{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T17:40:16Z","timestamp":1778694016806,"version":"3.51.4"},"publisher-location":"Berlin, Heidelberg","reference-count":44,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642352881","type":"print"},{"value":"9783642352898","type":"electronic"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-35289-8_3","type":"book-chapter","created":{"date-parts":[[2012,11,14]],"date-time":"2012-11-14T12:03:17Z","timestamp":1352894597000},"page":"9-48","source":"Crossref","is-referenced-by-count":1128,"title":["Efficient BackProp"],"prefix":"10.1007","author":[{"given":"Yann A.","family":"LeCun","sequence":"first","affiliation":[]},{"given":"L\u00e9on","family":"Bottou","sequence":"additional","affiliation":[]},{"given":"Genevieve B.","family":"Orr","sequence":"additional","affiliation":[]},{"given":"Klaus-Robert","family":"M\u00fcller","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"3_CR1","unstructured":"Amari, S.: Neural learning in structured parameter spaces \u2014 natural riemannian gradient. In: Mozer, M.C., Jordan, M.I., Petsche, T. (eds.) Advances in Neural Information Processing Systems, vol.\u00a09, p. 127. MIT Press (1997)"},{"issue":"2","key":"3_CR2","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1162\/089976698300017746","volume":"10","author":"S. Amari","year":"1998","unstructured":"Amari, S.: Natural gradient works efficiently in learning. Neural Computation\u00a010(2), 251\u2013276 (1998)","journal-title":"Neural Computation"},{"key":"3_CR3","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1162\/neco.1992.4.2.141","volume":"4","author":"R. Battiti","year":"1992","unstructured":"Battiti, R.: First- and second-order methods for learning: Between steepest descent and newton\u2019s method. Neural Computation\u00a04, 141\u2013166 (1992)","journal-title":"Neural Computation"},{"key":"3_CR4","unstructured":"Becker, S., LeCun, Y.: Improving the convergence of backbropagation learning with second oder metho ds. In: Touretzky, D., Hinton, G., Sejnowski, T. (eds.) Proceedings of the 1988 Connectionist Models Summer School, pp. 29\u201337. Lawrence Erlbaum Associates (1989)"},{"key":"3_CR5","doi-asserted-by":"crossref","DOI":"10.1093\/oso\/9780198538493.001.0001","volume-title":"Neural Networks for Pattern Recognition","author":"C.M. Bishop","year":"1995","unstructured":"Bishop, C.M.: Neural Networks for Pattern Recognition. Clarendon Press, Oxford (1995)"},{"key":"3_CR6","series-title":"The Newton Institute Series","volume-title":"Online Learning in Neural Networks (1997 Workshop at the Newton Institute)","author":"L. Bottou","year":"1998","unstructured":"Bottou, L.: Online algorithms and stochastic approximations. In: Saad, D. (ed.) Online Learning in Neural Networks (1997 Workshop at the Newton Institute). The Newton Institute Series. Cambridge University Press, Cambridge (1998)"},{"key":"3_CR7","first-page":"321","volume":"2","author":"D.S. Broomhead","year":"1988","unstructured":"Broomhead, D.S., Lowe, D.: Multivariable function interpolation and adaptive networks. Complex Systems\u00a02, 321\u2013355 (1988)","journal-title":"Complex Systems"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Buntine, W.L., Weigend, A.S.: Computing second order derivatives in Feed-Forward networks: A review. IEEE Transactions on Neural Networks (1993) (to appear)","DOI":"10.1109\/72.286919"},{"key":"3_CR9","first-page":"832","volume-title":"Advances in Neural Information Processing Systems","author":"C. Darken","year":"1991","unstructured":"Darken, C., Moody, J.E.: Note on learning rate schedules for stochastic optimization. In: Lippmann, R.P., Moody, J.E., Touretzky, D.S. (eds.) Advances in Neural Information Processing Systems, vol.\u00a03, pp. 832\u2013838. Morgan Kaufmann, San Mateo (1991)"},{"key":"3_CR10","volume-title":"Principal Component Neural Networks","author":"K.I. Diamantaras","year":"1996","unstructured":"Diamantaras, K.I., Kung, S.Y.: Principal Component Neural Networks. Wiley, New York (1996)"},{"key":"3_CR11","first-page":"183","volume-title":"Practical Methods of Optimization, ch. 8.7: Polynomial time algorithms","author":"R. Fletcher","year":"1987","unstructured":"Fletcher, R.: Practical Methods of Optimization, ch. 8.7: Polynomial time algorithms, 2nd edn., pp. 183\u2013188. John Wiley\u00a0&\u00a0Sons, New York (1987)","edition":"2"},{"issue":"1","key":"3_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1162\/neco.1992.4.1.1","volume":"4","author":"S. Geman","year":"1992","unstructured":"Geman, S., Bienenstock, E., Doursat, R.: Neural networks and the bias\/variance dilemma. Neural Computation\u00a04(1), 1\u201358 (1992)","journal-title":"Neural Computation"},{"key":"3_CR13","unstructured":"Goldstein, L.: Mean square optimality in the continuous time Robbins Monro procedure. Technical Report DRB-306, Dept. of Mathematics, University of Southern California, LA (1987)"},{"key":"3_CR14","volume-title":"Matrix Computations","author":"G.H. Golub","year":"1989","unstructured":"Golub, G.H., Van Loan, C.F.: Matrix Computations, 2nd edn. Johns Hopkins University Press, Baltimore (1989)","edition":"2"},{"key":"3_CR15","first-page":"199","volume-title":"Mathematical Approaches to Neural Networks","author":"T.M. Heskes","year":"1993","unstructured":"Heskes, T.M., Kappen, B.: On-line learning processes in artificial neural networks. In: Tayler, J.G. (ed.) Mathematical Approaches to Neural Networks, vol.\u00a051, pp. 199\u2013233. Elsevier, Amsterdam (1993)"},{"key":"3_CR16","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1016\/0893-6080(88)90003-2","volume":"1","author":"R.A. Jacobs","year":"1988","unstructured":"Jacobs, R.A.: Increased rates of convergence through learning rate adaptation. Neural Networks\u00a01, 295\u2013307 (1988)","journal-title":"Neural Networks"},{"key":"3_CR17","first-page":"40","volume-title":"Proceedings of the 1988 Conference on Advances in Neural Information Processing Systems","author":"A.H. Kramer","year":"1989","unstructured":"Kramer, A.H., Sangiovanni-Vincentelli, A.: Efficient parallel learning algorithms for neural networks. In: Touretzky, D.S. (ed.) Proceedings of the 1988 Conference on Advances in Neural Information Processing Systems, pp. 40\u201348. Morgan Kaufmann, San Mateo (1989)"},{"key":"3_CR18","unstructured":"LeCun, Y.: Modeles connexionnistes de l\u2019apprentissage (connectionist learning models). PhD thesis, Universit\u00e8 P. et M. Curie, Paris VI (1987)"},{"key":"3_CR19","volume-title":"Proceedings of the International Conference Connectionism in Perspective","author":"Y. LeCun","year":"1988","unstructured":"LeCun, Y.: Generalization and network design strategies. In: Pfeifer, R., Schreter, Z., Fogelman, F., Steels, L. (eds.) Proceedings of the International Conference Connectionism in Perspective, University of Z\u00fcrich, October 10-13. Elsevier, Amsterdam (1988)"},{"key":"3_CR20","volume-title":"Advances in Neural Information Processing Systems, vol. 2","author":"Y. LeCun","year":"1990","unstructured":"LeCun, Y., Boser, B., Denker, J.S., Henderson, D., Howard, R.E., Hubbard, W., Jackel, L.D.: Handwritten digit recognition with a backpropagation network. In: Touretsky, D.S. (ed.) Advances in Neural Information Processing Systems, vol. 2. Morgan Kaufmann, San Mateo (1990)"},{"key":"3_CR21","unstructured":"LeCun, Y., Denker, J.S., Solla, S.A.: Optimal brain damage. In: Touretsky, D.S. (ed.) Advances in Neural Information Processing Systems, vol.\u00a02, pp. 598\u2013605 (1990)"},{"key":"3_CR22","volume-title":"Advances in Neural Information Processing Systems","author":"Y. LeCun","year":"1991","unstructured":"LeCun, Y., Kanter, I., Solla, S.A.: Second order properties of error surfaces. In: Advances in Neural Information Processing Systems, vol.\u00a03. Morgan Kaufmann, San Mateo (1991)"},{"key":"3_CR23","volume-title":"Advances in Neural Information Processing Systems","author":"Y. LeCun","year":"1993","unstructured":"LeCun, Y., Simard, P.Y., Pearlmutter, B.: Automatic learning rate maximization by on-line estimation of the hessian\u2019s eigenvectors. In: Giles, Hanson, Cowan (eds.) Advances in Neural Information Processing Systems, vol.\u00a05. Morgan Kaufmann, San Mateo (1993)"},{"key":"3_CR24","doi-asserted-by":"publisher","first-page":"525","DOI":"10.1016\/S0893-6080(05)80056-5","volume":"6","author":"M. M\u00f8ller","year":"1993","unstructured":"M\u00f8ller, M.: A scaled conjugate gradient algorithm for fast supervised learning. Neural Networks\u00a06, 525\u2013533 (1993)","journal-title":"Neural Networks"},{"issue":"1","key":"3_CR25","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1142\/S0129065793000031","volume":"4","author":"M. M\u00f8ller","year":"1993","unstructured":"M\u00f8ller, M.: Supervised learning on large redundant training sets. International Journal of Neural Systems\u00a04(1), 15\u201325 (1993)","journal-title":"International Journal of Neural Systems"},{"key":"3_CR26","doi-asserted-by":"publisher","first-page":"281","DOI":"10.1162\/neco.1989.1.2.281","volume":"1","author":"J.E. Moody","year":"1989","unstructured":"Moody, J.E., Darken, C.J.: Fast learning in networks of locally-tuned processing units. Neural Computation\u00a01, 281\u2013294 (1989)","journal-title":"Neural Computation"},{"key":"3_CR27","unstructured":"Murata, N.: PhD thesis, University of Tokyo (1992) (in Japanese)"},{"key":"3_CR28","unstructured":"Murata, N., M\u00fcller, K.-R., Ziehe, A., Amari, S.: Adaptive on-line learning in changing environments. In: Mozer, M.C., Jordan, M.I., Petsche, T. (eds.) Advances in Neural Information Processing Systems, vol. 9, p. 599. MIT Press (1997)"},{"key":"3_CR29","volume-title":"Digital Signal Processing","author":"A.V. Oppenheim","year":"1975","unstructured":"Oppenheim, A.V., Schafer, R.W.: Digital Signal Processing. Prentice-Hall, Englewood Cliffs (1975)"},{"key":"3_CR30","unstructured":"Orr, G.B.: Dynamics and Algorithms for Stochastic learning. PhD thesis, Oregon Graduate Institute (1995)"},{"key":"3_CR31","unstructured":"Orr, G.B.: Removing noise in on-line search using adaptive batch sizes. In: Mozer, M.C., Jordan, M.I., Petsche, T. (eds.) Advances in Neural Information Processing Systems, vol. 9, p. 232. MIT Press (1997)"},{"issue":"3","key":"3_CR32","doi-asserted-by":"publisher","first-page":"606","DOI":"10.1162\/neco.1995.7.3.606","volume":"7","author":"M.J.L. Orr","year":"1995","unstructured":"Orr, M.J.L.: Regularization in the selection of radial basis function centers. Neural Computation\u00a07(3), 606\u2013623 (1995)","journal-title":"Neural Computation"},{"key":"3_CR33","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1162\/neco.1994.6.1.147","volume":"6","author":"B.A. Pearlmutter","year":"1994","unstructured":"Pearlmutter, B.A.: Fast exact multiplication by the hessian. Neural Computation\u00a06, 147\u2013160 (1994)","journal-title":"Neural Computation"},{"key":"3_CR34","volume-title":"Numerical Recipies in C: The art of Scientific Programming","author":"W.H. Press","year":"1988","unstructured":"Press, W.H., Flannery, B.P., Teukolsky, S.A., Vetterling, W.T.: Numerical Recipies in C: The art of Scientific Programming. Cambridge University Press, Cambridge (1988)"},{"key":"3_CR35","series-title":"The Newton Institute Series","volume-title":"Online Learning in Neural Networks (1997 Workshop at the Newton Institute)","year":"1998","unstructured":"Saad, D. (ed.): Online Learning in Neural Networks (1997 Workshop at the Newton Institute). The Newton Institute Series. Cambridge University Press, Cambridge (1998)"},{"key":"3_CR36","doi-asserted-by":"publisher","first-page":"4337","DOI":"10.1103\/PhysRevLett.74.4337","volume":"74","author":"D. Saad","year":"1995","unstructured":"Saad, D., Solla, S.A.: Exact solution for on-line learning in multilayer neural networks. Physical Review Letters\u00a074, 4337\u20134340 (1995)","journal-title":"Physical Review Letters"},{"key":"3_CR37","first-page":"105","volume-title":"Neural Networks: The Statistical Mechanics Perspective","author":"H. Sompolinsky","year":"1995","unstructured":"Sompolinsky, H., Barkai, N., Seung, H.S.: On-line learning of dichotomies: algorithms and learning curves. In: Oh, J.-H., Kwon, C., Cho, S. (eds.) Neural Networks: The Statistical Mechanics Perspective, pp. 105\u2013130. World Scientific, Singapore (1995)"},{"key":"3_CR38","unstructured":"Sutton, R.S.: Adapting bias by gradient descent: An incremental version of delta-bar-delta. In: Swartout, W. (ed.) Proceedings of the 10th National Conference on Artificial Intelligence, pp. 171\u2013176. MIT Press, San Jose (July 1992)"},{"issue":"1","key":"3_CR39","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/0893-6080(94)90052-3","volume":"7","author":"P. Smagt van der","year":"1994","unstructured":"van der Smagt, P.: Minimisation methods for training feed-forward networks. Neural Networks\u00a07(1), 1\u201311 (1994)","journal-title":"Neural Networks"},{"key":"3_CR40","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-2440-0","volume-title":"The Nature of Statistical Learning Theory","author":"V. Vapnik","year":"1995","unstructured":"Vapnik, V.: The Nature of Statistical Learning Theory. Springer, New York (1995)"},{"key":"3_CR41","volume-title":"Statistical Learning Theory","author":"V. Vapnik","year":"1998","unstructured":"Vapnik, V.: Statistical Learning Theory. Wiley, New York (1998)"},{"key":"3_CR42","doi-asserted-by":"publisher","first-page":"328","DOI":"10.1109\/29.21701","volume":"ASSP-37","author":"A. Waibel","year":"1989","unstructured":"Waibel, A., Hanazawa, T., Hinton, G., Shikano, K., Lang, K.J.: Phoneme recognition using time-delay neural networks. IEEE Transactions on Acoustics, Speech, and Signal Processing\u00a0ASSP-37, 328\u2013339 (1989)","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"3_CR43","doi-asserted-by":"publisher","first-page":"4425","DOI":"10.1088\/0305-4470\/27\/13\/017","volume":"27","author":"W. Wiegerinck","year":"1994","unstructured":"Wiegerinck, W., Komoda, A., Heskes, T.: Stochastic dynamics of learning with momentum in neural networks. Journal of Physics A\u00a027, 4425\u20134437 (1994)","journal-title":"Journal of Physics A"},{"key":"3_CR44","doi-asserted-by":"crossref","unstructured":"Yang, H.H., Amari, S.: The efficiency and the robustness of natural gradient descent learning rule. In: Jordan, M.I., Kearns, M.J., Solla, S.A. (eds.) Advances in Neural Information Processing Systems, vol. 10. MIT Press (1998)","DOI":"10.1162\/089976698300017007"}],"container-title":["Lecture Notes in Computer Science","Neural Networks: Tricks of the Trade"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-35289-8_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T10:52:16Z","timestamp":1714560736000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-35289-8_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642352881","9783642352898"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-35289-8_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012]]}}}