{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T04:53:44Z","timestamp":1764996824262,"version":"3.33.0"},"reference-count":79,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2007,8,3]],"date-time":"2007-08-03T00:00:00Z","timestamp":1186099200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2007,8,22]]},"DOI":"10.1007\/s10994-007-5017-7","type":"journal-article","created":{"date-parts":[[2007,8,2]],"date-time":"2007-08-02T06:45:19Z","timestamp":1186037119000},"page":"201-233","source":"Crossref","is-referenced-by-count":28,"title":["Annealing stochastic approximation Monte Carlo algorithm for\u00a0neural network training"],"prefix":"10.1007","volume":"68","author":[{"given":"Faming","family":"Liang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2007,8,3]]},"reference":[{"key":"5017_CR1","doi-asserted-by":"crossref","first-page":"362","DOI":"10.1117\/12.152635","volume":"1966","author":"A. M. Abunawass","year":"1993","unstructured":"Abunawass, A. M., & Owen, C. B. (1993). A statistical analysis of the effect of noise injection during neural network training. SPIE Proceedings, 1966, 362\u2013371.","journal-title":"SPIE Proceedings"},{"key":"5017_CR2","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1016\/0925-2312(91)90003-T","volume":"3","author":"S. Amato","year":"1991","unstructured":"Amato, S., Apolloni, B., Caporali, G., Madesani, U., & Zanaboni, A. (1991). Simulated annealing approach in back-propagation. Neurocomputing, 3, 207\u2013220.","journal-title":"Neurocomputing"},{"key":"5017_CR3","doi-asserted-by":"crossref","first-page":"283","DOI":"10.1137\/S0363012902417267","volume":"44","author":"C. Andrieu","year":"2005","unstructured":"Andrieu, C., Moulines, E., & Priouret, P. (2005). Stability of Stochastic Approximation Under Verifiable Conditions. SIAM J. Control and Optimization, 44, 283\u2013312.","journal-title":"SIAM J. Control and Optimization"},{"key":"5017_CR4","doi-asserted-by":"crossref","first-page":"930","DOI":"10.1109\/18.256500","volume":"3","author":"A. Barron","year":"1993","unstructured":"Barron, A. (1993). Universal approximation bounds for superposition of a\u00a0sigmoidal function. IEEE Transactions on Information Theory, 3, 930\u2013945.","journal-title":"IEEE Transactions on Information Theory"},{"key":"5017_CR5","first-page":"904","volume-title":"Advances in neural information processing systems","author":"E. B. Baum","year":"1991","unstructured":"Baum, E. B., & Lang, K. J. (1991). Constructing hidden units using examples and queries. In Advances in neural information processing systems (Vol.\u00a03, pp. 904\u2013910). San Mateo: Kaufmann."},{"key":"5017_CR6","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-75894-2","volume-title":"Adaptive algorithms and stochastic approximations","author":"A. Benveniste","year":"1990","unstructured":"Benveniste, A., M\u00e9tivier, M., & Priouret, P. (1990). Adaptive algorithms and stochastic approximations. New York: Springer."},{"key":"5017_CR7","doi-asserted-by":"crossref","first-page":"425","DOI":"10.1007\/BF02823149","volume":"24","author":"B. Bharath","year":"1999","unstructured":"Bharath, B., & Borkar, V. S. (1999). Stochastic approximation algorithms: overview and recent trends. Sadhana, 24, 425\u2013452.","journal-title":"Sadhana"},{"key":"5017_CR8","volume-title":"Probability and measure","author":"P. Billingsley","year":"1986","unstructured":"Billingsley, P. (1986). Probability and measure (2nd ed.). New York: Wiley.","edition":"2"},{"key":"5017_CR9","doi-asserted-by":"crossref","DOI":"10.1093\/oso\/9780198538493.001.0001","volume-title":"Neural networks for pattern recognition","author":"C. M. Bishop","year":"1995","unstructured":"Bishop, C. M. (1995). Neural networks for pattern recognition. Oxford: Oxford University Press."},{"key":"5017_CR10","doi-asserted-by":"crossref","first-page":"76","DOI":"10.1093\/imamat\/6.1.76","volume":"6","author":"C. G. Broyden","year":"1970","unstructured":"Broyden, C. G. (1970a). The convergence of a\u00a0class of double rank minimization algorithms, part I. Journal of the Institute of Mathematics and Applications, 6, 76\u201390.","journal-title":"Journal of the Institute of Mathematics and Applications"},{"key":"5017_CR11","doi-asserted-by":"crossref","first-page":"222","DOI":"10.1093\/imamat\/6.3.222","volume":"6","author":"C. G. Broyden","year":"1970","unstructured":"Broyden, C. G. (1970b). The convergence of a\u00a0class of double rank minimization algorithms, part II. Journal of the Institute of Mathematics and Applications, 6, 222\u2013231.","journal-title":"Journal of the Institute of Mathematics and Applications"},{"key":"5017_CR12","doi-asserted-by":"crossref","first-page":"196","DOI":"10.1016\/j.jcss.2004.01.006","volume":"69","author":"D. Chawla","year":"2004","unstructured":"Chawla, D., Li, L., & Scott, S. (2004). On approximating weighted sums with exponentially many terms. Journal of Computer and System Sciences, 69, 196\u2013234.","journal-title":"Journal of Computer and System Sciences"},{"key":"5017_CR13","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1007\/BF02551274","volume":"3","author":"G. Cybenko","year":"1989","unstructured":"Cybenko, G. (1989). Approximation by superpositions of a\u00a0sigmoidal function. Mathematics of Control, Signals, and Systems, 3, 303\u2013314.","journal-title":"Mathematics of Control, Signals, and Systems"},{"key":"5017_CR14","unstructured":"Davidon, W. C. (1959). Variable metric method for minimization. AEC Res. and Dev. Report ANL-5990."},{"key":"5017_CR15","doi-asserted-by":"crossref","first-page":"955","DOI":"10.1162\/089976600300015664","volume":"12","author":"N. Freitas de","year":"2000","unstructured":"de Freitas, N., Niranjan, M., Gee, A. H., & Doucet, A. (2000). Sequential Monte Carlo methods to train neural network models. Neural Computation, 12, 955\u2013993.","journal-title":"Neural Computation"},{"key":"5017_CR16","doi-asserted-by":"crossref","first-page":"94","DOI":"10.1214\/aos\/1018031103","volume":"27","author":"B. Delyon","year":"1999","unstructured":"Delyon, B., Lavielle, M., & Moulines, E. (1999). Convergence of a\u00a0stochastic approximation version of the EM algorithm. Annals of Statistics, 27, 94\u2013128.","journal-title":"Annals of Statistics"},{"key":"5017_CR17","unstructured":"Erland, S. (2003). Adaptive Markov chain Monte Carlo review. Technical Report, Department of Mathematical Science, Norwegian University of Science and Technology."},{"key":"5017_CR18","first-page":"524","volume-title":"Advances in neural information processing systems","author":"S. E. Fahlman","year":"1990","unstructured":"Fahlman, S. E., & Lebiere, C. (1990). The cascade-correlation learning architecture. In D. S. Touretzky (Ed.), Advances in neural information processing systems (Vol.\u00a02, pp. 524\u2013532). San Mateo: Kaufmann."},{"key":"5017_CR19","doi-asserted-by":"crossref","first-page":"875","DOI":"10.1016\/S0893-6080(96)00114-1","volume":"10","author":"J. A. Flanagan","year":"1997","unstructured":"Flanagan, J. A. (1997). Analyzing a\u00a0self-organizing algorithm. Neural Networks, 10, 875\u2013883.","journal-title":"Neural Networks"},{"key":"5017_CR20","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1093\/comjnl\/13.3.317","volume":"13","author":"R. Fletcher","year":"1970","unstructured":"Fletcher, R. (1970). A new approach to variable metric algorithms. The Computer Journal, 13, 317\u2013322.","journal-title":"The Computer Journal"},{"key":"5017_CR21","volume-title":"Practical methods of optimization","author":"R. Fletcher","year":"1987","unstructured":"Fletcher, R. (1987). Practical methods of optimization (2nd ed.). New York: Wiley.","edition":"2"},{"key":"5017_CR22","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1093\/comjnl\/6.2.163","volume":"6","author":"R. Fletcher","year":"1963","unstructured":"Fletcher, R., & Powell, M. J. D. (1963). A rapidly convergent descent method for minimization. The Computer Journal, 6, 163\u2013168.","journal-title":"The Computer Journal"},{"key":"5017_CR23","doi-asserted-by":"crossref","first-page":"183","DOI":"10.1016\/0893-6080(89)90003-8","volume":"2","author":"K. Funahashi","year":"1989","unstructured":"Funahashi, K. (1989). On the approximate realization of continuous mappings by neural networks. Neural Networks, 2, 183\u2013192.","journal-title":"Neural Networks"},{"key":"5017_CR24","doi-asserted-by":"crossref","first-page":"721","DOI":"10.1109\/TPAMI.1984.4767596","volume":"6","author":"S. Geman","year":"1984","unstructured":"Geman, S., & Geman, D. (1984). Stochastic relaxation, Gibbs distribution and the Bayesian restoration of images. IEEE Transactions on Pattern Analysis and Machine Intelligence, 6, 721\u2013741.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"5017_CR25","unstructured":"Gelfand, A. E., & Banerjee, S. (1998). Computing marginal posterior modes using stochastic approximation. Technical report, Department of Statistics, University of Connecticut."},{"key":"5017_CR26","volume-title":"Genetic algorithms in search, optimization, & machine learning","author":"D. E. Goldberg","year":"1989","unstructured":"Goldberg, D. E. (1989). Genetic algorithms in search, optimization, & machine learning, Reading: Addison\u2013Wesley."},{"key":"5017_CR27","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1090\/S0025-5718-1970-0258249-6","volume":"24","author":"D. Goldfarb","year":"1970","unstructured":"Goldfarb, D. (1970). A family of variable metric methods derived by variational means. Mathematics of Computation, 24, 23\u201326.","journal-title":"Mathematics of Computation"},{"key":"5017_CR28","unstructured":"Goswami, G. R., & Liu, J. S. (2005) On real parameter evolutionary Monte Carlo algorithm. Technical Report, Harvard University."},{"key":"5017_CR29","doi-asserted-by":"crossref","first-page":"7270","DOI":"10.1073\/pnas.95.13.7270","volume":"95","author":"M. G. Gu","year":"1998","unstructured":"Gu, M. G., & Kong, F. H. (1998). A stochastic approximation algorithm with Markov chain Monte Carlo method for incomplete data estimation problems. Proceedings of the National Academy of Sciences USA, 95, 7270\u20137274.","journal-title":"Proceedings of the National Academy of Sciences USA"},{"key":"5017_CR30","doi-asserted-by":"crossref","first-page":"339","DOI":"10.1111\/1467-9868.00289","volume":"63","author":"M. G. Gu","year":"2001","unstructured":"Gu, M. G., & Zhu, H. T. (2001). Maximum likelihood estimation for spatial models by Markov chain Monte Carlo stochastic approximation. Journal of the Royal Statistical Society, Series B, 63, 339\u2013355.","journal-title":"Journal of the Royal Statistical Society, Series B"},{"key":"5017_CR31","first-page":"295","volume-title":"Neural network models of conditioning and action","author":"S. J. Hanson","year":"1991","unstructured":"Hanson, S. J. (1991). Behavioral diversity, search and stochastic connectionist systems. In M. Commons, S. Grossberg, & J. Staddon (Eds.), Neural network models of conditioning and action (pp. 295\u2013345). Mahwah: Erlbaum."},{"key":"5017_CR32","doi-asserted-by":"crossref","first-page":"1475","DOI":"10.1016\/0042-6989(74)90024-8","volume":"14","author":"E. Harth","year":"1974","unstructured":"Harth, E., & Tzanakou, E. (1974). Alopex: a\u00a0stochastic method for determining visual receptive fields. Vision Research, 14, 1475\u20131482.","journal-title":"Vision Research"},{"key":"5017_CR33","doi-asserted-by":"crossref","DOI":"10.1007\/978-0-387-21606-5","volume-title":"The elements of statistical learning","author":"T. Hastie","year":"2001","unstructured":"Hastie, T., Tibshirani, R., & Friedman, J. (2001). The elements of statistical learning. New York: Springer."},{"key":"5017_CR34","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1093\/biomet\/57.1.97","volume":"57","author":"W. K. Hastings","year":"1970","unstructured":"Hastings, W. K. (1970). Monte Carlo sampling methods using Markov chains and their applications. Biometrika, 57, 97\u2013109.","journal-title":"Biometrika"},{"key":"5017_CR35","volume-title":"Neural networks: a\u00a0comprehensive foundation","author":"S. Haykin","year":"1999","unstructured":"Haykin, S. (1999). Neural networks: a\u00a0comprehensive foundation (2nd ed.). New York: Prentice Hall.","edition":"2"},{"key":"5017_CR36","volume-title":"Adaptation in natural and artificial systems","author":"J. H. Holland","year":"1975","unstructured":"Holland, J. H. (1975). Adaptation in natural and artificial systems. Ann Arbor: University of Michigan Press."},{"key":"5017_CR37","doi-asserted-by":"crossref","first-page":"333","DOI":"10.1016\/0022-1236(89)90023-2","volume":"83","author":"R. A. Holley","year":"1989","unstructured":"Holley, R. A., Kusuoka, S., & Stroock, D. (1989). Asymptotic of the spectral gap with applications to the theory of simulated annealing. Journal of Functional Analysis, 83, 333\u2013347.","journal-title":"Journal of Functional Analysis"},{"key":"5017_CR38","doi-asserted-by":"crossref","first-page":"359","DOI":"10.1016\/0893-6080(89)90020-8","volume":"2","author":"K. Hornik","year":"1989","unstructured":"Hornik, K., Stinchcombe, M., & White, H. (1989). Multilayer feedforward networks are universal approximators. Neural Networks, 2, 359\u2013366.","journal-title":"Neural Networks"},{"key":"5017_CR39","doi-asserted-by":"crossref","first-page":"395","DOI":"10.1016\/0893-6080(91)90076-H","volume":"4","author":"D. Ingman","year":"1991","unstructured":"Ingman, D., & Merlis, Y. (1991). Local minimization escape using thermodynamic properties of neural networks. Neural Networks, 4, 395\u2013404.","journal-title":"Neural Networks"},{"key":"5017_CR40","first-page":"482","volume-title":"Approximation algorithms for NP-hard problems","author":"M. Jerrum","year":"1997","unstructured":"Jerrum, M., & Sinclair, A. (1997). The Markov chain Monte Carlo method: an approach to approximate counting and integration. In D. S. Hochbaum (Ed.), Approximation algorithms for NP-hard problems (pp. 482\u2013520). Boston: PWS Publishing Company."},{"key":"5017_CR41","doi-asserted-by":"crossref","unstructured":"Jasra, A., Stephens, D. A., & Holmes, C. C. (2007, to appear). On population-based simulation for static inference. Statistics and Computing.","DOI":"10.1007\/s11222-007-9028-9"},{"key":"5017_CR42","volume-title":"An introduction to stochastic modeling","author":"S. Karlin","year":"1998","unstructured":"Karlin, S., & Taylor, H. M. (1998). An introduction to stochastic modeling, (3rd ed.). Orlando: Academic Press.","edition":"3"},{"key":"5017_CR43","doi-asserted-by":"crossref","first-page":"671","DOI":"10.1126\/science.220.4598.671","volume":"220","author":"S. Kirkpatrick","year":"1983","unstructured":"Kirkpatrick, S., Gelatt, C. D., & Vecchi, M. P. (1983). Optimization by simulated annealing. Science, 220, 671\u2013680.","journal-title":"Science"},{"key":"5017_CR44","doi-asserted-by":"crossref","first-page":"1464","DOI":"10.1109\/5.58325","volume":"78","author":"T. Kohonen","year":"1990","unstructured":"Kohonen, T. (1990). The self-organizing map. Proceedings of the Institute of Electrical and Electronics Engineers, 78, 1464\u20131480.","journal-title":"Proceedings of the Institute of Electrical and Electronics Engineers"},{"key":"5017_CR45","first-page":"52","volume-title":"Proceedings of the 1988 connectionist models","author":"K. J. Lang","year":"1989","unstructured":"Lang, K. J., & Witbrock, M. J. (1989). Learning to tell two spirals apart. In D. Touretzky, G. Hinton, & T.\u00a0Sejnowski (Eds.), Proceedings of the 1988 connectionist models (pp. 52\u201359). San Mateo: Kaufmann."},{"key":"5017_CR46","doi-asserted-by":"crossref","first-page":"164","DOI":"10.1090\/qam\/10666","volume":"2","author":"K. Levenberg","year":"1944","unstructured":"Levenberg, K. (1944). A method for the solution of certain non-linear problems in least squares. Quarterly Journal of Applied Mathematics, 2, 164\u2013168.","journal-title":"Quarterly Journal of Applied Mathematics"},{"key":"5017_CR47","doi-asserted-by":"crossref","first-page":"1959","DOI":"10.1162\/08997660360675107","volume":"15","author":"F. Liang","year":"2003","unstructured":"Liang, F. (2003). An effective Bayesian neural network classifier with a\u00a0comparison study to support vector machine. Neural Computation, 15, 1959\u20131989.","journal-title":"Neural Computation"},{"key":"5017_CR48","doi-asserted-by":"crossref","first-page":"1311","DOI":"10.1198\/016214505000000259","volume":"100","author":"F. Liang","year":"2005","unstructured":"Liang, F. (2005a). Generalized Wang-Landau algorithm for Monte Carlo computation. Journal of the American Statistical Association, 100, 1311\u20131327.","journal-title":"Journal of the American Statistical Association"},{"key":"5017_CR49","doi-asserted-by":"crossref","first-page":"1385","DOI":"10.1162\/0899766053630323","volume":"17","author":"F. Liang","year":"2005","unstructured":"Liang, F. (2005b). Evidence evaluation for Bayesian neural networks using contour Monte Carlo. Neural Computation, 17, 1385\u20131410.","journal-title":"Neural Computation"},{"key":"5017_CR50","doi-asserted-by":"crossref","first-page":"653","DOI":"10.1198\/016214501753168325","volume":"96","author":"F. Liang","year":"2001","unstructured":"Liang, F., & Wong, W. H. (2001). Real parameter evolutionary Monte Carlo with applications in Bayesian mixture models. Journal of the American Statistical Association, 96, 653\u2013666.","journal-title":"Journal of the American Statistical Association"},{"key":"5017_CR51","doi-asserted-by":"crossref","first-page":"305","DOI":"10.1198\/016214506000001202","volume":"102","author":"F. Liang","year":"2007","unstructured":"Liang, F., Liu, C., & Carroll, R. J. (2007). Stochastic approximation in Monte Carlo computation. Journal of the American Statistical Association, 102, 305\u2013320.","journal-title":"Journal of the American Statistical Association"},{"key":"5017_CR52","doi-asserted-by":"crossref","first-page":"448","DOI":"10.1162\/neco.1992.4.3.448","volume":"4","author":"D. J. C. MacKay","year":"1992","unstructured":"MacKay, D. J. C. (1992a). A practical Bayesian framework for backprop networks. Neural Computation, 4, 448\u2013472.","journal-title":"Neural Computation"},{"key":"5017_CR53","doi-asserted-by":"crossref","first-page":"720","DOI":"10.1162\/neco.1992.4.5.720","volume":"4","author":"D. J. C. MacKay","year":"1992","unstructured":"MacKay, D. J. C. (1992b). The evidence framework applied to classification problems. Neural Computation, 4, 720\u2013736.","journal-title":"Neural Computation"},{"key":"5017_CR54","doi-asserted-by":"crossref","first-page":"431","DOI":"10.1137\/0111030","volume":"11","author":"D. W. Marquardt","year":"1963","unstructured":"Marquardt, D. W. (1963). An algorithm for least-squares estimation of nonlinear parameters. Journal of the Society of Industrial and Applied Mathematics, 11, 431\u2013441.","journal-title":"Journal of the Society of Industrial and Applied Mathematics"},{"key":"5017_CR55","doi-asserted-by":"crossref","first-page":"101","DOI":"10.1214\/aos\/1033066201","volume":"24","author":"K. L. Mengersen","year":"1996","unstructured":"Mengersen, K. L., & Tweedie, R. L. (1996). Rates of convergence of the Hastings and Metropolis algorithms. The Annals of Statistics, 24, 101\u2013121.","journal-title":"The Annals of Statistics"},{"key":"5017_CR56","doi-asserted-by":"crossref","first-page":"1087","DOI":"10.1063\/1.1699114","volume":"21","author":"N. Metropolis","year":"1953","unstructured":"Metropolis, N., Rosenbluth, A. W., Rosenbluth, M. N., Teller, A. H., & Teller, E. (1953). Equation of state calculations by fast computing machines. Journal of Chemical Physics, 21, 1087\u20131091.","journal-title":"Journal of Chemical Physics"},{"key":"5017_CR57","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4471-3267-7","volume-title":"Markov chains and stochastic stability","author":"S. P. Meyn","year":"1993","unstructured":"Meyn, S. P., & Tweedie, R. L. (1993). Markov chains and stochastic stability. London: Springer."},{"key":"5017_CR58","doi-asserted-by":"crossref","first-page":"717","DOI":"10.1016\/0893-6080(95)00018-U","volume":"8","author":"F. M. Mulier","year":"1995","unstructured":"Mulier, F. M., & Cherkassky, V. S. (1995). Statistical analysis of self-organization. Neural Networks, 8, 717\u2013727.","journal-title":"Neural Networks"},{"key":"5017_CR59","doi-asserted-by":"crossref","first-page":"749","DOI":"10.1162\/089976698300017737","volume":"10","author":"P. M\u00fcller","year":"1998","unstructured":"M\u00fcller, P., & Insua, D. R. (1998). Issues in Bayesian analysis of neural network models. Neural Computation, 10, 749\u2013770.","journal-title":"Neural Computation"},{"key":"5017_CR60","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4612-0745-0","volume-title":"Bayesian learning for neural networks","author":"R. M. Neal","year":"1996","unstructured":"Neal, R. M. (1996). Bayesian learning for neural networks. New York: Springer."},{"key":"5017_CR61","doi-asserted-by":"crossref","first-page":"269","DOI":"10.1117\/12.152626","volume":"1966","author":"C. B. Owen","year":"1993","unstructured":"Owen, C. B., & Abunawass, A. M. (1993). Applications of simulated annealing to the back-propagation model improves convergence. SPIE Proceedings, 1966, 269\u2013276.","journal-title":"SPIE Proceedings"},{"key":"5017_CR62","unstructured":"Perrone, M. P. (1993).Improving regression estimation: averaging methods for variance reduction with extension, to general convex measure optimization. PhD thesis, Brown University, Rhode Island."},{"key":"5017_CR63","volume-title":"Computational methods in optimization: a\u00a0unified approach","author":"E. Polak","year":"1971","unstructured":"Polak, E. (1971). Computational methods in optimization: a\u00a0unified approach. New York: Academic Press."},{"key":"5017_CR64","doi-asserted-by":"crossref","first-page":"400","DOI":"10.1214\/aoms\/1177729586","volume":"22","author":"H. Robbins","year":"1951","unstructured":"Robbins, H., & Monro, S. (1951). A stochastic approximation method. Annals of Mathematical Statistics, 22, 400\u2013407.","journal-title":"Annals of Mathematical Statistics"},{"key":"5017_CR65","doi-asserted-by":"crossref","first-page":"318","DOI":"10.7551\/mitpress\/5236.001.0001","volume-title":"Parallel distributed processing: explorations in the microstructure of cognition","author":"D. E. Rumelhart","year":"1986","unstructured":"Rumelhart, D. E., Hinton, G. E., & Williams, R. J. (1986). Learning internal representations by back-propagating errors. In D. E. Rumelhart & J. L. McClelland (Eds.), Parallel distributed processing: explorations in the microstructure of cognition (Vol.\u00a01, pp. 318\u2013362). Cambridge: MIT Press."},{"key":"5017_CR66","doi-asserted-by":"crossref","first-page":"2729","DOI":"10.1162\/089976602760408044","volume":"14","author":"P. S. Sastry","year":"2002","unstructured":"Sastry, P. S., Magesh, M., & Unnikrishnan, K. P. (2002). Two timescale analysis of the Alopex algorithm for optimization. Neural Computation, 14, 2729\u20132750.","journal-title":"Neural Computation"},{"key":"5017_CR67","doi-asserted-by":"crossref","first-page":"434","DOI":"10.1214\/aoms\/1177730390","volume":"18","author":"H. Scheff\u00e9","year":"1947","unstructured":"Scheff\u00e9, H. (1947). A useful convergence theorem for probability distributions. Annals of Mathematical Statistics, 18, 434\u2013438.","journal-title":"Annals of Mathematical Statistics"},{"key":"5017_CR68","doi-asserted-by":"crossref","first-page":"647","DOI":"10.1090\/S0025-5718-1970-0274029-X","volume":"24","author":"D. F. Shanno","year":"1970","unstructured":"Shanno, D. F. (1970). Conditioning of quasi-Newton methods for function minimization. Mathematics of Computation, 24, 647\u2013656.","journal-title":"Mathematics of Computation"},{"key":"5017_CR69","first-page":"261","volume-title":"Proceedings pf the symposium on computer applications in medical care","author":"J. W. Smith","year":"1988","unstructured":"Smith, J. W., Everhart, J. E., Dickson, W. C., Knowler, W. C., & Johannes, R. S. (1988). Using the ADAP learning algorithm to forecast the onset of diabetes mellitus. In R. A. Greenes (Ed.), Proceedings pf the symposium on computer applications in medical care (pp. 261\u2013265). Los Alamitos: IEEE Computer Society Press."},{"key":"5017_CR70","doi-asserted-by":"crossref","first-page":"332","DOI":"10.1109\/9.119632","volume":"37","author":"J. C. Spall","year":"1992","unstructured":"Spall, J. C. (1992). Multivariate stochastic approximation using a\u00a0simultaneous perturbation gradient approximation. IEEE Transactions on Automatic Control, 37, 332\u2013341.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"5017_CR71","doi-asserted-by":"crossref","unstructured":"Szu, H. (1986). Fast simulated annealing. In AIP conference proceedings: Vol. 151. Neural network for computing, Snowbird, UT.","DOI":"10.1063\/1.36250"},{"key":"5017_CR72","doi-asserted-by":"crossref","first-page":"1125","DOI":"10.1162\/089976603765202686","volume":"15","author":"Z. Tang","year":"2003","unstructured":"Tang, Z., Wang, X., Tamura, H., & Ishii, M. (2003). An algorithm of supervised learning for multilayer neural networks. Neural Computation, 15, 1125\u20131142.","journal-title":"Neural Computation"},{"key":"5017_CR73","first-page":"39","volume":"2","author":"G. Tesauro","year":"1988","unstructured":"Tesauro, G., & Janssens, B. (1988). Scaling relations in back-propagation learning. Complex System, 2, 39\u201344.","journal-title":"Complex System"},{"key":"5017_CR74","volume-title":"Neural network training using genetic algorithms","author":"A. J. F. Rooij van","year":"1996","unstructured":"van Rooij, A. J. F., Jain, L. C., & Johnson, R. P. (1996). Neural network training using genetic algorithms. Singapore: World Scientific."},{"key":"5017_CR75","doi-asserted-by":"crossref","first-page":"335","DOI":"10.1109\/ICNN.1988.23865","volume-title":"Proceedings of IEEE international conference on neural networks","author":"A. Lehmen von","year":"1988","unstructured":"von Lehmen, A., Paek, E. G., Liao, P. F., Marrakchi, A., & Patel, J. S. (1988). Factors influencing learning by back-propagation. In Proceedings of IEEE international conference on neural networks (pp. 335\u2013341). New York: IEEE Press."},{"key":"5017_CR76","doi-asserted-by":"crossref","first-page":"2050","DOI":"10.1103\/PhysRevLett.86.2050","volume":"86","author":"F. Wang","year":"2001","unstructured":"Wang, F., & Landau, D. P. (2001). Efficient, multiple-range random walk algorithm to calculate the density of states. Physical Review Letters, 86, 2050\u20132053.","journal-title":"Physical Review Letters"},{"key":"5017_CR77","doi-asserted-by":"crossref","first-page":"1511","DOI":"10.1109\/72.809097","volume":"10","author":"C. Wang","year":"1999","unstructured":"Wang, C., & Principe, J. C. (1999). Training neural networks with additive noise in the desired signal. IEEE Transactions on Neural Networks, 10, 1511\u20131517.","journal-title":"IEEE Transactions on Neural Networks"},{"key":"5017_CR78","doi-asserted-by":"crossref","first-page":"14220","DOI":"10.1073\/pnas.94.26.14220","volume":"94","author":"W. H. Wong","year":"1997","unstructured":"Wong, W. H., & Liang, F. (1997). Dynamic weighting in Monte Carlo and optimization. Proceedings of the National Academy of Sciences USA, 94, 14220\u201314224.","journal-title":"Proceedings of the National Academy of Sciences USA"},{"key":"5017_CR79","doi-asserted-by":"crossref","unstructured":"Wouwer, A. V., Renotte, C., & Remy, M. (1999) On the use of simultaneous perturbation stochastic approximation for neural network training. In Proceedings of the American control conference (pp. 388\u2013392), San Diego, CA.","DOI":"10.1109\/ACC.1999.782807"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-007-5017-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-007-5017-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-007-5017-7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,20]],"date-time":"2025-01-20T02:18:14Z","timestamp":1737339494000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-007-5017-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2007,8,3]]},"references-count":79,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2007,8,22]]}},"alternative-id":["5017"],"URL":"https:\/\/doi.org\/10.1007\/s10994-007-5017-7","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"type":"print","value":"0885-6125"},{"type":"electronic","value":"1573-0565"}],"subject":[],"published":{"date-parts":[[2007,8,3]]}}}