{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T16:13:02Z","timestamp":1772122382616,"version":"3.50.1"},"reference-count":146,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2018,9,6]],"date-time":"2018-09-06T00:00:00Z","timestamp":1536192000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Glob Optim"],"published-print":{"date-parts":[[2019,2]]},"DOI":"10.1007\/s10898-018-0701-7","type":"journal-article","created":{"date-parts":[[2018,9,6]],"date-time":"2018-09-06T04:58:17Z","timestamp":1536209897000},"page":"239-277","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["Global optimization issues in deep network regression: an overview"],"prefix":"10.1007","volume":"73","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9496-6097","authenticated-orcid":false,"given":"Laura","family":"Palagi","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,9,6]]},"reference":[{"key":"701_CR1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/S0925-2312(03)00369-2","volume":"56","author":"A Abraham","year":"2004","unstructured":"Abraham, A.: Meta learning evolutionary artificial neural networks. Neurocomputing 56, 1\u201338 (2004)","journal-title":"Neurocomputing"},{"key":"701_CR2","first-page":"1","volume":"17","author":"S Adam","year":"2016","unstructured":"Adam, S., Magoulas, G., Karras, D., Vrahatis, M.: Bounding the search space for global optimization of neural networks learning error: an interval analysis approach. J. Mach. Learn. Res. 17, 1\u201340 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"701_CR3","unstructured":"Adamu, A., Maul, T., Bargiela, A.: On training neural networks with transfer function diversity. In: International Conference on Computational Intelligence and Information Technology (CIIT 2013), Elsevier (2013)"},{"issue":"5","key":"701_CR4","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1016\/0925-2312(91)90003-T","volume":"3","author":"S Amato","year":"1991","unstructured":"Amato, S., Apolloni, B., Caporali, G., Madesani, U., Zanaboni, A.: Simulated annealing approach in backpropagation. Neurocomputing 3(5), 207\u2013220 (1991)","journal-title":"Neurocomputing"},{"issue":"3","key":"701_CR5","doi-asserted-by":"crossref","first-page":"643","DOI":"10.1162\/neco.1996.8.3.643","volume":"8","author":"G An","year":"1996","unstructured":"An, G.: The effects of adding noise during backpropagation training on a generalization performance. Neural Comput. 8(3), 643\u2013674 (1996)","journal-title":"Neural Comput."},{"issue":"1","key":"701_CR6","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/BF02578945","volume":"11","author":"A Bagirov","year":"2003","unstructured":"Bagirov, A., Rubinov, A., Soukhoroukova, N., Yearwood, J.: Unsupervised and supervised data classification via nonsmooth and global optimization. Top 11(1), 1\u201375 (2003)","journal-title":"Top"},{"issue":"1","key":"701_CR7","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1016\/0893-6080(89)90014-2","volume":"2","author":"P Baldi","year":"1989","unstructured":"Baldi, P., Hornik, K.: Neural networks and principal component analysis: learning from examples without local minima. Neural Netw. 2(1), 53\u201358 (1989)","journal-title":"Neural Netw."},{"key":"701_CR8","doi-asserted-by":"crossref","first-page":"136","DOI":"10.1016\/j.neunet.2012.04.011","volume":"33","author":"P Baldi","year":"2012","unstructured":"Baldi, P., Lu, Z.: Complex-valued autoencoders. Neural Netw. 33, 136\u2013147 (2012)","journal-title":"Neural Netw."},{"key":"701_CR9","doi-asserted-by":"crossref","first-page":"78","DOI":"10.1016\/j.artint.2014.02.004","volume":"210","author":"P Baldi","year":"2014","unstructured":"Baldi, P., Sadowski, P.: The dropout learning algorithm. Artif. Intell. 210, 78\u2013122 (2014)","journal-title":"Artif. Intell."},{"issue":"5315","key":"701_CR10","doi-asserted-by":"crossref","first-page":"1094","DOI":"10.1126\/science.276.5315.1094","volume":"276","author":"J Barhen","year":"1997","unstructured":"Barhen, J., Protopopescu, V., Reister, D.: TRUST: a deterministic algorithm for global optimization. Science 276(5315), 1094\u20131097 (1997)","journal-title":"Science"},{"key":"701_CR11","volume-title":"Nonlinear Regression Analysis and Its Applications. Wiley Series in Probability and Statistics","author":"DM Bates","year":"2007","unstructured":"Bates, D.M., Watts, D.G.: Nonlinear Regression Analysis and Its Applications. Wiley Series in Probability and Statistics. Wiley, Hoboken (2007)"},{"key":"701_CR12","doi-asserted-by":"crossref","unstructured":"Bengio, Y., Louradour, J., Collobert, R., Weston, J.: Curriculum learning. In: Proceedings of the 26th annual international conference on machine learning, pp. 41\u201348. ACM (2009)","DOI":"10.1145\/1553374.1553380"},{"key":"701_CR13","first-page":"281","volume":"13","author":"J Bergstra","year":"2012","unstructured":"Bergstra, J., Bengio, Y.: Random search for hyper-parameter optimization. J. Mach. Learn. Res. 13, 281\u2013305 (2012)","journal-title":"J. Mach. Learn. Res."},{"key":"701_CR14","volume-title":"Nonlinear Programming","author":"DP Bertsekas","year":"1999","unstructured":"Bertsekas, D.P.: Nonlinear Programming. Athena Scientific, Belmont (1999)"},{"issue":"1\u201338","key":"701_CR15","first-page":"3","volume":"2010","author":"DP Bertsekas","year":"2011","unstructured":"Bertsekas, D.P.: Incremental gradient, subgradient, and proximal methods for convex optimization: a survey. Optim. Mach. Learn. 2010(1\u201338), 3 (2011)","journal-title":"Optim. Mach. Learn."},{"key":"701_CR16","volume-title":"Parallel and Distributed Computation: Numerical Methods","author":"DP Bertsekas","year":"1989","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Parallel and Distributed Computation: Numerical Methods. Prentice-Hall, Englewood Cliffs (1989)"},{"issue":"3","key":"701_CR17","doi-asserted-by":"crossref","first-page":"627","DOI":"10.1137\/S1052623497331063","volume":"10","author":"DP Bertsekas","year":"2000","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Gradient convergence in gradient methods with errors. SIAM J. Optim. 10(3), 627\u2013642 (2000)","journal-title":"SIAM J. Optim."},{"issue":"7","key":"701_CR18","doi-asserted-by":"publisher","first-page":"1039","DOI":"10.1007\/s10994-017-5633-9","volume":"106","author":"D Bertsimas","year":"2017","unstructured":"Bertsimas, D., Dunn, J.: Optimal classification trees. Mach. Learn. 106(7), 1039\u20131082 (2017). https:\/\/doi.org\/10.1007\/s10994-017-5633-9","journal-title":"Mach. Learn."},{"issue":"2","key":"701_CR19","doi-asserted-by":"crossref","first-page":"252","DOI":"10.1287\/opre.1060.0360","volume":"55","author":"D Bertsimas","year":"2007","unstructured":"Bertsimas, D., Shioda, R.: Classification and regression via integer optimization. Oper. Res. 55(2), 252\u2013271 (2007)","journal-title":"Oper. Res."},{"issue":"3","key":"701_CR20","doi-asserted-by":"crossref","first-page":"749","DOI":"10.1109\/72.377979","volume":"6","author":"M Bianchini","year":"1995","unstructured":"Bianchini, M., Frasconi, P., Gori, M.: Learning without local minima in radial basis function networks. IEEE Trans. Neural Netw. 6(3), 749\u2013756 (1995)","journal-title":"IEEE Trans. Neural Netw."},{"issue":"4","key":"701_CR21","doi-asserted-by":"crossref","first-page":"579","DOI":"10.1162\/neco.1991.3.4.579","volume":"3","author":"C Bishop","year":"1991","unstructured":"Bishop, C.: Improving the generalization properties of radial basis function neural networks. Neural Comput. 3(4), 579\u2013588 (1991)","journal-title":"Neural Comput."},{"key":"701_CR22","unstructured":"Bishop, C.: Pattern Recognition and Machine Learning (Information Science and Statistics), 1st edn. 2006. corr. 2nd printing edn (2007)"},{"key":"701_CR23","unstructured":"Blum, A., Rivest, R.L.: Training a 3-node neural network is NP-complete. In: Proceedings of the 1st International Conference on Neural Information Processing Systems, pp. 494\u2013501. MIT Press (1988)"},{"key":"701_CR24","unstructured":"Blundell, C., Cornebise, J., Kavukcuoglu, K., Wierstra, D.: Weight uncertainty in neural networks (2015). arXiv preprint arXiv:1505.05424"},{"key":"701_CR25","unstructured":"Bottou, L., Bousquet, O.: The tradeoffs of large scale learning. In: Proceedings of the 20th International Conference on Neural Information Processing Systems, NIPS\u201907, pp. 161\u2013168. Curran Associates Inc., USA (2007). http:\/\/dl.acm.org\/citation.cfm?id=2981562.2981583"},{"issue":"2","key":"701_CR26","doi-asserted-by":"crossref","first-page":"223","DOI":"10.1137\/16M1080173","volume":"60","author":"L Bottou","year":"2018","unstructured":"Bottou, L., Curtis, F.E., Nocedal, J.: Optimization methods for large-scale machine learning. SIAM Rev. 60(2), 223\u2013311 (2018)","journal-title":"SIAM Rev."},{"issue":"10","key":"701_CR27","doi-asserted-by":"crossref","first-page":"3676","DOI":"10.1016\/j.patcog.2012.04.015","volume":"45","author":"A Boubezoul","year":"2012","unstructured":"Boubezoul, A., Paris, S.: Application of global optimization methods to model and feature selection. Pattern Recognit. 45(10), 3676\u20133686 (2012)","journal-title":"Pattern Recognit."},{"key":"701_CR28","unstructured":"Branke, J.: Evolutionary algorithms for neural network design and training. In: Proceedings of the First Nordic Workshop on Genetic Algorithms and its Applications, pp. 145\u2013163 (1995)"},{"issue":"4","key":"701_CR29","doi-asserted-by":"crossref","first-page":"1005","DOI":"10.1109\/TNNLS.2015.2504957","volume":"28","author":"L Bravi","year":"2017","unstructured":"Bravi, L., Piccialli, V., Sciandrone, M.: An optimization-based method for feature ranking in nonlinear regression problems. IEEE Trans. Neural Netw. Learn. Syst. 28(4), 1005\u20131010 (2017)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"15","key":"701_CR30","doi-asserted-by":"crossref","first-page":"150 201","DOI":"10.1103\/PhysRevLett.98.150201","volume":"98","author":"AJ Bray","year":"2007","unstructured":"Bray, A.J., Dean, D.S.: Statistics of critical points of Gaussian fields on large-dimensional spaces. Phys. Rev. Lett. 98(15), 150 201 (2007)","journal-title":"Phys. Rev. Lett."},{"key":"701_CR31","unstructured":"Breuel, T.M.: On the convergence of SGD training of neural networks (2015). arXiv preprint arXiv:1508.02790"},{"issue":"5","key":"701_CR32","doi-asserted-by":"crossref","first-page":"928","DOI":"10.1109\/TSMCB.2005.847743","volume":"35","author":"O Buchtala","year":"2005","unstructured":"Buchtala, O., Klimek, M., Sick, B.: Evolutionary optimization of radial basis function classifiers for data mining applications. IEEE Trans. Syst. Man Cybern. Part B (Cybern.) 35(5), 928\u2013947 (2005)","journal-title":"IEEE Trans. Syst. Man Cybern. Part B (Cybern.)"},{"issue":"2","key":"701_CR33","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1023\/A:1009715923555","volume":"2","author":"CJ Burges","year":"1998","unstructured":"Burges, C.J.: A tutorial on support vector machines for pattern recognition. Data Min. Knowl. Discov. 2(2), 121\u2013167 (1998)","journal-title":"Data Min. Knowl. Discov."},{"issue":"8","key":"701_CR34","doi-asserted-by":"crossref","first-page":"1891","DOI":"10.1162\/08997660152469396","volume":"13","author":"C Buzzi","year":"2001","unstructured":"Buzzi, C., Grippo, L., Sciandrone, M.: Convergent decomposition techniques for training RBF neural networks. Neural Comput. 13(8), 1891\u20131920 (2001)","journal-title":"Neural Comput."},{"key":"701_CR35","doi-asserted-by":"crossref","first-page":"328","DOI":"10.1016\/j.cor.2013.10.002","volume":"43","author":"E Carrizosa","year":"2014","unstructured":"Carrizosa, E., Mart\u00edn-Barrag\u00e1n, B., Morales, D.R.: A nested heuristic for parameter tuning in support vector machines. Comput. Oper. Res. 43, 328\u2013334 (2014)","journal-title":"Comput. Oper. Res."},{"issue":"1","key":"701_CR36","doi-asserted-by":"crossref","first-page":"150","DOI":"10.1016\/j.cor.2012.05.015","volume":"40","author":"E Carrizosa","year":"2013","unstructured":"Carrizosa, E., Morales, D.R.: Supervised classification and mathematical optimization. Comput. Oper. Res. 40(1), 150\u2013165 (2013)","journal-title":"Comput. Oper. Res."},{"issue":"1","key":"701_CR37","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1007\/BF00940781","volume":"77","author":"B Cetin","year":"1993","unstructured":"Cetin, B., Barhen, J., Burdick, J.: Terminal repeller unconstrained subenergy tunneling ( trust) for fast global optimization. J. Optim. Theory Appl. 77(1), 97\u2013126 (1993)","journal-title":"J. Optim. Theory Appl."},{"key":"701_CR38","unstructured":"Cetin, B.C., Burdick, J.W., Barhen, J.: Global descent replaces gradient descent to avoid local minima problem in learning with artificial neural networks. In: IEEE International Conference onNeural Networks, 1993, pp. 836\u2013842. IEEE (1993)"},{"issue":"1","key":"701_CR39","doi-asserted-by":"crossref","first-page":"16","DOI":"10.1016\/j.compeleceng.2013.11.024","volume":"40","author":"G Chandrashekar","year":"2014","unstructured":"Chandrashekar, G., Sahin, F.: A survey on feature selection methods. Comput. Electr. Eng. 40(1), 16\u201328 (2014)","journal-title":"Comput. Electr. Eng."},{"key":"701_CR40","doi-asserted-by":"crossref","unstructured":"Chao, J., Hoshino, M., Kitamura, T., Masuda, T.: A multilayer RBF network and its supervised learning. In: International Joint Conference on Neural Networks, 2001 (IJCNN\u201901), Proceedings, vol.\u00a03, pp. 1995\u20132000. IEEE (2001)","DOI":"10.1109\/IJCNN.2001.938470"},{"key":"701_CR41","first-page":"203","volume":"9","author":"O Chapelle","year":"2008","unstructured":"Chapelle, O., Sindhwani, V., Keerthi, S.S.: Optimization techniques for semi-supervised support vector machines. J. Mach. Learn. Res. 9, 203\u2013233 (2008)","journal-title":"J. Mach. Learn. Res."},{"issue":"5","key":"701_CR42","doi-asserted-by":"crossref","first-page":"1239","DOI":"10.1109\/72.788663","volume":"10","author":"S Chen","year":"1999","unstructured":"Chen, S., Wu, Y., Luk, B.: Combined genetic algorithm optimization and regularized orthogonal least squares learning for radial basis function networks. IEEE Trans. Neural Netw. 10(5), 1239\u20131243 (1999)","journal-title":"IEEE Trans. Neural Netw."},{"key":"701_CR43","unstructured":"Chiang, H.D., Reddy, C.K.: TRUST-TECH based neural network training. In: International Joint Conference on Neural Networks, 2007. (IJCNN 2007), pp. 90\u201395. IEEE (2007)"},{"issue":"1","key":"701_CR44","doi-asserted-by":"crossref","first-page":"115","DOI":"10.1016\/S0925-2312(99)00055-7","volume":"25","author":"Sy Cho","year":"1999","unstructured":"Cho, Sy, Chow, T.W.: Training multilayer neural networks using fast global learning algorithm\u2014least-squares and penalized optimization methods. Neurocomputing 25(1), 115\u2013131 (1999)","journal-title":"Neurocomputing"},{"key":"701_CR45","unstructured":"Choromanska, A., Henaff, M., Mathieu, M., Arous, G.B., LeCun, Y.: The loss surfaces of multilayer networks. In: AISTATS (2015)"},{"key":"701_CR46","unstructured":"Choromanska, A., LeCun, Y., Arous, G.B.: Open problem: the landscape of the loss surfaces of multilayer networks. In: COLT, pp. 1756\u20131760 (2015)"},{"key":"701_CR47","unstructured":"Cohen, S., Intrator, N.: Global optimization of RBF networks (2000). http:\/\/citeseerx.ist.psu.edu\/viewdoc\/summary?doi=10.1.1.31.5955"},{"issue":"2","key":"701_CR48","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1007\/s100440200010","volume":"5","author":"S Cohen","year":"2002","unstructured":"Cohen, S., Intrator, N.: A hybrid projection-based and radial basis function architecture: initial values and global optimisation. Pattern Anal. Appl. 5(2), 113\u2013120 (2002)","journal-title":"Pattern Anal. Appl."},{"key":"701_CR49","doi-asserted-by":"crossref","first-page":"1124","DOI":"10.1016\/j.asoc.2014.08.012","volume":"24","author":"Q Dai","year":"2014","unstructured":"Dai, Q., Ma, Z., Xie, Q.: A two-phased and ensemble scheme integrated backpropagation algorithm. Appl. Soft Comput. 24, 1124\u20131135 (2014)","journal-title":"Appl. Soft Comput."},{"key":"701_CR50","unstructured":"Dauphin, Y.N., Pascanu, R., Gulcehre, C., Cho, K., Ganguli, S., Bengio, Y.: Identifying and attacking the saddle point problem in high-dimensional non-convex optimization. In: Advances in neural information processing systems, pp. 2933\u20132941 (2014)"},{"key":"701_CR51","doi-asserted-by":"crossref","unstructured":"David, O.E., Greental, I.: Genetic algorithms for evolving deep neural networks. In: Proceedings of the Companion Publication of the 2014 Annual Conference on Genetic and Evolutionary Computation, pp. 1451\u20131452. ACM (2014)","DOI":"10.1145\/2598394.2602287"},{"key":"701_CR52","doi-asserted-by":"crossref","unstructured":"Dietterich, T.G.: Ensemble methods in machine learning. In: International workshop on multiple classifier systems, pp. 1\u201315. Springer (2000)","DOI":"10.1007\/3-540-45014-9_1"},{"issue":"2","key":"701_CR53","doi-asserted-by":"crossref","first-page":"772","DOI":"10.1016\/j.ejor.2017.02.020","volume":"261","author":"AP Duarte Silva","year":"2017","unstructured":"Duarte Silva, A.P.: Optimization approaches to supervised classification. Eur. J. Oper. Res. 261(2), 772\u2013788 (2017)","journal-title":"Eur. J. Oper. Res."},{"key":"701_CR54","first-page":"639","volume":"7","author":"W Duch","year":"1997","unstructured":"Duch, W., Jankowski, N.: New neural transfer functions. Appl. Math. Comput. Sci. 7, 639\u2013658 (1997)","journal-title":"Appl. Math. Comput. Sci."},{"issue":"1","key":"701_CR55","first-page":"163","volume":"2","author":"W Duch","year":"1999","unstructured":"Duch, W., Jankowski, N.: Survey of neural transfer functions. Neural Comput. Surv. 2(1), 163\u2013212 (1999)","journal-title":"Neural Comput. Surv."},{"key":"701_CR56","first-page":"163","volume":"2","author":"W Duch","year":"1998","unstructured":"Duch, W., Korczak, J.: Optimization and global minimization methods suitable for neural networks. Neural Comput. Surv. 2, 163\u2013212 (1998)","journal-title":"Neural Comput. Surv."},{"key":"701_CR57","unstructured":"Feng-wen, H., Ai-ping, J.: An improved method of wavelet neural network optimization based on filled function method. In: 16th International Conference on Industrial Engineering and Engineering Management, 2009 (IE&EM\u201909), pp. 1694\u20131697. IEEE (2009)"},{"key":"701_CR58","doi-asserted-by":"crossref","first-page":"183","DOI":"10.1016\/j.disopt.2015.03.002","volume":"22","author":"M Fischetti","year":"2016","unstructured":"Fischetti, M.: Fast training of support vector machines with gaussian kernel. Discrete Optim. 22, 183\u2013194 (2016)","journal-title":"Discrete Optim."},{"key":"701_CR59","volume-title":"Deterministic Global Optimization: Theory, Methods and Applications","author":"CA Floudas","year":"2013","unstructured":"Floudas, C.A.: Deterministic Global Optimization: Theory, Methods and Applications, vol. 37. Springer, Berlin (2013)"},{"issue":"3","key":"701_CR60","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1016\/S0893-6080(00)00009-5","volume":"13","author":"K Fukumizu","year":"2000","unstructured":"Fukumizu, K., Amari, Si: Local minima and plateaus in hierarchical structures of multilayer perceptrons. Neural Netw. 13(3), 317\u2013327 (2000)","journal-title":"Neural Netw."},{"issue":"1\u20133","key":"701_CR61","first-page":"191","volume":"46","author":"R Ge","year":"1990","unstructured":"Ge, R.: A filled function method for finding a global minimizer of a function of several variables. Math. Program. 46(1\u20133), 191\u2013204 (1990)","journal-title":"Math. Program."},{"issue":"6","key":"701_CR62","doi-asserted-by":"crossref","first-page":"1478","DOI":"10.1109\/TNN.2003.820657","volume":"14","author":"J Gonz\u00e1lez","year":"2003","unstructured":"Gonz\u00e1lez, J., Rojas, I., Ortega, J., Pomares, H., Fernandez, F.J., D\u00edaz, A.F.: Multiobjective evolutionary optimization of the size, shape, and position parameters of radial basis function networks for function approximation. IEEE Trans. Neural Netw. 14(6), 1478\u20131495 (2003)","journal-title":"IEEE Trans. Neural Netw."},{"key":"701_CR63","volume-title":"Deep Learning","author":"I Goodfellow","year":"2016","unstructured":"Goodfellow, I., Bengio, Y., Courville, A.: Deep Learning. MIT Press, Cambridge (2016)"},{"key":"701_CR64","unstructured":"Goodfellow, I.J., Vinyals, O.: Qualitatively characterizing neural network optimization problems. CoRR (2014). http:\/\/arxiv.org\/abs\/1412.6544"},{"issue":"1","key":"701_CR65","doi-asserted-by":"crossref","first-page":"76","DOI":"10.1109\/34.107014","volume":"14","author":"M Gori","year":"1992","unstructured":"Gori, M., Tesi, A.: On the problem of local minima in backpropagation. IEEE Trans. Pattern Anal. Mach. Intell. 14(1), 76\u201386 (1992)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"701_CR66","doi-asserted-by":"crossref","unstructured":"Gorse, D., Shepherd, A.J., Taylor, J.G.: Avoiding local minima by a classical range expansion algorithm. In: ICANN94, pp. 525\u2013528. Springer, London (1994)","DOI":"10.1007\/978-1-4471-2097-1_122"},{"key":"701_CR67","unstructured":"Gorse, D., Shepherd, A.J., Taylor, J.G.: A classical algorithm for avoiding local minima. In: Proceedings of the World Congress on Neural Networks, pp. 364\u2013369. Citeseer (1994)"},{"issue":"2","key":"701_CR68","doi-asserted-by":"crossref","first-page":"343","DOI":"10.1016\/S0893-6080(96)00090-1","volume":"10","author":"D Gorse","year":"1997","unstructured":"Gorse, D., Shepherd, A.J., Taylor, J.G.: The new ERA in supervised learning. Neural Netw. 10(2), 343\u2013352 (1997)","journal-title":"Neural Netw."},{"key":"701_CR69","unstructured":"Graves, A.: Practical variational inference for neural networks. In: Advances in Neural Information Processing Systems, pp. 2348\u20132356 (2011)"},{"issue":"6","key":"701_CR70","doi-asserted-by":"crossref","first-page":"1284","DOI":"10.1109\/72.883426","volume":"11","author":"L Grippo","year":"2000","unstructured":"Grippo, L.: Convergent on-line algorithms for supervised learning in neural networks. IEEE Trans. Neural Netw. 11(6), 1284\u20131299 (2000)","journal-title":"IEEE Trans. Neural Netw."},{"issue":"11","key":"701_CR71","doi-asserted-by":"crossref","first-page":"2146","DOI":"10.1109\/TNNLS.2015.2475621","volume":"27","author":"L Grippo","year":"2016","unstructured":"Grippo, L., Manno, A., Sciandrone, M.: Decomposition techniques for multilayer perceptron training. IEEE Trans. Neural Netw. Learn. Syst. 27(11), 2146\u20132159 (2016)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"4","key":"701_CR72","doi-asserted-by":"crossref","first-page":"587","DOI":"10.1080\/10556789908805730","volume":"10","author":"L Grippo","year":"1999","unstructured":"Grippo, L., Sciandrone, M.: Globally convergent block-coordinate techniques for unconstrained optimization. Optim. Methods Softw. 10(4), 587\u2013637 (1999)","journal-title":"Optim. Methods Softw."},{"issue":"2","key":"701_CR73","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1023\/A:1020587701058","volume":"23","author":"L Grippo","year":"2002","unstructured":"Grippo, L., Sciandrone, M.: Nonmonotone globalization techniques for the Barzilai\u2013Borwein gradient method. Comput. Optim. Appl. 23(2), 143\u2013169 (2002)","journal-title":"Comput. Optim. Appl."},{"key":"701_CR74","volume-title":"A Distribution-free Theory of Nonparametric Regression","author":"L Gy\u00f6rfi","year":"2006","unstructured":"Gy\u00f6rfi, L., Kohler, M., Krzyzak, A., Walk, H.: A Distribution-free Theory of Nonparametric Regression. Springer, Berlin (2006)"},{"issue":"4","key":"701_CR75","doi-asserted-by":"crossref","first-page":"669","DOI":"10.1016\/S0893-6080(97)00134-2","volume":"11","author":"LG Hamey","year":"1998","unstructured":"Hamey, L.G.: XOR has no local minima: a case study in neural network error surface analysis. Neural Netw. 11(4), 669\u2013681 (1998)","journal-title":"Neural Netw."},{"issue":"3","key":"701_CR76","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1007\/s11063-007-9048-7","volume":"26","author":"L Hamm","year":"2007","unstructured":"Hamm, L., Brorsen, B.W., Hagan, M.T.: Comparison of stochastic global optimization methods to estimate neural network weights. Neural Process. Lett. 26(3), 145\u2013158 (2007)","journal-title":"Neural Process. Lett."},{"key":"701_CR77","volume-title":"Neural Networks and Learning Machines","author":"S Haykin","year":"2009","unstructured":"Haykin, S.: Neural Networks and Learning Machines, vol. 3. Pearson, Upper Saddle River (2009)"},{"issue":"1","key":"701_CR78","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1162\/neco.1997.9.1.1","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Flat minima. Neural Comput. 9(1), 1\u201342 (1997)","journal-title":"Neural Comput."},{"key":"701_CR79","volume-title":"Global Optimization: Deterministic Approaches","author":"R Horst","year":"2013","unstructured":"Horst, R., Tuy, H.: Global Optimization: Deterministic Approaches. Springer, Berlin (2013)"},{"key":"701_CR80","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1016\/j.neunet.2014.10.001","volume":"61","author":"G Huang","year":"2015","unstructured":"Huang, G., Huang, G.B., Song, S., You, K.: Trends in extreme learning machines: a review. Neural Netw. 61, 32\u201348 (2015)","journal-title":"Neural Netw."},{"key":"701_CR81","unstructured":"Huang, G.B., Zhu, Q.Y., Siew, C.K.: Extreme learning machine: a new learning scheme of feedforward neural networks. In: 2004 IEEE International Joint Conference on Neural Networks, 2004. Proceedings, vol.\u00a02, pp. 985\u2013990. IEEE (2004)"},{"issue":"1","key":"701_CR82","doi-asserted-by":"crossref","first-page":"58","DOI":"10.1007\/BF01414103","volume":"5","author":"LCK Hui","year":"1997","unstructured":"Hui, L.C.K., Lam, K.Y., Chea, C.W.: Global optimisation in neural network training. Neural Comput. Appl. 5(1), 58\u201364 (1997)","journal-title":"Neural Comput. Appl."},{"issue":"3","key":"701_CR83","doi-asserted-by":"crossref","first-page":"397","DOI":"10.1109\/TSMCC.2008.919172","volume":"38","author":"Y Jin","year":"2008","unstructured":"Jin, Y., Sendhoff, B.: Pareto-based multiobjective machine learning: an overview and case studies. IEEE Trans. Syst. Man Cybern. Part C (Appl. Rev.) 38(3), 397\u2013415 (2008)","journal-title":"IEEE Trans. Syst. Man Cybern. Part C (Appl. Rev.)"},{"key":"701_CR84","unstructured":"Kawaguchi, K.: Deep learning without poor local minima. In: Advances In Neural Information Processing Systems, pp. 586\u2013594 (2016)"},{"key":"701_CR85","unstructured":"Keskar, N.S., Mudigere, D., Nocedal, J., Smelyanskiy, M., Tang, P.T.P.: On large-batch training for deep learning: generalization gap and sharp minima. In: ICLR 2017 (2016)"},{"key":"701_CR86","unstructured":"Lang, K.: Learning to tell two spiral apart. In: Proceedings of the 1988 Connectionist Models Summer School, pp. 52\u201359 (1989)"},{"key":"701_CR87","unstructured":"Laurent, T., von Brecht, J.: The multilinear structure of ReLU networks (2017). arXiv preprint arXiv:1712.10132"},{"issue":"7553","key":"701_CR88","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436\u2013444 (2015)","journal-title":"Nature"},{"key":"701_CR89","doi-asserted-by":"crossref","unstructured":"LeCun, Y.A., Bottou, L., Orr, G.B., M\u00fcller, K.R.: Efficient backprop. In: Neural networks: Tricks of the trade, pp. 9\u201348. Springer (2012)","DOI":"10.1007\/978-3-642-35289-8_3"},{"key":"701_CR90","unstructured":"Lee, J.D., Simchowitz, M., Jordan, M.I., Recht, B.: Gradient descent only converges to minimizers. In: Conference on Learning Theory, pp. 1246\u20131257 (2016)"},{"issue":"4","key":"701_CR91","first-page":"519","volume":"20","author":"JS Lee","year":"2010","unstructured":"Lee, J.S., Park, C.H.: Global optimization of radial basis function networks by hybrid simulated annealing. Neural Netw. World 20(4), 519 (2010)","journal-title":"Neural Netw. World"},{"issue":"9","key":"701_CR92","first-page":"1247","volume":"28","author":"HR Li","year":"2007","unstructured":"Li, H.R., Li, H.L.: A global optimization algorithm based on filled-function for neural networks. J. Northeast. Univ. Nat. Sci. 28(9), 1247 (2007)","journal-title":"J. Northeast. Univ. Nat. Sci."},{"issue":"2","key":"701_CR93","doi-asserted-by":"crossref","first-page":"1491","DOI":"10.1016\/j.eswa.2007.01.014","volume":"34","author":"SW Lin","year":"2008","unstructured":"Lin, S.W., Tseng, T.Y., Chou, S.Y., Chen, S.C.: A simulated-annealing-based approach for simultaneous parameter optimization and feature selection of back-propagation networks. Expert Syst. Appl. 34(2), 1491\u20131499 (2008)","journal-title":"Expert Syst. Appl."},{"issue":"1","key":"701_CR94","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1088\/0954-898X_2_1_007","volume":"2","author":"P Lisboa","year":"1991","unstructured":"Lisboa, P., Perantonis, S.: Complete solution of the local minima in the XOR problem. Network: Comput. Neural Syst. 2(1), 119\u2013124 (1991)","journal-title":"Network: Comput. Neural Syst."},{"issue":"12","key":"701_CR95","doi-asserted-by":"crossref","first-page":"2283","DOI":"10.1080\/00207160.2017.1283021","volume":"94","author":"H Liu","year":"2017","unstructured":"Liu, H., Wang, Y., Guan, S., Liu, X.: A new filled function method for unconstrained global optimization. Int. J. Comput. Math. 94(12), 2283\u20132296 (2017)","journal-title":"Int. J. Comput. Math."},{"key":"701_CR96","doi-asserted-by":"publisher","unstructured":"Locatelli, M., Schoen, F.: Global optimization: theory, algorithms, and applications. Society for Industrial and Applied Mathematics, Philadelphia, PA (2013). https:\/\/doi.org\/10.1137\/1.9781611972672","DOI":"10.1137\/1.9781611972672"},{"key":"701_CR97","doi-asserted-by":"crossref","unstructured":"Magoulas, G., Plagianakos, V., Vrahatis, M.: Hybrid methods using evolutionary algorithms for on-line training. In: International Joint Conference on Neural Networks, 2001 (IJCNN\u201901) Proceedings, vol.\u00a03, pp. 2218\u20132223. IEEE (2001)","DOI":"10.1109\/IJCNN.2001.938511"},{"key":"701_CR98","unstructured":"Martin-Guerreo, J., G\u00f3mez-Chova, L., Calpe-Maravilla, J., Camps-Valls, G., Soria-Olivas, E., Moreno, J.: A soft approach to ERA algorithm for hyperspectral image classification. In: Proceedings of the 3rd International Symposium on Image and Signal Processing and Analysis, 2003 (ISPA 2003), vol.\u00a02, pp. 761\u2013765. IEEE (2003)"},{"key":"701_CR99","unstructured":"Neelakantan, A., Vilnis, L., Le, Q.V., Sutskever, I., Kaiser, L., Kurach, K., Martens, J.: Adding gradient noise improves learning for very deep networks (2015). arXiv preprint arXiv:1511.06807"},{"issue":"2","key":"701_CR100","first-page":"372","volume":"27","author":"Y Nesterov","year":"1983","unstructured":"Nesterov, Y.: A method of solving a convex programming problem with convergence rate $$o(1\/k^2)$$ o ( 1 \/ k 2 ) . Sov. Math. Doklady 27(2), 372\u2013376 (1983)","journal-title":"Sov. Math. Doklady"},{"key":"701_CR101","unstructured":"Nguyen, Q., Hein, M.: The loss surface and expressivity of deep convolutional neural networks (2017). arXiv preprint arXiv:1710.10928"},{"key":"701_CR102","unstructured":"Nguyen, Q., Hein, M.: The loss surface of deep and wide neural networks (2017). arXiv preprint arXiv:1704.08045"},{"key":"701_CR103","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1016\/j.engappai.2017.01.013","volume":"60","author":"VK Ojha","year":"2017","unstructured":"Ojha, V.K., Abraham, A., Sn\u00e1\u0161el, V.: Metaheuristic design of feedforward neural networks: a review of two decades of research. Eng. Appl. Artif. Intell. 60, 97\u2013116 (2017)","journal-title":"Eng. Appl. Artif. Intell."},{"issue":"3","key":"701_CR104","doi-asserted-by":"crossref","first-page":"587","DOI":"10.1109\/TNN.2005.844858","volume":"16","author":"PP Palmes","year":"2005","unstructured":"Palmes, P.P., Hayasaka, T., Usui, S.: Mutation-based genetic neural network. IEEE Trans. Neural Netw. 16(3), 587\u2013600 (2005)","journal-title":"IEEE Trans. Neural Netw."},{"key":"701_CR105","unstructured":"Peng, C.C., Magoulas, G.D.: Adaptive nonmonotone conjugate gradient training algorithm for recurrent neural networks. In: 19th IEEE International Conference on Tools with Artificial Intelligence, 2007 (ICTAI 2007), vol.\u00a02, pp. 374\u2013381. IEEE (2007)"},{"issue":"6","key":"701_CR106","doi-asserted-by":"crossref","first-page":"897","DOI":"10.1007\/s00521-010-0493-2","volume":"20","author":"CC Peng","year":"2011","unstructured":"Peng, C.C., Magoulas, G.D.: Nonmonotone Levenberg\u2013Marquardt training of recurrent neural architectures for processing symbolic sequences. Neural Comput. Appl. 20(6), 897\u2013908 (2011)","journal-title":"Neural Comput. Appl."},{"issue":"2","key":"701_CR107","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1007\/s10288-018-0378-2","volume":"16","author":"V Piccialli","year":"2018","unstructured":"Piccialli, V., Sciandrone, M.: Nonlinear optimization and support vector machines. 4OR 16(2), 111\u2013149 (2018)","journal-title":"4OR"},{"issue":"1","key":"701_CR108","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1016\/j.eswa.2011.06.050","volume":"39","author":"JD Pint\u00e9r","year":"2012","unstructured":"Pint\u00e9r, J.D.: Calibrating artificial neural networks by global optimization. Expert Syst. Appl. 39(1), 25\u201332 (2012)","journal-title":"Expert Syst. Appl."},{"issue":"5","key":"701_CR109","doi-asserted-by":"crossref","first-page":"3431","DOI":"10.1016\/S0362-546X(01)00459-X","volume":"47","author":"V Plagianakos","year":"2001","unstructured":"Plagianakos, V., Magoulas, G., Vrahatis, M.: Learning in multilayer perceptrons using global optimization strategies. Nonlinear Anal. Theory Methods Appl. 47(5), 3431\u20133436 (2001)","journal-title":"Nonlinear Anal. Theory Methods Appl."},{"key":"701_CR110","doi-asserted-by":"crossref","unstructured":"Plagianakos, V., Magoulas, G., Vrahatis, M.: Improved learning of neural nets through global search. In: Global Optimization, pp. 361\u2013388. Springer (2006)","DOI":"10.1007\/0-387-30927-6_15"},{"issue":"6","key":"701_CR111","doi-asserted-by":"crossref","first-page":"1268","DOI":"10.1109\/TNN.2002.804225","volume":"13","author":"VP Plagianakos","year":"2002","unstructured":"Plagianakos, V.P., Magoulas, G.D., Vrahatis, M.N.: Deterministic nonmonotone strategies for effective training of multilayer perceptrons. IEEE Transactions on Neural Networks 13(6), 1268\u20131284 (2002)","journal-title":"IEEE Transactions on Neural Networks"},{"issue":"9","key":"701_CR112","doi-asserted-by":"crossref","first-page":"1481","DOI":"10.1109\/5.58326","volume":"78","author":"T Poggio","year":"1990","unstructured":"Poggio, T., Girosi, F.: Networks for approximation and learning. Proc. IEEE 78(9), 1481\u20131497 (1990)","journal-title":"Proc. IEEE"},{"issue":"5","key":"701_CR113","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/0041-5553(64)90137-5","volume":"4","author":"BT Polyak","year":"1964","unstructured":"Polyak, B.T.: Some methods of speeding up the convergence of iteration methods. USSR Comput. Math. Math. Phys. 4(5), 1\u201317 (1964)","journal-title":"USSR Comput. Math. Math. Phys."},{"key":"701_CR114","doi-asserted-by":"crossref","first-page":"242","DOI":"10.1016\/j.neucom.2016.06.014","volume":"214","author":"A Prieto","year":"2016","unstructured":"Prieto, A., Prieto, B., Ortigosa, E.M., Ros, E., Pelayo, F., Ortega, J., Rojas, I.: Neural networks: an overview of early research, current frameworks and new challenges. Neurocomputing 214, 242\u2013268 (2016)","journal-title":"Neurocomputing"},{"key":"701_CR115","doi-asserted-by":"crossref","first-page":"137","DOI":"10.1016\/j.procs.2015.12.114","volume":"72","author":"LR Rere","year":"2015","unstructured":"Rere, L.R., Fanany, M.I., Arymurthy, A.M.: Simulated annealing algorithm for deep learning. Proc. Comput. Sci. 72, 137\u2013144 (2015)","journal-title":"Proc. Comput. Sci."},{"issue":"3","key":"701_CR116","doi-asserted-by":"crossref","first-page":"400","DOI":"10.1214\/aoms\/1177729586","volume":"22","author":"H Robbins","year":"1951","unstructured":"Robbins, H., Monro, S.: A stochastic approximation method. Ann. Math. Stat. 22(3), 400\u2013407 (1951)","journal-title":"Ann. Math. Stat."},{"issue":"1","key":"701_CR117","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1109\/72.737492","volume":"10","author":"P RoyChowdhury","year":"1999","unstructured":"RoyChowdhury, P., Singh, Y.P., Chansarkar, R.: Dynamic tunneling technique for efficient training of multilayer perceptrons. IEEE Trans. Neural Netw. 10(1), 48\u201355 (1999)","journal-title":"IEEE Trans. Neural Netw."},{"key":"701_CR118","doi-asserted-by":"crossref","unstructured":"Ruppert, D., Wand, M.P., Carroll, R.J.: Semiparametric regression. In: Cambridge Series in Statistical and Probabilistic mathematics, vol. 12. Mathematical Reviews (MathSciNet): MR1998720. Cambridge Univ. Press, Cambridge (2003)","DOI":"10.1017\/CBO9780511755453"},{"key":"701_CR119","doi-asserted-by":"crossref","first-page":"1193","DOI":"10.1214\/09-EJS525","volume":"3","author":"D Ruppert","year":"2009","unstructured":"Ruppert, D., Wand, M.P., Carroll, R.J.: Semiparametric regression during 2003\u20132007. Electron. J. Stat. 3, 1193 (2009)","journal-title":"Electron. J. Stat."},{"key":"701_CR120","volume-title":"On-Line Learning in Neural Networks","author":"D Saad","year":"2009","unstructured":"Saad, D.: On-Line Learning in Neural Networks, vol. 17. Cambridge University Press, Cambridge (2009)"},{"issue":"2","key":"701_CR121","doi-asserted-by":"crossref","first-page":"1200","DOI":"10.1002\/widm.1200","volume":"7","author":"S Scardapane","year":"2017","unstructured":"Scardapane, S., Wang, D.: Randomness in neural networks: an overview. Wiley Interdiscip. Rev. Data Min. Knowl. Discov. 7(2), 1200 (2017)","journal-title":"Wiley Interdiscip. Rev. Data Min. Knowl. Discov."},{"key":"701_CR122","unstructured":"Schaffer, J.D., Whitley, D., Eshelman, L.J.: Combinations of genetic algorithms and neural networks: a survey of the state of the art. In: International Workshop on Combinations of Genetic Algorithms and Neural Networks, 1992 (COGANN-92), pp. 1\u201337. IEEE (1992)"},{"key":"701_CR123","doi-asserted-by":"crossref","first-page":"85","DOI":"10.1016\/j.neunet.2014.09.003","volume":"61","author":"J Schmidhuber","year":"2015","unstructured":"Schmidhuber, J.: Deep learning in neural networks: an overview. Neural Netw. 61, 85\u2013117 (2015)","journal-title":"Neural Netw."},{"issue":"4","key":"701_CR124","doi-asserted-by":"crossref","first-page":"439","DOI":"10.1016\/S0893-6080(01)00027-2","volume":"14","author":"F Schwenker","year":"2001","unstructured":"Schwenker, F., Kestler, H.A., Palm, G.: Three learning phases for radial-basis-function networks. Neural Netw. 14(4), 439\u2013458 (2001)","journal-title":"Neural Netw."},{"issue":"2","key":"701_CR125","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1016\/S0167-9236(97)00040-7","volume":"22","author":"RS Sexton","year":"1998","unstructured":"Sexton, R.S., Dorsey, R.E., Johnson, J.D.: Toward global optimization of neural networks: a comparison of the genetic algorithm and backpropagation. Decis. Support Syst. 22(2), 171\u2013185 (1998)","journal-title":"Decis. Support Syst."},{"issue":"3","key":"701_CR126","doi-asserted-by":"crossref","first-page":"589","DOI":"10.1016\/S0377-2217(98)00114-3","volume":"114","author":"RS Sexton","year":"1999","unstructured":"Sexton, R.S., Dorsey, R.E., Johnson, J.D.: Optimization of neural networks: a comparative analysis of the genetic algorithm and simulated annealing. Eur. J. Oper. Res. 114(3), 589\u2013601 (1999)","journal-title":"Eur. J. Oper. Res."},{"issue":"3","key":"701_CR127","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1109\/2.485892","volume":"29","author":"Y Shang","year":"1996","unstructured":"Shang, Y., Wah, B.W.: Global optimization for neural network training. Computer 29(3), 45\u201354 (1996)","journal-title":"Computer"},{"issue":"11","key":"701_CR128","doi-asserted-by":"crossref","first-page":"2709","DOI":"10.1162\/089976602760408035","volume":"14","author":"J \u0160\u00edma","year":"2002","unstructured":"\u0160\u00edma, J.: Training a single sigmoidal neuron is hard. Neural Comput. 14(11), 2709\u20132728 (2002)","journal-title":"Neural Comput."},{"key":"701_CR129","unstructured":"Soudry, D., Carmon, Y.: No bad local minima: data independent training error guarantees for multilayer neural networks (2016). arXiv preprint arXiv:1605.08361"},{"issue":"4","key":"701_CR130","doi-asserted-by":"crossref","first-page":"683","DOI":"10.1016\/S0893-6080(98)00014-8","volume":"11","author":"IG Sprinkhuizen-Kuyper","year":"1998","unstructured":"Sprinkhuizen-Kuyper, I.G., Boers, E.J.: The error surface of the 2-2-1 XOR network: The finite stationary points. Neural Netw. 11(4), 683\u2013690 (1998)","journal-title":"Neural Netw."},{"issue":"1","key":"701_CR131","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava, N., Hinton, G.E., Krizhevsky, A., Sutskever, I., Salakhutdinov, R.: Dropout: a simple way to prevent neural networks from overfitting. J. Mach. Learn. Res. 15(1), 1929\u20131958 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"701_CR132","unstructured":"Steijvers, M., Gr\u00fcnwald, P.: A recurrent network that performs a context-sensitive prediction task. In: Proceedings of the 18th Annual Conference of the Cognitive Science Society, pp. 335\u2013339 (1996)"},{"issue":"28","key":"701_CR133","first-page":"1139","volume":"3","author":"I Sutskever","year":"2013","unstructured":"Sutskever, I., Martens, J., Dahl, G.E., Hinton, G.E.: On the importance of initialization and momentum in deep learning. ICML 3(28), 1139\u20131147 (2013)","journal-title":"ICML"},{"key":"701_CR134","unstructured":"Swirszcz, G., Czarnecki, W.M., Pascanu, R.: Local minima in training of deep networks. CoRR (2016). arXiv:1611.06310v1"},{"key":"701_CR135","first-page":"65","volume":"8","author":"M Teboulle","year":"2007","unstructured":"Teboulle, M.: A unified continuous optimization framework for center-based clustering methods. J. Mach. Learn. Res. 8, 65\u2013102 (2007)","journal-title":"J. Mach. Learn. Res."},{"key":"701_CR136","doi-asserted-by":"crossref","unstructured":"Teo, C.H., Smola, A., Vishwanathan, S., Le, Q.V.: A scalable modular convex solver for regularized risk minimization. In: Proceedings of the 13th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 727\u2013736. ACM (2007)","DOI":"10.1145\/1281192.1281270"},{"key":"701_CR137","unstructured":"Tirumala, S.S., Ali, S., Ramesh, C.P.: Evolving deep neural networks: A new prospect. In: 12th International Conference on Natural Computation, Fuzzy Systems and Knowledge Discovery (ICNC-FSKD), 2016, pp. 69\u201374. IEEE (2016)"},{"issue":"6","key":"701_CR138","doi-asserted-by":"crossref","first-page":"977","DOI":"10.1109\/TSMCB.2002.804366","volume":"33","author":"KA Toh","year":"2003","unstructured":"Toh, K.A.: Deterministic global optimization for FNN training. IEEE Trans. Syst. Man Cybern. Part B (Cybern.) 33(6), 977\u2013983 (2003)","journal-title":"IEEE Trans. Syst. Man Cybern. Part B (Cybern.)"},{"key":"701_CR139","volume-title":"The Nature of Statistical Learning Theory","author":"V Vapnik","year":"2013","unstructured":"Vapnik, V.: The Nature of Statistical Learning Theory. Springer, Berlin (2013)"},{"issue":"2","key":"701_CR140","first-page":"231","volume":"14","author":"C Voglis","year":"2006","unstructured":"Voglis, C., Lagaris, I.: A global optimization approach to neural network training. Neural Parallel Sci. Comput. 14(2), 231 (2006)","journal-title":"Neural Parallel Sci. Comput."},{"issue":"1","key":"701_CR141","doi-asserted-by":"crossref","first-page":"216","DOI":"10.1016\/j.amc.2009.03.012","volume":"213","author":"C Voglis","year":"2009","unstructured":"Voglis, C., Lagaris, I.E.: Towards ideal multistart: a stochastic approach for locating the minima of a continuous function inside a bounded domain. Appl. Math. Comput. 213(1), 216\u2013229 (2009)","journal-title":"Appl. Math. Comput."},{"key":"701_CR142","doi-asserted-by":"crossref","first-page":"126","DOI":"10.1016\/j.ins.2016.05.021","volume":"364\u2013365","author":"D Wang","year":"2016","unstructured":"Wang, D.: Editorial: Randomized algorithms for training neural networks. Inf. Sci. 364\u2013365, 126\u2013128 (2016)","journal-title":"Inf. Sci."},{"key":"701_CR143","doi-asserted-by":"crossref","unstructured":"Werbos, P.J.: Supervised learning: Can it escape its local minimum? In: Theoretical Advances in Neural Computation and Learning, pp. 449\u2013461. Springer (1994)","DOI":"10.1007\/978-1-4615-2696-4_13"},{"issue":"5","key":"701_CR144","doi-asserted-by":"publisher","first-page":"978","DOI":"10.1109\/TNNLS.2015.2431251","volume":"27","author":"DS Yeung","year":"2016","unstructured":"Yeung, D.S., Li, J.C., Ng, W.W.Y., Chan, P.P.K.: Mlpnn training via a multiobjective optimization of training error and stochastic sensitivity. IEEE Trans. Neural Netw. Learn. Syst. 27(5), 978\u2013992 (2016). https:\/\/doi.org\/10.1109\/TNNLS.2015.2431251","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"701_CR145","doi-asserted-by":"crossref","first-page":"308","DOI":"10.1016\/j.neucom.2014.03.077","volume":"149","author":"W Yu","year":"2015","unstructured":"Yu, W., Zhuang, F., He, Q., Shi, Z.: Learning deep representations via extreme learning machines. Neurocomputing 149, 308\u2013315 (2015)","journal-title":"Neurocomputing"},{"issue":"2","key":"701_CR146","doi-asserted-by":"crossref","first-page":"1026","DOI":"10.1016\/j.amc.2006.07.025","volume":"185","author":"JR Zhang","year":"2007","unstructured":"Zhang, J.R., Zhang, J., Lok, T.M., Lyu, M.R.: A hybrid particle swarm optimization-back-propagation algorithm for feedforward neural network training. Appl. Math. Comput. 185(2), 1026\u20131037 (2007)","journal-title":"Appl. Math. Comput."}],"container-title":["Journal of Global Optimization"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10898-018-0701-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10898-018-0701-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10898-018-0701-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,6]],"date-time":"2025-07-06T22:38:58Z","timestamp":1751841538000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10898-018-0701-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,9,6]]},"references-count":146,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2019,2]]}},"alternative-id":["701"],"URL":"https:\/\/doi.org\/10.1007\/s10898-018-0701-7","relation":{},"ISSN":["0925-5001","1573-2916"],"issn-type":[{"value":"0925-5001","type":"print"},{"value":"1573-2916","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,9,6]]},"assertion":[{"value":"17 June 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 August 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 September 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}