{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T16:24:49Z","timestamp":1772814289621,"version":"3.50.1"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2019,12,24]],"date-time":"2019-12-24T00:00:00Z","timestamp":1577145600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,12,24]],"date-time":"2019-12-24T00:00:00Z","timestamp":1577145600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"crossref","award":["NRF-2018R1D1A1B07043406"],"award-info":[{"award-number":["NRF-2018R1D1A1B07043406"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Optim Theory Appl"],"published-print":{"date-parts":[[2020,3]]},"DOI":"10.1007\/s10957-019-01624-6","type":"journal-article","created":{"date-parts":[[2019,12,24]],"date-time":"2019-12-24T07:30:21Z","timestamp":1577172621000},"page":"953-971","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Combining Stochastic Adaptive Cubic Regularization with Negative Curvature for Nonconvex Optimization"],"prefix":"10.1007","volume":"184","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4726-2441","authenticated-orcid":false,"given":"Seonho","family":"Park","sequence":"first","affiliation":[]},{"given":"Seung Hyun","family":"Jung","sequence":"additional","affiliation":[]},{"given":"Panos M.","family":"Pardalos","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,12,24]]},"reference":[{"key":"1624_CR1","unstructured":"Curtis, F.E., Robinson, D.P.: Exploiting negative curvature in deterministic and stochastic optimization. arXiv preprint arXiv:1703.00412 (2017)"},{"key":"1624_CR2","first-page":"4854","volume-title":"Advances in Neural Information Processing Systems","author":"M Liu","year":"2018","unstructured":"Liu, M., Li, Z., Wang, X., Yi, J., Yang, T.: Adaptive negative curvature descent with applications in non-convex optimization. In: Mozer, M.C., Jordan, M.I., Petsche, T. (eds.) Advances in Neural Information Processing Systems, pp. 4854\u20134863. MIT Press, Cambridge (2018)"},{"issue":"2","key":"1624_CR3","doi-asserted-by":"publisher","first-page":"474","DOI":"10.1007\/s10957-017-1137-9","volume":"174","author":"J Cano","year":"2017","unstructured":"Cano, J., Moguerza, J.M., Prieto, F.J.: Using improved directions of negative curvature for the solution of bound-constrained nonconvex problems. J. Optim. Theory Appl. 174(2), 474\u2013499 (2017)","journal-title":"J. Optim. Theory Appl."},{"key":"1624_CR4","unstructured":"Reddi, S.J., Zaheer, M., Sra, S., Poczos, B., Bach, F., Salakhutdinov, R., Smola, A.J.: A generic approach for escaping saddle points. arXiv preprint arXiv:1709.01434 (2017)"},{"issue":"4","key":"1624_CR5","doi-asserted-by":"publisher","first-page":"1094","DOI":"10.1137\/0613066","volume":"13","author":"J Kuczy\u0144ski","year":"1992","unstructured":"Kuczy\u0144ski, J., Wo\u017aniakowski, H.: Estimating the largest eigenvalue by the power and lanczos algorithms with a random start. SIAM J. Matrix Anal. Appl. 13(4), 1094\u20131122 (1992)","journal-title":"SIAM J. Matrix Anal. Appl."},{"issue":"3","key":"1624_CR6","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1007\/BF00275687","volume":"15","author":"E Oja","year":"1982","unstructured":"Oja, E.: Simplified neuron model as a principal component analyzer. J. Math. Biol. 15(3), 267\u2013273 (1982)","journal-title":"J. Math. Biol."},{"key":"1624_CR7","unstructured":"Martens, J.: Deep learning via hessian-free optimization. In: ICML vol. 27, pp. 735\u2013742 (2010)"},{"key":"1624_CR8","unstructured":"Martens, J., Sutskever, I.: Learning recurrent neural networks with hessian-free optimization. In: Proceedings of the 28th International Conference on Machine Learning (ICML-11), pp. 1033\u20131040. Citeseer (2011)"},{"issue":"1","key":"1624_CR9","first-page":"4148","volume":"18","author":"N Agarwal","year":"2017","unstructured":"Agarwal, N., Bullins, B., Hazan, E.: Second-order stochastic optimization for machine learning in linear time. J. Mach. Learn. Res. 18(1), 4148\u20134187 (2017)","journal-title":"J. Mach. Learn. Res."},{"key":"1624_CR10","first-page":"1261","volume-title":"Artificial Intelligence and Statistics","author":"O Vinyals","year":"2012","unstructured":"Vinyals, O., Povey, D.: Krylov subspace descent for deep learning. In: Gale, W.A. (ed.) Artificial Intelligence and Statistics, pp. 1261\u20131268. Addison-Wesley Pub. Co., Boston (2012)"},{"issue":"2","key":"1624_CR11","doi-asserted-by":"publisher","first-page":"1008","DOI":"10.1137\/140954362","volume":"26","author":"RH Byrd","year":"2016","unstructured":"Byrd, R.H., Hansen, S.L., Nocedal, J., Singer, Y.: A stochastic quasi-newton method for large-scale optimization. SIAM J. Optim. 26(2), 1008\u20131031 (2016)","journal-title":"SIAM J. Optim."},{"issue":"1","key":"1624_CR12","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1162\/neco.1994.6.1.147","volume":"6","author":"BA Pearlmutter","year":"1994","unstructured":"Pearlmutter, B.A.: Fast exact multiplication by the Hessian. Neural Comput. 6(1), 147\u2013160 (1994)","journal-title":"Neural Comput."},{"key":"1624_CR13","unstructured":"Griewank, A.: The modification of Newtons method for unconstrained optimization by bounding cubic terms. Technical report, NA\/12 (1981)"},{"issue":"1","key":"1624_CR14","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1007\/s10107-006-0706-8","volume":"108","author":"Y Nesterov","year":"2006","unstructured":"Nesterov, Y., Polyak, B.T.: Cubic regularization of newton method and its global performance. Math. Program. 108(1), 177\u2013205 (2006)","journal-title":"Math. Program."},{"issue":"2","key":"1624_CR15","doi-asserted-by":"publisher","first-page":"927","DOI":"10.1137\/15M1053141","volume":"27","author":"X Wang","year":"2017","unstructured":"Wang, X., Ma, S., Goldfarb, D., Liu, W.: Stochastic quasi-Newton methods for nonconvex stochastic optimization. SIAM J. Optim. 27(2), 927\u2013956 (2017)","journal-title":"SIAM J. Optim."},{"key":"1624_CR16","unstructured":"Wang, Z., Zhou, Y., Liang, Y., Lan, G.: Cubic regularization with momentum for nonconvex optimization. arXiv preprint arXiv:1810.03763 (2018)"},{"issue":"2","key":"1624_CR17","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1007\/s10107-009-0286-5","volume":"127","author":"C Cartis","year":"2011","unstructured":"Cartis, C., Gould, N.I., Toint, P.L.: Adaptive cubic regularisation methods for unconstrained optimization. Part I: motivation, convergence and numerical results. Math. Program. 127(2), 245\u2013295 (2011)","journal-title":"Math. Program."},{"issue":"2","key":"1624_CR18","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1007\/s10107-009-0337-y","volume":"130","author":"C Cartis","year":"2011","unstructured":"Cartis, C., Gould, N.I., Toint, P.L.: Adaptive cubic regularisation methods for unconstrained optimization. Part II: worst-case function-and derivative-evaluation complexity. Math. Program. 130(2), 295\u2013319 (2011)","journal-title":"Math. Program."},{"key":"1624_CR19","unstructured":"Kohler, J.M., Lucchi, A.: Sub-sampled cubic regularization for non-convex optimization. arXiv preprint arXiv:1705.05933 (2017)"},{"issue":"3","key":"1624_CR20","doi-asserted-by":"publisher","first-page":"885","DOI":"10.1007\/s10957-018-1341-2","volume":"178","author":"EH Bergou","year":"2018","unstructured":"Bergou, E.H., Diouane, Y., Gratton, S.: A line-search algorithm inspired by the adaptive cubic regularization framework and complexity analysis. J. Optim. Theory Appl. 178(3), 885\u2013913 (2018)","journal-title":"J. Optim. Theory Appl."},{"issue":"5","key":"1624_CR21","doi-asserted-by":"publisher","first-page":"1013","DOI":"10.1007\/s11590-016-1026-4","volume":"11","author":"X Wang","year":"2017","unstructured":"Wang, X., Fan, N., Pardalos, P.M.: Stochastic subgradient descent method for large-scale robust chance-constrained support vector machines. Optim. Lett. 11(5), 1013\u20131024 (2017)","journal-title":"Optim. Lett."},{"key":"1624_CR22","unstructured":"Carmon, Y., Duchi, J.C.: Gradient descent efficiently finds the cubic-regularized non-convex Newton step. arXiv preprint arXiv:1612.00547 (2016)"},{"issue":"135","key":"1624_CR23","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1515\/crll.1909.135.1","volume":"1909","author":"W Ritz","year":"1909","unstructured":"Ritz, W.: \u00dcber eine neue methode zur l\u00f6sung gewisser variationsprobleme der mathematischen physik. Journal f\u00fcr die reine und angewandte Mathematik (Crelles J.) 1909(135), 1\u201361 (1909)","journal-title":"Journal f\u00fcr die reine und angewandte Mathematik (Crelles J.)"},{"key":"1624_CR24","unstructured":"Lee, J.D., Simchowitz, M., Jordan, M.I., Recht, B.: Gradient descent converges to minimizers. arXiv preprint arXiv:1602.04915 (2016)"},{"issue":"3","key":"1624_CR25","doi-asserted-by":"publisher","first-page":"1548","DOI":"10.1109\/TIT.2011.2104999","volume":"57","author":"D Gross","year":"2011","unstructured":"Gross, D.: Recovering low-rank matrices from few coefficients in any basis. IEEE Trans. Inf. Theory 57(3), 1548\u20131566 (2011)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"1624_CR26","unstructured":"Ghadimi, S., Liu, H., Zhang, T.: Second-order methods with cubic regularization under inexact information. arXiv preprint arXiv:1710.05782 (2017)"},{"key":"1624_CR27","unstructured":"Roosta-Khorasani, F., Mahoney, M.W.: Sub-sampled newton methods II: local convergence rates. arXiv preprint arXiv:1601.04738 (2016)"},{"key":"1624_CR28","doi-asserted-by":"crossref","unstructured":"Agarwal, N., Allen-Zhu, Z., Bullins, B., Hazan, E., Ma, T.: Finding approximate local minima faster than gradient descent. In: Proceedings of the 49th Annual ACM SIGACT Symposium on Theory of Computing, pp. 1195\u20131199. ACM (2017)","DOI":"10.1145\/3055399.3055464"},{"issue":"2","key":"1624_CR29","doi-asserted-by":"publisher","first-page":"1751","DOI":"10.1137\/17M1114296","volume":"28","author":"Y Carmon","year":"2018","unstructured":"Carmon, Y., Duchi, J.C., Hinder, O., Sidford, A.: Accelerated methods for nonconvex optimization. SIAM J. Optim. 28(2), 1751\u20131772 (2018)","journal-title":"SIAM J. Optim."},{"key":"1624_CR30","first-page":"2675","volume-title":"Advances in Neural Information Processing Systems","author":"Z Allen-Zhu","year":"2018","unstructured":"Allen-Zhu, Z.: Natasha 2: faster non-convex optimization than sgd. In: Mozer, M.C., Jordan, M.I., Petsche, T. (eds.) Advances in Neural Information Processing Systems, pp. 2675\u20132686. MIT Press, Cambridge (2018)"},{"key":"1624_CR31","first-page":"3716","volume-title":"Advances in Neural Information Processing Systems","author":"Z Allen-Zhu","year":"2018","unstructured":"Allen-Zhu, Z., Li, Y.: Neon2: finding local minima via first-order oracles. In: Mozer, M.C., Jordan, M.I., Petsche, T. (eds.) Advances in Neural Information Processing Systems, pp. 3716\u20133726. MIT Press, Cambridge (2018)"},{"key":"1624_CR32","unstructured":"Glorot, X., Bengio, Y.: Understanding the difficulty of training deep feedforward neural networks. In: Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics, pp. 249\u2013256 (2010)"},{"issue":"3","key":"1624_CR33","first-page":"27","volume":"2","author":"CC Chang","year":"2011","unstructured":"Chang, C.C., Lin, C.J.: Libsvm: a library for support vector machines. ACM Trans. Intell. Syst. Technol. (TIST) 2(3), 27 (2011)","journal-title":"ACM Trans. Intell. Syst. Technol. (TIST)"},{"issue":"11","key":"1624_CR34","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., Haffner, P.: Gradient-based learning applied to document recognition. Proc. IEEE 86(11), 2278\u20132324 (1998)","journal-title":"Proc. IEEE"},{"key":"1624_CR35","unstructured":"Krizhevsky, A., Hinton, G.: Learning multiple layers of features from tiny images. Technical report, Citeseer (2009)"}],"container-title":["Journal of Optimization Theory and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10957-019-01624-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10957-019-01624-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10957-019-01624-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,12,23]],"date-time":"2020-12-23T00:36:36Z","timestamp":1608683796000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10957-019-01624-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12,24]]},"references-count":35,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2020,3]]}},"alternative-id":["1624"],"URL":"https:\/\/doi.org\/10.1007\/s10957-019-01624-6","relation":{},"ISSN":["0022-3239","1573-2878"],"issn-type":[{"value":"0022-3239","type":"print"},{"value":"1573-2878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,12,24]]},"assertion":[{"value":"17 June 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 December 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 December 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}