{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T03:24:48Z","timestamp":1777605888179,"version":"3.51.4"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2023,3,13]],"date-time":"2023-03-13T00:00:00Z","timestamp":1678665600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,3,13]],"date-time":"2023-03-13T00:00:00Z","timestamp":1678665600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Nonlinear Sci"],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1007\/s00332-023-09903-3","type":"journal-article","created":{"date-parts":[[2023,3,26]],"date-time":"2023-03-26T22:22:40Z","timestamp":1679869360000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":27,"title":["Stochastic Gradient Descent with Noise of Machine Learning Type Part I: Discrete Time Analysis"],"prefix":"10.1007","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3766-5332","authenticated-orcid":false,"given":"Stephan","family":"Wojtowytsch","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,3,13]]},"reference":[{"key":"9903_CR1","unstructured":"Allen-Zhu, Z.: Natasha 2: faster non-convex optimization than SGD. (2017) arXiv preprint arXiv:1708.08694"},{"key":"9903_CR2","unstructured":"Bernstein, J., Azizzadenesheli, K., Wang, Y.-X., Anandkumar, A.: Convergence rate of sign stochastic gradient descent for non-convex functions. In: International Conference on Machine Learning, pp. 560\u2013569. PMLR (2018)"},{"key":"9903_CR3","unstructured":"Bassily, R., Belkin, M., Ma, S.: On exponential convergence of sgd in non-convex over-parametrized learning. (2018) arXiv preprint arXiv:1811.02564"},{"issue":"2","key":"9903_CR4","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1137\/16M1080173","volume":"60","author":"L Bottou","year":"2018","unstructured":"Bottou, L., Curtis, F.E., Nocedal, J.: Optimization methods for large-scale machine learning. SIAM Rev. 60(2), 223\u2013311 (2018)","journal-title":"SIAM Rev."},{"key":"9903_CR5","unstructured":"Bach, F., Moulines, E.: Non-strongly-convex smooth stochastic approximation with convergence rate O(1\/n). (2013) arXiv preprint arXiv:1306.2119"},{"key":"9903_CR6","unstructured":"Cooper, Y.: The loss landscape of overparameterized neural networks. (2018) arXiv:1804.10200 [cs.LG],"},{"key":"9903_CR7","unstructured":"D\u00e9fossez, A., Bottou, L., Bach, F., Usunier, N.: A simple convergence proof of Adam and Adagrad. (2020) arXiv preprint arXiv:2003.02395"},{"key":"9903_CR8","unstructured":"Dieuleveut, A., Durmus, A., Bach, F.: Bridging the gap between constant step size stochastic gradient descent and Markov chains. (2017) arXiv preprint arXiv:1707.06386"},{"key":"9903_CR9","unstructured":"Dereich, S., Kassing, S.: Convergence of stochastic gradient descent schemes for Lojasiewicz-landscapes. (2021) arXiv preprint arXiv:2102.09385"},{"issue":"1","key":"9903_CR10","first-page":"5354","volume":"21","author":"B Fehrman","year":"2020","unstructured":"Fehrman, B., Gess, B., Jentzen, A.: Convergence rates for the stochastic gradient descent method for non-convex objective functions. J. Mach. Learn. Res. 21(1), 5354\u20135401 (2020)","journal-title":"J. Mach. Learn. Res."},{"issue":"4","key":"9903_CR11","doi-asserted-by":"publisher","first-page":"2341","DOI":"10.1137\/120880811","volume":"23","author":"S Ghadimi","year":"2013","unstructured":"Ghadimi, S., Lan, G.: Stochastic first-and zeroth-order methods for nonconvex stochastic programming. SIAM J. Optim. 23(4), 2341\u20132368 (2013)","journal-title":"SIAM J. Optim."},{"issue":"1\u20132","key":"9903_CR12","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1007\/s10107-014-0846-1","volume":"155","author":"S Ghadimi","year":"2016","unstructured":"Ghadimi, S., Lan, G., Zhang, H.: Mini-batch stochastic approximation methods for nonconvex stochastic composite optimization. Math. Program. 155(1\u20132), 267\u2013305 (2016)","journal-title":"Math. Program."},{"key":"9903_CR13","unstructured":"Hsieh, Y.-G., Iutzeler, F., Malick, J., Mertikopoulos, P.: On the convergence of single-call stochastic extra-gradient methods. (2019) arXiv preprint arXiv:1908.08465"},{"key":"9903_CR14","unstructured":"Hsieh, Y.-G., Iutzeler, F., Malick, J., Mertikopoulos, P.: Explore aggressively, update conservatively: Stochastic extragradient methods with variable stepsize scaling. (2020) arXiv preprint arXiv:2003.10162"},{"issue":"1","key":"9903_CR15","doi-asserted-by":"publisher","first-page":"455","DOI":"10.1093\/imanum\/drz055","volume":"41","author":"A Jentzen","year":"2021","unstructured":"Jentzen, A., Kuckuck, B., Neufeld, A., von Wurstemberger, P.: Strong error analysis for stochastic gradient descent optimization algorithms. IMA J. Numer. Anal. 41(1), 455\u2013492 (2021)","journal-title":"IMA J. Numer. Anal."},{"key":"9903_CR16","volume-title":"Wahrscheinlichkeitstheorie","author":"A Klenke","year":"2006","unstructured":"Klenke, A.: Wahrscheinlichkeitstheorie, vol. 1. Springer, Berlin (2006)"},{"key":"9903_CR17","doi-asserted-by":"crossref","unstructured":"Karimi, H., Nutini, J., Schmidt, M.: Linear convergence of gradient and proximal-gradient methods under the Polyak-Lojasiewicz condition. In: Joint European Conference on Machine Learning and Knowledge Discovery in Databases, pp. 795\u2013811. Springer, (2016)","DOI":"10.1007\/978-3-319-46128-1_50"},{"key":"9903_CR18","volume-title":"Analysis 2","author":"K K\u00f6nigsberger","year":"2013","unstructured":"K\u00f6nigsberger, K.: Analysis 2. Springer-Verlag, Berlin (2013)"},{"key":"9903_CR19","volume-title":"Stochastic Approximation and Recursive Algorithms and Applications","author":"H Kushner","year":"2003","unstructured":"Kushner, H., Yin, G.G.: Stochastic Approximation and Recursive Algorithms and Applications, vol. 35. Springer Science and Business Media, Berlin (2003)"},{"key":"9903_CR20","unstructured":"Li, Q., Tai, C., Weinan, E.: Stochastic modified equations and adaptive stochastic gradient algorithms. In: International Conference on Machine Learning, pp. 2101\u20132110. PMLR, (2017)"},{"key":"9903_CR21","first-page":"451","volume":"24","author":"E Moulines","year":"2011","unstructured":"Moulines, E., Bach, F.: Non-asymptotic analysis of stochastic approximation algorithms for machine learning. Adv. Neural. Inf. Process. Syst. 24, 451\u2013459 (2011)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"9903_CR22","unstructured":"Mertikopoulos, P., Hallak, N., Kavis, A., Cevher, V.: On the almost sure convergence of stochastic gradient descent in non-convex problems. (2020) arXiv preprint arXiv:2006.11144"},{"key":"9903_CR23","first-page":"1017","volume":"27","author":"D Needell","year":"2014","unstructured":"Needell, D., Ward, R., Srebro, N.: Stochastic gradient descent, weighted sampling, and the randomized Kaczmarz algorithm. Adv. Neural. Inf. Process. Syst. 27, 1017\u20131025 (2014)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"9903_CR24","doi-asserted-by":"crossref","unstructured":"Robbins, H., Monro, S.: A stochastic approximation method. Ann. Math. Stat. 400\u2013407 (1951)","DOI":"10.1214\/aoms\/1177729586"},{"key":"9903_CR25","unstructured":"Rakhlin, A., Shamir, O., Sridharan, K.: Making gradient descent optimal for strongly convex stochastic optimization. (2011) arXiv preprint arXiv:1109.5647"},{"issue":"12","key":"9903_CR26","doi-asserted-by":"publisher","first-page":"883","DOI":"10.1090\/S0002-9904-1942-07811-6","volume":"48","author":"A Sard","year":"1942","unstructured":"Sard, A.: The measure of the critical values of differentiable maps. Bull. Am. Math. Soc. 48(12), 883\u2013890 (1942)","journal-title":"Bull. Am. Math. Soc."},{"key":"9903_CR27","unstructured":"Skorokhodov, I., Burtsev, M.: Loss landscape sightseeing with multi-point optimization. (2019) arXiv preprint arXiv:1910.03867"},{"key":"9903_CR28","unstructured":"Vaswani, S., Bach, F., Schmidt, M.: Fast and faster convergence of SGD for over-parameterized models and an accelerated perceptron. In: Chaudhuri, K., Sugiyama, M. (eds.) Proceedings of the Twenty-Second International Conference on Artificial Intelligence and Statistics, volume\u00a089 of Proceedings of Machine Learning Research, pp. 1195\u20131204. PMLR, 16\u201318 (2019)"},{"key":"9903_CR29","unstructured":"Wojtowytsch, S.: Stochastic gradient descent with noise of machine learning type. Part II: Continuous time analysis. (2021) arXiv:2106.02588 [cs.LG]"},{"key":"9903_CR30","unstructured":"Ward, R., Wu, X., Bottou, L.: Adagrad stepsizes: Sharp convergence over nonconvex landscapes. In: International Conference on Machine Learning, pp. 6677\u20136686. PMLR (2019)"},{"key":"9903_CR31","unstructured":"Xie, Y., Wu, X., Ward, R.: Linear convergence of adaptive stochastic gradient descent. In: International Conference on Artificial Intelligence and Statistics, pp. 1475\u20131485. PMLR, (2020)"}],"container-title":["Journal of Nonlinear Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00332-023-09903-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00332-023-09903-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00332-023-09903-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,8]],"date-time":"2023-05-08T13:12:07Z","timestamp":1683551527000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00332-023-09903-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3,13]]},"references-count":31,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2023,6]]}},"alternative-id":["9903"],"URL":"https:\/\/doi.org\/10.1007\/s00332-023-09903-3","relation":{},"ISSN":["0938-8974","1432-1467"],"issn-type":[{"value":"0938-8974","type":"print"},{"value":"1432-1467","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,3,13]]},"assertion":[{"value":"8 October 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 February 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 March 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"There is no conflict of interest to report.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"45"}}