{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,13]],"date-time":"2026-02-13T23:15:28Z","timestamp":1771024528105,"version":"3.50.1"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2021,9,8]],"date-time":"2021-09-08T00:00:00Z","timestamp":1631059200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,9,8]],"date-time":"2021-09-08T00:00:00Z","timestamp":1631059200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001871","name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","doi-asserted-by":"publisher","award":["SFRH\/BD\/145075\/2019"],"award-info":[{"award-number":["SFRH\/BD\/145075\/2019"]}],"id":[{"id":"10.13039\/501100001871","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000181","name":"Air Force Office of Scientific Research","doi-asserted-by":"publisher","award":["FA9550-18-1-0167"],"award-info":[{"award-number":["FA9550-18-1-0167"]}],"id":[{"id":"10.13039\/100000181","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100019217","name":"IVADO","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100019217","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Sci Comput"],"published-print":{"date-parts":[[2021,10]]},"DOI":"10.1007\/s10915-021-01628-3","type":"journal-article","created":{"date-parts":[[2021,9,8]],"date-time":"2021-09-08T16:02:51Z","timestamp":1631116971000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":19,"title":["Stochastic Gradient Descent with Polyak\u2019s Learning Rate"],"prefix":"10.1007","volume":"89","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4422-7875","authenticated-orcid":false,"given":"Mariana","family":"Prazeres","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4214-7364","authenticated-orcid":false,"given":"Adam M.","family":"Oberman","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,9,8]]},"reference":[{"key":"1628_CR1","unstructured":"Agarwal, A., Wainwright, M.J., Bartlett, P.L., Ravikumar, P.K.: Information\u2013theoretic lower bounds on the oracle complexity of convex optimization. In: Advances in Neural Information Processing Systems, pp. 1\u20139 (2009)"},{"key":"1628_CR2","unstructured":"Bottou, L., Curtis, F.E., Nocedal, J.: Optimization methods for large-scale machine learning. arXiv preprint arXiv:1606.04838 (2016)"},{"key":"1628_CR3","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611974997","volume-title":"First-Order Methods in Optimization","author":"A Beck","year":"2017","unstructured":"Beck, A.: First-Order Methods in Optimization, vol. 25. SIAM, Philadelphia (2017)"},{"key":"1628_CR4","unstructured":"Boyd, S., Mutapcic, A.: Subgradient methods. Lecture notes of EE364b, Stanford University, Winter Quarter, 2013 (2013)"},{"issue":"8","key":"1628_CR5","first-page":"12","volume":"91","author":"L Bottou","year":"1991","unstructured":"Bottou, L.: Stochastic gradient learning in neural networks. Proc. Neuro-N\u0131mes 91(8), 12 (1991)","journal-title":"Proc. Neuro-N\u0131mes"},{"key":"1628_CR6","first-page":"2121","volume":"12","author":"J Duchi","year":"2011","unstructured":"Duchi, J., Hazan, E., Singer, Y.: Adaptive subgradient methods for online learning and stochastic optimization. J. Mach. Learn. Res. 12, 2121\u20132159 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"1628_CR7","unstructured":"Hardt, M., Recht, B., Singer, Y.: Train faster, generalize better: stability of stochastic gradient descent. arXiv preprint arXiv:1509.01240 (2015)"},{"key":"1628_CR8","unstructured":"Hinton, G., Srivastava, N., Swersky, K.: RmsProp: divide the gradient by a running average of its recent magnitude. Neural networks for machine learning, Coursera lecture 6e (2012)"},{"key":"1628_CR9","unstructured":"Johnson, R., Zhang, T.: Accelerating stochastic gradient descent using predictive variance reduction. In: Advances in Neural Information Processing Systems, pp. 315\u2013323 (2013)"},{"issue":"1\u20133","key":"1628_CR10","doi-asserted-by":"publisher","first-page":"359","DOI":"10.1007\/BF01588797","volume":"49","author":"S Kim","year":"1990","unstructured":"Kim, S., Ahn, H., Cho, S.-C.: Variable target value subgradient method. Math. Program. 49(1\u20133), 359\u2013369 (1990)","journal-title":"Math. Program."},{"issue":"1","key":"1628_CR11","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1115\/1.3662552","volume":"82","author":"RE Kalman","year":"1960","unstructured":"Kalman, R.E.: A new approach to linear filtering and prediction problems. J. Basic Eng. 82(1), 35\u201345 (1960)","journal-title":"J. Basic Eng."},{"key":"1628_CR12","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"1628_CR13","unstructured":"Lacoste-Julien, S., Schmidt, M., Bach, F.: A simpler approach to obtaining an o(1\/t) convergence rate for the projected stochastic subgradient method. arXiv preprint arXiv:1212.2002 (2012)"},{"key":"1628_CR14","unstructured":"Li, X., Orabona, F.: On the convergence of stochastic gradient descent with adaptive stepsizes. In: Chaudhuri, K., Sugiyama, M. (eds.) Proceedings of the Twenty-Second International Conference on Artificial Intelligence and Statistics, Volume\u00a089 of Proceedings of Machine Learning Research, pp. 983\u2013992. PMLR, 16\u201318 April 2019"},{"key":"1628_CR15","unstructured":"Laborde, M., Oberman, A.: A Lyapunov analysis for accelerated gradient methods: from deterministic to stochastic case. In: International Conference on Artificial Intelligence and Statistics, pp. 602\u2013612 (2020)"},{"key":"1628_CR16","unstructured":"Laborde, M., Oberman, A.M.: Nesterov\u2019s method with decreasing learning rate leads to accelerated stochastic gradient descent. arXiv preprint arXiv:1908.07861 (2019)"},{"key":"1628_CR17","volume-title":"Introductory Lectures on Convex Optimization: A Basic Course","author":"Y Nesterov","year":"2013","unstructured":"Nesterov, Y.: Introductory Lectures on Convex Optimization: A Basic Course, vol. 87. Springer, Berlin (2013)"},{"key":"1628_CR18","unstructured":"Osher, S., Wang, B., Yin, P., Luo, X., Barekat, F., Pham, M., Lin, A.: Laplacian smoothing gradient descent. arXiv preprint arXiv:1806.06317 (2018)"},{"issue":"5","key":"1628_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/0041-5553(64)90137-5","volume":"4","author":"BT Polyak","year":"1964","unstructured":"Polyak, B.T.: Some methods of speeding up the convergence of iteration methods. USSR Comput. Math. Math. Phys. 4(5), 1\u201317 (1964)","journal-title":"USSR Comput. Math. Math. Phys."},{"key":"1628_CR20","doi-asserted-by":"crossref","unstructured":"Polyak, R.A.: Introduction to Continuous Optimization, Springer Optimization and Its Applications Series, vol. 172 (2021)","DOI":"10.1007\/978-3-030-68713-7"},{"key":"1628_CR21","unstructured":"Qian, X., Richtarik, P., Gower, R., Sailanbayev, A., Loizou, N., Shulgin, E.: SGD with arbitrary sampling: general analysis and improved rates. In: International Conference on Machine Learning, pp. 5200\u20135209 (2019)"},{"issue":"3","key":"1628_CR22","doi-asserted-by":"publisher","first-page":"400","DOI":"10.1214\/aoms\/1177729586","volume":"22","author":"H Robbins","year":"1951","unstructured":"Robbins, H., Monro, S.: A stochastic approximation method. Ann. Math. Stat. 22(3), 400\u2013407 (1951)","journal-title":"Ann. Math. Stat."},{"key":"1628_CR23","unstructured":"Rolinek, M., Martius, G.: L4: practical loss-based stepsize adaptation for deep learning. In: Advances in Neural Information Processing Systems, pp. 6434\u20136444 (2018)"},{"issue":"10","key":"1628_CR24","doi-asserted-by":"publisher","first-page":"7036","DOI":"10.1109\/TIT.2011.2154375","volume":"57","author":"M Raginsky","year":"2011","unstructured":"Raginsky, M., Rakhlin, A.: Information-based complexity, feedback and dynamics in convex programming. IEEE Trans. Inf. Theory 57(10), 7036\u20137056 (2011)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"1628_CR25","unstructured":"Rakhlin, A., Shamir, O., Sridharan, K.: Making gradient descent optimal for strongly convex stochastic optimization. arXiv preprint arXiv:1109.5647 (2011)"},{"key":"1628_CR26","unstructured":"Springenberg, J.T., Dosovitskiy, A., Brox, T., Riedmiller, M.: Striving for simplicity: the all convolutional net. arXiv preprint arXiv:1412.6806 (2014)"},{"key":"1628_CR27","volume-title":"Minimization Methods for Non-differentiable Functions","author":"NZ Shor","year":"2012","unstructured":"Shor, N.Z.: Minimization Methods for Non-differentiable Functions, vol. 3. Springer, Berlin (2012)"},{"key":"1628_CR28","unstructured":"Wilson, A.C., Roelofs, R., Stern, M., Srebro, N., Recht, B.: The marginal value of adaptive gradient methods in machine learning. In: Advances in Neural Information Processing Systems, pp. 4148\u20134158 (2017)"},{"key":"1628_CR29","unstructured":"Wu, X., Ward, R., Bottou, L.: Wngrad: learn the learning rate in gradient descent. arXiv preprint arXiv:1803.02865 (2018)"},{"issue":"1","key":"1628_CR30","doi-asserted-by":"publisher","first-page":"A26","DOI":"10.1137\/19M1294356","volume":"43","author":"B Wang","year":"2021","unstructured":"Wang, B., Zou, D., Gu, Q., Osher, S.J.: Laplacian smoothing stochastic gradient Markov chain Monte Carlo. SIAM J. Sci. Comput. 43(1), A26\u2013A53 (2021)","journal-title":"SIAM J. Sci. Comput."},{"key":"1628_CR31","doi-asserted-by":"crossref","unstructured":"Yan, Y., Yang, Ti., Li, Z., Lin, Q., Yi,\u00a0Y.: A unified analysis of stochastic momentum methods for deep learning. In: Proceedings of the 27th International Joint Conference on Artificial Intelligence, IJCAI18, pp. 2955\u20132961. AAAI Press (2018)","DOI":"10.24963\/ijcai.2018\/410"},{"key":"1628_CR32","unstructured":"Zhou, D., Chen, J., Cao, Y., Tang, Y., Yang, Z., Gu, Q.: On the convergence of adaptive gradient methods for nonconvex optimization (2020)"},{"key":"1628_CR33","unstructured":"Zeiler, M.D.: Adadelta: an adaptive learning rate method. arXiv preprint arXiv:1212.5701 (2012)"},{"key":"1628_CR34","unstructured":"Zhang, L., Mahdavi, M., Jin, R.: Linear convergence with condition number independent access of full gradients. In: Advances in Neural Information Processing Systems, pp. 980\u2013988 (2013)"}],"container-title":["Journal of Scientific Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10915-021-01628-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10915-021-01628-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10915-021-01628-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,23]],"date-time":"2021-10-23T04:25:51Z","timestamp":1634963151000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10915-021-01628-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,8]]},"references-count":34,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2021,10]]}},"alternative-id":["1628"],"URL":"https:\/\/doi.org\/10.1007\/s10915-021-01628-3","relation":{},"ISSN":["0885-7474","1573-7691"],"issn-type":[{"value":"0885-7474","type":"print"},{"value":"1573-7691","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,9,8]]},"assertion":[{"value":"3 January 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 May 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 August 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 September 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declaration"}},{"value":"All authors certify that they have no affiliations with or involvement in any organization or entity with any financial interest or non-financial interest in the subject matter or materials discussed in this manuscript.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"25"}}