{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T05:12:42Z","timestamp":1772082762955,"version":"3.50.1"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,3,11]],"date-time":"2024-03-11T00:00:00Z","timestamp":1710115200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,11]],"date-time":"2024-03-11T00:00:00Z","timestamp":1710115200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001665","name":"Agence Nationale de la Recherche","doi-asserted-by":"publisher","award":["ANR-19-PI3A-0004"],"award-info":[{"award-number":["ANR-19-PI3A-0004"]}],"id":[{"id":"10.13039\/501100001665","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Optim Theory Appl"],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1007\/s10957-024-02408-3","type":"journal-article","created":{"date-parts":[[2024,3,11]],"date-time":"2024-03-11T19:02:25Z","timestamp":1710183745000},"page":"699-719","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Nonsmooth Nonconvex Stochastic Heavy Ball"],"prefix":"10.1007","volume":"201","author":[{"given":"Tam","family":"Le","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,3,11]]},"reference":[{"key":"2408_CR1","volume-title":"Differential Equations with Discontinuous Righthand Sides: Control Systems. Mathematics and its Applications","author":"FM Arscott","year":"1988","unstructured":"Arscott, F.M., Filippov, A.F.: Differential Equations with Discontinuous Righthand Sides: Control Systems. Mathematics and its Applications. Springer, Netherlands (1988)"},{"key":"2408_CR2","unstructured":"Aubin, J.P., Cellina, A.: Differential Inclusions: Set-Valued Maps and Viability Theory. Grundlehren der mathematischen Wissenschaften. Springer, Berlin, Heidelberg (2012)"},{"key":"2408_CR3","unstructured":"Bai, S., Kolter, J.Z., Koltun, V.: Deep equilibrium models. In: Wallach, H., Larochelle, H., Beygelzimer, A., d\u2019Alch\u00e9-Buc, F., Fox, E., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol.\u00a032, pp. 690\u2013701. Curran Associates, Inc (2019)"},{"key":"2408_CR4","unstructured":"Bai, S., Kolter, J.Z., Koltun, V.: Deep equilibrium models. In: Wallach, H., Larochelle, H., Beygelzimer, A., d\u2019Alch\u00e9-Buc, F., Fox, E., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol.\u00a032. Curran Associates, Inc (2019)"},{"key":"2408_CR5","doi-asserted-by":"crossref","unstructured":"Bena\u00efm, M.: Dynamics of stochastic approximation algorithms. In: Seminaire de probabilites XXXIII, pp. 1\u201368. Springer (1999)","DOI":"10.1007\/BFb0096509"},{"issue":"1","key":"2408_CR6","doi-asserted-by":"publisher","first-page":"328","DOI":"10.1137\/S0363012904439301","volume":"44","author":"M Bena\u00efm","year":"2005","unstructured":"Bena\u00efm, M., Hofbauer, J., Sorin, S.: Stochastic approximations and differential inclusions. SIAM J. Control. Optim. 44(1), 328\u2013348 (2005). https:\/\/doi.org\/10.1137\/S0363012904439301","journal-title":"SIAM J. Control. Optim."},{"key":"2408_CR7","unstructured":"Bertrand, Q., Klopfenstein, Q., Blondel, M., Vaiter, S., Gramfort, A., Salmon, J.: Implicit differentiation of lasso-type models for hyperparameter optimization. In: H.D. III, A.\u00a0Singh (eds.) Proceedings of the 37th International Conference on Machine Learning, Proceedings of Machine Learning Research, vol. 119, pp. 810\u2013821. PMLR (2020)"},{"key":"2408_CR8","unstructured":"Bianchi, P., Rios-Zertuche, R.: A closed-measure approach to stochastic approximation. arXiv preprint arXiv:2112.05482 (2021)"},{"issue":"3","key":"2408_CR9","doi-asserted-by":"publisher","first-page":"1117","DOI":"10.1007\/s11228-022-00638-z","volume":"30","author":"P Bianchi","year":"2022","unstructured":"Bianchi, P., Hachem, W., Schechtman, S.: Convergence of constant step stochastic gradient descent for non-smooth non-convex functions. Set-Valued Var. Anal. 30(3), 1117\u20131147 (2022). https:\/\/doi.org\/10.1007\/s11228-022-00638-z","journal-title":"Set-Valued Var. Anal."},{"key":"2408_CR10","unstructured":"Bolte, J., Le, T., Pauwels, E., Silveti-Falls, T.: Nonsmooth implicit differentiation for machine-learning and optimization. In: Ranzato, M., Beygelzimer, A., Dauphin, Y., Liang, P., Vaughan, J.W. (eds.) Advances in Neural Information Processing Systems, vol.\u00a034, pp. 13537\u201313549. Curran Associates, Inc (2021)"},{"key":"2408_CR11","doi-asserted-by":"crossref","unstructured":"Bolte, J., Le, T., Pauwels, E.: Subgradient sampling for nonsmooth nonconvex minimization. arXiv preprint arXiv:2202.13744 (2022)","DOI":"10.1137\/22M1479178"},{"key":"2408_CR12","unstructured":"Bolte, J., Pauwels, E.: A mathematical model for automatic differentiation in machine learning. In: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M., Lin, H. (eds.) Advances in Neural Information Processing Systems, vol.\u00a033, pp. 10809\u201310819. Curran Associates, Inc (2020)"},{"issue":"1","key":"2408_CR13","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1007\/s10107-020-01501-5","volume":"188","author":"J Bolte","year":"2021","unstructured":"Bolte, J., Pauwels, E.: Conservative set valued fields, automatic differentiation, stochastic gradient methods and deep learning. Math. Program. 188(1), 19\u201351 (2021). https:\/\/doi.org\/10.1007\/s10107-020-01501-5","journal-title":"Math. Program."},{"key":"2408_CR14","doi-asserted-by":"publisher","DOI":"10.4171\/JEMS\/1285","author":"J Bolte","year":"2022","unstructured":"Bolte, J., Pauwels, E., R\u00edos-Zertuche, R.: Long term dynamics of the subgradient method for lipschitz path differentiable functions. J. Eur. Math. Soc. (2022). https:\/\/doi.org\/10.4171\/JEMS\/1285","journal-title":"J. Eur. Math. Soc."},{"issue":"134","key":"2408_CR15","first-page":"1","volume":"22","author":"C Castera","year":"2021","unstructured":"Castera, C., Bolte, J., F\u00e9votte, C., Pauwels, E.: An inertial newton algorithm for deep learning. J. Mach. Learn. Res. 22(134), 1\u201331 (2021)","journal-title":"J. Mach. Learn. Res."},{"key":"2408_CR16","unstructured":"Clarke, F.H.: Optimization and Nonsmooth Analysis. Classics in Applied Mathematics. Society for Industrial and Applied Mathematics (1990)"},{"issue":"2","key":"2408_CR17","doi-asserted-by":"publisher","first-page":"311","DOI":"10.1215\/00127094-2010-213","volume":"156","author":"R Cluckers","year":"2011","unstructured":"Cluckers, R., Miller, D.J.: Stability under integration of sums of products of real globally subanalytic functions and their logarithms. Duke Math. J. 156(2), 311\u2013348 (2011). https:\/\/doi.org\/10.1215\/00127094-2010-213","journal-title":"Duke Math. J."},{"key":"2408_CR18","unstructured":"Coste, M.: An Introduction to O-Minimal Geometry. Institut de Recherche Math\u00e9matique de Rennes (1999)"},{"issue":"1","key":"2408_CR19","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1007\/s10208-018-09409-5","volume":"20","author":"D Davis","year":"2020","unstructured":"Davis, D., Drusvyatskiy, D., Kakade, S., Lee, J.D.: Stochastic subgradient method converges on tame functions. Found. Comput. Math. 20(1), 119\u2013154 (2020). https:\/\/doi.org\/10.1007\/s10208-018-09409-5","journal-title":"Found. Comput. Math."},{"issue":"2","key":"2408_CR20","doi-asserted-by":"publisher","first-page":"196","DOI":"10.1007\/BF02742069","volume":"34","author":"Y Ermoliev","year":"1998","unstructured":"Ermoliev, Y., Norkin, V.: Stochastic generalized gradient method for nonconvex nonsmooth stochastic optimization. Cybern. Syst. Anal. 34(2), 196\u2013215 (1998)","journal-title":"Cybern. Syst. Anal."},{"key":"2408_CR21","unstructured":"Figurnov, M., Mohamed, S., Mnih, A.: Implicit reparameterization gradients. In: Bengio, S., Wallach, H., Larochelle, H., Grauman, K., Cesa-Bianchi, N., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol.\u00a031. Curran Associates, Inc (2018)"},{"issue":"1","key":"2408_CR22","doi-asserted-by":"publisher","first-page":"461","DOI":"10.1214\/18-EJS1395","volume":"12","author":"S Gadat","year":"2018","unstructured":"Gadat, S., Panloup, F., Saadane, S.: Stochastic heavy ball. Electron. J. Stat. 12(1), 461\u2013529 (2018)","journal-title":"Electron. J. Stat."},{"key":"2408_CR23","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. CoRR arXiv:1412.6980 (2014)"},{"key":"2408_CR24","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: Pereira, F., Burges, C., Bottou, L., Weinberger, K. (eds.) Advances in Neural Information Processing Systems, vol.\u00a025. Curran Associates, Inc (2012)"},{"issue":"7553","key":"2408_CR25","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436\u2013444 (2015). https:\/\/doi.org\/10.1038\/nature14539","journal-title":"Nature"},{"key":"2408_CR26","unstructured":"Majewski, S., Miasojedow, B., Moulines, \u00c9.: Analysis of nonsmooth stochastic approximation: the differential inclusion approach. arXiv Optimization and Control (2018)"},{"key":"2408_CR27","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1007\/BF01099354","volume":"16","author":"V Norkin","year":"1980","unstructured":"Norkin, V.: Generalized-differentiable functions. Cybern. Syst. Anal. 16, 10\u201312 (1980). https:\/\/doi.org\/10.1007\/BF01099354","journal-title":"Cybern. Syst. Anal."},{"key":"2408_CR28","doi-asserted-by":"publisher","first-page":"804","DOI":"10.1007\/BF01068698","volume":"22","author":"V Norkin","year":"1986","unstructured":"Norkin, V.: Stochastic generalized-differentiable functions in the problem of nonconvex nonsmooth stochastic optimization. Cybern. Syst. Anal. 22, 804\u2013809 (1986). https:\/\/doi.org\/10.1007\/BF01068698","journal-title":"Cybern. Syst. Anal."},{"key":"2408_CR29","unstructured":"Orvieto, A., Kohler, J., Lucchi, A.: The role of memory in stochastic optimization. In: Adams, R.P., Gogate, V. (eds.) Proceedings of the 35th Uncertainty in Artificial Intelligence Conference, Proceedings of Machine Learning Research, vol. 115, pp. 356\u2013366. PMLR (2020)"},{"issue":"5","key":"2408_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/0041-5553(64)90137-5","volume":"4","author":"BT Polyak","year":"1964","unstructured":"Polyak, B.T.: Some methods of speeding up the convergence of iteration methods. USSR Comput. Math. Math. Phys. 4(5), 1\u201317 (1964). https:\/\/doi.org\/10.1016\/0041-5553(64)90137-5","journal-title":"USSR Comput. Math. Math. Phys."},{"issue":"7","key":"2408_CR31","doi-asserted-by":"publisher","first-page":"1615","DOI":"10.1007\/s11590-020-01537-8","volume":"14","author":"A Ruszczy\u0144ski","year":"2020","unstructured":"Ruszczy\u0144ski, A.: Convergence of a stochastic subgradient method with averaging for nonsmooth nonconvex constrained optimization. Optim. Lett. 14(7), 1615\u20131625 (2020). https:\/\/doi.org\/10.1007\/s11590-020-01537-8","journal-title":"Optim. Lett."},{"key":"2408_CR32","unstructured":"Sebbouh, O., Gower, R.M., Defazio, A.: Almost sure convergence rates for stochastic gradient descent and stochastic heavy ball. In: Belkin, M., Kpotufe, S. (eds.) Proceedings of Thirty Fourth Conference on Learning Theory, Proceedings of Machine Learning Research, vol. 134, pp. 3935\u20133971. PMLR (2021)"},{"key":"2408_CR33","doi-asserted-by":"publisher","unstructured":"Shikhman, V.: Topological Aspects of Nonsmooth Optimization, Nonconvex Optimization and Its Applications, vol.\u00a064. Springer (2012). https:\/\/doi.org\/10.1007\/978-1-4614-1897-9","DOI":"10.1007\/978-1-4614-1897-9"},{"key":"2408_CR34","unstructured":"Sutskever, I., Martens, J., Dahl, G., Hinton, G.: On the importance of initialization and momentum in deep learning. In: Dasgupta, S., McAllester, D. (eds.) Proceedings of the 30th International Conference on Machine Learning, Proceedings of Machine Learning Research, vol.\u00a028, pp. 1139\u20131147. PMLR, Atlanta, Georgia, USA (2013)"},{"issue":"2","key":"2408_CR35","doi-asserted-by":"publisher","first-page":"497","DOI":"10.1215\/S0012-7094-96-08416-1","volume":"84","author":"L van den Dries","year":"1996","unstructured":"van den Dries, L., Miller, C.: Geometric categories and o-minimal structures. Duke Math. J. 84(2), 497\u2013540 (1996). https:\/\/doi.org\/10.1215\/S0012-7094-96-08416-1","journal-title":"Duke Math. J."},{"key":"2408_CR36","unstructured":"Yang, T., Lin, Q., Li, Z.: Unified convergence analysis of stochastic momentum methods for convex and non-convex optimization. arXiv Optimization and Control (2016)"}],"container-title":["Journal of Optimization Theory and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10957-024-02408-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10957-024-02408-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10957-024-02408-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,9]],"date-time":"2024-05-09T15:14:15Z","timestamp":1715267655000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10957-024-02408-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,11]]},"references-count":36,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,5]]}},"alternative-id":["2408"],"URL":"https:\/\/doi.org\/10.1007\/s10957-024-02408-3","relation":{},"ISSN":["0022-3239","1573-2878"],"issn-type":[{"value":"0022-3239","type":"print"},{"value":"1573-2878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,3,11]]},"assertion":[{"value":"17 May 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 February 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 March 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}