{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T12:43:01Z","timestamp":1774528981584,"version":"3.50.1"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2023,1,18]],"date-time":"2023-01-18T00:00:00Z","timestamp":1674000000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,18]],"date-time":"2023-01-18T00:00:00Z","timestamp":1674000000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key R &D Program of China","doi-asserted-by":"crossref","award":["2021YFA1003400"],"award-info":[{"award-number":["2021YFA1003400"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62176051"],"award-info":[{"award-number":["62176051"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities of China","doi-asserted-by":"crossref","award":["2412020FZ024"],"award-info":[{"award-number":["2412020FZ024"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Optim Lett"],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s11590-023-01972-3","type":"journal-article","created":{"date-parts":[[2023,1,18]],"date-time":"2023-01-18T04:23:44Z","timestamp":1674015824000},"page":"2113-2131","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Almost sure convergence of stochastic composite objective mirror descent for non-convex non-smooth optimization"],"prefix":"10.1007","volume":"18","author":[{"given":"Yuqing","family":"Liang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9663-9743","authenticated-orcid":false,"given":"Dongpo","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Naimin","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Danilo P.","family":"Mandic","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,1,18]]},"reference":[{"key":"1972_CR1","unstructured":"Alacaoglu, A., Malitsky, Y., Cevher, V.: Convergence of adaptive algorithms for weakly convex constrained optimization. arXiv preprint arXiv:2006.06650 (2020)"},{"issue":"1","key":"1972_CR2","first-page":"310","volume":"18","author":"YF Atchad\u00e9","year":"2017","unstructured":"Atchad\u00e9, Y.F., Fort, G., Moulines, E.: On perturbed proximal gradient algorithms. J. Mach. Learn. Res. 18(1), 310\u2013342 (2017)","journal-title":"J. Mach. Learn. Res."},{"issue":"2","key":"1972_CR3","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1137\/16M1080173","volume":"60","author":"L Bottou","year":"2018","unstructured":"Bottou, L., Curtis, F.E., Nocedal, J.: Optimization methods for large-scale machine learning. SIAM Rev. 60(2), 223\u2013311 (2018)","journal-title":"SIAM Rev."},{"issue":"5","key":"1972_CR4","doi-asserted-by":"publisher","first-page":"1177","DOI":"10.1007\/s11590-018-1331-1","volume":"13","author":"V Cevher","year":"2019","unstructured":"Cevher, V., V\u0169, B.C.: On the linear convergence of the stochastic gradient method with constant step-size. Optim. Lett. 13(5), 1177\u20131187 (2019)","journal-title":"Optim. Lett."},{"issue":"1","key":"1972_CR5","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1137\/18M1178244","volume":"29","author":"D Davis","year":"2019","unstructured":"Davis, D., Drusvyatskiy, D.: Stochastic model-based minimization of weakly convex functions. SIAM J. Optim. 29(1), 207\u2013239 (2019)","journal-title":"SIAM J. Optim."},{"key":"1972_CR6","first-page":"24","volume":"23","author":"D Driggs","year":"2022","unstructured":"Driggs, D., Liang, J., Sch\u00f6nlieb, C.B.: On biased stochastic gradient estimation. J. Mach. Learn. Res. 23, 24\u20131 (2022)","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"1972_CR7","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1007\/s10107-018-1311-3","volume":"178","author":"D Drusvyatskiy","year":"2019","unstructured":"Drusvyatskiy, D., Paquette, C.: Efficiency of minimizing compositions of convex functions and smooth maps. Math. Program. 178(1), 503\u2013558 (2019)","journal-title":"Math. Program."},{"issue":"7","key":"1972_CR8","first-page":"2121","volume":"12","author":"J Duchi","year":"2011","unstructured":"Duchi, J., Hazan, E., Singer, Y.: Adaptive subgradient methods for online learning and stochastic optimization. J. Mach. Learn. Res. 12(7), 2121\u20132159 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"1972_CR9","unstructured":"Duchi, J., Shalev-Shwartz, S., Singer, Y., Tewari, A.: Composite objective mirror descent. In: Conference on Learning Theory, vol. 10, pp. 14\u201326 (2010)"},{"key":"1972_CR10","first-page":"2899","volume":"10","author":"J Duchi","year":"2009","unstructured":"Duchi, J., Singer, Y.: Efficient online and batch learning using forward backward splitting. J. Mach. Learn. Res. 10, 2899\u20132934 (2009)","journal-title":"J. Mach. Learn. Res."},{"key":"1972_CR11","unstructured":"Dundar, M., Krishnapuram, B., Bi, J., Rao, R.B.: Learning classifiers when the training data is not iid. In: IJCAI, vol. 2007, pp. 756\u201361 (2007)"},{"issue":"1","key":"1972_CR12","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1007\/s10107-014-0846-1","volume":"155","author":"S Ghadimi","year":"2016","unstructured":"Ghadimi, S., Lan, G., Zhang, H.: Mini-batch stochastic approximation methods for nonconvex stochastic composite optimization. Math. Program. 155(1), 267\u2013305 (2016)","journal-title":"Math. Program."},{"key":"1972_CR13","unstructured":"Gorbunov, E., Hanzely, F., Richt\u00e1rik, P.: A unified theory of SGD: Variance reduction, sampling, quantization and coordinate descent. In: International Conference on Artificial Intelligence and Statistics, pp. 680\u2013690. PMLR (2020)"},{"key":"1972_CR14","unstructured":"Gower, R., Sebbouh, O., Loizou, N.: SGD for structured nonconvex functions: Learning rates, minibatching and interpolation. In: International Conference on Artificial Intelligence and Statistics, pp. 1315\u20131323. PMLR (2021)"},{"issue":"3","key":"1972_CR15","doi-asserted-by":"publisher","first-page":"989","DOI":"10.1016\/j.patcog.2012.10.010","volume":"46","author":"Y Han","year":"2013","unstructured":"Han, Y., Feng, X., Baciu, G., Wang, W.: Nonconvex sparse regularizer based speckle noise removal. Pattern Recognit. 46(3), 989\u20131001 (2013)","journal-title":"Pattern Recognit."},{"key":"1972_CR16","unstructured":"J.\u00a0Reddi, S., Sra, S., Poczos, B., Smola, A.J.: Proximal stochastic methods for nonsmooth nonconvex finite-sum optimization. In: Advances in Neural Information Processing Systems, vol.\u00a029, pp. 1153\u20131161 (2016)"},{"key":"1972_CR17","unstructured":"Khaled, A., Richt\u00e1rik, P.: Better theory for SGD in the nonconvex world. arXiv preprint arXiv:2002.03329 (2020)"},{"issue":"239","key":"1972_CR18","first-page":"1","volume":"23","author":"Z Li","year":"2022","unstructured":"Li, Z., Li, J.: Simple and optimal stochastic gradient methods for nonsmooth nonconvex optimization. J. Mach. Learn. Res. 23(239), 1\u201361 (2022)","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"1972_CR19","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1137\/19M1294277","volume":"31","author":"J Liang","year":"2021","unstructured":"Liang, J., Monteiro, R.D.: An average curvature accelerated composite gradient method for nonconvex smooth composite optimization problems. SIAM J. Optim. 31(1), 217\u2013243 (2021)","journal-title":"SIAM J. Optim."},{"key":"1972_CR20","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1016\/j.neunet.2021.10.026","volume":"145","author":"J Liu","year":"2022","unstructured":"Liu, J., Kong, J., Xu, D., Qi, M., Lu, Y.: Convergence analysis of AdaBound with relaxed bound functions for non-convex optimization. Neural Netw. 145, 300\u2013307 (2022)","journal-title":"Neural Netw."},{"key":"1972_CR21","unstructured":"Liu, J., Yuan, Y.: On almost sure convergence rates of stochastic gradient methods. arXiv preprint arXiv:2202.04295 (2022)"},{"key":"1972_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.neucom.2022.02.063","volume":"487","author":"J Luo","year":"2022","unstructured":"Luo, J., Liu, J., Xu, D., Zhang, H.: SGD-r$$\\alpha$$: A real-time $$\\alpha$$-suffix averaging method for SGD with biased gradient estimates. Neurocomputing 487, 1\u20138 (2022)","journal-title":"Neurocomputing"},{"key":"1972_CR23","unstructured":"Mai, V., Johansson, M.: Convergence of a stochastic gradient method with momentum for non-smooth non-convex optimization. In: International Conference on Machine Learning, pp. 6630\u20136639. PMLR (2020)"},{"key":"1972_CR24","doi-asserted-by":"publisher","DOI":"10.1002\/047084535X","volume-title":"Recurrent Neural Networks for Prediction: Learning Algorithms, Architectures and Stability","author":"D Mandic","year":"2001","unstructured":"Mandic, D., Chambers, J.: Recurrent Neural Networks for Prediction: Learning Algorithms, Architectures and Stability. Wiley, New York (2001)"},{"key":"1972_CR25","unstructured":"Mertikopoulos, P., Hallak, N., Kavis, A., Cevher, V.: On the almost sure convergence of stochastic gradient descent in non-convex problems. In: Advances in Neural Information Processing Systems, vol.\u00a033, pp. 1117\u20131128 (2020)"},{"key":"1972_CR26","first-page":"115","volume":"22","author":"MR Metel","year":"2021","unstructured":"Metel, M.R., Takeda, A.: Stochastic proximal methods for non-smooth non-convex constrained sparse optimization. J. Mach. Learn. Res. 22, 115\u20131 (2021)","journal-title":"J. Mach. Learn. Res."},{"key":"1972_CR27","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-319-91578-4","volume-title":"Lectures on Convex Optimization","author":"Y Nesterov","year":"2018","unstructured":"Nesterov, Y.: Lectures on Convex Optimization, vol. 137. Springer, Cham (2018)"},{"issue":"12","key":"1972_CR28","doi-asserted-by":"publisher","first-page":"3073","DOI":"10.1109\/TIP.2010.2052275","volume":"19","author":"M Nikolova","year":"2010","unstructured":"Nikolova, M., Ng, M.K., Tam, C.P.: Fast nonconvex nonsmooth minimization methods for image restoration and reconstruction. IEEE Trans. Image Process. 19(12), 3073\u20133088 (2010)","journal-title":"IEEE Trans. Image Process."},{"key":"1972_CR29","doi-asserted-by":"crossref","unstructured":"Robbins, H., Monro, S.: A stochastic approximation method. Ann. Math. Stat. pp. 400\u2013407 (1951)","DOI":"10.1214\/aoms\/1177729586"},{"key":"1972_CR30","doi-asserted-by":"crossref","unstructured":"Robbins, H., Siegmund, D.: A convergence theorem for non-negative almost supermartingales and some applications. In: Optimizing methods in statistics, pp. 233\u2013257. Elsevier (1971)","DOI":"10.1016\/B978-0-12-604550-5.50015-8"},{"key":"1972_CR31","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781107298019","volume-title":"Understanding Machine Learning: from Theory to Algorithms","author":"S Shalev-Shwartz","year":"2014","unstructured":"Shalev-Shwartz, S., Ben-David, S.: Understanding Machine Learning: from Theory to Algorithms. Cambridge University Press, New York (2014)"},{"issue":"2","key":"1972_CR32","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1007\/s40305-020-00309-6","volume":"8","author":"RY Sun","year":"2020","unstructured":"Sun, R.Y.: Optimization for deep learning: An overview. J. Oper. Res. Soc. China 8(2), 249\u2013294 (2020)","journal-title":"J. Oper. Res. Soc. China"},{"issue":"2","key":"1972_CR33","doi-asserted-by":"publisher","first-page":"835","DOI":"10.1109\/TCYB.2018.2874332","volume":"50","author":"W Tao","year":"2018","unstructured":"Tao, W., Pan, Z., Wu, G., Tao, Q.: Primal averaging: A new gradient evaluation step to attain the optimal individual convergence. IEEE T. Cybern. 50(2), 835\u2013845 (2018)","journal-title":"IEEE T. Cybern."},{"issue":"2","key":"1972_CR34","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1287\/moor.8.2.231","volume":"8","author":"JP Vial","year":"1983","unstructured":"Vial, J.P.: Strong and weak convexity of sets and functions. Math. Oper. Res. 8(2), 231\u2013259 (1983)","journal-title":"Math. Oper. Res."},{"key":"1972_CR35","first-page":"1","volume":"21","author":"R Ward","year":"2020","unstructured":"Ward, R., Wu, X., Bottou, L.: AdaGrad stepsizes: Sharp convergence over nonconvex landscapes. J. Mach. Learn. Res. 21, 1\u201330 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"1972_CR36","doi-asserted-by":"publisher","first-page":"1646","DOI":"10.1109\/LCSYS.2021.3124187","volume":"6","author":"K Wood","year":"2022","unstructured":"Wood, K., Bianchin, G., Dall\u2019Anese, E.: Online projected gradient descent for stochastic optimization with decision-dependent distributions. IEEE Control Syst. Lett. 6, 1646\u20131651 (2022)","journal-title":"IEEE Control Syst. Lett."},{"issue":"1","key":"1972_CR37","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1007\/s11590-020-01579-y","volume":"15","author":"H Zhang","year":"2021","unstructured":"Zhang, H., Pan, L., Xiu, N.: Optimality conditions for locally Lipschitz optimization with $$l_{0}$$-regularization. Optim. Lett. 15(1), 189\u2013203 (2021)","journal-title":"Optim. Lett."},{"key":"1972_CR38","unstructured":"Zhou, D., Chen, J., Cao, Y., Tang, Y., Yang, Z., Gu, Q.: On the convergence of adaptive gradient methods for nonconvex optimization. arXiv preprint arXiv:1808.05671 (2018)"},{"key":"1972_CR39","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Wang, Z., Ji, K., Liang, Y., Tarokh, V.: Proximal gradient algorithm with momentum and flexible parameter restart for nonconvex optimization. arXiv preprint arXiv:2002.11582 (2020)","DOI":"10.24963\/ijcai.2020\/201"}],"container-title":["Optimization Letters"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11590-023-01972-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11590-023-01972-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11590-023-01972-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,15]],"date-time":"2024-10-15T10:24:35Z","timestamp":1728987875000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11590-023-01972-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1,18]]},"references-count":39,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["1972"],"URL":"https:\/\/doi.org\/10.1007\/s11590-023-01972-3","relation":{},"ISSN":["1862-4472","1862-4480"],"issn-type":[{"value":"1862-4472","type":"print"},{"value":"1862-4480","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,1,18]]},"assertion":[{"value":"22 August 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 January 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 January 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}