{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T19:29:06Z","timestamp":1767986946820,"version":"3.49.0"},"reference-count":81,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2021,1,4]],"date-time":"2021-01-04T00:00:00Z","timestamp":1609718400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,4]],"date-time":"2021-01-04T00:00:00Z","timestamp":1609718400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["DMS. 1619884"],"award-info":[{"award-number":["DMS. 1619884"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"crossref","award":["No. N00014-20-1-2088"],"award-info":[{"award-number":["No. N00014-20-1-2088"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Math. Program."],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s10107-020-01583-1","type":"journal-article","created":{"date-parts":[[2021,1,4]],"date-time":"2021-01-04T17:03:07Z","timestamp":1609779787000},"page":"1005-1071","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":20,"title":["A hybrid stochastic optimization framework for composite nonconvex optimization"],"prefix":"10.1007","volume":"191","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5866-0787","authenticated-orcid":false,"given":"Quoc","family":"Tran-Dinh","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4490-8649","authenticated-orcid":false,"given":"Nhan H.","family":"Pham","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1579-7035","authenticated-orcid":false,"given":"Dzung T.","family":"Phan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6083-606X","authenticated-orcid":false,"given":"Lam M.","family":"Nguyen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,1,4]]},"reference":[{"key":"1583_CR1","first-page":"1","volume":"99","author":"A Agarwal","year":"2010","unstructured":"Agarwal, A., Bartlett, P.L., Ravikumar, P., Wainwright, M.J.: Information-theoretic lower bounds on the oracle complexity of stochastic convex optimization. IEEE Trans. Inf. Theory 99, 1\u20131 (2010)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"1583_CR2","unstructured":"Agarwal, A., Bottou, L.: A lower bound for the optimization of finite sums. In: International Conference on Machine Learning, pp. 78\u201386 (2015)"},{"key":"1583_CR3","doi-asserted-by":"crossref","unstructured":"Allen-Zhu, Z.: Katyusha: The first direct acceleration of stochastic gradient methods. Proceedings of the 49th Annual ACM SIGACT Symposium on Theory of Computing (STOC), pp. 1200\u20131205 (2017). Montreal, Canada","DOI":"10.1145\/3055399.3055448"},{"key":"1583_CR4","unstructured":"Allen-Zhu, Z.: Natasha: Faster non-convex stochastic optimization via strongly non-convex parameter. In: Proceedings of the 34th International Conference on Machine Learning, vol.\u00a070, pp. 89\u201397 (2017)"},{"key":"1583_CR5","unstructured":"Allen-Zhu, Z.: Natasha 2: Faster non-convex optimization than SGD. In: Advances in neural information processing systems, pp. 2675\u20132686 (2018)"},{"key":"1583_CR6","unstructured":"Allen-Zhu, Z., Li. Y.: NEON2: Finding local minima via first-order oracles. In: Advances in Neural Information Processing Systems, pp. 3720\u20133730 (2018)"},{"key":"1583_CR7","unstructured":"Allen-Zhu, Zeyuan, Yuan, Yang: Improved SVRG for Non-Strongly-Convex or Sum-of-Non-Convex Objectives. In: ICML, pp. 1080\u20131089 (2016)"},{"key":"1583_CR8","unstructured":"Arjevani, Y., Carmon, Y., Duchi, J.\u00a0C., Foster, D.\u00a0J., Srebro, N., Woodworth, B.: Lower bounds for non-convex stochastic optimization. arXiv:1912.02365, (2019)"},{"issue":"2","key":"1583_CR9","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1007\/s10107-011-0472-0","volume":"129","author":"DP Bertsekas","year":"2011","unstructured":"Bertsekas, D.P.: Incremental proximal methods for large scale convex optimization. Math. Program. 129(2), 163\u2013195 (2011)","journal-title":"Math. Program."},{"issue":"2","key":"1583_CR10","doi-asserted-by":"publisher","first-page":"545","DOI":"10.1093\/imanum\/dry009","volume":"39","author":"R Bollapragada","year":"2019","unstructured":"Bollapragada, R., Byrd, R., Nocedal, J.: Exact and Inexact Subsampled Newton Methods for Optimization. IMA J. Numer. Anal. 39(2), 545\u2013578 (2019)","journal-title":"IMA J. Numer. Anal."},{"key":"1583_CR11","doi-asserted-by":"crossref","unstructured":"Bottou, L.: Large-scale machine learning with stochastic gradient descent. In: Proceedings of COMPSTAT\u20192010, pp. 177\u2013186. Springer (2010)","DOI":"10.1007\/978-3-7908-2604-3_16"},{"key":"1583_CR12","first-page":"9","volume-title":"Online Learning in Neural Networks","author":"L Bottou","year":"1998","unstructured":"Bottou, L.: Online learning and stochastic approximations. In: David, S. (ed.) Online Learning in Neural Networks, pp. 9\u201342. Cambridge University Press, New York (1998)"},{"issue":"2","key":"1583_CR13","doi-asserted-by":"publisher","first-page":"1008","DOI":"10.1137\/140954362","volume":"26","author":"HB Richard","year":"2016","unstructured":"Richard, H.B., Hansen, S.L., Jorge, N., Yoram, S.: A stochastic quasi-Newton method for large-scale optimization. SIAM J. Optim. 26(2), 1008\u20131031 (2016)","journal-title":"SIAM J. Optim."},{"key":"1583_CR14","first-page":"1","volume":"5","author":"Y Carmon","year":"2017","unstructured":"Carmon, Y., Duchi, J., Hinder, O., Sidford, A.: Lower bounds for finding stationary points I. Math. Program. 5, 1\u201350 (2017)","journal-title":"Math. Program."},{"issue":"4","key":"1583_CR15","doi-asserted-by":"publisher","first-page":"2783","DOI":"10.1137\/17M1134834","volume":"28","author":"A Chambolle","year":"2018","unstructured":"Chambolle, A., Ehrhardt, M.J., Richt\u00e1rik, P., Sch\u00f6nlieb, C.-B.: Stochastic primal-dual hybrid gradient algorithm with arbitrary sampling and imaging applications. SIAM J. Optim. 28(4), 2783\u20132808 (2018)","journal-title":"SIAM J. Optim."},{"key":"1583_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1961189.1961199","volume":"2","author":"C-C Chang","year":"2011","unstructured":"Chang, C.-C., Lin, C.-J.: LIBSVM: a library for support vector machines. ACM Trans. Intell. Syst. Technol. 2, 1\u201327 (2011)","journal-title":"ACM Trans. Intell. Syst. Technol."},{"key":"1583_CR17","unstructured":"Cutkosky, A., Orabona, F.: Momentum-based variance reduction in non-convex SGD. In: Advances in Neural Information Processing Systems, pp. 15210\u201315219 (2019)"},{"issue":"3","key":"1583_CR18","doi-asserted-by":"publisher","first-page":"1908","DOI":"10.1137\/17M1151031","volume":"29","author":"D Davis","year":"2019","unstructured":"Davis, D., Grimmer, B.: Proximally guided stochastic subgradient method for nonsmooth, nonconvex problems. SIAM J. Optim. 29(3), 1908\u20131930 (2019)","journal-title":"SIAM J. Optim."},{"key":"1583_CR19","unstructured":"Defazio, A., Bach, F., Lacoste-Julien, S.: SAGA: A fast incremental gradient method with support for non-strongly convex composite objectives. In: Advances in Neural Information Processing Systems (NIPS), pp. 1646\u20131654 (2014)"},{"key":"1583_CR20","unstructured":"Defazio, A., Caetano, T., Domke. J.: Finito: A faster, permutable incremental gradient method for big data problems. In: International Conference on Machine Learning, pp. 1125\u20131133 (2014)"},{"key":"1583_CR21","unstructured":"Driggs, D., Liang, J., Sch\u00f6nlieb, C.-B.: On the bias-variance tradeoff in stochastic gradient methods. arXiv:1906.01133 (2019)"},{"key":"1583_CR22","first-page":"2121","volume":"12","author":"J Duchi","year":"2011","unstructured":"Duchi, J., Hazan, E., Singer, Y.: Adaptive subgradient methods for online learning and stochastic optimization. J. Mach. Learn. Res. 12, 2121\u20132159 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"1583_CR23","unstructured":"Erdogdu, M.A, Montanari, A.: Convergence rates of sub-sampled Newton methods. In: Advances in Neural Information Processing Systems, pp. 3052\u20133060 (2015)"},{"key":"1583_CR24","unstructured":"Fang, C., Li, C.\u00a0J., Lin, Z., Zhang, T.: SPIDER: near-optimal non-convex optimization via stochastic path integrated differential estimator. In: Advances in Neural Information Processing Systems, pp. 689\u2013699 (2018)"},{"key":"1583_CR25","unstructured":"Fang, C., Lin, Z., Zhang, T.: Sharp Analysis for Nonconvex SGD Escaping from Saddle Points. In: Conference on Learning Theory, pp. 1192\u20131234 (2019)"},{"key":"1583_CR26","unstructured":"Foster, D., Sekhari, A., Shamir, O., Srebro, N., Sridharan, K., Woodworth, B.: The complexity of making the gradient small in stochastic convex optimization. In: Conference on Learning Theory, pp. 1319\u20131345 (2019)"},{"key":"1583_CR27","unstructured":"Ge, R., Huang, F., Jin, C., Yuan, Y.: Escaping from saddle points\u2013online stochastic gradient for tensor decomposition. In: Conference on Learning Theory, pp. 797\u2013842 (2015)"},{"key":"1583_CR28","unstructured":"Ge, R., Li, Z., Wang, W., Wang, X.: Stabilized SVRG: Simple variance reduction for nonconvex optimization. In: Conference on Learning Theory, pp. 1394\u20131448 (2019)"},{"issue":"4","key":"1583_CR29","doi-asserted-by":"publisher","first-page":"2341","DOI":"10.1137\/120880811","volume":"23","author":"S Ghadimi","year":"2013","unstructured":"Ghadimi, S., Lan, G.: Stochastic first-and zeroth-order methods for nonconvex stochastic programming. SIAM J. Optim. 23(4), 2341\u20132368 (2013)","journal-title":"SIAM J. Optim."},{"issue":"1\u20132","key":"1583_CR30","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1007\/s10107-015-0871-8","volume":"156","author":"S Ghadimi","year":"2016","unstructured":"Ghadimi, S., Lan, G.: Accelerated gradient methods for nonconvex nonlinear and stochastic programming. Math. Program. 156(1\u20132), 59\u201399 (2016)","journal-title":"Math. Program."},{"issue":"1\u20132","key":"1583_CR31","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1007\/s10107-014-0846-1","volume":"155","author":"S Ghadimi","year":"2016","unstructured":"Ghadimi, S., Lan, G., Zhang, H.: Mini-batch stochastic approximation methods for nonconvex stochastic composite optimization. Math. Program. 155(1\u20132), 267\u2013305 (2016)","journal-title":"Math. Program."},{"key":"1583_CR32","volume-title":"Deep Learning","author":"I Goodfellow","year":"2016","unstructured":"Goodfellow, I., Bengio, Y., Courville, A.: Deep Learning, vol. 1. MIT Press, Cambridge (2016)"},{"key":"1583_CR33","unstructured":"Hanzely, F., Mishchenko, K., Richt\u00e1rik, P.: SEGA: variance reduction via gradient sketching. In: Advances in Neural Information Processing Systems, pp. 2082\u20132093 (2018)"},{"issue":"1\u20132","key":"1583_CR34","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1007\/s10107-018-1297-x","volume":"174","author":"A Jofr\u00e9","year":"2019","unstructured":"Jofr\u00e9, A., Thompson, P.: On variance reduction for stochastic smooth convex optimization with multiplicative noise. Math. Program. 174(1\u20132), 253\u2013292 (2019)","journal-title":"Math. Program."},{"key":"1583_CR35","unstructured":"Johnson, R., Zhang, T.: Accelerating stochastic gradient descent using predictive variance reduction. In: Advances in Neural Information Processing Systems (NIPS), pp. 315\u2013323 (2013)"},{"key":"1583_CR36","doi-asserted-by":"crossref","unstructured":"Karimi, H., Nutini, J. Schmidt, M.: Linear convergence of gradient and proximal-gradient methods under the Polyak-\u0142ojasiewicz condition. In: Joint European Conference on Machine Learning and Knowledge Discovery in Databases, pp. 795\u2013811. Springer (2016)","DOI":"10.1007\/978-3-319-46128-1_50"},{"key":"1583_CR37","unstructured":"Kingma, D.P., Ba, J.: ADAM: A Method for Stochastic Optimization. In: Proceedings of the 3rd International Conference on Learning Representations (ICLR). arXiv:1412.6980 (2014)"},{"key":"1583_CR38","doi-asserted-by":"publisher","first-page":"242","DOI":"10.1109\/JSTSP.2015.2505682","volume":"10","author":"J Kone\u010dn\u00fd","year":"2016","unstructured":"Kone\u010dn\u00fd, J., Liu, J., Richt\u00e1rik, P., Tak\u00e1\u010d, M.: Mini-batch semi-stochastic gradient descent in the proximal setting. IEEE J. Sel. Top. Signal Process. 10, 242\u2013255 (2016)","journal-title":"IEEE J. Sel. Top. Signal Process."},{"key":"1583_CR39","unstructured":"Kovalev, D., Horvath, S., Richtarik, P.: Don\u2019t jump through hoops and remove those loops: SVRG and Katyusha are better without the outer loop. Proc 31st Int Conf Algorithmic Learn Theor. 117, 1\u201317 (2020)"},{"issue":"7553","key":"1583_CR40","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436\u2013444 (2015)","journal-title":"Nature"},{"key":"1583_CR41","unstructured":"Li, Z.: SSRGD: simple stochastic recursive gradient descent for escaping saddle points. In: Advances in Neural Information Processing Systems, pp. 1521\u20131531 (2019)"},{"key":"1583_CR42","unstructured":"Li, Z., Li, J.: A simple proximal stochastic gradient method for nonsmooth nonconvex optimization. In: Advances in Neural Information Processing Systems, pp. 5564\u20135574 (2018)"},{"key":"1583_CR43","unstructured":"Lihua, L., Ju, C., Chen, J., Jordan, M.: Non-convex finite-sum optimization via SCSG methods. In: Advances in Neural Information Processing Systems, pp. 2348\u20132358 (2017)"},{"key":"1583_CR44","unstructured":"Lin, H., Mairal, J., Harchaoui, Z.: A universal catalyst for first-order optimization. In: Advances in Neural Information Processing Systems, pp. 3384\u20133392 (2015)"},{"issue":"2","key":"1583_CR45","doi-asserted-by":"publisher","first-page":"829","DOI":"10.1137\/140957639","volume":"25","author":"J Mairal","year":"2015","unstructured":"Mairal, J.: Incremental majorization-minimization optimization with application to large-scale machine learning. SIAM J. Optim. 25(2), 829\u2013855 (2015)","journal-title":"SIAM J. Optim."},{"key":"1583_CR46","unstructured":"Metel, M., Takeda, A.: Simple stochastic gradient methods for non-smooth non-convex regularized optimization. In: International Conference on Machine Learning, pp. 4537\u20134545 (2019)"},{"key":"1583_CR47","unstructured":"Mokhtari, A., Ozdaglar, A., Jadbabaie, A.: Escaping saddle points in constrained optimization. In: Advances in Neural Information Processing Systems, pp. 3629\u20133639 (2018)"},{"key":"1583_CR48","unstructured":"Moulines, Eric, Bach, Francis\u00a0R: Non-asymptotic analysis of stochastic approximation algorithms for machine learning. In: Advances in Neural Information Processing Systems, pp. 451\u2013459 (2011)"},{"issue":"4","key":"1583_CR49","doi-asserted-by":"publisher","first-page":"1574","DOI":"10.1137\/070704277","volume":"19","author":"A Nemirovski","year":"2009","unstructured":"Nemirovski, A., Juditsky, A., Lan, G., Shapiro, A.: Robust stochastic approximation approach to stochastic programming. SIAM J. Optim. 19(4), 1574\u20131609 (2009)","journal-title":"SIAM J. Optim."},{"key":"1583_CR50","volume-title":"Problem Complexity and Method Efficiency in Optimization","author":"A Nemirovskii","year":"1983","unstructured":"Nemirovskii, A., Yudin, D.: Problem Complexity and Method Efficiency in Optimization. Wiley, New York (1983)"},{"key":"1583_CR51","series-title":"Applied Optimization","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-8853-9","volume-title":"Introductory Lectures on Convex Optimization: A Basic Course","author":"Y Nesterov","year":"2004","unstructured":"Nesterov, Y.: Introductory Lectures on Convex Optimization: A Basic Course. Applied Optimization, vol. 87. Kluwer Academic Publishers, London (2004)"},{"issue":"1","key":"1583_CR52","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1007\/s10107-006-0706-8","volume":"108","author":"Y Nesterov","year":"2006","unstructured":"Nesterov, Y., Polyak, B.T.: Cubic regularization of Newton method and its global performance. Math. Program. 108(1), 177\u2013205 (2006)","journal-title":"Math. Program."},{"key":"1583_CR53","unstructured":"Nguyen, L.M., Liu, J., Scheinberg, K., Tak\u00e1\u010d, M.: SARAH: a novel method for machine learning problems using stochastic recursive gradient. In: ICML (2017)"},{"key":"1583_CR54","unstructured":"Nguyen, L.M., Nguyen, N.H., Phan, D.T., Kalagnanam, J.R., Scheinberg, K.: When does stochastic gradient algorithm work well? arXiv:1801.06159 (2018)"},{"key":"1583_CR55","doi-asserted-by":"crossref","unstructured":"Nguyen, L.M., Scheinberg, K., Takac, M.: Inexact SARAH Algorithm for Stochastic Optimization. Optim. Methods Softw. (online first) (2020)","DOI":"10.1080\/10556788.2020.1818081"},{"key":"1583_CR56","unstructured":"Nguyen, L.M., van Dijk, M., Phan, D.T., Nguyen, P.H., Weng, T.-W., Kalagnanam, J.R.: Optimal finite-sum smooth non-convex optimization with SARAH. arXiv:1901.07648 (2019)"},{"key":"1583_CR57","unstructured":"Nguyen, L.M., Liu, J., Scheinberg, K., Tak\u00e1c, M.: Stochastic recursive gradient algorithm for nonconvex optimization. arXiv:1705.07261 (2017)"},{"issue":"3","key":"1583_CR58","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1007\/BF00275687","volume":"15","author":"E Oja","year":"1982","unstructured":"Oja, E.: Simplified neuron model as a principal component analyzer. J. Math. Biol. 15(3), 267\u2013273 (1982)","journal-title":"J. Math. Biol."},{"key":"1583_CR59","first-page":"1","volume":"21","author":"HN Pham","year":"2020","unstructured":"Pham, H.N., Nguyen, M.L., Phan, T.D., Tran-Dinh, Q.: ProxSARAH: an efficient algorithmic framework for stochastic composite nonconvex optimization. J. Mach. Learn. Res. 21, 1\u201348 (2020)","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"1583_CR60","doi-asserted-by":"publisher","first-page":"205","DOI":"10.1137\/15M1021106","volume":"27","author":"M Pilanci","year":"2017","unstructured":"Pilanci, M., Wainwright, M.J.: Newton sketch: a linear-time optimization algorithm with linear-quadratic convergence. SIAM J. Optim. 27(1), 205\u2013245 (2017)","journal-title":"SIAM J. Optim."},{"issue":"4","key":"1583_CR61","doi-asserted-by":"publisher","first-page":"838","DOI":"10.1137\/0330046","volume":"30","author":"B Polyak","year":"1992","unstructured":"Polyak, B., Juditsky, A.: Acceleration of stochastic approximation by averaging. SIAM J. Control Optim. 30(4), 838\u2013855 (1992)","journal-title":"SIAM J. Control Optim."},{"key":"1583_CR62","doi-asserted-by":"crossref","unstructured":"Reddi, S.J., Sra, S., P\u00f3czos, B., Smola, A.: Stochastic Frank-Wolfe methods for nonconvex optimization. In: 2016 54th Annual Allerton Conference on Communication, Control, and Computing (Allerton), pp. 1244\u20131251. IEEE (2016)","DOI":"10.1109\/ALLERTON.2016.7852377"},{"key":"1583_CR63","unstructured":"Reddi, S.J., Sra, S., P\u00f3czos, B., Smola, A.J.: Proximal stochastic methods for nonsmooth nonconvex finite-sum optimization. In: Advances in Neural Information Processing Systems, pp. 1145\u20131153 (2016)"},{"issue":"3","key":"1583_CR64","doi-asserted-by":"publisher","first-page":"400","DOI":"10.1214\/aoms\/1177729586","volume":"22","author":"H Robbins","year":"1951","unstructured":"Robbins, H., Monro, S.: A stochastic approximation method. Ann. Math. Stat. 22(3), 400\u2013407 (1951)","journal-title":"Ann. Math. Stat."},{"key":"1583_CR65","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1007\/s10107-018-1346-5","volume":"174","author":"F Roosta-Khorasani","year":"2019","unstructured":"Roosta-Khorasani, F., Mahoney, M.W.: Sub-sampled Newton methods I: globally convergent algorithms. Math. Program. 174, 293\u2013326 (2019)","journal-title":"Math. Program."},{"issue":"1\u20132","key":"1583_CR66","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1007\/s10107-016-1030-6","volume":"162","author":"M Schmidt","year":"2017","unstructured":"Schmidt, M., Le Roux, N., Bach, F.: Minimizing finite sums with the stochastic average gradient. Math. Program. 162(1\u20132), 83\u2013112 (2017)","journal-title":"Math. Program."},{"key":"1583_CR67","first-page":"567","volume":"14","author":"S Shalev-Shwartz","year":"2013","unstructured":"Shalev-Shwartz, S., Zhang, T.: Stochastic dual coordinate ascent methods for regularized loss minimization. J. Mach. Learn. Res. 14, 567\u2013599 (2013)","journal-title":"J. Mach. Learn. Res."},{"key":"1583_CR68","unstructured":"Tran-Dinh, Q., Liu, D., Nguyen, L.M.: Hybrid variance-reduced SGD algorithms for minimax problems with nonconvex-linear function. Proc. of The Thirty-fourth Conference on Neural In-formation Processing Systems (NeurIPS) (2020)"},{"key":"1583_CR69","first-page":"1","volume":"20","author":"M Unser","year":"2019","unstructured":"Unser, M.: A Representer Theorem for Deep Neural Networks. J. Math. 20, 1\u201330 (2019)","journal-title":"J. Math."},{"issue":"1\u20132","key":"1583_CR70","doi-asserted-by":"publisher","first-page":"419","DOI":"10.1007\/s10107-016-1017-3","volume":"161","author":"M Wang","year":"2017","unstructured":"Wang, M., Fang, E., Liu, L.: Stochastic compositional gradient descent: algorithms for minimizing compositions of expected-value functions. Math. Program. 161(1\u20132), 419\u2013449 (2017)","journal-title":"Math. Program."},{"key":"1583_CR71","unstructured":"Wang, Z., Ji, K., Zhou, Y., Liang, Y., Tarokh, V.: SpiderBoost and Momentum: Faster variance reduction algorithms. Proc. of The Thirty-third Conference on Neural Information Processing Systems (NeurIPS) (2019)"},{"key":"1583_CR72","unstructured":"Wang, Z., Zhou, Y., Liang, Y., Lan, G.: Stochastic variance-reduced cubic regularization for nonconvex optimization. In: The 22nd International Conference on Artificial Intelligence and Statistics, pp. 2731\u20132740 (2019)"},{"key":"1583_CR73","unstructured":"Woodworth, B.E., Srebro, N.: Tight complexity bounds for optimizing composite objectives. In: Advances in Neural Information Processing Systems (NIPS), pp. 3639\u20133647 (2016)"},{"key":"1583_CR74","unstructured":"Zhang, J., Xiao, L., Zhang, S.: Adaptive stochastic variance reduction for subsampled Newton method with cubic regularization. arXiv:1811.11637 (2018)"},{"key":"1583_CR75","doi-asserted-by":"crossref","unstructured":"Zhao, L., Mammadov, M., Yearwood, J.: From convex to nonconvex: a loss function analysis for binary classification. In: IEEE International Conference on Data Mining Workshops (ICDMW), pp. 1281\u20131288. IEEE (2010)","DOI":"10.1109\/ICDMW.2010.57"},{"key":"1583_CR76","unstructured":"Zhao, P., Zhang, T.: Stochastic optimization with importance sampling for regularized loss minimization. In: International Conference on Machine Learning, pp. 1\u20139 (2015)"},{"key":"1583_CR77","unstructured":"Zhou, D., Gu, Q.: Lower bounds for smooth nonconvex finite-sum optimization (2019)"},{"key":"1583_CR78","unstructured":"Zhou, D., Gu, Q.: Stochastic recursive variance-reduced cubic regularization methods. Proc. of The 24th International Conference on Articial Intelligence and Statistics (AISTATS) (2020)"},{"key":"1583_CR79","unstructured":"Zhou, D., Xu, P., Gu, Q.: Stochastic nested variance reduction for nonconvex optimization. In: Proceedings of the 32nd International Conference on Neural Information Processing Systems, pp. 3925\u20133936. Curran Associates Inc. (2018)"},{"key":"1583_CR80","unstructured":"Zhou, K., Shang, F., Cheng, J.: A simple stochastic variance reduced algorithm with fast convergence rates. In: International Conference on Machine Learning, pp. 5975\u20135984 (2018)"},{"key":"1583_CR81","unstructured":"Zhou, Y., Wang, Z., Ji, K., Liang, Y., Tarokh, V.: Momentum schemes with stochastic variance reduction for nonconvex composite optimization. arXiv:1902.02715 (2019)"}],"container-title":["Mathematical Programming"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10107-020-01583-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10107-020-01583-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10107-020-01583-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,23]],"date-time":"2022-02-23T02:08:50Z","timestamp":1645582130000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10107-020-01583-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,4]]},"references-count":81,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["1583"],"URL":"https:\/\/doi.org\/10.1007\/s10107-020-01583-1","relation":{},"ISSN":["0025-5610","1436-4646"],"issn-type":[{"value":"0025-5610","type":"print"},{"value":"1436-4646","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,1,4]]},"assertion":[{"value":"4 September 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 October 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 January 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}