{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,29]],"date-time":"2026-03-29T20:24:45Z","timestamp":1774815885829,"version":"3.50.1"},"reference-count":57,"publisher":"Springer Science and Business Media LLC","issue":"1-2","license":[{"start":{"date-parts":[[2022,6,9]],"date-time":"2022-06-09T00:00:00Z","timestamp":1654732800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,6,9]],"date-time":"2022-06-09T00:00:00Z","timestamp":1654732800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100000143","name":"Division of Computing and Communication Foundations","doi-asserted-by":"publisher","award":["1553086"],"award-info":[{"award-number":["1553086"]}],"id":[{"id":"10.13039\/100000143","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100007297","name":"Office of Naval Research Global","doi-asserted-by":"publisher","award":["ONR-YIP N00014-19-1-2288"],"award-info":[{"award-number":["ONR-YIP N00014-19-1-2288"]}],"id":[{"id":"10.13039\/100007297","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000879","name":"Alfred P. Sloan Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000879","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000143","name":"Division of Computing and Communication Foundations","doi-asserted-by":"publisher","award":["1740751"],"award-info":[{"award-number":["1740751"]}],"id":[{"id":"10.13039\/100000143","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006785","name":"Google","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006785","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Math. Program."],"published-print":{"date-parts":[[2023,5]]},"DOI":"10.1007\/s10107-022-01822-7","type":"journal-article","created":{"date-parts":[[2022,6,9]],"date-time":"2022-06-09T16:16:37Z","timestamp":1654791397000},"page":"165-214","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":53,"title":["Lower bounds for non-convex stochastic optimization"],"prefix":"10.1007","volume":"199","author":[{"given":"Yossi","family":"Arjevani","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5731-8640","authenticated-orcid":false,"given":"Yair","family":"Carmon","sequence":"additional","affiliation":[]},{"given":"John C.","family":"Duchi","sequence":"additional","affiliation":[]},{"given":"Dylan J.","family":"Foster","sequence":"additional","affiliation":[]},{"given":"Nathan","family":"Srebro","sequence":"additional","affiliation":[]},{"given":"Blake","family":"Woodworth","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,6,9]]},"reference":[{"issue":"58","key":"1822_CR1","doi-asserted-by":"publisher","first-page":"3235","DOI":"10.1109\/TIT.2011.2182178","volume":"5","author":"A Agarwal","year":"2012","unstructured":"Agarwal, A., Bartlett, P.L., Ravikumar, P., Wainwright, M.J.: Information-theoretic lower bounds on the oracle complexity of stochastic convex optimization. IEEE Trans. Inf. Theory 5(58), 3235\u20133249 (2012)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"1822_CR2","unstructured":"Allen-Zhu, Z.: How to make the gradients small stochastically: even faster convex and nonconvex SGD. In: Advances in Neural Information Processing Systems, pp. 1165\u20131175 (2018a)"},{"key":"1822_CR3","unstructured":"Allen-Zhu, Z.: Natasha 2: Faster non-convex optimization than SGD. In: Advances in Neural Information Processing Systems, pp. 2675\u20132686, (2018b)"},{"key":"1822_CR4","unstructured":"Allen-Zhu, Z., Hazan, E.: Variance reduction for faster non-convex optimization. In International conference on machine learning, pp. 699\u2013707 (2016)"},{"key":"1822_CR5","unstructured":"Allen-Zhu, Z., Li. Y.: Neon2: finding local minima via first-order oracles. In: Advances in Neural Information Processing Systems, pp. 3716\u20133726 (2018)"},{"key":"1822_CR6","unstructured":"Arjevani, Y.: Limitations on variance-reduction and acceleration schemes for finite sums optimization. In: Advances in Neural Information Processing Systems, pp. 3540\u20133549 (2017)"},{"key":"1822_CR7","unstructured":"Arjevani, Y., Shamir, O.: Dimension-free iteration complexity of finite sum optimization problems. In: Advances in Neural Information Processing Systems, pp. 3540\u20133548 (2016)"},{"key":"1822_CR8","unstructured":"Arjevani, Y., Carmon, Y., Duchi, J.C., Foster, D.J., Sekhari, A., Sridharan, K.: Second-order information in non-convex stochastic optimization: power and limitations. In: Conference on Learning Theory, pp. 242\u2013299. PMLR (2020)"},{"key":"1822_CR9","unstructured":"Ball, K.: An elementary introduction to modern convex geometry. In Levy, S. (ed Flavors of Geometry, pp. 1\u201358. MSRI Publications (1997)"},{"key":"1822_CR10","unstructured":"Bottou, L., Bousquet, O.: The tradeoffs of large scale learning. In Advances in neural information processing systems, pp. 161\u2013168 (2008)"},{"issue":"2","key":"1822_CR11","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1137\/16M1080173","volume":"60","author":"L Bottou","year":"2018","unstructured":"Bottou, L., Curtis, F., Nocedal, J.: Optimization methods for large-scale learning. SIAM Rev. 60(2), 223\u2013311 (2018)","journal-title":"SIAM Rev."},{"issue":"7","key":"1822_CR12","doi-asserted-by":"publisher","first-page":"4709","DOI":"10.1109\/TIT.2017.2701343","volume":"63","author":"G Braun","year":"2017","unstructured":"Braun, G., Guzm\u00e1n, C., Pokutta, S.: Lower bounds on the oracle complexity of nonsmooth convex optimization via information theory. IEEE Trans. Inf. Theory 63(7), 4709\u20134724 (2017)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"1822_CR13","unstructured":"Bubeck, S., Jiang, Q., Lee, Y.T., Li, Y., Sidford, A.: Complexity of highly parallel non-smooth convex optimization. In: Advances in Neural Information Processing Systems 32 (2019)"},{"key":"1822_CR14","unstructured":"Carmon, Y., Duchi, J.C., Hinder, O., Sidford, A.: Convex until proven guilty: Dimension-free acceleration of gradient descent on non-convex functions. In Proceedings of the 34th International Conference on Machine Learning, pp. 654\u2013663 (2017)"},{"issue":"1","key":"1822_CR15","first-page":"71","volume":"184","author":"Y Carmon","year":"2019","unstructured":"Carmon, Y., Duchi, J.C., Hinder, O., Sidford, A.: Lower bounds for finding stationary points I. Math. Progr. 184(1), 71\u2013120 (2019)","journal-title":"Math. Progr."},{"issue":"1","key":"1822_CR16","doi-asserted-by":"publisher","first-page":"315","DOI":"10.1007\/s10107-019-01431-x","volume":"185","author":"Y Carmon","year":"2021","unstructured":"Carmon, Y., Duchi, J.C., Hinder, O., Sidford, A.: Lower bounds for finding stationary points II: First-order methods. Math. Progr. 185(1), 315\u2013355 (2021)","journal-title":"Math. Progr."},{"issue":"6","key":"1822_CR17","doi-asserted-by":"publisher","first-page":"2833","DOI":"10.1137\/090774100","volume":"20","author":"C Cartis","year":"2010","unstructured":"Cartis, C., Gould, N.I., Toint, P.L.: On the complexity of steepest descent, newton\u2019s and regularized newton\u2019s methods for nonconvex unconstrained optimization problems. Siam J. Opt. 20(6), 2833\u20132852 (2010)","journal-title":"Siam J. Opt."},{"issue":"1","key":"1822_CR18","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1016\/j.jco.2011.06.001","volume":"28","author":"C Cartis","year":"2012","unstructured":"Cartis, C., Gould, N.I., Toint, P.L.: Complexity bounds for second-order optimality in unconstrained optimization. J. Complex. 28(1), 93\u2013108 (2012)","journal-title":"J. Complex."},{"key":"1822_CR19","first-page":"1","volume":"88","author":"C Cartis","year":"2012","unstructured":"Cartis, C., Gould, N.I., Toint, P.L.: How much patience to you have?: a worst-case perspective on smooth noncovex optimization. Optima 88, 1\u201310 (2012)","journal-title":"Optima"},{"key":"1822_CR20","unstructured":"Cartis, C., Gould, N.I., Toint, P.L.: Worst-case evaluation complexity and optimality of second-order methods for nonconvex smooth optimization. arXiv preprint arXiv:1709.07180, (2017)"},{"key":"1822_CR21","unstructured":"Cutkosky, A., Orabona, F.: Momentum-based variance reduction in non-convex SGD. Adv. Neural Inf. Process. Syst. (2019)"},{"key":"1822_CR22","unstructured":"Defazio, A., Bach, F., Lacoste-Julien, S.: SAGA: A fast incremental gradient method with support for non-strongly convex composite objectives. In: Advances in Neural Information Processing Systems 27, (2014)"},{"key":"1822_CR23","unstructured":"Diakonikolas, J., Guzm\u00e1n, C.: Lower bounds for parallel and randomized convex optimization. In: Proceedings of the Thirty Second Annual Conference on Computational Learning Theory (2019)"},{"key":"1822_CR24","unstructured":"Drori, Y., Shamir, O.: The complexity of finding stationary points with stochastic gradient descent. arXiv preprint arXiv:1910.01845 (2019)"},{"key":"1822_CR25","unstructured":"Fang, C., Li, C.J., Lin, Z., Zhang, T.: Spider: near-optimal non-convex optimization via stochastic path-integrated differential estimator. In: Advances in Neural Information Processing Systems, pp. 689\u2013699 (2018)"},{"key":"1822_CR26","unstructured":"Fang, C., Lin, Z., Zhang, T.: Sharp analysis for nonconvex SGD escaping from saddle points. In: Beygelzimer, A., Hsu, D., (eds) Proceedings of the Thirty-Second Conference on Learning Theory, vol. 99, pp. 1192\u20131234. PMLR (2019)"},{"key":"1822_CR27","unstructured":"Foster, D.J., Sekhari, A., Shamir, O., Srebro, N., Sridharan, K., Woodworth, B.: The complexity of making the gradient small in stochastic convex optimization. In: Proceedings of the Thirty-Second Conference on Learning Theory, pp. 1319\u20131345 (2019)"},{"key":"1822_CR28","unstructured":"Ge, R., Huang, F., Jin, C., Yuan, Y.: Escaping from saddle points: online stochastic gradient for tensor decomposition. In: Conference on Learning Theory, pp. 797\u2013842 (2015)"},{"key":"1822_CR29","unstructured":"Ge, R., Lee, J.D., Ma, T.: Matrix completion has no spurious local minimum. In: Advances in Neural Information Processing Systems, pp. 2973\u20132981 (2016)"},{"issue":"4","key":"1822_CR30","doi-asserted-by":"publisher","first-page":"2341","DOI":"10.1137\/120880811","volume":"23","author":"S Ghadimi","year":"2013","unstructured":"Ghadimi, S., Lan, G.: Stochastic first-and zeroth-order methods for nonconvex stochastic programming. SIAM J. Opt. 23(4), 2341\u20132368 (2013)","journal-title":"SIAM J. Opt."},{"issue":"1","key":"1822_CR31","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1214\/aos\/1193342380","volume":"1","author":"L LeCam","year":"1973","unstructured":"LeCam, L.: Convergence of estimates under dimensionality restrictions. Ann. Stat. 1(1), 38\u201353 (1973)","journal-title":"Ann. Stat."},{"key":"1822_CR32","unstructured":"Lei, L., Ju, C., Chen, J., Jordan, M.I.: Non-convex finite-sum optimization via SCSG methods. In: Advances in Neural Information Processing Systems, pp. 2348\u20132358 (2017)"},{"key":"1822_CR33","doi-asserted-by":"publisher","DOI":"10.1007\/s10208-019-09429-9","author":"C Ma","year":"2019","unstructured":"Ma, C., Wang, K., Chi, Y., Chen, Y.: Implicit regularization in nonconvex statistical estimation: gradient descent converges linearly for phase retrieval, matrix completion and blind deconvolution. Found. Comput. Math. (2019). https:\/\/doi.org\/10.1007\/s10208-019-09429-9","journal-title":"Found. Comput. Math."},{"issue":"2","key":"1822_CR34","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1007\/BF02592948","volume":"39","author":"KG Murty","year":"1987","unstructured":"Murty, K.G., Kabadi, S.N.: Some np-complete problems in quadratic and nonlinear programming. Math. progr. 39(2), 117\u2013129 (1987)","journal-title":"Math. progr."},{"issue":"4","key":"1822_CR35","doi-asserted-by":"publisher","first-page":"451","DOI":"10.1006\/jcom.1994.1025","volume":"10","author":"A Nemirovski","year":"1994","unstructured":"Nemirovski, A.: On parallel complexity of nonsmooth convex optimization. J. Complex. 10(4), 451\u2013463 (1994)","journal-title":"J. Complex."},{"key":"1822_CR36","unstructured":"Nemirovski, A., Yudin, D.B.: Problem Complexity and Method Efficiency in Optimization. Wiley (1983)"},{"key":"1822_CR37","doi-asserted-by":"crossref","unstructured":"Nesterov, Y.: Introductory Lectures of Convex Optimization. Kluwer Academic Publishers (2004)","DOI":"10.1007\/978-1-4419-8853-9"},{"issue":"1","key":"1822_CR38","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1007\/s10107-006-0706-8","volume":"108","author":"Y Nesterov","year":"2006","unstructured":"Nesterov, Y., Polyak, B.T.: Cubic regularization of newton method and its global performance. Math. Progr. 108(1), 177\u2013205 (2006)","journal-title":"Math. Progr."},{"issue":"2","key":"1822_CR39","first-page":"372","volume":"27","author":"YE Nesterov","year":"1983","unstructured":"Nesterov, Y.E.: A method for solving the convex programming problem with convergence rate $$o (1\/k^2)$$. Sov. Math. Dokl. 27(2), 372\u2013376 (1983)","journal-title":"Sov. Math. Dokl."},{"key":"1822_CR40","unstructured":"Nocedal, J., Wright, S.: Numerical Optimization. Springer Science & Business Media (2006)"},{"issue":"10","key":"1822_CR41","doi-asserted-by":"publisher","first-page":"7036","DOI":"10.1109\/TIT.2011.2154375","volume":"57","author":"M Raginsky","year":"2011","unstructured":"Raginsky, M., Rakhlin, A.: Information-based complexity, feedback and dynamics in convex programming. IEEE Trans. Inf. Theory 57(10), 7036\u20137056 (2011)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"1822_CR42","doi-asserted-by":"crossref","unstructured":"Reddi, S.J., Hefny, A., Sra, S., Poczos, B., Smola, A.: Stochastic variance reduction for nonconvex optimization. In: International Conference on Machine Learning, pp. 314\u2013323 (2016)","DOI":"10.1109\/ALLERTON.2016.7852377"},{"key":"1822_CR43","unstructured":"Schmidt, M., Roux, N.L., Bach, F.: Convergence rates of inexact proximal-gradient methods for convex optimization. In: Advances in Neural Information Processing Systems 24 (2011)"},{"key":"1822_CR44","first-page":"567","volume":"14","author":"S Shalev-Shwartz","year":"2013","unstructured":"Shalev-Shwartz, S., Zhang, T.: Stochastic dual coordinate ascent methods for regularized loss minimization. J. Mach. Learn. Res. 14, 567\u2013599 (2013)","journal-title":"J. Mach. Learn. Res."},{"issue":"5","key":"1822_CR45","doi-asserted-by":"publisher","first-page":"1131","DOI":"10.1007\/s10208-017-9365-9","volume":"18","author":"J Sun","year":"2018","unstructured":"Sun, J., Qu, Q., Wright, J.: A geometric analysis of phase retrieval. Found. Comput. Math. 18(5), 1131\u20131198 (2018)","journal-title":"Found. Comput. Math."},{"key":"1822_CR46","unstructured":"Traub, J.F., Wasilkowski, G.W., Wo\u017aniakowski H.: Information-Based Complexity. (1988)"},{"key":"1822_CR47","unstructured":"Tripuraneni, N., Stern, M., Jin, C., Regier, J., Jordan, M.I.: Stochastic cubic regularization for fast nonconvex optimization. In: Advances in Neural Information Processing Systems, pp. 2899\u20132908 (2018)"},{"issue":"1","key":"1822_CR48","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1137\/0803004","volume":"3","author":"SA Vavasis","year":"1993","unstructured":"Vavasis, S.A.: Black-box complexity of local minimization. SIAM J. Opt. 3(1), 60\u201380 (1993)","journal-title":"SIAM J. Opt."},{"key":"1822_CR49","unstructured":"Wang, Z., Ji, K., Zhou, Y., Liang, Y., Tarokh, V.: Spiderboost: a class of faster variance-reduced algorithms for nonconvex optimization. arXiv preprint arXiv:1810.10690, (2018)"},{"key":"1822_CR50","unstructured":"Woodworth, B., Srebro, N.: Tight complexity bounds for optimizing composite objectives. In: Advances in Neural Information Processing Systems, pp. 3639\u20133647 (2016)"},{"key":"1822_CR51","unstructured":"Woodworth, B., Srebro, N.: Lower bound for randomized first order convex optimization. arXiv preprint, arXiv:1709.03594 (2017)"},{"key":"1822_CR52","unstructured":"Xu, Y., Rong, J., Yang, T.: First-order stochastic algorithms for escaping from saddle points in almost linear time. In: Advances in Neural Information Processing Systems, pp. 5530\u20135540 (2018)"},{"key":"1822_CR53","doi-asserted-by":"crossref","unstructured":"Yao, A.C.-C.: Probabilistic computations: toward a unified measure of complexity. In 18th Annual Symposium on Foundations of Computer Science, pp. 222\u2013227. IEEE (1977)","DOI":"10.1109\/SFCS.1977.24"},{"key":"1822_CR54","doi-asserted-by":"crossref","unstructured":"Yu, B.: Assouad, Fano, and Le Cam. In Festschrift for Lucien Le Cam, pp. 423\u2013435. Springer (1997)","DOI":"10.1007\/978-1-4612-1880-7_29"},{"key":"1822_CR55","unstructured":"Zhou, D., Gu, Q.: Lower bounds for smooth nonconvex finite-sum optimization. In: International Conference on Machine Learning (2019)"},{"key":"1822_CR56","unstructured":"Zhou, D., Xu, P., Gu, Q.: Stochastic nested variance reduction for nonconvex optimization. In: Advances in Neural Information Processing Systems, pp. 3925\u20133936. Curran Associates Inc. (2018)"},{"key":"1822_CR57","unstructured":"Zhou, D., Xu, P., Gu, Q.: Stochastic nested variance reduction for nonconvex optimization. J. Mach. Learn. Res. (2020)"}],"container-title":["Mathematical Programming"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10107-022-01822-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10107-022-01822-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10107-022-01822-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,4,21]],"date-time":"2023-04-21T17:32:18Z","timestamp":1682098338000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10107-022-01822-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,9]]},"references-count":57,"journal-issue":{"issue":"1-2","published-print":{"date-parts":[[2023,5]]}},"alternative-id":["1822"],"URL":"https:\/\/doi.org\/10.1007\/s10107-022-01822-7","relation":{},"ISSN":["0025-5610","1436-4646"],"issn-type":[{"value":"0025-5610","type":"print"},{"value":"1436-4646","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,6,9]]},"assertion":[{"value":"26 May 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 April 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 June 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}