{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T00:41:51Z","timestamp":1773276111614,"version":"3.50.1"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"1-2","license":[{"start":{"date-parts":[[2019,10,29]],"date-time":"2019-10-29T00:00:00Z","timestamp":1572307200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,10,29]],"date-time":"2019-10-29T00:00:00Z","timestamp":1572307200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["DMS-1723085"],"award-info":[{"award-number":["DMS-1723085"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Math. Program."],"published-print":{"date-parts":[[2021,3]]},"DOI":"10.1007\/s10107-019-01440-w","type":"journal-article","created":{"date-parts":[[2019,10,30]],"date-time":"2019-10-30T20:56:45Z","timestamp":1572469005000},"page":"49-84","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":42,"title":["Why random reshuffling beats stochastic gradient descent"],"prefix":"10.1007","volume":"186","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0575-2450","authenticated-orcid":false,"given":"M.","family":"G\u00fcrb\u00fczbalaban","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"A.","family":"Ozdaglar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"P. A.","family":"Parrilo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,10,29]]},"reference":[{"issue":"5","key":"1440_CR1","doi-asserted-by":"publisher","first-page":"3235","DOI":"10.1109\/TIT.2011.2182178","volume":"58","author":"A Agarwal","year":"2012","unstructured":"Agarwal, A., Bartlett, P.L., Ravikumar, P., Wainwright, M.J.: Information-theoretic lower bounds on the oracle complexity of stochastic convex optimization. IEEE Trans. Inf. Theory 58(5), 3235\u20133249 (2012)","journal-title":"IEEE Trans. Inf. Theory"},{"issue":"3","key":"1440_CR2","doi-asserted-by":"publisher","first-page":"807","DOI":"10.1137\/S1052623494268522","volume":"6","author":"D Bertsekas","year":"1996","unstructured":"Bertsekas, D.: Incremental least squares methods and the extended Kalman filter. SIAM J. Optim. 6(3), 807\u2013822 (1996)","journal-title":"SIAM J. Optim."},{"key":"1440_CR3","doi-asserted-by":"publisher","first-page":"913","DOI":"10.1137\/S1052623495287022","volume":"7","author":"D Bertsekas","year":"1997","unstructured":"Bertsekas, D.: A hybrid incremental gradient method for least squares. SIAM J. Optim. 7, 913\u2013926 (1997)","journal-title":"SIAM J. Optim."},{"key":"1440_CR4","volume-title":"Nonlinear Programming","author":"D Bertsekas","year":"1999","unstructured":"Bertsekas, D.: Nonlinear Programming. Athena Scientific, Belmont (1999)"},{"key":"1440_CR5","first-page":"2011","volume":"1\u201338","author":"D Bertsekas","year":"2010","unstructured":"Bertsekas, D.: Incremental gradient, subgradient, and proximal methods for convex optimization: a survey. Optim. Mach. Learn. 1\u201338, 2011 (2010)","journal-title":"Optim. Mach. Learn."},{"key":"1440_CR6","volume-title":"Convex Optimization Algorithms","author":"D Bertsekas","year":"2015","unstructured":"Bertsekas, D.: Convex Optimization Algorithms. Athena Scientific, Belmont (2015)"},{"key":"1440_CR7","unstructured":"Bottou, L.: Curiously fast convergence of some stochastic gradient descent algorithms. In: Proceedings of the Symposium on Learning and Data Science, Paris (2009)"},{"key":"1440_CR8","doi-asserted-by":"crossref","unstructured":"Bottou, L.: Large-scale machine learning with stochastic gradient descent. In: Lechevallier, Y., Saporta, G. (eds.) Proceedings of COMPSTAT\u20192010, pp. 177\u2013186. Physica-Verlag, HD (2010)","DOI":"10.1007\/978-3-7908-2604-3_16"},{"key":"1440_CR9","doi-asserted-by":"crossref","unstructured":"Bottou, L.: Stochastic gradient descent tricks. In: Neural Networks: Tricks of the Trade, pp. 421\u2013436. Springer, Heidelberg (2012)","DOI":"10.1007\/978-3-642-35289-8_25"},{"key":"1440_CR10","doi-asserted-by":"crossref","unstructured":"Bottou, L.: Stochastic gradient descent tricks. In: Montavon, G., Orr, G.B., M\u00fcller, K.-R. (eds.) Neural Networks: Tricks of the Trade. Lecture Notes in Computer Science, vol. 7700, pp. 421\u2013436. Springer, Berlin (2012)","DOI":"10.1007\/978-3-642-35289-8_25"},{"issue":"2","key":"1440_CR11","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1002\/asmb.538","volume":"21","author":"L Bottou","year":"2005","unstructured":"Bottou, L., Le Cun, Y.: On-line learning for very large data sets. Appl. Stoch. Models Bus. Ind. 21(2), 137\u2013151 (2005)","journal-title":"Appl. Stoch. Models Bus. Ind."},{"key":"1440_CR12","unstructured":"Boyd, S., Parikh, N., Chu, E., Peleato, B., Eckstein, J.: Distributed optimization and statistical learning via the alternating direction method of multipliers. Foundations and Trends$$\\textregistered $$ in Machine Learning 3(1), 1\u2013122 (2011)"},{"issue":"3","key":"1440_CR13","doi-asserted-by":"publisher","first-page":"463","DOI":"10.1214\/aoms\/1177728716","volume":"25","author":"KL Chung","year":"1954","unstructured":"Chung, K.L.: On a stochastic approximation method. Ann. Math. Stat. 25(3), 463\u2013483 (1954)","journal-title":"Ann. Math. Stat."},{"key":"1440_CR14","unstructured":"Defazio, A., Bach, F., Lacoste-Julien, S.: Saga: A fast incremental gradient method with support for non-strongly convex composite objectives. In: Advances in Neural Information Processing Systems, pp. 1646\u20131654 (2014)"},{"issue":"9","key":"1440_CR15","doi-asserted-by":"publisher","first-page":"2739","DOI":"10.1090\/S0002-9939-06-08296-7","volume":"134","author":"N Etemadi","year":"2006","unstructured":"Etemadi, N.: Convergence of weighted averages of random variables revisited. Proc. Am. Math. Soc. 134(9), 2739\u20132744 (2006)","journal-title":"Proc. Am. Math. Soc."},{"issue":"1","key":"1440_CR16","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1214\/aoms\/1177699070","volume":"38","author":"V Fabian","year":"1967","unstructured":"Fabian, V.: Stochastic approximation of minima with improved asymptotic speed. Ann. Math. Stat. 38(1), 191\u2013200 (1967)","journal-title":"Ann. Math. Stat."},{"key":"1440_CR17","doi-asserted-by":"publisher","first-page":"1327","DOI":"10.1214\/aoms\/1177698258","volume":"39","author":"V Fabian","year":"1968","unstructured":"Fabian, V.: On asymptotic normality in stochastic approximation. Ann. Math. Stat. 39, 1327\u20131332 (1968)","journal-title":"Ann. Math. Stat."},{"key":"1440_CR18","doi-asserted-by":"crossref","unstructured":"Feng, X., Kumar, A., Recht, B., R\u00e9, C.: Towards a unified architecture for in-RDBMS analytics. In: Proceedings of the 2012 ACM SIGMOD International Conference on Management of Data, pp. 325\u2013336. ACM (2012)","DOI":"10.1145\/2213836.2213874"},{"key":"1440_CR19","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1007\/978-3-642-55692-0_4","volume-title":"Frontiers in Numerical Analysis. Universitext","author":"NIM Gould","year":"2003","unstructured":"Gould, N.I.M., Leyffer, S.: An introduction to algorithms for nonlinear optimization. In: Blowey, J.F., Craig, A.W., Shardlow, T. (eds.) Frontiers in Numerical Analysis. Universitext, pp. 109\u2013197. Springer, Berlin (2003)"},{"key":"1440_CR20","unstructured":"Goyal, P., Doll\u00e1r, P., Girshick, R., Noordhuis, P., Wesolowski, L., Kyrola, A., Tulloch, A., Jia, Y., He, K.: Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour. arXiv e-prints, page arXiv:1706.02677, (Jun 2017)"},{"key":"1440_CR21","unstructured":"G\u00fcrb\u00fczbalaban, M., Ozdaglar, A., Parrilo, P.: Convergence rate of incremental gradient and Newton methods. arXiv preprint arXiv:1510.08562, (October 2015)"},{"key":"1440_CR22","unstructured":"Harikandeh, R., Ahmed, M.O., Virani, A., Schmidt, M., Kone\u010dn\u1ef3, J., Sallinen, S.: Stopwasting my gradients: Practical SVRG. In: Advances in Neural Information Processing Systems, pp. 2251\u20132259 (2015)"},{"key":"1440_CR23","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.laa.2015.09.013","volume":"488","author":"A Israel","year":"2016","unstructured":"Israel, A., Krahmer, F., Ward, R.: An arithmetic-geometric mean inequality for products of three matrices. Linear Algebra Appl. 488, 1\u201312 (2016)","journal-title":"Linear Algebra Appl."},{"key":"1440_CR24","unstructured":"Johnson, R., Zhang, T.: Accelerating stochastic gradient descent using predictive variance reduction. In: Advances in Neural Information Processing Systems, pp. 315\u2013323 (2013)"},{"key":"1440_CR25","volume-title":"Stochastic Approximation and Recursive Algorithms and Applications","author":"HJ Kushner","year":"2003","unstructured":"Kushner, H.J., Yin, G.: Stochastic Approximation and Recursive Algorithms and Applications, vol. 35. Springer, Berlin (2003)"},{"key":"1440_CR26","unstructured":"Moulines, E., Bach, F.R.: Non-asymptotic analysis of stochastic approximation algorithms for machine learning. In: Advances in Neural Information Processing, pp. 451\u2013459 (2011)"},{"key":"1440_CR27","doi-asserted-by":"crossref","unstructured":"Nedi\u0107, A., Ozdaglar, A.: On the rate of convergence of distributed subgradient methods for multi-agent optimization. In: Proceedings of the 46th IEEE Conference on Decision and Control (CDC), pp. 4711\u20134716 (2007)","DOI":"10.1109\/CDC.2007.4434693"},{"issue":"1","key":"1440_CR28","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1109\/TAC.2008.2009515","volume":"54","author":"A Nedi\u0107","year":"2009","unstructured":"Nedi\u0107, A., Ozdaglar, A.: Distributed subgradient methods for multi-agent optimization. IEEE Trans. Autom. Control 54(1), 48\u201361 (2009)","journal-title":"IEEE Trans. Autom. Control"},{"issue":"4","key":"1440_CR29","doi-asserted-by":"publisher","first-page":"1574","DOI":"10.1137\/070704277","volume":"19","author":"A Nemirovski","year":"2009","unstructured":"Nemirovski, A., Juditsky, A., Lan, G., Shapiro, A.: Robust stochastic approximation approach to stochastic programming. SIAM J. Optim. 19(4), 1574\u20131609 (2009)","journal-title":"SIAM J. Optim."},{"key":"1440_CR30","volume-title":"Problem complexity and method efficiency in optimization","author":"AS Nemirovskii","year":"1983","unstructured":"Nemirovskii, A.S., Yudin, D.B., Dawson, E.R.: Problem complexity and method efficiency in optimization. Wiley-Interscience Series in Discrete Mathematics. Wiley, Chichester (1983)"},{"issue":"4","key":"1440_CR31","doi-asserted-by":"publisher","first-page":"838","DOI":"10.1137\/0330046","volume":"30","author":"BT Polyak","year":"1992","unstructured":"Polyak, B.T., Juditsky, A.B.: Acceleration of stochastic approximation by averaging. SIAM J. Control Optim. 30(4), 838\u2013855 (1992)","journal-title":"SIAM J. Control Optim."},{"key":"1440_CR32","unstructured":"Rakhlin, A., Shamir, O., Sridharan, K.: Making gradient descent optimal for strongly convex stochastic optimization. In: Proceedings of the 29th International Conference on Machine Learning (ICML-12), pp. 449\u2013456 (2012)"},{"key":"1440_CR33","first-page":"582","volume":"2007","author":"SS Ram","year":"2007","unstructured":"Ram, S.S., Nedic, A., Veeravalli, V.V.: Stochastic incremental gradient descent for estimation in sensor networks. Signals Syst. Comput. ACSSC 2007, 582\u2013586 (2007)","journal-title":"Signals Syst. Comput. ACSSC"},{"key":"1440_CR34","first-page":"11.1","volume":"23","author":"B Recht","year":"2012","unstructured":"Recht, B., R\u00e9, C.: Toward a noncommutative arithmetic-geometric mean inequality: conjectures, case-studies, and consequences. JMLR Workshop Conf. Proc. 23, 11.1\u201311.24 (2012)","journal-title":"JMLR Workshop Conf. Proc."},{"issue":"2","key":"1440_CR35","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1007\/s12532-013-0053-8","volume":"5","author":"B Recht","year":"2013","unstructured":"Recht, B., R\u00e9, C.: Parallel stochastic gradient algorithms for large-scale matrix completion. Math. Program. Comput. 5(2), 201\u2013226 (2013)","journal-title":"Math. Program. Comput."},{"key":"1440_CR36","unstructured":"Recht, B., R\u00e9, C., Wright, S., Feng, N. Hogwild: A lock-free approach to parallelizing stochastic gradient descent. In: Shawe-Taylor, J., Zemel, R.S., Bartlett, P.L., Pereira, F., Weinberger, K.Q. (eds.) Advances in Neural Information Processing Systems 24, pp. 693\u2013701. Curran Associates Inc (2011)"},{"key":"1440_CR37","doi-asserted-by":"crossref","unstructured":"Robbins, H., Monro, S.: A stochastic approximation method. The Annals of Mathematical Statistics, pp. 400\u2013407, (1951)","DOI":"10.1214\/aoms\/1177729586"},{"key":"1440_CR38","first-page":"2663","volume-title":"Advances in Neural Information Processing Systems 25","author":"NL Roux","year":"2012","unstructured":"Roux, N.L., Schmidt, M., Bach, F.R.: A stochastic gradient method with an exponential convergence rate for finite training sets. In: Pereira, F., Burges, C.J.C., Bottou, L., Weinberger, K.Q. (eds.) Advances in Neural Information Processing Systems 25, pp. 2663\u20132671. Curran Associates Inc, Red Hook (2012)"},{"key":"1440_CR39","unstructured":"Roux, N.L., Schmidt, M., Bach, F.R.: A stochastic gradient method with an exponential convergence \\_rate for finite training sets. In: Advances in Neural Information Processing Systems, pp. 2663\u20132671 (2012)"},{"key":"1440_CR40","unstructured":"Shamir, O.: Open Problem: Is Averaging Needed for Strongly Convex Stochastic Gradient Descent? COLT, (2012)"},{"key":"1440_CR41","unstructured":"Sohl-Dickstein, J., Poole, B., Ganguli, S.: Fast large-scale optimization by unifying stochastic gradient and quasi-Newton methods. In: Jebara, T., Xing, E.P. (eds), JMLR Workshop and Conference Proceedings ICML, pp. 604\u2013612 (2014)"},{"key":"1440_CR42","doi-asserted-by":"crossref","unstructured":"Sparks, E.R., Talwalkar, A., Smith, V., Kottalam, J., Xinghao, P., Gonzalez, J., Franklin, M.J., Jordan, M.I., Kraska, T.: MLI: An API for distributed machine learning. In: IEEE 13th International Conference on Data Mining (ICDM), pp. 1187\u20131192 (2013)","DOI":"10.1109\/ICDM.2013.158"},{"key":"1440_CR43","unstructured":"Zhang, T.: Solving large scale linear prediction problems using stochastic gradient descent algorithms. In: Proceedings of the Twenty-first International Conference on Machine Learning, ICML, p. 116, New York, NY, USA, (2004). ACM"},{"key":"1440_CR44","unstructured":"Zhang, T.: A note on the non-commutative arithmetic-geometric mean inequality. arXiv preprint arXiv:1411.5058, (November 2014)"}],"container-title":["Mathematical Programming"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10107-019-01440-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10107-019-01440-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10107-019-01440-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,2,11]],"date-time":"2021-02-11T09:40:06Z","timestamp":1613036406000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10107-019-01440-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,10,29]]},"references-count":44,"journal-issue":{"issue":"1-2","published-print":{"date-parts":[[2021,3]]}},"alternative-id":["1440"],"URL":"https:\/\/doi.org\/10.1007\/s10107-019-01440-w","relation":{},"ISSN":["0025-5610","1436-4646"],"issn-type":[{"value":"0025-5610","type":"print"},{"value":"1436-4646","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,10,29]]},"assertion":[{"value":"28 February 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 October 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 October 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}