{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T16:17:57Z","timestamp":1775837877582,"version":"3.50.1"},"reference-count":47,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100003977","name":"Israel Science Foundation","doi-asserted-by":"publisher","award":["1766\/22"],"award-info":[{"award-number":["1766\/22"]}],"id":[{"id":"10.13039\/501100003977","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003977","name":"Israel Science Foundation","doi-asserted-by":"publisher","award":["819\/20"],"award-info":[{"award-number":["819\/20"]}],"id":[{"id":"10.13039\/501100003977","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Inform. Theory"],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1109\/tit.2024.3422837","type":"journal-article","created":{"date-parts":[[2024,7,3]],"date-time":"2024-07-03T17:28:25Z","timestamp":1720027705000},"page":"4485-4514","source":"Crossref","is-referenced-by-count":1,"title":["Batches Stabilize the Minimum Norm Risk in High-Dimensional Overparametrized Linear Regression"],"prefix":"10.1109","volume":"71","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8865-3791","authenticated-orcid":false,"given":"Shahar","family":"Stein Ioushua","sequence":"first","affiliation":[{"name":"Department of Electrical Engineering-Systems, Tel Aviv University, Tel Aviv, Israel"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Inbar","family":"Hasidim","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering-Systems, Tel Aviv University, Tel Aviv, Israel"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4321-0318","authenticated-orcid":false,"given":"Ofer","family":"Shayevitz","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering-Systems, Tel Aviv University, Tel Aviv, Israel"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1290-0482","authenticated-orcid":false,"given":"Meir","family":"Feder","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering-Systems, Tel Aviv University, Tel Aviv, Israel"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"issue":"1","key":"ref1","first-page":"3321","article-title":"Communication-efficient algorithms for statistical optimization","volume":"14","author":"Zhang","year":"2013","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"ref2","first-page":"3299","article-title":"Divide and conquer kernel ridge regression: A distributed algorithm with minimax optimal rates","volume":"16","author":"Zhang","year":"2015","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"ref3","first-page":"2657","article-title":"Distributed coordinate descent method for learning with big data","volume":"17","author":"Richt\u00e1rik","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"ref4","first-page":"1223","article-title":"Large scale distributed deep networks","volume-title":"Proc. 25th Int. Conf. Neural Inf. Process. Syst.","volume":"1","author":"Dean"},{"key":"ref5","first-page":"1801","article-title":"Distributed algorithms for topic models","volume":"10","author":"Newman","year":"2009","journal-title":"J. Mach. Learn. Res."},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3377454"},{"issue":"1","key":"ref7","first-page":"2483","article-title":"WONDER: Weighted one-shot distributed ridge regression in high dimensions","volume":"21","author":"Dobriban","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1214\/20-AOS1984"},{"key":"ref9","first-page":"10322","article-title":"Data-splitting improves statistical performance in overparameterized regimes","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"M\u00fccke"},{"key":"ref10","article-title":"Communication-efficient sparse regression: A one-shot approach","author":"Lee","year":"2015","journal-title":"arXiv:1503.04337"},{"key":"ref11","article-title":"Accurate, large minibatch SGD: Training ImageNet in 1 hour","author":"Goyal","year":"2017","journal-title":"arXiv:1706.02677"},{"key":"ref12","first-page":"1729","article-title":"Train longer, generalize better: Closing the generalization gap in large batch training of neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Hoffer"},{"issue":"4","key":"ref13","first-page":"507","article-title":"Distribution of eigenvalues for some sets of random matrices","volume":"114","author":"Marchenko","year":"1967","journal-title":"Matematicheskii Sbornik"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1214\/21-AOS2133"},{"key":"ref15","article-title":"Optimal regularization can mitigate double descent","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Nakkiran"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/18.720540"},{"key":"ref17","first-page":"2328","article-title":"Information-theoretic lower bounds for distributed statistical estimation with communication constraints","volume-title":"Proc. 26th Int. Conf. Neural Inf. Process. Syst.","volume":"2","author":"Zhang"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2897518.2897582"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/tit.2021.3108952"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/18.2629"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3313276.3316332"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2019.2913384"},{"key":"ref23","first-page":"8580","article-title":"Neural tangent kernel: Convergence and generalization in neural networks","volume-title":"Proc. 32nd Int. Conf. Neural Inf. Process. Syst. (NIPS)","author":"Jacot-Guillarmod"},{"key":"ref24","article-title":"Gradient descent provably optimizes over-parameterized neural networks","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Du"},{"key":"ref25","first-page":"1675","article-title":"Gradient descent finds global minima of deep neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Du"},{"key":"ref26","first-page":"242","article-title":"A convergence theory for deep learning via over-parameterization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Allen-Zhu"},{"key":"ref27","first-page":"2937","article-title":"On lazy training in differentiable programming","volume-title":"Proc. 33rd Conf. Neural Inf. Process. Syst. (NeurIPS)","author":"Chizat"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729586"},{"key":"ref29","article-title":"On large-batch training for deep learning: Generalization gap and sharp minima","volume-title":"Proc. 5th Int. Conf. Learn. Represent.","author":"Keskar"},{"key":"ref30","article-title":"Revisiting small batch training for deep neural networks","author":"Masters","year":"2018","journal-title":"arXiv:1804.07612"},{"key":"ref31","article-title":"Don\u2019t use large mini-batches, use local SGD","volume-title":"Proc. 8th Int. Conf. Learn. Represent.","author":"Lin"},{"key":"ref32","first-page":"1143","article-title":"Control batch size and learning rate to generalize well: Theoretical and empirical evidence","volume-title":"Proc. 33rd Int. Conf. Neural Inf. Process. Syst.","author":"He"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.icte.2020.04.010"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/MLKE55170.2022.00026"},{"key":"ref35","first-page":"3548","article-title":"SGD in the large: Average-case analysis, asymptotics, and stepsize criticality","volume-title":"Proc. Conf. Learn. Theory","author":"Paquette"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1137\/23M1594388"},{"key":"ref37","first-page":"52","article-title":"On a space of totally additive functions","volume":"13","author":"Kantorovich","year":"1958","journal-title":"Vestnik Leningrad. Univ"},{"key":"ref38","first-page":"257","article-title":"Multivariate normal approximation using exchange-able pairs","volume":"4","author":"Chatterjee","year":"2008","journal-title":"Alea"},{"key":"ref39","volume-title":"The Schur Complement and Its Applications","volume":"4","author":"Zhang","year":"2006"},{"key":"ref40","article-title":"Problems in information theory and high dimensional statistics","author":"Ioushua","year":"2022"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511794308.006"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1002\/rsa.10073"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1214\/aop\/1176992819"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1137\/0609045"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1017\/9781108591034"},{"issue":"2","key":"ref46","first-page":"176","article-title":"Multivariate analysis","volume":"9","author":"Mardia","year":"1979","journal-title":"Probab. Math. Statist."},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1002\/SERIES1345"}],"container-title":["IEEE Transactions on Information Theory"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/18\/11011509\/10584422.pdf?arnumber=10584422","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T17:04:07Z","timestamp":1748019847000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10584422\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":47,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tit.2024.3422837","relation":{},"ISSN":["0018-9448","1557-9654"],"issn-type":[{"value":"0018-9448","type":"print"},{"value":"1557-9654","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6]]}}}