{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T01:18:35Z","timestamp":1760577515577,"version":"3.37.3"},"reference-count":62,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62101134","62231010","62071126"],"award-info":[{"award-number":["62101134","62231010","62071126"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Signal Process."],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/tsp.2024.3452035","type":"journal-article","created":{"date-parts":[[2024,8,30]],"date-time":"2024-08-30T18:03:55Z","timestamp":1725041035000},"page":"4050-4064","source":"Crossref","is-referenced-by-count":2,"title":["STSyn: Speeding Up Local SGD With Straggler-Tolerant Synchronization"],"prefix":"10.1109","volume":"72","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4661-0686","authenticated-orcid":false,"given":"Feng","family":"Zhu","sequence":"first","affiliation":[{"name":"Key Laboratory for Information Science of Electromagnetic Waves (MoE), Department of Communication Science and Engineering, School of Information Science and Technology, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1498-4912","authenticated-orcid":false,"given":"Jingjing","family":"Zhang","sequence":"additional","affiliation":[{"name":"Key Laboratory for Information Science of Electromagnetic Waves (MoE), Department of Communication Science and Engineering, School of Information Science and Technology, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2292-3845","authenticated-orcid":false,"given":"Xin","family":"Wang","sequence":"additional","affiliation":[{"name":"Key Laboratory for Information Science of Electromagnetic Waves (MoE), Department of Communication Science and Engineering, School of Information Science and Technology, Fudan University, Shanghai, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-7908-2604-3_16"},{"key":"ref2","first-page":"5336","article-title":"Can decentralized algorithms outperform centralized algorithms? A case study for decentralized parallel stochastic gradient descent","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"30","author":"Lian","year":"2017"},{"key":"ref3","first-page":"5906","article-title":"Collaborative deep learning in fixed topology networks","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"30","author":"Jiang","year":"2017"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.5555\/2685048.2685095"},{"key":"ref5","first-page":"1223","article-title":"Large scale distributed deep networks","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"25","author":"Dean","year":"2012"},{"key":"ref6","first-page":"2595","article-title":"Parallelized stochastic gradient descent","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"23","author":"Zinkevich","year":"2010"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2944481"},{"key":"ref8","first-page":"37","article-title":"Exploiting bounded staleness to speed up big data analytics","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Cui","year":"2014"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2016.0028"},{"article-title":"Accurate, large minibatch SGD: Training ImageNet in 1 hour","year":"2017","author":"Goyal","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/SBAC-PAD.2019.00035"},{"key":"ref12","first-page":"4427","article-title":"Federated multi-task learning","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"30","author":"Smith","year":"2017"},{"key":"ref13","first-page":"1756","article-title":"Communication complexity of distributed convex learning and optimization","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"28","author":"Arjevani","year":"2015"},{"key":"ref14","first-page":"1273","article-title":"Communication-efficient learning of deep networks from decentralized data","volume-title":"Proc. Artif. Intell. Statist.","author":"Mcmahan","year":"2017"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/447"},{"key":"ref16","article-title":"Local SGD converges fast and communicates little","volume-title":"Proc. Int. Conf. Learn. Representations (ICLR)","author":"Stich","year":"2019"},{"key":"ref17","article-title":"Cooperative SGD: A unified framework for the design and analysis of communication-efficient SGD algorithms","volume-title":"Proc. ICML Workshop Coding Theory Mach. Learn.","author":"Wang","year":"2019"},{"key":"ref18","first-page":"8505","article-title":"Graph oracle models, lower bounds, and gaps for parallel stochastic optimization","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"31","author":"Woodworth","year":"2018"},{"key":"ref19","article-title":"On the convergence of FedAvg on non-IID data","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Li","year":"2019"},{"article-title":"First analysis of local GD on heterogeneous data","year":"2019","author":"Khaled","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015693"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2904348"},{"article-title":"On the convergence of local descent methods in federated learning","year":"2019","author":"Haddadpour","key":"ref23"},{"key":"ref24","article-title":"Don\u2019t use large mini-batches, use local SGD","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Tao","year":"2019"},{"key":"ref25","first-page":"11082","article-title":"Local SGD with periodic averaging: Tighter analysis and adaptive synchronization","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"32","author":"Haddadpour","year":"2019"},{"key":"ref26","first-page":"212","article-title":"Adaptive communication strategies to achieve the best error-runtime trade-off in local-update SGD","volume-title":"Proc. Mach. Learn. Syst.","volume":"1","author":"Wang","year":"2019"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/JSAIT.2022.3205475"},{"article-title":"Adaptive stochastic gradient descent for fast and communication-efficient distributed learning","year":"2022","author":"Hanna","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i11.17153"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/2408776.2408794"},{"key":"ref31","article-title":"Optimal time complexities of parallel stochastic optimization methods under a fixed computation model","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Tyurin","year":"2024"},{"key":"ref32","first-page":"3368","article-title":"Gradient coding: Avoiding stragglers in distributed learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Tandon","year":"2017"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2017.2756959"},{"article-title":"ErasureHead: Distributed gradient descent without delays using approximate gradient coding","year":"2019","author":"Wang","key":"ref34"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2979762"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/DSW.2019.8755563"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2023.3244084"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/2987550.2987554"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2022.3182221"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2021.3121632"},{"key":"ref41","first-page":"803","article-title":"Slow and stale gradients can win the race: Error-runtime trade-offs in distributed SGD","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Dutta","year":"2018"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/SPAWC48557.2020.9154327"},{"key":"ref43","first-page":"7611","article-title":"Tackling the objective inconsistency problem in heterogeneous federated optimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","volume":"33","author":"Wang","year":"2020"},{"key":"ref44","first-page":"3403","article-title":"Towards flexible device participation in federated learning","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Ruan","year":"2021"},{"key":"ref45","article-title":"Anytime minibatch: Exploiting stragglers in online distributed optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Ferdinand","year":"2018"},{"key":"ref46","first-page":"873","article-title":"Distributed delayed stochastic optimization","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"24","author":"Agarwal","year":"2011"},{"key":"ref47","article-title":"Hogwild!: A lock-free approach to parallelizing stochastic gradient descent","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"24","author":"Recht","year":"2011"},{"key":"ref48","article-title":"Cyclades: Conflict-free asynchronous machine learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Pan","year":"2016"},{"key":"ref49","first-page":"2737","article-title":"Asynchronous parallel stochastic gradient for nonconvex optimization","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"28","author":"Lian","year":"2015"},{"key":"ref50","first-page":"957","article-title":"AdaDelay: Delay adaptive distributed stochastic optimization","volume-title":"Proc. Artif. Intell. Statist.","author":"Sra","year":"2016"},{"key":"ref51","first-page":"4120","article-title":"Asynchronous stochastic gradient descent with delay compensation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zheng","year":"2017"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2020.2994391"},{"key":"ref53","first-page":"3581","article-title":"Federated learning with buffered asynchronous aggregation","volume-title":"Proc. 25th Int. Conf. Artif. Intell. Statist.","volume":"151","author":"Nguyen","year":"2022"},{"article-title":"Freya page: First optimal time complexity for large-scale nonconvex finite-sum optimization with heterogeneous asynchronous computations","year":"2024","author":"Tyurin","key":"ref54"},{"key":"ref55","article-title":"Communication-efficient distributed SGD with sketching","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Ivkin","year":"2019"},{"key":"ref56","first-page":"8253","article-title":"FetchSGD: Communication-efficient federated learning with sketching","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Rothchild","year":"2020"},{"key":"ref57","article-title":"Sparsified SGD with memory","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Stich","year":"2018"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1016\/j.spl.2007.05.022"},{"article-title":"Federated learning with non-IID data","year":"2018","author":"Zhao","key":"ref59"},{"key":"ref60","article-title":"Adaptive federated optimization","volume-title":"Proc. Int. Conf. Learn. Representations (ICLR)","author":"Reddi","year":"2020"},{"key":"ref61","article-title":"Achieving linear speedup with partial worker participation in non-IID federated learning","volume-title":"Proc. Int. Conf. Learn. Representations (ICLR)","author":"Yang","year":"2020"},{"key":"ref62","first-page":"5132","article-title":"SCAFFOLD: Stochastic controlled averaging for federated learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Karimireddy","year":"2020"}],"container-title":["IEEE Transactions on Signal Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/78\/10347386\/10659740.pdf?arnumber=10659740","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,23]],"date-time":"2024-09-23T17:32:24Z","timestamp":1727112744000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10659740\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":62,"URL":"https:\/\/doi.org\/10.1109\/tsp.2024.3452035","relation":{},"ISSN":["1053-587X","1941-0476"],"issn-type":[{"type":"print","value":"1053-587X"},{"type":"electronic","value":"1941-0476"}],"subject":[],"published":{"date-parts":[[2024]]}}}