{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T13:49:54Z","timestamp":1774964994640,"version":"3.50.1"},"reference-count":51,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Science Foundation of China","doi-asserted-by":"publisher","award":["62132022"],"award-info":[{"award-number":["62132022"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Science Foundation of China","doi-asserted-by":"publisher","award":["62302524"],"award-info":[{"award-number":["62302524"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Science and Technology Innovation Program of Hunan Province","award":["2024JJ6531"],"award-info":[{"award-number":["2024JJ6531"]}]},{"name":"National Science Foundation of Hunan Province","award":["2024JJ5050"],"award-info":[{"award-number":["2024JJ5050"]}]},{"name":"National Science Foundation of Hunan Province","award":["2022JJ20078"],"award-info":[{"award-number":["2022JJ20078"]}]},{"DOI":"10.13039\/100016760","name":"Using Computing Resources at the High Performance Computing Center of Central South University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100016760","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Networking"],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1109\/tnet.2024.3441039","type":"journal-article","created":{"date-parts":[[2024,8,19]],"date-time":"2024-08-19T17:40:13Z","timestamp":1724089213000},"page":"4917-4930","source":"Crossref","is-referenced-by-count":6,"title":["Straggler-Aware Gradient Aggregation for Large-Scale Distributed Deep Learning System"],"prefix":"10.1109","volume":"32","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4335-8742","authenticated-orcid":false,"given":"Yijun","family":"Li","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, Central South University, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7578-4490","authenticated-orcid":false,"given":"Jiawei","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Central South University, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9677-2368","authenticated-orcid":false,"given":"Zhaoyi","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Central South University, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8743-0270","authenticated-orcid":false,"given":"Jingling","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Central South University, Changsha, China"}]},{"given":"Shengwen","family":"Zhou","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Central South University, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1559-406X","authenticated-orcid":false,"given":"Tao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Hunan Province Key Laboratory of Industrial Internet Technology and Security, Changsha University, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5067-321X","authenticated-orcid":false,"given":"Wanchun","family":"Jiang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Central South University, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1516-0480","authenticated-orcid":false,"given":"Jianxin","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Central South University, Changsha, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"ref4","first-page":"2493","article-title":"Natural language processing (almost) from scratch","volume":"12","author":"Collobert","year":"2011","journal-title":"J. Mach. Learn. Res."},{"key":"ref5","first-page":"265","article-title":"TensorFlow: A system for large-scale machine learning","volume-title":"Proc. 12th USENIX Symp. Oper. Syst. Design Implement.","author":"Abadi"},{"key":"ref6","article-title":"Horovod: Fast and easy distributed deep learning in TensorFlow","author":"Sergeev","year":"2018","journal-title":"arXiv:1802.05799"},{"key":"ref7","first-page":"463","article-title":"A unified architecture for accelerating distributed DNN training in heterogeneous GPU\/CPU clusters","volume-title":"Proc. 14th USENIX Symp. Operating Syst. Design Implement. (OSDI)","author":"Jiang"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359646"},{"key":"ref9","volume-title":"Nvidia Collective Communication Library (NCCL)","year":"2024"},{"key":"ref10","first-page":"785","article-title":"Scaling distributed machine learning with in-network aggregation","volume-title":"Proc. 18th USENIX Symp. Netw. Syst. Design Implement. (NSDI)","author":"Sapio"},{"key":"ref11","first-page":"829","article-title":"In-network aggregation for shared machine learning clusters","volume-title":"Proc. MLSys","author":"Gebara"},{"key":"ref12","first-page":"741","article-title":"ATP: In-network aggregation for multi-tenant learning","volume-title":"Proc. 18th USENIX Symp. Networked Syst. Design Implement. (NSDI)","author":"Lao"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3132747.3132764"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3552326.3587436"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3544216.3544262"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3127479.3131622"},{"key":"ref17","first-page":"443","article-title":"Serving DNNs like clockwork: Performance predictability from the bottom up","volume-title":"Proc. USENIX OSDI","author":"Gujarati"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2847220.2847223"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737587"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3228733"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2020.2974461"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TCC.2021.3102593"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/2987550.2987554"},{"key":"ref24","first-page":"283","article-title":"Quiver: An informed storage cache for deep learning","volume-title":"Proc. USENIX FAST","author":"Kumar"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.5555\/2685048.2685095"},{"key":"ref26","volume-title":"Barefoot Networks","year":"2024"},{"key":"ref27","first-page":"307","article-title":"HetPipe: Enabling large DNN training on (Whimpy) heterogeneous GPU clusters through integration of pipelined model parallelism and data parallelism","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Park"},{"key":"ref28","first-page":"947","article-title":"Analysis of large-scale multi-tenant gpu clusters for dnn training workloads","volume-title":"Proc. USENIX ATC","author":"Jeon"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3342195.3387555"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190517"},{"key":"ref31","first-page":"595","article-title":"Gandiva: Introspective cluster scheduling for deep learning","volume-title":"Proc. 13th USENIX Symp. Oper. Syst. Design Implement. (OSDI)","author":"Xiao"},{"key":"ref32","first-page":"485","article-title":"Tiresias: A GPU cluster manager for distributed deep learning","volume-title":"Proc. USENIX NSDI","author":"Gu"},{"key":"ref33","article-title":"DS-sync: Addressing network bottlenecks with Divide-and-Shuffle synchronization for distributed DNN training","author":"Wang","year":"2020","journal-title":"arXiv:2007.03298"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2024.3366336"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2017.2736066"},{"key":"ref36","first-page":"1","article-title":"Short-dot: Computing large linear transforms distributedly using coded short dot products","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Dutta"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2022.3230779"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/JSAIT.2021.3103770"},{"key":"ref39","first-page":"1223","article-title":"More effective distributed ML via a stale synchronous parallel parameter server","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Ho"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICNP59255.2023.10355574"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref42","first-page":"82","article-title":"Plink: Discovering and exploiting locality for accelerated distributed training on the public cloud","volume-title":"Proc. Mach. Learn. Syst.","volume":"2","author":"Luo"},{"key":"ref43","first-page":"1403","article-title":"CASSINI: Network-aware job scheduling in machine learning clusters","volume-title":"Proc. USENIX NSDI","author":"Rajasekaran"},{"key":"ref44","article-title":"Very deep convolutional networks for large-scale image recognition","author":"Simonyan","year":"2014","journal-title":"arXiv:1409.1556"},{"key":"ref45","article-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky","year":"2009"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W16-3210"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2019.00150"},{"issue":"14","key":"ref48","first-page":"527","article-title":"Network simulations with the Ns-3 simulator","volume":"14","author":"Henderson","year":"2008","journal-title":"SIGCOMM Demonstration"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/3152434.3152461"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/IWQoS57198.2023.10188783"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582037"}],"container-title":["IEEE\/ACM Transactions on Networking"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/90\/10807683\/10638484.pdf?arnumber=10638484","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,15]],"date-time":"2025-01-15T20:31:19Z","timestamp":1736973079000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10638484\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12]]},"references-count":51,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tnet.2024.3441039","relation":{},"ISSN":["1063-6692","1558-2566"],"issn-type":[{"value":"1063-6692","type":"print"},{"value":"1558-2566","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12]]}}}