{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T05:47:14Z","timestamp":1767678434372,"version":"3.48.0"},"reference-count":43,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172204"],"award-info":[{"award-number":["62172204"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62325205"],"award-info":[{"award-number":["62325205"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62272213"],"award-info":[{"award-number":["62272213"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Collaborative Innovation Center of Novel Software Technology and Industrialization"},{"name":"Nanjing University\u2014China Mobile Communications Group Company Ltd. Joint Institute"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Netw."],"published-print":{"date-parts":[[2026]]},"DOI":"10.1109\/ton.2025.3617081","type":"journal-article","created":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T17:57:02Z","timestamp":1761587822000},"page":"1160-1174","source":"Crossref","is-referenced-by-count":0,"title":["Fine-Grained Scheduling of In-Network Aggregation Resources for Efficient Machine Learning Service"],"prefix":"10.1109","volume":"34","author":[{"given":"Shichen","family":"Dong","sequence":"first","affiliation":[{"name":"School of Computer Science, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6947-9740","authenticated-orcid":false,"given":"Zhixiong","family":"Niu","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingchao","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer Science, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiying","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Computer Science, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chuntao","family":"Hu","sequence":"additional","affiliation":[{"name":"School of Computer Science, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pengzhi","family":"Zhu","sequence":"additional","affiliation":[{"name":"Nvidia, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qingchun","family":"Song","sequence":"additional","affiliation":[{"name":"Nvidia, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7770-573X","authenticated-orcid":false,"given":"Lei","family":"Qu","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4014-4757","authenticated-orcid":false,"given":"Peng","family":"Cheng","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-9484-6876","authenticated-orcid":false,"given":"Cam-Tu","family":"Nguyen","sequence":"additional","affiliation":[{"name":"School of Computer Science, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shaoling","family":"Sun","sequence":"additional","affiliation":[{"name":"China Mobile, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaohu","family":"Xu","sequence":"additional","affiliation":[{"name":"China Mobile, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4175-0097","authenticated-orcid":false,"given":"Yongqiang","family":"Xiong","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer Science, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3410-8621","authenticated-orcid":false,"given":"Xiaoliang","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer Science, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6934-1685","authenticated-orcid":false,"given":"Guihai","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Computer Science, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM55648.2025.11044657"},{"article-title":"Improving language understanding by generative pre-training","year":"2018","author":"Radford","key":"ref2"},{"key":"ref3","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv:1810.04805"},{"volume-title":"An End-to-End Platform for Machine Learning","year":"2025","key":"ref4"},{"key":"ref5","first-page":"1","article-title":"Efficient large-scale language model training on GPU clusters using megatron-LM","volume-title":"Proc. Int. Conf. High Perform. Comput., Netw., Storage Anal.","author":"Narayanan"},{"key":"ref6","article-title":"The llama 3 herd of models","author":"Grattafiori","year":"2024","journal-title":"arXiv:2407.21783"},{"volume-title":"Azure Machine Learning","year":"2025","key":"ref7"},{"volume-title":"AI and Machine Learning Products","year":"2025","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3405671.3405810"},{"key":"ref10","article-title":"Communication-efficient large-scale distributed deep learning: A comprehensive survey","author":"Liang","year":"2024","journal-title":"arXiv:2404.06114"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2020.11.005"},{"article-title":"Cupcake: A compression optimizer for scalable communication-efficient distributed training","volume-title":"Proc. 6th Conf. Mach. Learn. Syst. (MLSys)","author":"Wang","key":"ref12"},{"key":"ref13","first-page":"785","article-title":"Scaling distributed machine learning with in-network aggregation","volume-title":"Proc. 18th USENIX Symp. Networked Syst. Design Implement. (NSDI)","author":"Sapio"},{"key":"ref14","first-page":"741","article-title":"ATP: In-network aggregation for multi-tenant learning","volume-title":"Proc. 18th USENIX Symp. Networked Syst. Design Implement. (NSDI)","author":"Lao"},{"key":"ref15","article-title":"Very deep convolutional networks for large-scale image recognition","author":"Simonyan","year":"2014","journal-title":"arXiv:1409.1556"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/COMHPC.2016.006"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-50743-5_3"},{"volume-title":"ND A100 V4-Series","year":"2023","key":"ref18"},{"volume-title":"Top500 List\u2014June 2023","year":"2023","key":"ref19"},{"volume-title":"NVIDIA Quantum HDR 200 Gb\/s InfiniBand Switch","year":"2020","key":"ref20"},{"volume-title":"Nvidia Quantum-2 Infiniband Platform","year":"2022","key":"ref21"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575705"},{"key":"ref23","first-page":"595","article-title":"Gandiva: Introspective cluster scheduling for deep learning","volume-title":"Proc. 13th USENIX Symp. Oper. Syst. Design Implement. (OSDI)","author":"Xiao"},{"key":"ref24","first-page":"485","article-title":"Tiresias: A GPU cluster manager for distributed deep learning","volume-title":"Proc. 16th USENIX Symp. Networked Syst. Design Implement. (NSDI 19)","author":"Gu"},{"key":"ref25","first-page":"35","article-title":"NetChain: Scale-free sub-RTT coordination","volume-title":"Proc. 15th {USENIX} Symp. Netw. Syst. Design Implement. (NSDI)","author":"Jin"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190517"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2018.8486422"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737460"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/10968987_3"},{"issue":"1","key":"ref30","doi-asserted-by":"crossref","first-page":"512","DOI":"10.11591\/ijece.v9i1.pp512-524","article-title":"Bin packing algorithms for virtual machine placement in cloud computing: A review","volume":"9","author":"Kumaraswamy","year":"2019","journal-title":"Int. J. Electr. Comput. Eng. (IJECE)"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575708"},{"key":"ref32","article-title":"OPT: Open pre-trained transformer language models","author":"Zhang","year":"2022","journal-title":"arXiv:2205.01068"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3035918.3035939"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.ic.2017.06.001"},{"key":"ref35","first-page":"8024","article-title":"PyTorch: An imperative style, high-performance deep learning library","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Paszke"},{"key":"ref36","first-page":"265","article-title":"TensorFlow: A system for large-scale machine learning","volume-title":"Proc. 12th USENIX Symp. Oper. Syst. Design Implement.","author":"Abadi"},{"volume-title":"Sharp Aggregation Manager","year":"2023","key":"ref37"},{"volume-title":"NVIDIA Collective Communication Library (NCCL)","year":"2017","key":"ref38"},{"volume-title":"RDMA and SHArP Plugins for NCCL Library","year":"2020","key":"ref39"},{"key":"ref40","article-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020","journal-title":"arXiv:2010.11929"},{"key":"ref41","first-page":"481","article-title":"Heterogeneity-aware cluster scheduling policies for deep learning workloads","volume-title":"Proc. 14th USENIX Symp. Oper. Syst. Design Implement. (OSDI)","author":"Narayanan"},{"key":"ref42","first-page":"533","article-title":"AntMan: Dynamic scaling on GPU clusters for deep learning","volume-title":"Proc. 14th USENIX Symp. Operating Syst. Design Implement. (OSDI 20)","author":"Xiao"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/3617232.3624863"}],"container-title":["IEEE Transactions on Networking"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10723154\/11317935\/11217926.pdf?arnumber=11217926","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T05:43:17Z","timestamp":1767678197000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11217926\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/ton.2025.3617081","relation":{},"ISSN":["2998-4157"],"issn-type":[{"type":"electronic","value":"2998-4157"}],"subject":[],"published":{"date-parts":[[2026]]}}}