{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,22]],"date-time":"2026-03-22T22:43:04Z","timestamp":1774219384990,"version":"3.50.1"},"reference-count":59,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U23B2004"],"award-info":[{"award-number":["U23B2004"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62472433"],"award-info":[{"award-number":["62472433"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62472009"],"award-info":[{"award-number":["62472009"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62402508"],"award-info":[{"award-number":["62402508"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100014717","name":"Hunan Provincial Natural Science Foundation for Outstanding Youth","doi-asserted-by":"publisher","award":["2023JJ20055"],"award-info":[{"award-number":["2023JJ20055"]}],"id":[{"id":"10.13039\/100014717","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007085","name":"National University of Defense Technology","doi-asserted-by":"publisher","award":["ZK24-41"],"award-info":[{"award-number":["ZK24-41"]}],"id":[{"id":"10.13039\/501100007085","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Netw."],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1109\/ton.2025.3578180","type":"journal-article","created":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T13:39:34Z","timestamp":1750167574000},"page":"2977-2992","source":"Crossref","is-referenced-by-count":3,"title":["In-Network Aggregation as a Generic Service for Distributed Applications"],"prefix":"10.1109","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9522-3025","authenticated-orcid":false,"given":"Junxu","family":"Xia","sequence":"first","affiliation":[{"name":"State Key Laboratory of Complex and Critical Software Environment, College of Information and Communication, National University of Defense Technology, Wuhan, Hubei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1357-3137","authenticated-orcid":false,"given":"Wenfei","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Computer Science, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4886-9974","authenticated-orcid":false,"given":"Lailong","family":"Luo","sequence":"additional","affiliation":[{"name":"National Key Laboratory of Information Systems Engineering, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4894-5540","authenticated-orcid":false,"given":"Deke","family":"Guo","sequence":"additional","affiliation":[{"name":"School of Computer, Sun Yat-sen University, Guangzhou, Guangdong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5519-1375","authenticated-orcid":false,"given":"Geyao","family":"Cheng","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Complex and Critical Software Environment, College of Information and Communication, National University of Defense Technology, Wuhan, Hubei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"19","article-title":"Communication efficient distributed machine learning with the parameter server","volume-title":"Proc. 28th Int. Conf. Neural Inf. Process. Syst.","volume":"27","author":"Li"},{"key":"ref2","first-page":"463","article-title":"A unified architecture for accelerating distributed DNN training in heterogeneous GPU\/CPU clusters","volume-title":"Proc. OSDI","author":"Jiang"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00048"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/2901318.2901328"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3436890"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IWQoS54832.2022.9812906"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3230543.3230544"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2014.2354398"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2012.64"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2004.1320018"},{"key":"ref11","first-page":"1","article-title":"Scaling distributed machine learning with in-network aggregation","volume-title":"Proc. 18th USENIX Symp. Networked Syst. Design Implement.","author":"Sapio"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3641289"},{"key":"ref13","article-title":"A survey of large language models","author":"Xin Zhao","year":"2023","journal-title":"arXiv:2303.18223"},{"key":"ref14","volume-title":"Bringing HPC Techniques To Deep Learning","author":"Gibiansky","year":"2017"},{"key":"ref15","article-title":"Horovod: Fast and easy distributed deep learning in TensorFlow","author":"Sergeev","year":"2018","journal-title":"arXiv:1802.05799"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1147\/JRD.2019.2947013"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527382"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3524059.3532380"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS54860.2022.00065"},{"key":"ref20","volume-title":"Intel Tofino Programmable Ethernet Switch ASIC","year":"2023"},{"key":"ref21","volume-title":"ONE Silicon, ONE Experience, MULTIPLE Roles","year":"2023"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2022.3213237"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/2656877.2656890"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/OJCOMS.2020.2990182"},{"key":"ref25","volume-title":"DPDK (Data Plane Development Kit)","year":"2023"},{"key":"ref26","volume-title":"Intel Arria 10 Device Overview","year":"2023"},{"key":"ref27","volume-title":"MPI: A Message-Passing Interface Standard Version 4.0","year":"2021"},{"key":"ref28","volume-title":"Gloo","year":"2024"},{"key":"ref29","volume-title":"NVIDIA Collective Communications Library (NCCL)","year":"2024"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/1402958.1402967"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45523-x_17"},{"key":"ref32","first-page":"741","article-title":"ATP: in-network aggregation for multi-tenant learning","volume-title":"Proc. 18th USENIX Symp. Networked Syst. Design Implement.","author":"Lao"},{"key":"ref33","first-page":"1","article-title":"NetRPC: Enabling in-network computation in remote procedure calls","volume-title":"Proc. 20th USENIX Symp. Networked Syst. Design Implement.","author":"Zhao"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/26.380148"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/1592568.1592577"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/BF00288961"},{"key":"ref37","volume-title":"Massively Scale Your Deep Learning Training With NCCL 2.4","author":"Jeaugey","year":"2019"},{"key":"ref38","volume-title":"PyTorch New Announcements","year":"2024"},{"key":"ref39","volume-title":"Learning Multiple Layers of Features From Tiny Images","author":"Krizhevsky","year":"2009"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref42","first-page":"1","article-title":"Very deep convolutional networks for large-scale image recognition","volume-title":"Proc. 3rd Int. Conf. Learn. Represent."},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"ref44","article-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020","journal-title":"arXiv:2010.11929"},{"key":"ref45","article-title":"Jerasure: A library in C facilitating erasure coding for storage applications","author":"Plank","year":"2014"},{"key":"ref46","article-title":"Highly scalable deep learning training system with mixed-precision: Training imagenet in four minutes","author":"Jia","year":"2018","journal-title":"arXiv:1807.11205"},{"key":"ref47","article-title":"Massively distributed SGD: ImageNet\/ResNet-50 training in a flash","author":"Mikami","year":"2018","journal-title":"arXiv:1811.05233"},{"key":"ref48","article-title":"Image classification at supercomputer scale","author":"Ying","year":"2018","journal-title":"arXiv:1811.06992"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TDSC.2008.23"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/3132747.3132764"},{"key":"ref51","first-page":"143","article-title":"DistCache: Provable load balancing for large-scale storage systems with distributed caching","volume-title":"Proc. 17th USENIX Conf. File Storage Technol., FAST","author":"Liu"},{"key":"ref52","first-page":"19","article-title":"Offloading real-time DDoS attack detection to programmable data planes","volume-title":"Proc. IFIP\/IEEE Symp. Integr. Netw. Service Manage. (IM)","author":"Lapolli"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICC40277.2020.9149043"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1145\/2774993.2774999"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1145\/2935634.2935638"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2020.2992106"},{"key":"ref57","article-title":"NetReduce: RDMA-compatible in-network reduction for distributed DNN training acceleration","author":"Liu","year":"2020","journal-title":"arXiv:2009.09736"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/COMHPC.2016.006"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-50743-5_3"}],"container-title":["IEEE Transactions on Networking"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10723154\/11311125\/11037617.pdf?arnumber=11037617","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,23]],"date-time":"2025-12-23T18:30:36Z","timestamp":1766514636000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11037617\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12]]},"references-count":59,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/ton.2025.3578180","relation":{},"ISSN":["2998-4157"],"issn-type":[{"value":"2998-4157","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12]]}}}