{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T18:50:21Z","timestamp":1775069421670,"version":"3.50.1"},"reference-count":60,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,10,10]],"date-time":"2023-10-10T00:00:00Z","timestamp":1696896000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,10,10]],"date-time":"2023-10-10T00:00:00Z","timestamp":1696896000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,10,10]]},"DOI":"10.1109\/icnp59255.2023.10355574","type":"proceedings-article","created":{"date-parts":[[2023,12,20]],"date-time":"2023-12-20T14:18:50Z","timestamp":1703081930000},"page":"1-12","source":"Crossref","is-referenced-by-count":17,"title":["Preemptive Switch Memory Usage to Accelerate Training Jobs with Shared In-Network Aggregation"],"prefix":"10.1109","author":[{"given":"Hao","family":"Wang","sequence":"first","affiliation":[{"name":"Hong Kong University of Science and Technology,iSING Lab"}]},{"given":"Yuxuan","family":"Qin","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology,iSING Lab"}]},{"given":"ChonLam","family":"Lao","sequence":"additional","affiliation":[{"name":"Harvard University"}]},{"given":"Yanfang","family":"Le","sequence":"additional","affiliation":[{"name":"Intel"}]},{"given":"Wenfei","family":"Wu","sequence":"additional","affiliation":[{"name":"Peking University"}]},{"given":"Kai","family":"Chen","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology,iSING Lab"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Second-generation p4-programmable ethernet switch asic","year":"2021"},{"key":"ref2","volume-title":"The source code of atp","year":"2021"},{"key":"ref3","volume-title":"Raw ethernet programming","year":"2023"},{"key":"ref4","first-page":"49","article-title":"Empowering azure storage with rdma","volume-title":"20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Bai"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3106989.3107003"},{"key":"ref6","first-page":"455","article-title":"Information-agnostic flow scheduling for commodity data centers","volume-title":"12th USENIX Symposium on Networked Systems Design and Implementation (NSDI 15)","author":"Bai"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/2185448.2185474"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3234200.3234251"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/2851613.2851626"},{"key":"ref10","article-title":"Blueconnect: Novel hierarchical all-reduce on multi-tired network for deep learning","volume-title":"Proceedings of the 2nd SysML Conference","author":"Cho"},{"key":"ref11","first-page":"3","article-title":"Camdoop: Exploiting in-network aggregation for big data applications","volume":"12","author":"Costa","year":"2012","journal-title":"NSDI"},{"key":"ref12","article-title":"Gossipgrad: Scalable deep learning using gossip communication based asynchronous gradient descent","author":"Daily","year":"2018","journal-title":"arXiv preprint"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2023.3244794"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/1542275.1542344"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3452296.3472904"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s11023-020-09548-1"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TCC.2018.2879313"},{"key":"ref18","first-page":"829","article-title":"In-network aggre-gation for shared machine learning clusters","volume-title":"Proceedings of Machine Learning and Systems","volume":"3","author":"Gebara"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/comhpc.2016.006"},{"key":"ref20","first-page":"41","article-title":"Scalable hierarchical aggregation and reduction protocol (sharp) tm streaming -aggregation hardware design and evaluation","volume-title":"High Performance Computing: 35th International Conference, ISC High Performance 2020","author":"Richard","year":"2020"},{"key":"ref21","first-page":"485","article-title":"Tiresias: A gpu cluster manager for distributed deep learning","volume-title":"16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19)","author":"Gu"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/762483.762486"},{"issue":"418\u2013430","key":"ref23","article-title":"Tictac: Accelerating distributed deep learning with communication scheduling","volume-title":"Proceedings of Machine Learning and Systems","volume":"1","author":"Hashemi"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.jnca.2022.103561"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"issue":"14","key":"ref26","first-page":"527","article-title":"Network simulations with the ns- 3 simulator","volume":"14","author":"Thomas","year":"2008","journal-title":"SIGCOMM demonstration"},{"key":"ref27","first-page":"721","article-title":"Elastic resource sharing for distributed deep learning","volume-title":"18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21)","author":"Hwang"},{"key":"ref28","first-page":"132","article-title":"Priority-based parameter propagation for distributed dnn training","volume-title":"Proceedings of Machine Learning and Systems","volume":"1","author":"Jayarajan"},{"key":"ref29","first-page":"947","article-title":"Analysis of large-scale multi-tenant gpu clusters for dnn training workloads","volume-title":"2019 USENIX Annual Technical Conference (USENIX ATC 19)","author":"Jeon"},{"key":"ref30","first-page":"463","article-title":"A unified architecture for accelerating distributed dnn training in heterogeneous gpu\/cpu clusters","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Jiang"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-31957-3_17"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00085"},{"key":"ref33","volume-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky","year":"2009"},{"key":"ref34","first-page":"741","article-title":"Swift. Atp: In-network aggregation for multi-tenant learning","volume":"21","author":"Lao","year":"2021","journal-title":"NSDI"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.14778\/3583140.3583158"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322259"},{"key":"ref37","article-title":"Deep gradient compression: Reducing the communication bandwidth for distributed training","author":"Lin","year":"2017","journal-title":"arXiv preprint"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582037"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3267809.3267840"},{"key":"ref40","first-page":"82","article-title":"Plink: Discovering and exploiting locality for accelerated distributed training on the public cloud","volume-title":"Proceedings of Machine Learning and Systems","volume":"2","author":"Luo"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/2674005.2674996"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/3098822.3098824"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190517"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359642"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/3152434.3152461"},{"key":"ref46","first-page":"785","article-title":"Scaling distributed machine learning with In-Network aggregation","volume-title":"18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21)","author":"Sapio"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2014-274"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/940923.940937"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737367"},{"key":"ref50","article-title":"Very deep convolutional networks for large-scale image recognition","author":"Simonyan","year":"2014","journal-title":"arXiv preprint"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421296"},{"key":"ref52","first-page":"172","article-title":"Blink: Fast and generic collectives for distributed ml","volume-title":"Proceedings of Machine Learning and Systems","volume":"2","author":"Wang"},{"key":"ref53","first-page":"595","article-title":"Gandiva: Introspective cluster scheduling for deep learning","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Xiao"},{"key":"ref54","first-page":"533","article-title":"Antman: Dynamic scaling on gpu clusters for deep learning","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Xiao"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2022.3161580"},{"key":"ref56","first-page":"181","article-title":"Poseidon: An efficient communication architecture for distributed deep learning on gpu clusters","volume-title":"2017 USENIX Annual Technical Conference (USENIX ATC 17)","author":"Zhang"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/3127479.3127490"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/3359989.3365426"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM53939.2023.10228956"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2014.12.014"}],"event":{"name":"2023 IEEE 31st International Conference on Network Protocols (ICNP)","location":"Reykjavik, Iceland","start":{"date-parts":[[2023,10,10]]},"end":{"date-parts":[[2023,10,13]]}},"container-title":["2023 IEEE 31st International Conference on Network Protocols (ICNP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10355566\/10355569\/10355574.pdf?arnumber=10355574","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T15:22:56Z","timestamp":1705072976000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10355574\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,10]]},"references-count":60,"URL":"https:\/\/doi.org\/10.1109\/icnp59255.2023.10355574","relation":{},"subject":[],"published":{"date-parts":[[2023,10,10]]}}}