{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T05:08:53Z","timestamp":1738818533160,"version":"3.37.0"},"reference-count":41,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Science Foundation of China","doi-asserted-by":"publisher","award":["62372426,62102392"],"award-info":[{"award-number":["62372426,62102392"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004739","name":"Youth Innovation Promotion Association of the Chinese Academy of Science","doi-asserted-by":"publisher","award":["2023481"],"award-info":[{"award-number":["2023481"]}],"id":[{"id":"10.13039\/501100004739","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1109\/icnp61940.2024.10858582","type":"proceedings-article","created":{"date-parts":[[2025,2,4]],"date-time":"2025-02-04T18:29:45Z","timestamp":1738693785000},"page":"1-11","source":"Crossref","is-referenced-by-count":0,"title":["Non-Idle Machine-Aware Worker Placement for Efficient Distributed Training in GPU Clusters"],"prefix":"10.1109","author":[{"given":"Jin","family":"Fang","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gongming","family":"Zhao","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongli","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Luyao","family":"Luo","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhen","family":"Yao","sequence":"additional","affiliation":[{"name":"Huawei Technologies Co., Ltd."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"An","family":"Xie","sequence":"additional","affiliation":[{"name":"Huawei Technologies Co., Ltd."}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Google\u2019s neural machine translation system: Bridging the gap between human and machine translation","author":"Wu","year":"2016","journal-title":"arXiv preprint"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2020.113679"},{"key":"ref3","article-title":"Lamda: Language models for dialog applications","author":"Thoppilan","year":"2022","journal-title":"arXiv preprint"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3003307"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3503221.3508399"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM42981.2021.9488678"},{"journal-title":"Megascale: Scaling large language model training to more than 10,000 gpus","year":"2024","author":"Jiang","key":"ref7"},{"key":"ref8","first-page":"945","article-title":"in the wild: Workload analysis and scheduling in heterogeneous clusters","volume-title":"19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Weng"},{"article-title":"Pollux: Co-adaptive cluster scheduling for goodput-optimized deep learning","volume-title":"15th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 21)","author":"Qiao","key":"ref9"},{"key":"ref10","first-page":"485","article-title":"Tiresias: A cluster manager for distributed deep learning","volume-title":"16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19)","author":"Gu"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575721"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2023.3305753"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM48880.2022.9796910"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00056"},{"volume-title":"Maximize system throughput with nvidia nvlink","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/1402946.1402967"},{"key":"ref17","first-page":"995","article-title":"Beware of fragmentation: Scheduling workloads with fragmentation gradient descent","volume-title":"2023 USENIX Annual Technical Conference (USENIX ATC 23)","author":"Weng"},{"key":"ref18","first-page":"579","article-title":"Looking beyond for scheduling on \\{Multi-Tenant $\\}$ clusters","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Mohan"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476223"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3552326.3587445"},{"key":"ref21","first-page":"515","article-title":"\\{HiveD\\}: Sharing a $\\{$ GPU cluster for deep learning with guarantees","volume-title":"14th USENIX symposium on operating systems design and implementation (OSDI 20)","author":"Zhao","year":"2020"},{"key":"ref22","first-page":"721","article-title":"Elastic resource sharing for distributed deep learning","volume-title":"18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21)","author":"Hwang"},{"volume-title":"Bandwidth reduction using importance weighted pruning on ring allreduce","year":"2019","author":"Cheng","key":"ref23"},{"volume-title":"Nvidia collective communications library (nccl)","key":"ref24"},{"key":"ref25","article-title":"Accurate, large minibatch sgd: Training imagenet in 1 hour","author":"Goyal","year":"2017","journal-title":"arXiv preprint"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3091475"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1137\/S0097539700382820"},{"issue":"2","key":"ref28","first-page":"159","article-title":"The gurobi optimizer","volume":"41","author":"Bixby","year":"2007","journal-title":"Transp. Re-search Part B"},{"key":"ref29","first-page":"719","article-title":"{HeteroSketch}: Coordinating network-wide monitoring in heterogeneous and dynamic networks","volume-title":"19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Agarwal"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1137\/0201010"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM48880.2022.9796938"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139177801.004"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1287\/moor.3.3.177"},{"volume-title":"Mininet: An instant virtual network on your laptop (or other pc)","key":"ref34"},{"key":"ref35","first-page":"709","article-title":"Characterization of large language model development in the datacenter","volume-title":"21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24)","author":"Hu"},{"key":"ref36","first-page":"1421","article-title":"Towards network transport for distributed training","volume-title":"21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24)","author":"Wang"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"volume-title":"Cifar-100 (canadian institute for advanced research).","author":"Krizhevsky","key":"ref38"},{"journal-title":"Very deep convolutional networks for large-scale image recognition","year":"2015","author":"Simonyan","key":"ref39"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2023.3244794"},{"volume-title":"Index of algorithms provided by gloo and their semantics.)","key":"ref41"}],"event":{"name":"2024 IEEE 32nd International Conference on Network Protocols (ICNP)","start":{"date-parts":[[2024,10,28]]},"location":"Charleroi, Belgium","end":{"date-parts":[[2024,10,31]]}},"container-title":["2024 IEEE 32nd International Conference on Network Protocols (ICNP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10858485\/10858498\/10858582.pdf?arnumber=10858582","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,5]],"date-time":"2025-02-05T05:59:05Z","timestamp":1738735145000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10858582\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/icnp61940.2024.10858582","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]}}}