{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T15:57:27Z","timestamp":1759334247336,"version":"build-2065373602"},"reference-count":58,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"NSFC-RGC","award":["62461160333"],"award-info":[{"award-number":["62461160333"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1109\/tpds.2025.3605491","type":"journal-article","created":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T17:54:56Z","timestamp":1756922096000},"page":"2366-2382","source":"Crossref","is-referenced-by-count":0,"title":["DynPipe: Toward Dynamic End-to-End Pipeline Parallelism for Interference-Aware DNN Training"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-6532-7176","authenticated-orcid":false,"given":"Zhengyi","family":"Yuan","sequence":"first","affiliation":[{"name":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab\/Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5360-0454","authenticated-orcid":false,"given":"Xiong","family":"Wang","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab\/Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]},{"given":"Yuntao","family":"Nie","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab\/Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]},{"given":"Yufei","family":"Tao","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab\/Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0816-5777","authenticated-orcid":false,"given":"Yuqing","family":"Li","sequence":"additional","affiliation":[{"name":"School of Cyber Science and Engineering, Wuhan University, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2139-6465","authenticated-orcid":false,"given":"Zhiyuan","family":"Shao","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab\/Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6302-813X","authenticated-orcid":false,"given":"Xiaofei","family":"Liao","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab\/Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2083-9105","authenticated-orcid":false,"given":"Bo","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Hong Kong University of Science and Technology, Clear Water Bay, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3934-7605","authenticated-orcid":false,"given":"Hai","family":"Jin","sequence":"additional","affiliation":[{"name":"National Engineering Research Center for Big Data Technology and System, Services Computing Technology and System Lab\/Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"article-title":"Improving language understanding by generative pre-training","year":"2018","author":"Radford","key":"ref3"},{"article-title":"Very deep convolutional networks for large-scale image recognition","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Simonyan","key":"ref4"},{"key":"ref5","article-title":"Introducing meta Llama 3: The most capable openly available LLM to date","author":"Meta","year":"2024","journal-title":"Meta AI"},{"key":"ref6","first-page":"347","article-title":"nnScaler: Constraint-guided parallelization plan generation for deep learning training","volume-title":"Proc. USENIX Conf. Operating Syst. Des. Implementation","author":"Lin"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.14778\/3570690.3570697"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.14778\/3598581.3598604"},{"key":"ref9","first-page":"103","article-title":"GPipe: Efficient training of giant neural networks using pipeline parallelism","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Huang"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441593"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359646"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3094364"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-022-3581-9"},{"key":"ref14","first-page":"995","article-title":"Beware of fragmentation: Scheduling GPU-sharing workloads with fragmentation gradient descent","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Weng"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640411"},{"key":"ref16","first-page":"497","article-title":"Bamboo: Making preemptible instances resilient for affordable training of large DNNs","volume-title":"Proc. USENIX Symp. Netw. Syst. Des. Implementation","author":"Thorpe"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3725322"},{"key":"ref18","first-page":"7937","article-title":"Memory-efficient pipeline-parallel DNN training","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Narayanan"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3544216.3544224"},{"key":"ref20","first-page":"19","article-title":"A case for task sampling based learning for cluster job scheduling","volume-title":"Proc. USENIX Symp. Netw. Syst. Des. Implementation","author":"Jajoo"},{"key":"ref21","first-page":"203","article-title":"CheckFreq: Frequent, fine-grained DNN checkpointing","volume-title":"Proc. USENIX Conf. File Storage Technol.","author":"Mohan"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613145"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607102"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2023.3242733"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359642"},{"article-title":"Llama 2: Open foundation and fine-tuned chat models","year":"2023","author":"Touvron","key":"ref26"},{"year":"2023","key":"ref27","article-title":"GPT-4 technical report"},{"key":"ref28","first-page":"1232","article-title":"Large scale distributed deep networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Dean"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507735"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3597926.3598036"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575705"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-024-4227-2"},{"key":"ref34","first-page":"945","article-title":"MLaaS in the wild: Workload analysis and scheduling in large-scale heterogeneous GPU clusters","volume-title":"Proc. USENIX Symp. Netw. Syst. Des. Implementation","author":"Weng"},{"key":"ref35","first-page":"745","article-title":"MegaScale: Scaling large language model training to more than 10, 000 GPUs","volume-title":"Proc. USENIX Symp. Netw. Syst. Des. Implementation","author":"Jiang"},{"key":"ref36","doi-asserted-by":"crossref","DOI":"10.1145\/3458817.3476209","article-title":"Efficient large-scale language model training on GPU clusters using megatron-LM","volume-title":"Proc. Int. Conf. High Perform. Comput. Netw. Storage Anal.","author":"Narayanan"},{"key":"ref37","first-page":"559","article-title":"Alpa: Automating inter- and intra-operator parallelism for distributed deep learning","volume-title":"Proc. USENIX Conf. Operating Syst. Des. Implementation","author":"Zheng"},{"key":"ref38","first-page":"307","article-title":"HetPipe: Enabling large DNN training on (Whimpy) heterogeneous GPU clusters through integration of pipelined model parallelism and data parallelism","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Park"},{"key":"ref39","first-page":"787","article-title":"SHEPHERD: Serving DNNs in the wild","volume-title":"Proc. USENIX Symp. Netw. Syst. Des. Implementation","author":"Zhang"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM48880.2022.9796935"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref43","first-page":"947","article-title":"Analysis of large-scale multi-tenant GPU clusters for DNN training workloads","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Jeon"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607060"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3079202"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.29172\/7c2a6982-6d72-4cd8-bba6-2fccb06a7011"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014780"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1002\/SERIES1345"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3406703"},{"issue":"8","key":"ref51","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI Blog"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.11"},{"article-title":"Learning multiple layers of features from tiny images","year":"2009","author":"Krizhevsky","key":"ref54"},{"key":"ref56","first-page":"463","article-title":"A unified architecture for accelerating distributed DNN training in heterogeneous GPU\/CPU clusters","volume-title":"Proc. USENIX Conf. Operating Syst. Des. Implementation","author":"Jiang"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2023.3247001"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2024.3406420"},{"key":"ref59","first-page":"381","article-title":"Fine-tuning giant neural networks on commodity hardware with automatic pipeline model parallelism","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Eliad"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1145\/3322795.3331463"},{"key":"ref61","first-page":"595","article-title":"Gandiva: Introspective cluster scheduling for deep learning","volume-title":"Proc. USENIX Conf. Operating Syst. Des. Implementation","author":"Xiao"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421299"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.14778\/3685800.3685832"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/71\/11182288\/11150566.pdf?arnumber=11150566","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T13:01:27Z","timestamp":1759237287000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11150566\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11]]},"references-count":58,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2025.3605491","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"type":"print","value":"1045-9219"},{"type":"electronic","value":"1558-2183"},{"type":"electronic","value":"2161-9883"}],"subject":[],"published":{"date-parts":[[2025,11]]}}}