{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T00:26:50Z","timestamp":1768523210130,"version":"3.49.0"},"reference-count":48,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62141216"],"award-info":[{"award-number":["62141216"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172382"],"award-info":[{"award-number":["62172382"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61832011"],"award-info":[{"award-number":["61832011"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"111 Project 2.0","award":["BP0719016"],"award-info":[{"award-number":["BP0719016"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1109\/tpds.2024.3466913","type":"journal-article","created":{"date-parts":[[2024,9,24]],"date-time":"2024-09-24T17:28:54Z","timestamp":1727198934000},"page":"2375-2391","source":"Crossref","is-referenced-by-count":1,"title":["Efficient Schedule Construction for Distributed Execution of Large DNN Models"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8050-6196","authenticated-orcid":false,"given":"Zhiqi","family":"Lin","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2395-9965","authenticated-orcid":false,"given":"Youshan","family":"Miao","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"given":"Guanbin","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7064-6120","authenticated-orcid":false,"given":"Cheng","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China, Hefei, China"}]},{"given":"Olli","family":"Saarikivi","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"given":"Saeed","family":"Maleki","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"given":"Fan","family":"Yang","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/212094.212131"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3492321.3519584"},{"key":"ref3","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Brown"},{"key":"ref4","article-title":"Training deep nets with sublinear memory cost","author":"Chen","year":"2016"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/BF02577873"},{"issue":"1","key":"ref6","first-page":"55","article-title":"QoS requirements of network applications on the internet","volume":"4","author":"Chen","year":"2004","journal-title":"Inf. Knowl. Syst. Manage."},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-78800-3_24"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441593"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378530"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.48550\/arxiv.1811.06965"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00049"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613152"},{"key":"ref13","first-page":"1","article-title":"Beyond data and model parallelism for deep neural networks","volume-title":"Proc. Mach. Learn. Syst.","author":"Jia"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/368996.369025"},{"key":"ref15","first-page":"5583","article-title":"ViLT: Vision-and-language transformer without convolution or region supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kim"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/2086696.2086711"},{"key":"ref17","first-page":"341","article-title":"Reducing activation recomputation in large transformer models","volume-title":"Proc. Mach. Learn. Syst.","volume":"5","author":"Korthikanti"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476145"},{"key":"ref19","article-title":"SuperScaler: Supporting flexible DNN parallelization via a unified abstraction","author":"Lin","year":"2023"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359646"},{"key":"ref22","first-page":"1","article-title":"Efficient large-scale language model training on GPU clusters using megatron-lm","volume-title":"Proc. Int. Conf. High Perform. Comput. Netw. Storage Anal.","author":"Narayanan"},{"key":"ref23","article-title":"GPT-4 introduction","year":"2023"},{"key":"ref24","first-page":"307","article-title":"HetPipe: Enabling large DNN training on (whimpy) heterogeneous GPU clusters through integration of pipelined model parallelism and data parallelism","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Park"},{"key":"ref25","article-title":"PyTorch","year":"2022"},{"key":"ref26","article-title":"Improving language understanding by generative pre-training","author":"Radford","year":"2018"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00024"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3406703"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/143095.143141"},{"key":"ref30","first-page":"551","article-title":"Zero-offload: Democratizing billion-scale model training","volume-title":"Proc. USENIX Annu. Tech. Conf.","author":"Ren"},{"key":"ref31","article-title":"NVIDIA\/Megatron-LM: Training multi-billion parameter language models using GPU model parallelism","author":"Shoeybi","year":"2019"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01519"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/19551.19541"},{"key":"ref34","first-page":"24829","article-title":"PIPER: Multidimensional planner for DNN parallelization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Tarnawski"},{"key":"ref35","article-title":"Distributed data parallelism","author":"Team","year":"2022"},{"key":"ref36","article-title":"TorchScript","author":"Team"},{"key":"ref37","article-title":"Attention is all you need","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Vaswani"},{"key":"ref38","article-title":"HelixFold: An efficient implementation of AlphaFold2 using paddlepaddle","author":"Wang","year":"2022"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303953"},{"key":"ref40","article-title":"SimVLM: Simple visual language model pretraining with weak supervision","author":"Wang","year":"2021"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-36478-1_17"},{"key":"ref42","volume-title":"Technical, Commercial and Regulatory Challenges of QoS: An Internet Service Model Perspective","author":"Xiao","year":"2008"},{"key":"ref43","article-title":"GSPMD: General and scalable parallelization for ML computation graphs","author":"Xu","year":"2021"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.41"},{"key":"ref45","first-page":"521","article-title":"ORCA: A distributed serving system for $\\lbrace${Transformer-Based$\\rbrace$} generative models","volume-title":"Proc. 16th USENIX Symp. Operating Syst. Des. Implementation","author":"Yu"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.257"},{"key":"ref47","first-page":"559","article-title":"ALPA: Automating inter-and $\\lbrace${Intra-Operator$\\rbrace$} parallelism for distributed deep learning","volume-title":"Proc. 16th USENIX Symp. Operating Syst. Des. Implementation","author":"Zheng"},{"key":"ref48","article-title":"On optimizing the communication of model parallelism","volume-title":"Proc. Mach. Learn. Syst.","author":"Zhuang"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/71\/10719599\/10691391.pdf?arnumber=10691391","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,16]],"date-time":"2024-10-16T18:07:40Z","timestamp":1729102060000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10691391\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12]]},"references-count":48,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2024.3466913","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"value":"1045-9219","type":"print"},{"value":"1558-2183","type":"electronic"},{"value":"2161-9883","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12]]}}}