{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T16:48:57Z","timestamp":1765039737350,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T00:00:00Z","timestamp":1725408000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"King Abdullah University of Science and Technology (KAUST) Office of Research Administration (ORA)","award":["ORA-CRG2020-4382"],"award-info":[{"award-number":["ORA-CRG2020-4382"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,9,4]]},"DOI":"10.1145\/3678015.3680478","type":"proceedings-article","created":{"date-parts":[[2024,8,29]],"date-time":"2024-08-29T12:19:32Z","timestamp":1724933972000},"page":"88-94","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Towards a Flexible and High-Fidelity Approach to Distributed DNN Training Emulation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-9932-6096","authenticated-orcid":false,"given":"Banruo","family":"Liu","sequence":"first","affiliation":[{"name":"Tsinghua University and KAUST"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3861-1782","authenticated-orcid":false,"given":"Mubarak Adetunji","family":"Ojewale","sequence":"additional","affiliation":[{"name":"KAUST"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2829-0319","authenticated-orcid":false,"given":"Yuhan","family":"Ding","sequence":"additional","affiliation":[{"name":"Tsinghua University"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5051-4283","authenticated-orcid":false,"given":"Marco","family":"Canini","sequence":"additional","affiliation":[{"name":"KAUST"}]}],"member":"320","published-online":{"date-parts":[[2024,9,4]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3635867"},{"key":"e_1_3_2_1_2_1","unstructured":"Jehyeon Bang Yujeong Choi Myeongwoo Kim Yongdeok Kim and Minsoo Rhu. 2023. vTrain: A Simulation Framework for Evaluating Cost-effective and Compute-optimal Large Language Model Training. (2023). arXiv:cs.LG\/2312.12391"},{"key":"e_1_3_2_1_3_1","unstructured":"Cody Coleman Christopher Yeh Stephen Mussmann Baharan Mirzasoleiman Peter Bailis Percy Liang Jure Leskovec and Matei Zaharia. 2020. Selection via Proxy: Efficient Data Selection for Deep Learning. In ICLR."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Wei Deng Junwei Pan Tian Zhou Deguang Kong Aaron Flores and Guang Lin. 2021. DeepLight: Deep Lightweight Feature Interactions for Accelerating CTR Predictions in Ad Serving. In WSDM.","DOI":"10.1145\/3437963.3441727"},{"key":"e_1_3_2_1_5_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT.","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT."},{"key":"e_1_3_2_1_6_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep Residual Learning for Image Recognition. In CVPR."},{"key":"e_1_3_2_1_7_1","volume-title":"Yang Yang, and Yanqi Zhou.","author":"Hestness Joel","year":"2017","unstructured":"Joel Hestness, Sharan Narang, Newsha Ardalani, Gregory Diamos, Heewoo Jun, Hassan Kianinejad, Md Mostofa Ali Patwary, Yang Yang, and Yanqi Zhou. 2017. Deep Learning Scaling is Predictable, Empirically. (2017). arXiv:cs.LG\/1712.00409"},{"key":"e_1_3_2_1_8_1","unstructured":"Yanping Huang Youlong Cheng Ankur Bapna Orhan Firat Dehao Chen Mia Chen HyoukJoong Lee Jiquan Ngiam Quoc V Le Yonghui Wu and zhifeng Chen. 2019. GPipe: Efficient Training of Giant Neural Networks using Pipeline Parallelism. In NeurIPS."},{"key":"e_1_3_2_1_9_1","volume-title":"Tutel: Adaptive Mixture-of-Experts at Scale. In MLSys.","author":"Hwang Changho","year":"2023","unstructured":"Changho Hwang, Wei Cui, Yifan Xiong, Ziyue Yang, Ze Liu, Han Hu, Zilong Wang, Rafael Salas, Jithin Jose, Prabhat Ram, Joe Chau, Peng Cheng, Fan Yang, Mao Yang, and Yongqiang Xiong. 2023. Tutel: Adaptive Mixture-of-Experts at Scale. In MLSys."},{"key":"e_1_3_2_1_10_1","unstructured":"Mahnaz Koupaee and William Yang Wang. 2018. WikiHow: A Large Scale Text Summarization Dataset. (2018). arXiv:cs.CL\/1810.09305 https:\/\/arxiv.org\/abs\/1810.09305"},{"key":"e_1_3_2_1_11_1","volume-title":"Chun cheng Jason Chen, and Mosharaf Chowdhury","author":"Lai Fan","year":"2023","unstructured":"Fan Lai, Wei Zhang, Rui Liu, William Tsai, Xiaohan Wei, Yuxi Hu, Sabin Devkota, Jianyu Huang, Jongsoo Park, Xing Liu, Zeliang Chen, Ellie Wen, Paul Rivera, Jie You, Chun cheng Jason Chen, and Mosharaf Chowdhury. 2023. AdaEmbed: Adaptive Embedding for Large-Scale Recommendation Models. In OSDI."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Bob Lantz Brandon Heller and Nick McKeown. 2010. A network in a laptop: rapid prototyping for software-defined networks. In HotNets.","DOI":"10.1145\/1868447.1868466"},{"key":"e_1_3_2_1_13_1","unstructured":"Yujun Lin Song Han Huizi Mao Yu Wang and William J Dally. 2018. Deep Gradient Compression: Reducing the Communication Bandwidth for Distributed Training. In ICLR."},{"key":"e_1_3_2_1_14_1","unstructured":"Yinhan Liu Myle Ott Naman Goyal Jingfei Du Mandar Joshi Danqi Chen Omer Levy Mike Lewis Luke Zettlemoyer and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. (2019). arXiv:cs.CL\/1907.11692"},{"key":"e_1_3_2_1_15_1","unstructured":"Microsoft. 2015. Criteo's 1TB Click Prediction Dataset. (2015). https:\/\/docs.microsoft.com\/en-us\/archive\/blogs\/machinelearning\/now-available-on-azure-ml-criteos-1tb-click-prediction-dataset."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Radhika Mittal Alexander Shpiner Aurojit Panda Eitan Zahavi Arvind Krishnamurthy Sylvia Ratnasamy and Scott Shenker. 2018. Revisiting Network Support for RDMA. In SIGCOMM.","DOI":"10.1145\/3230543.3230557"},{"key":"e_1_3_2_1_17_1","unstructured":"NVIDIA. 2024. A PyTorch Extension: Tools for easy mixed precision and distributed training in Pytorch. (2024). https:\/\/github.com\/NVIDIA\/apex."},{"key":"e_1_3_2_1_18_1","unstructured":"NVIDIA. 2024. Collective Communication Library (NCCL). (2024). https:\/\/developer.nvidia.com\/nccl."},{"key":"e_1_3_2_1_19_1","volume-title":"Liu","author":"Raffel Colin","year":"2023","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J. Liu. 2023. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. (2023). arXiv:cs.LG\/1910.10683 https:\/\/arxiv.org\/abs\/1910.10683"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Samyam Rajbhandari Jeff Rasley Olatunji Ruwase and Yuxiong He. 2020. ZeRO: Memory Optimizations Toward Training Trillion Parameter Models. In SC.","DOI":"10.1109\/SC41405.2020.00024"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Pranav Rajpurkar Robin Jia and Percy Liang. 2018. Know What You Don't Know: Unanswerable Questions for SQuAD. In ACL.","DOI":"10.18653\/v1\/P18-2124"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Saeed Rashidi Srinivas Sridharan Sudarshan Srinivasan and Tushar Krishna. 2020. ASTRA-SIM: Enabling SW\/HW Co-Design Exploration for Distributed DL Training Platforms. In ISPASS.","DOI":"10.1109\/ISPASS48437.2020.00018"},{"key":"e_1_3_2_1_23_1","volume-title":"Themis: A Network Bandwidth-Aware Collective Scheduling Policy for Distributed Training of DL Models. In ISCA.","author":"Rashidi Saeed","year":"2022","unstructured":"Saeed Rashidi, William Won, Sudarshan Srinivasan, Srinivas Sridharan, and Tushar Krishna. 2022. Themis: A Network Bandwidth-Aware Collective Scheduling Policy for Distributed Training of DL Models. In ISCA."},{"key":"e_1_3_2_1_24_1","volume-title":"Zuluaga","author":"Wilfredo J.","year":"2022","unstructured":"Wilfredo J. Robinson M., Flavio Esposito, and Maria A. Zuluaga. 2022. DTS: A Simulator to Estimate the Training Time of Distributed Deep Neural Networks. In MASCOTS."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_26_1","unstructured":"Mohammad Shoeybi Mostofa Patwary Raul Puri Patrick LeGresley Jared Casper and Bryan Catanzaro. 2020. Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism. (2020). arXiv:cs.CL\/1909.08053"},{"key":"e_1_3_2_1_27_1","unstructured":"Aleksandar Stani\u0107 Dylan Ashley Oleg Serikov Louis Kirsch Francesco Faccio J\u00fcrgen Schmidhuber Thomas Hofmann and Imanol Schlag. 2023. The Languini Kitchen: Enabling Language Modelling Research at Different Scales of Compute. (2023). arXiv:cs.LG\/2309.11197"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342005051521"},{"key":"e_1_3_2_1_29_1","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar Aurelien Rodriguez Armand Joulin Edouard Grave and Guillaume Lample. 2023. LLaMA: Open and Efficient Foundation Language Models. (2023). arXiv:cs.CL\/2302.13971"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Amin Vahdat Ken Yocum Kevin Walsh Priya Mahadevan Dejan Kosti\u0107 Jeff Chase and David Becker. 2002. Scalability and Accuracy in a Large-Scale Network Emulator. In OSDI.","DOI":"10.1145\/1060289.1060315"},{"key":"e_1_3_2_1_31_1","volume-title":"Xiaoxia Wu, Connor Holmes, Zhewei Yao, Samyam Rajbhandari, Olatunji Ruwase, Feng Yan, Lei Yang, and Yuxiong He.","author":"Wang Guanhua","year":"2024","unstructured":"Guanhua Wang, Heyang Qin, Sam Ade Jacobs, Xiaoxia Wu, Connor Holmes, Zhewei Yao, Samyam Rajbhandari, Olatunji Ruwase, Feng Yan, Lei Yang, and Yuxiong He. 2024. ZeRO++: Extremely Efficient Collective Communication for Large Model Training. In ICLR."},{"key":"e_1_3_2_1_32_1","unstructured":"William Won Taekyung Heo Saeed Rashidi Srinivas Sridharan Sudarshan Srinivasan and Tushar Krishna. 2023. ASTRA-sim2.0: Modeling Hierarchical Networks and Disaggregated Systems for Large-model Training at Scale. In ISPASS."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2015.2472014"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Qizhen Zhang Kelvin K. W. Ng Charles Kazer Shen Yan Jo\u00e3o Sedoc and Vincent Liu. 2021. MimicNet: Fast Performance Estimates for Data Center Networks with Machine Learning. In SIGCOMM.","DOI":"10.1145\/3452296.3472926"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.14778\/3611540.3611569"},{"key":"e_1_3_2_1_36_1","volume-title":"Alpa: Automating Inter- and Intra-Operator Parallelism for Distributed Deep Learning. In OSDI.","author":"Zheng Lianmin","year":"2022","unstructured":"Lianmin Zheng, Zhuohan Li, Hao Zhang, Yonghao Zhuang, Zhifeng Chen, Yanping Huang, Yida Wang, Yuanzhong Xu, Danyang Zhuo, Eric P. Xing, Joseph E. Gonzalez, and Ion Stoica. 2022. Alpa: Automating Inter- and Intra-Operator Parallelism for Distributed Deep Learning. In OSDI."}],"event":{"name":"APSys '24: 15th ACM SIGOPS Asia-Pacific Workshop on Systems","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"],"location":"Kyoto Japan","acronym":"APSys '24"},"container-title":["Proceedings of the 15th ACM SIGOPS Asia-Pacific Workshop on Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3678015.3680478","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3678015.3680478","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T02:14:28Z","timestamp":1755915268000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3678015.3680478"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,4]]},"references-count":36,"alternative-id":["10.1145\/3678015.3680478","10.1145\/3678015"],"URL":"https:\/\/doi.org\/10.1145\/3678015.3680478","relation":{},"subject":[],"published":{"date-parts":[[2024,9,4]]},"assertion":[{"value":"2024-09-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}