{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T14:44:28Z","timestamp":1775745868721,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,8]]},"DOI":"10.1145\/3718958.3750503","type":"proceedings-article","created":{"date-parts":[[2025,8,27]],"date-time":"2025-08-27T16:54:11Z","timestamp":1756313651000},"page":"861-881","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["From ATOP to ZCube: Automated Topology Optimization Pipeline and A Highly Cost-Effective Network Topology for Large Model Training"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-8736-8876","authenticated-orcid":false,"given":"Zihan","family":"Yan","sequence":"first","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7581-8865","authenticated-orcid":false,"given":"Dan","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4228-7885","authenticated-orcid":false,"given":"Li","family":"Chen","sequence":"additional","affiliation":[{"name":"Zhongguancun Laboratory, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8905-9046","authenticated-orcid":false,"given":"Dian","family":"Xiong","sequence":"additional","affiliation":[{"name":"Harnets.AI, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9013-5993","authenticated-orcid":false,"given":"Kaihui","family":"Gao","sequence":"additional","affiliation":[{"name":"Zhongguancun Laboratory, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-2270-015X","authenticated-orcid":false,"given":"Yiwei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8632-2017","authenticated-orcid":false,"given":"Rui","family":"Yan","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8124-8757","authenticated-orcid":false,"given":"Menglei","family":"Zhang","sequence":"additional","affiliation":[{"name":"ByteDance, Seattle, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5421-3817","authenticated-orcid":false,"given":"Bochun","family":"Zhang","sequence":"additional","affiliation":[{"name":"ByteDance, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6144-7899","authenticated-orcid":false,"given":"Zhuo","family":"Jiang","sequence":"additional","affiliation":[{"name":"ByteDance, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3395-3624","authenticated-orcid":false,"given":"Jianxi","family":"Ye","sequence":"additional","affiliation":[{"name":"ByteDance, Seattle, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4879-5335","authenticated-orcid":false,"given":"Haibin","family":"Lin","sequence":"additional","affiliation":[{"name":"ByteDance, Seattle, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,8,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1654059.1654101"},{"key":"e_1_3_2_1_2_1","volume-title":"A scalable, commodity data center network architecture. ACM SIGCOMM computer communication review 38, 4","author":"Al-Fares Mohammad","year":"2008","unstructured":"Mohammad Al-Fares, Alexander Loukissas, and Amin Vahdat. 2008. A scalable, commodity data center network architecture. ACM SIGCOMM computer communication review 38, 4 (2008), 63\u201374."},{"key":"e_1_3_2_1_3_1","unstructured":"Alibaba. 2024. SimAI. https:\/\/github.com\/aliyun\/SimAI"},{"key":"e_1_3_2_1_4_1","unstructured":"Ebtesam Almazrouei Hamza Alobeidli Abdulaziz Alshamsi Alessandro Cappelli Ruxandra Cojocaru M\u00e9rouane Debbah \u00c9tienne Goffinet Daniel Hesslow Julien Launay Quentin Malartic et al. 2023. The falcon series of open language models. arXiv preprint arXiv:2311.16867 (2023)."},{"key":"e_1_3_2_1_5_1","volume-title":"Max-value entropy search for multi-objective Bayesian optimization. Advances in neural information processing systems 32","author":"Belakaria Syrine","year":"2019","unstructured":"Syrine Belakaria, Aryan Deshwal, and Janardhan Rao Doppa. 2019. Max-value entropy search for multi-objective Bayesian optimization. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.34"},{"key":"e_1_3_2_1_7_1","unstructured":"Broadcom. 2023. htsim. https:\/\/github.com\/Broadcom\/csg-htsim"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2014.06.008"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01442131"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM41043.2020.9155462"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Bo Chen Xingyi Cheng Pan Li Yangli-ao Geng Jing Gong Shen Li Zhilei Bei Xu Tan Boyan Wang Xin Zeng et al. 2024. xTrimoPGLM: unified 100B-scale pre-trained transformer for deciphering the language of protein. arXiv preprint arXiv:2401.06199 (2024).","DOI":"10.1101\/2023.07.05.547496"},{"key":"e_1_3_2_1_12_1","first-page":"43","article-title":"A survey of binary similarity and distance measures","volume":"8","author":"Choi Seung-Seok","year":"2010","unstructured":"Seung-Seok Choi, Sung-Hyuk Cha, Charles C Tappert, et al. 2010. A survey of binary similarity and distance measures. Journal of systemics, cybernetics and informatics 8, 1 (2010), 43\u201348.","journal-title":"Journal of systemics, cybernetics and informatics"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1002\/j.1538-7305.1953.tb01433.x"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFCOM.2012.6195470"},{"key":"e_1_3_2_1_15_1","volume-title":"A fast and elitist multiobjective genetic algorithm: NSGA-II","author":"Deb Kalyanmoy","year":"2002","unstructured":"Kalyanmoy Deb, Amrit Pratap, Sameer Agarwal, and TAMT Meyarivan. 2002. A fast and elitist multiobjective genetic algorithm: NSGA-II. IEEE transactions on evolutionary computation 6, 2 (2002), 182\u2013197."},{"key":"e_1_3_2_1_16_1","unstructured":"Colfax Direct. 2024. https:\/\/www.colfaxdirect.com"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFCOM.2013.6567015"},{"key":"e_1_3_2_1_18_1","unstructured":"Elon Musk. 2024. Colossus Cluster. https:\/\/x.com\/elonmusk\/status\/1830650370336473253"},{"key":"e_1_3_2_1_19_1","unstructured":"FS. 2024. https:\/\/www.fs.com\/"},{"key":"e_1_3_2_1_20_1","unstructured":"FS. 2024. N8550-24CD8D 24-Port Ethernet L3 Data Center Switch. https:\/\/www.fs.com\/products\/207079.html?now_cid=4369"},{"key":"e_1_3_2_1_21_1","unstructured":"FS. 2024. N9510-64D 64-Port Ethernet L3 Data Center Switch. https:\/\/www.fs.com\/products\/149853.html?now_cid=3255"},{"key":"e_1_3_2_1_22_1","unstructured":"FS. 2024. N9600-128QC 128-Port Ethernet L3 Data Center Switch. https:\/\/www.fs.com\/products\/241601.html?now_cid=3255"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3651890.3672233"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3603269.3604844"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/1592568.1592577"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/1402958.1402968"},{"key":"e_1_3_2_1_27_1","volume-title":"SC22: International Conference for High Performance Computing, Networking, Storage and Analysis. IEEE, 1\u201318","author":"Hoefler Torsten","year":"2022","unstructured":"Torsten Hoefler, Tommaso Bonato, Daniele De Sensi, Salvatore Di Girolamo, Shigang Li, Marco Heddes, Jon Belk, Deepak Goel, Miguel Castro, and Steve Scott. 2022. HammingMesh: a network topology for large-scale deep learning. In SC22: International Conference for High Performance Computing, Networking, Storage and Analysis. IEEE, 1\u201318."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Christian Hopps. 2000. Analysis of an equal-cost multi-path algorithm. Technical Report.","DOI":"10.17487\/rfc2992"},{"key":"e_1_3_2_1_29_1","volume-title":"21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24)","author":"Jiang Ziheng","year":"2024","unstructured":"Ziheng Jiang, Haibin Lin, Yinmin Zhong, Qi Huang, Yangrui Chen, Zhi Zhang, Yanghua Peng, Xiang Li, Cong Xie, Shibiao Nong, et al. 2024. {MegaScale}: Scaling large language model training to more than 10,000 {GPUs}. In 21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24). 745\u2013760."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589350"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610536"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/1394608.1382129"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3603269.3604869"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-022-10359-2"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476209"},{"key":"e_1_3_2_1_36_1","volume-title":"2009 International Conference on Ultra Modern Telecommunications & Workshops. IEEE, 1\u20135.","author":"Nguyen Anh Tuan","year":"2009","unstructured":"Anh Tuan Nguyen and Frank Eliassen. 2009. An efficient solution for max-min fair rate allocation in p2p simulation. In 2009 International Conference on Ultra Modern Telecommunications & Workshops. IEEE, 1\u20135."},{"key":"e_1_3_2_1_37_1","unstructured":"NVIDIA. 2022. Mellanox QM9790 InfiniBand Switch. https:\/\/docs.nvidia.com\/networking\/display\/qm9700-and-qm9790-1u-ndr-400gbps-infiniband-switch-systems-user-manual.pdf."},{"key":"e_1_3_2_1_38_1","unstructured":"NVIDIA. 2022. NVIDIA ConnectX-7 400G Adapters Accelerated Networking for Modern Data Center Infrastructures. https:\/\/resources.nvidia.com\/en-us-accelerated-networking-resource-library\/connectx-7-datasheet"},{"key":"e_1_3_2_1_39_1","unstructured":"NVIDIA. 2023. SuperPOD: Next Generation Scalable Infrastructure for AI Leadership. https:\/\/docs.nvidia.com\/https:\/docs.nvidia.com\/dgx-superpod-reference-architecture-dgx-h100.pdf"},{"key":"e_1_3_2_1_40_1","unstructured":"NVIDIA. 2024. nccl-tests. https:\/\/github.com\/NVIDIA\/nccl-tests\/"},{"key":"e_1_3_2_1_41_1","unstructured":"NVIDIA. 2024. NVIDIA Bluefield-3 DPU Programmable Data Center Infrastructure On-a-chip. https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/Data-Center\/documents\/datasheet-nvidia-bluefield-3-dpu.pdf"},{"key":"e_1_3_2_1_42_1","unstructured":"NVIDIA. 2024. NVIDIA Collective Communications Library (NCCL). https:\/\/developer.nvidia.com\/nccl"},{"key":"e_1_3_2_1_43_1","volume-title":"Monte-Carlo and Quasi-Monte Carlo Methods 1998: Proceedings of a Conference held at the","author":"Owen Art B","year":"1998","unstructured":"Art B Owen. 2000. Monte Carlo, quasi-Monte carlo, and randomized quasi-Monte Carlo. In Monte-Carlo and Quasi-Monte Carlo Methods 1998: Proceedings of a Conference held at the Claremont Graduate University, Claremont, California, USA, June 22\u201326, 1998. Springer, 86\u201397."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3651890.3672265"},{"key":"e_1_3_2_1_45_1","volume-title":"International conference on machine learning. PMLR","author":"Rajbhandari Samyam","year":"2022","unstructured":"Samyam Rajbhandari, Conglong Li, Zhewei Yao, Minjia Zhang, Reza Yazdani Aminabadi, Ammar Ahmad Awan, Jeff Rasley, and Yuxiong He. 2022. Deepspeed-moe: Advancing mixture-of-experts inference and training to power next-generation ai scale. In International conference on machine learning. PMLR, 18332\u201318346."},{"key":"e_1_3_2_1_46_1","volume-title":"Modeling and tools for network simulation","author":"Riley George F","unstructured":"George F Riley and Thomas R Henderson. 2010. The ns-3 network simulator. In Modeling and tools for network simulation. Springer, 15\u201334."},{"key":"e_1_3_2_1_47_1","volume-title":"Maciej Szankin, and Sairam Sundaresan.","author":"Sarah Anthony","year":"2024","unstructured":"Anthony Sarah, Sharath Nittur Sridhar, Maciej Szankin, and Sairam Sundaresan. 2024. LLaMA-NAS: Efficient Neural Architecture Search for Large Language Models. arXiv preprint arXiv:2405.18377 (2024)."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/2785956.2787476"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3577193.3593704"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342005051521"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOTI63208.2024.00013"},{"key":"e_1_3_2_1_52_1","volume-title":"20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Wang Weiyang","year":"2023","unstructured":"Weiyang Wang, Moein Khazraee, Zhizhen Zhong, Manya Ghobadi, Zhihao Jia, Dheevatsa Mudigere, Ying Zhang, and Anthony Kewitsch. 2023. {TopoOpt}: Co-optimizing Network Topology and Parallelization Strategy for Distributed Training Jobs. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23). 739\u2013767."},{"key":"e_1_3_2_1_53_1","unstructured":"Tianwen Wei Bo Zhu Liang Zhao Cheng Cheng Biye Li Weiwei L\u00fc Peng Cheng Jianhao Zhang Xiaoyu Zhang Liang Zeng et al. 2024. Skywork-MoE: A Deep Dive into Training Techniques for Mixture-of-Experts Language Models. arXiv preprint arXiv:2406.06563 (2024)."},{"key":"e_1_3_2_1_54_1","volume-title":"2023 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS). IEEE, 283\u2013294","author":"Won William","year":"2023","unstructured":"William Won, Taekyung Heo, Saeed Rashidi, Srinivas Sridharan, Sudarshan Srinivasan, and Tushar Krishna. 2023. Astra-sim2. 0: Modeling hierarchical networks and disaggregated systems for large-model training at scale. In 2023 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS). IEEE, 283\u2013294."},{"key":"e_1_3_2_1_55_1","volume-title":"Openmoe: An early effort on open mixture-of-experts language models. arXiv preprint arXiv:2402.01739","author":"Xue Fuzhao","year":"2024","unstructured":"Fuzhao Xue, Zian Zheng, Yao Fu, Jinjie Ni, Zangwei Zheng, Wangchunshu Zhou, and Yang You. 2024. Openmoe: An early effort on open mixture-of-experts language models. arXiv preprint arXiv:2402.01739 (2024)."},{"key":"e_1_3_2_1_56_1","unstructured":"Aohan Zeng Xiao Liu Zhengxiao Du Zihan Wang Hanyu Lai Ming Ding Zhuoyi Yang Yifan Xu Wendi Zheng Xiao Xia et al. 2022. Glm-130b: An open bilingual pre-trained model. arXiv preprint arXiv:2210.02414 (2022)."},{"key":"e_1_3_2_1_57_1","unstructured":"Chenggang Zhao Chengqi Deng Chong Ruan Damai Dai Huazuo Gao Jiashi Li Liyue Zhang Panpan Huang Shangyan Zhou Shirong Ma et al. 2025. Insights into deepseek-v3: Scaling challenges and reflections on hardware for ai architectures. arXiv preprint arXiv:2505.09343 (2025)."},{"key":"e_1_3_2_1_58_1","volume-title":"ForestColl: Efficient Collective Communications on Heterogeneous Network Fabrics. arXiv preprint arXiv:2402.06787","author":"Zhao Liangyu","year":"2024","unstructured":"Liangyu Zhao, Saeed Maleki, Ziyue Yang, Hossein Pourreza, Aashaka Shah, Changho Hwang, and Arvind Krishnamurthy. 2024. ForestColl: Efficient Collective Communications on Heterogeneous Network Fabrics. arXiv preprint arXiv:2402.06787 (2024)."},{"key":"e_1_3_2_1_59_1","volume-title":"Efficient Direct-Connect Topologies for Collective Communications. arXiv preprint arXiv:2202.03356","author":"Zhao Liangyu","year":"2022","unstructured":"Liangyu Zhao, Siddharth Pal, Tapan Chugh, Weiyang Wang, Jason Fantl, Prithwish Basu, Joud Khoury, and Arvind Krishnamurthy. 2022. Efficient Direct-Connect Topologies for Collective Communications. arXiv preprint arXiv:2202.03356 (2022)."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1007\/BFb0056872"}],"event":{"name":"SIGCOMM '25: ACM SIGCOMM 2025 Conference","location":"S\u00e3o Francisco Convent Coimbra Portugal","acronym":"SIGCOMM '25","sponsor":["SIGCOMM ACM Special Interest Group on Data Communication"]},"container-title":["Proceedings of the ACM SIGCOMM 2025 Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3718958.3750503","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,27]],"date-time":"2025-08-27T16:54:47Z","timestamp":1756313687000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3718958.3750503"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,27]]},"references-count":60,"alternative-id":["10.1145\/3718958.3750503","10.1145\/3718958"],"URL":"https:\/\/doi.org\/10.1145\/3718958.3750503","relation":{},"subject":[],"published":{"date-parts":[[2025,8,27]]},"assertion":[{"value":"2025-08-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}