{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T12:08:36Z","timestamp":1763381316159,"version":"3.45.0"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","funder":[{"name":"Zuckerman STEM Leadership Program"},{"DOI":"10.13039\/501100003977","name":"Israel Science Foundation","doi-asserted-by":"publisher","award":["1998\/22","980\/21"],"award-info":[{"award-number":["1998\/22","980\/21"]}],"id":[{"id":"10.13039\/501100003977","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,17]]},"DOI":"10.1145\/3772356.3772384","type":"proceedings-article","created":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T12:02:48Z","timestamp":1763380968000},"page":"139-148","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["FlowPulse: Catching Network Failures in ML Clusters"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8335-5680","authenticated-orcid":false,"given":"Jakob","family":"Krebs","sequence":"first","affiliation":[{"name":"Technion - Israel Institute of Technology, Haifa, Israel"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6165-3481","authenticated-orcid":false,"given":"Dimitry","family":"Gavrilenko","sequence":"additional","affiliation":[{"name":"Technion - Israel Institute of Technology, Haifa, Israel"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6294-9604","authenticated-orcid":false,"given":"Daniel","family":"Amir","sequence":"additional","affiliation":[{"name":"Technion - Israel Institute of Technology, Haifa, Israel"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3998-8645","authenticated-orcid":false,"given":"Shir","family":"Landau Feibish","sequence":"additional","affiliation":[{"name":"University of Haifa, Haifa, Israel"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9659-068X","authenticated-orcid":false,"given":"Mark","family":"Silberstein","sequence":"additional","affiliation":[{"name":"Technion - Israel Institute of Technology, Haifa, Israel"}]}],"member":"320","published-online":{"date-parts":[[2025,11,17]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"219","volume-title":"17th USENIX Symposium on Networked Systems Design and Implementation (NSDI 20)","author":"Abhashkumar Anubhavnidhi","year":"2020","unstructured":"Anubhavnidhi Abhashkumar, Aaron Gember-Jacobson, and Aditya Akella. Tiramisu: Fast multilayer network verification. In 17th USENIX Symposium on Networked Systems Design and Implementation (NSDI 20), pages 201\u2013219, 2020."},{"key":"e_1_3_2_1_2_1","first-page":"168","volume-title":"Proceedings of the Conference of the ACM Special Interest Group on Data Communication","author":"Beckett Ryan","year":"2017","unstructured":"Ryan Beckett, Aarti Gupta, Ratul Mahajan, and David Walker. A general approach to network configuration verification. In Proceedings of the Conference of the ACM Special Interest Group on Data Communication, pages 155\u2013168, 2017."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3035761"},{"key":"e_1_3_2_1_4_1","first-page":"60","volume-title":"Proceedings of the ninth ACM conference on Emerging networking experiments and technologies","author":"Cao Jiaxin","year":"2013","unstructured":"Jiaxin Cao, Rui Xia, Pengkun Yang, Chuanxiong Guo, Guohan Lu, Lihua Yuan, Yixin Zheng, Haitao Wu, Yongqiang Xiong, and Dave Maltz. Per-packet load-balanced, low-latency routing for clos-based data center networks. In Proceedings of the ninth ACM conference on Emerging networking experiments and technologies, pages 49\u201360, 2013."},{"key":"e_1_3_2_1_5_1","volume-title":"NS-3 Network Simulator. https:\/\/www.nsnam.org\/","author":"Maintainers Contributors","year":"2025","unstructured":"NS3 Contributors and Maintainers. NS-3 Network Simulator. https:\/\/www.nsnam.org\/, 2025."},{"key":"e_1_3_2_1_6_1","volume-title":"https:\/\/docs.nvidia.com\/networking\/display\/rdmacore50\/mlnx_ofed+features+verbs+and+capabilities","author":"NVIDIA Corporation","year":"2023","unstructured":"NVIDIA Corporation. MLNX_OFED Features Verbs and Capabilities. https:\/\/docs.nvidia.com\/networking\/display\/rdmacore50\/mlnx_ofed+features+verbs+and+capabilities, 2023."},{"key":"e_1_3_2_1_7_1","volume-title":"NVIDIA Spectrum SN5000 Series Switches. https:\/\/nvdam.widen.net\/s\/mmvbnpk8qk\/networking-ethernet-switches-sn5000-datasheet-us","author":"NVIDIA Corporation","year":"2024","unstructured":"NVIDIA Corporation. NVIDIA Spectrum SN5000 Series Switches. https:\/\/nvdam.widen.net\/s\/mmvbnpk8qk\/networking-ethernet-switches-sn5000-datasheet-us, 2024."},{"key":"e_1_3_2_1_8_1","volume-title":"NVIDIA Spectrum-X Network Platform Architecture. https:\/\/resources.nvidia.com\/en-us-accelerated-networking-resource-library\/nvidia-spectrum-x","author":"NVIDIA Corporation","year":"2024","unstructured":"NVIDIA Corporation. NVIDIA Spectrum-X Network Platform Architecture. https:\/\/resources.nvidia.com\/en-us-accelerated-networking-resource-library\/nvidia-spectrum-x, 2024."},{"key":"e_1_3_2_1_9_1","volume-title":"NVIDIA Supercharges Ethernet Networking for Generative AI. https:\/\/nvidianews.nvidia.com\/news\/nvidia-supercharges-ethernet-networking-for-generative-ai","author":"NVIDIA Corporation","year":"2024","unstructured":"NVIDIA Corporation. NVIDIA Supercharges Ethernet Networking for Generative AI. https:\/\/nvidianews.nvidia.com\/news\/nvidia-supercharges-ethernet-networking-for-generative-ai, 2024."},{"key":"e_1_3_2_1_10_1","unstructured":"NVIDIA Corporation. NVIDIA DGX SuperPOD: Next Generation Scalable Infrastructure for AI Leadership Reference Architecture Featuring NVDIA DGX H100. https:\/\/docs.nvidia.com\/dgx-superpod\/reference-architecture-scalable-infrastructure-h100\/latest\/network-fabrics.html 2025."},{"key":"e_1_3_2_1_11_1","volume-title":"NVIDIA NCCL Source Code. https:\/\/github.com\/NVIDIA\/nccl","author":"NVIDIA Corporation","year":"2025","unstructured":"NVIDIA Corporation. NVIDIA NCCL Source Code. https:\/\/github.com\/NVIDIA\/nccl, 2025."},{"key":"e_1_3_2_1_12_1","first-page":"2138","volume-title":"2013 Proceedings IEEE INFOCOM","author":"Dixit Advait","unstructured":"Advait Dixit, Pawan Prakash, Y Charlie Hu, and Ramana Rao Kompella. On the impact of packet spraying in data center networks. In 2013 Proceedings IEEE INFOCOM, pages 2130\u20132138. IEEE, 2013."},{"key":"e_1_3_2_1_13_1","volume-title":"Software for Open Networking in the Cloud (SONiC). https:\/\/sonicfoundation.dev\/","author":"Foundation Linux","year":"2023","unstructured":"Linux Foundation. Software for Open Networking in the Cloud (SONiC). https:\/\/sonicfoundation.dev\/, 2023."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3651890.3672233"},{"key":"e_1_3_2_1_15_1","first-page":"204","volume-title":"One. In Proceedings of the 23rd ACM Workshop on Hot Topics in Networks","author":"Gherghescu Alexandru M","year":"2024","unstructured":"Alexandru M Gherghescu, Vlad-Andrei B\u0103doiu, Alexandru Agache, Mihai-Valentin Dumitru, Iuliu Vasilescu, Radu Mantu, and Costin Raiciu. I've Got 99 Problems But FLOPS Ain't One. In Proceedings of the 23rd ACM Workshop on Hot Topics in Networks, pages 195\u2013204, 2024."},{"key":"e_1_3_2_1_16_1","first-page":"238","volume-title":"Proceedings of the Conference of the ACM Special Interest Group on Data Communication","author":"Ghorbani Soudeh","year":"2017","unstructured":"Soudeh Ghorbani, Zibin Yang, P Brighten Godfrey, Yashar Ganjali, and Amin Firoozshahian. Drill: Micro load balancing for low-latency data center networks. In Proceedings of the Conference of the ACM Special Interest Group on Data Communication, pages 225\u2013238, 2017."},{"key":"e_1_3_2_1_17_1","volume-title":"The Llama 3 Herd of Models","author":"Grattafiori Aaron","year":"2024","unstructured":"Aaron Grattafiori, Abhimanyu Dubey, Abhinav Jauhri, et al. The Llama 3 Herd of Models, 2024."},{"key":"e_1_3_2_1_18_1","first-page":"152","volume-title":"Proceedings of the 2015 ACM Conference on Special Interest Group on Data Communication","author":"Guo Chuanxiong","year":"2015","unstructured":"Chuanxiong Guo, Lihua Yuan, Dong Xiang, Yingnong Dang, Ray Huang, Dave Maltz, Zhaoyi Liu, Vin Wang, Bin Pang, Hua Chen, et al. Pingmesh: A large-scale system for data center network latency measurement and analysis. In Proceedings of the 2015 ACM Conference on Special Interest Group on Data Communication, pages 139\u2013152, 2015."},{"issue":"4","key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","first-page":"465","DOI":"10.1145\/2829988.2787507","article-title":"Edge-based load balancing for fast data-center networks","volume":"45","author":"He Keqiang","year":"2015","unstructured":"Keqiang He, Eric Rozner, Kanak Agarwal, Wes Felter, John Carter, and Aditya Akella. Presto: Edge-based load balancing for fast data-center networks. ACM SIGCOMM Computer Communication Review, 45(4):465\u2013478, 2015.","journal-title":"ACM SIGCOMM Computer Communication Review"},{"key":"e_1_3_2_1_20_1","first-page":"729","volume-title":"21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24)","author":"Hu Qinghao","year":"2024","unstructured":"Qinghao Hu, Zhisheng Ye, Zerui Wang, Guoteng Wang, Meng Zhang, Qiaoling Chen, Peng Sun, Dahua Lin, Xiaolin Wang, Yingwei Luo, et al. Characterization of large language model development in the datacenter. In 21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24), pages 709\u2013729, 2024."},{"key":"e_1_3_2_1_21_1","first-page":"155","volume-title":"Proceedings of the 16th Workshop on Hot Topics in Operating Systems","author":"Huang Peng","year":"2017","unstructured":"Peng Huang, Chuanxiong Guo, Lidong Zhou, Jacob R Lorch, Yingnong Dang, Murali Chintalapati, and Randolph Yao. Gray failure: The achilles' heel of cloud-scale systems. In Proceedings of the 16th Workshop on Hot Topics in Operating Systems, pages 150\u2013155, 2017."},{"key":"e_1_3_2_1_22_1","unstructured":"Cisco Systems Inc. Cisco Nexus 9000 Series NX-OS Unicast Routing Configuration Guide Configure Dynamic Load Balancing. https:\/\/www.cisco.com\/c7en\/us\/td\/docs\/dcn\/nx-os\/nexus9000\/105x\/unicast-routing-configuration\/cisco-nexus-9000-series-nx-os-unicast-routing-configuration-guide\/m-configure-dynamic-load-balancing.html 2025."},{"key":"e_1_3_2_1_23_1","first-page":"9","volume-title":"NOMS 2020-2020 IEEE\/IFIP Network Operations and Management Symposium","author":"Jia Chenhao","unstructured":"Chenhao Jia, Tian Pan, Zizheng Bian, Xingchen Lin, Enge Song, Cheng Xu, Tao Huang, and Yunjie Liu. Rapid detection and localization of gray failures in data centers via in-band network telemetry. In NOMS 2020-2020 IEEE\/IFIP Network Operations and Management Symposium, pages 1\u20139. IEEE, 2020."},{"key":"e_1_3_2_1_24_1","volume-title":"Cognitive routing in the Tomahawk 5 data center switch. https:\/\/www.broadcom.com\/blog\/cognitive-routing-in-the-tomahawk-5-data-center-switch","author":"Kalkunte Mohan","year":"2023","unstructured":"Mohan Kalkunte, Niranjan Vaidya, and Pete Del Vecchio. Cognitive routing in the Tomahawk 5 data center switch. https:\/\/www.broadcom.com\/blog\/cognitive-routing-in-the-tomahawk-5-data-center-switch, 2023."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/1188455.1188552"},{"key":"e_1_3_2_1_26_1","first-page":"1274","volume-title":"Carole-Jean Wu. Revisiting Reliability in Large-Scale Machine Learning Research Clusters. In 2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","author":"Kokolis Apostolos","unstructured":"Apostolos Kokolis, Michael Kuchnik, John Hoffman, Adithya Kumar, Parth Malani, Faye Ma, Zachary DeVito, Shubho Sengupta, Kalyan Saladi, and Carole-Jean Wu. Revisiting Reliability in Large-Scale Machine Learning Research Clusters. In 2025 IEEE International Symposium on High Performance Computer Architecture (HPCA), pages 1259\u20131274. IEEE, 2025."},{"key":"e_1_3_2_1_27_1","first-page":"8","volume-title":"Proceedings of the 8th Asia-Pacific Workshop on Networking","author":"Li Wenxue","year":"2024","unstructured":"Wenxue Li, Xiangzhou Liu, Yuxuan Li, Yilun Jin, Han Tian, Zhizhen Zhong, Guyue Liu, Ying Zhang, and Kai Chen. Understanding communication characteristics of distributed training. In Proceedings of the 8th Asia-Pacific Workshop on Networking, pages 1\u20138, 2024."},{"key":"e_1_3_2_1_28_1","first-page":"495","volume-title":"Proceedings of the 12th International on Conference on emerging Networking EXperiments and Technologies","author":"Li Yuliang","year":"2016","unstructured":"Yuliang Li, Rui Miao, Changhoon Kim, and Minlan Yu. LossRadar: Fast detection of lost packets in data center networks. In Proceedings of the 12th International on Conference on emerging Networking EXperiments and Technologies, pages 481\u2013495, 2016."},{"key":"e_1_3_2_1_29_1","volume-title":"The power of two choices in randomized load balancing","author":"Mitzenmacher Michael","year":"2002","unstructured":"Michael Mitzenmacher. The power of two choices in randomized load balancing. IEEE transactions on parallel and distributed systems, 12(10):1094\u20131104, 2002."},{"key":"e_1_3_2_1_30_1","volume-title":"Cornelis Omni-Path Express Edge Switches. https:\/\/www.cornelisnetworks.com\/product\/cornelis-omni-path-express-edge-switches","author":"Networks Cornelis","year":"2025","unstructured":"Cornelis Networks. Cornelis Omni-Path Express Edge Switches. https:\/\/www.cornelisnetworks.com\/product\/cornelis-omni-path-express-edge-switches, 2025."},{"key":"e_1_3_2_1_31_1","volume-title":"Troubleshoot Switch Port and Interface Problems. https:\/\/www.cisco.com\/c\/en\/us\/support\/docs\/switches\/catalyst-6500-series-switches\/12027-53.html","author":"Prince Ming","year":"2023","unstructured":"Ming Prince. Troubleshoot Switch Port and Interface Problems. https:\/\/www.cisco.com\/c\/en\/us\/support\/docs\/switches\/catalyst-6500-series-switches\/12027-53.html, 2023."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3651890.3672265"},{"key":"e_1_3_2_1_33_1","first-page":"194","volume-title":"Nate Foster. Hydra: Effective Runtime Network Verification. In Proceedings of the ACM SIGCOMM 2023 Conference","author":"Renganathan Sundararajan","year":"2023","unstructured":"Sundararajan Renganathan, Benny Rubin, Hyojoon Kim, Pier Luigi Ventre, Carmelo Cascone, Daniele Moro, Charles Chan, Nick McKeown, and Nate Foster. Hydra: Effective Runtime Network Verification. In Proceedings of the ACM SIGCOMM 2023 Conference, pages 182\u2013194, 2023."},{"key":"e_1_3_2_1_34_1","volume-title":"Turbocharging Generative AI Workloads with NVIDIA Spectrum-X Networking Platform. https:\/\/developer.nvidia.com\/blog\/turbocharging-ai-workloads-with-nvidia-spectrum-x-networking-platform\/","author":"Rizk Peter","year":"2023","unstructured":"Peter Rizk. Turbocharging Generative AI Workloads with NVIDIA Spectrum-X Networking Platform. https:\/\/developer.nvidia.com\/blog\/turbocharging-ai-workloads-with-nvidia-spectrum-x-networking-platform\/, 2023."},{"key":"e_1_3_2_1_35_1","first-page":"612","volume-title":"Snoeren. Passive Realtime Datacenter Fault Detection and Localization. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Roy Arjun","year":"2017","unstructured":"Arjun Roy, Hongyi Zeng, Jasmeet Bagga, and Alex C. Snoeren. Passive Realtime Datacenter Fault Detection and Localization. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17), pages 595\u2013612, Boston, MA, March 2017. USENIX Association."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2009.09.001"},{"key":"e_1_3_2_1_37_1","volume-title":"Built by xAI. https:\/\/nvidianews.nvidia.com\/news\/spectrum-x-ethernet-networking-xai-colossus","author":"Shapiro Alex","year":"2024","unstructured":"Alex Shapiro. NVIDIA Ethernet Networking Accelerates World's Largest AI Supercomputer, Built by xAI. https:\/\/nvidianews.nvidia.com\/news\/spectrum-x-ethernet-networking-xai-colossus, 2024."},{"key":"e_1_3_2_1_38_1","volume-title":"The Challenges and Practices of Network Stability in Alibabas Large Scale Computing Clusters. https:\/\/www.youtube.com\/watch?v=-3qZL_DOWAc","author":"Shi Xuemei","year":"2024","unstructured":"Xuemei Shi and Surendra Anubolu. The Challenges and Practices of Network Stability in Alibabas Large Scale Computing Clusters. https:\/\/www.youtube.com\/watch?v=-3qZL_DOWAc, 2024. OCP Global Summit 2024."},{"key":"e_1_3_2_1_39_1","first-page":"482","volume-title":"15th USENIX Symposium on Networked Systems Design and Implementation (NSDI 18)","author":"Tilmans Olivier","year":"2018","unstructured":"Olivier Tilmans, Tobias B\u00fchler, Ingmar Poese, Stefano Vissicchio, and Laurent Vanbever. Stroboscope: Declarative network monitoring on a budget. In 15th USENIX Symposium on Networked Systems Design and Implementation (NSDI 18), pages 467\u2013482, 2018."},{"key":"e_1_3_2_1_40_1","first-page":"420","volume-title":"14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Vanini Erico","year":"2017","unstructured":"Erico Vanini, Rong Pan, Mohammad Alizadeh, Parvin Taheri, and Tom Edsall. Let it flow: Resilient asymmetric load balancing with flowlet switching. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17), pages 407\u2013420, 2017."},{"key":"e_1_3_2_1_41_1","volume-title":"Vigraham and Benjamin Leonhardi. Maintaining large-scale AI capacity at Meta. https:\/\/engineering.fb.com\/2024\/06\/12\/production-engineering\/maintaining-large-scale-ai-capacity-meta\/","author":"Saranyan","year":"2024","unstructured":"Saranyan A. Vigraham and Benjamin Leonhardi. Maintaining large-scale AI capacity at Meta. https:\/\/engineering.fb.com\/2024\/06\/12\/production-engineering\/maintaining-large-scale-ai-capacity-meta\/, 2024."},{"key":"e_1_3_2_1_42_1","volume-title":"HBF and SHIELD. https:\/\/enterprise-support.nvidia.com\/s\/article\/Recommended-Topologies-for-Implementing-an-HPC-Cluster-with-NVIDIA-Quantum-InfiniBand-Solutions-Part-2","author":"Vladimir Koushnir","year":"2024","unstructured":"Koushnir Vladimir. Recommended Topologies for Implementing an HPC Cluster with NVIDIA Quantum InfiniBand Solutions - Part 2 - Adaptive routing, HBF and SHIELD. https:\/\/enterprise-support.nvidia.com\/s\/article\/Recommended-Topologies-for-Implementing-an-HPC-Cluster-with-NVIDIA-Quantum-InfiniBand-Solutions-Part-2, 2024."},{"key":"e_1_3_2_1_43_1","volume-title":"How to build low-cost networks for large language models (without sacrificing performance)? arXiv e-prints","author":"Wang Weiyang","year":"2023","unstructured":"Weiyang Wang, Manya Ghobadi, Kayvon Shakeri, Ying Zhang, and Naader Hasani. How to build low-cost networks for large language models (without sacrificing performance)? arXiv e-prints, pages arXiv-2307, 2023."},{"key":"e_1_3_2_1_44_1","first-page":"850","volume-title":"2024 USENIX Annual Technical Conference (USENIX ATC 24)","author":"Xiong Yifan","year":"2024","unstructured":"Yifan Xiong, Yuting Jiang, Ziyue Yang, Lei Qu, Guoshuai Zhao, Shuguang Liu, Dong Zhong, Boris Pinzur, Jie Zhang, Yang Wang, et al. {SuperBench}: Improving Cloud {AI} Infrastructure Reliability with Proactive Validation. In 2024 USENIX Annual Technical Conference (USENIX ATC 24), pages 835\u2013850, 2024."},{"key":"e_1_3_2_1_45_1","first-page":"540","volume-title":"22nd USENIX Symposium on Networked Systems Design and Implementation (NSDI 25)","author":"Yao Zhiyi","year":"2025","unstructured":"Zhiyi Yao, Pengbo Hu, Congcong Miao, Xuya Jia, Zuning Liang, Yuedong Xu, Chunzhi He, Hao Lu, Mingzhuo Chen, Xiang Li, et al. Holmes: Localizing Irregularities in {LLM} Training with Mega-scale {GPU} Clusters. In 22nd USENIX Symposium on Networked Systems Design and Implementation (NSDI 25), pages 523\u2013540, 2025."},{"key":"e_1_3_2_1_46_1","first-page":"491","volume-title":"Proc of the 2015 ACM Conference on Special Interest Group on Data Communication. New York: ACM Press","author":"Yibo Zhu","year":"2015","unstructured":"Zhu Yibo, Kang Nanxi, Cao Jiaxin, et al. Packet-level telemetry in large datacenter networks. In Proc of the 2015 ACM Conference on Special Interest Group on Data Communication. New York: ACM Press, pages 479\u2013491, 2015."},{"key":"e_1_3_2_1_47_1","first-page":"408","volume-title":"2023 24st Asia-Pacific Network Operations and Management Symposium (APNOMS)","author":"Zhang Kuichao","unstructured":"Kuichao Zhang, Wei Su, Huiling Shi, Kai Zhang, and Wei Zhang. GrayINT-Detection and Localization of Gray Failures via Hybrid In-band Network Telemetry. In 2023 24st Asia-Pacific Network Operations and Management Symposium (APNOMS), pages 405\u2013408. IEEE, 2023."},{"key":"e_1_3_2_1_48_1","volume-title":"Insights into DeepSeek-V3: Scaling Challenges and Reflections on Hardware for AI Architectures. arXiv preprint arXiv:2505.09343","author":"Zhao Chenggang","year":"2025","unstructured":"Chenggang Zhao, Chengqi Deng, Chong Ruan, Damai Dai, Huazuo Gao, Jiashi Li, Liyue Zhang, Panpan Huang, Shangyan Zhou, Shirong Ma, et al. Insights into DeepSeek-V3: Scaling Challenges and Reflections on Hardware for AI Architectures. arXiv preprint arXiv:2505.09343, 2025."}],"event":{"name":"HotNets '25: 24th ACM Workshop on Hot Topics in Networks","location":"UMD Campus College Park MD USA","acronym":"HotNets '25","sponsor":["SIGCOMM ACM Special Interest Group on Data Communication"]},"container-title":["Proceedings of the 24th ACM Workshop on Hot Topics in Networks"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3772356.3772384","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T12:05:38Z","timestamp":1763381138000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3772356.3772384"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,17]]},"references-count":48,"alternative-id":["10.1145\/3772356.3772384","10.1145\/3772356"],"URL":"https:\/\/doi.org\/10.1145\/3772356.3772384","relation":{},"subject":[],"published":{"date-parts":[[2025,11,17]]},"assertion":[{"value":"2025-11-17","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}