{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T01:44:04Z","timestamp":1773193444728,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,18]],"date-time":"2024-11-18T00:00:00Z","timestamp":1731888000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"Cisco Systems","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2444537"],"award-info":[{"award-number":["2444537"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,18]]},"DOI":"10.1145\/3696348.3696856","type":"proceedings-article","created":{"date-parts":[[2024,11,11]],"date-time":"2024-11-11T00:20:52Z","timestamp":1731284452000},"page":"290-299","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["A case for server-scale photonic connectivity"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6599-3996","authenticated-orcid":false,"given":"Abhishek Vijaya","family":"Kumar","sequence":"first","affiliation":[{"name":"Cornell University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2866-9824","authenticated-orcid":false,"given":"Arjun","family":"Devraj","sequence":"additional","affiliation":[{"name":"Cornell University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8218-5656","authenticated-orcid":false,"given":"Darius","family":"Bunandar","sequence":"additional","affiliation":[{"name":"Lightmatter"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8118-3026","authenticated-orcid":false,"given":"Rachee","family":"Singh","sequence":"additional","affiliation":[{"name":"Cornell University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,11,18]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/IEEESTD.2010.5412866"},{"key":"e_1_3_2_1_2_1","volume-title":"Scaling Distributed Machine Learning with In-Network Aggregation. In 18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21)","unstructured":"2021. Scaling Distributed Machine Learning with In-Network Aggregation. In 18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21). USENIX Association, 785--808. https:\/\/www.usenix.org\/conference\/nsdi21\/presentation\/sapio"},{"key":"e_1_3_2_1_3_1","unstructured":"2023. TPU v4 Documentation. https:\/\/cloud.google.com\/tpu\/docs\/v4. Accessed on 2024-05-29."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3603269.3604878"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/1402958.1402967"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2785956.2787481"},{"key":"e_1_3_2_1_7_1","unstructured":"Cerebras 2021. The future of AI is Wafer-Scale. https:\/\/www.cerebras.net\/product-chip\/."},{"key":"e_1_3_2_1_8_1","volume-title":"Transformer-xl: Attentive language models beyond a fixed-length context. arXiv preprint arXiv:1901.02860","author":"Dai Zihang","year":"2019","unstructured":"Zihang Dai, Zhilin Yang, Yiming Yang, Jaime Carbonell, Quoc V Le, and Ruslan Salakhutdinov. 2019. Transformer-xl: Attentive language models beyond a fixed-length context. arXiv preprint arXiv:1901.02860 (2019)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441727"},{"key":"e_1_3_2_1_10_1","first-page":"32","article-title":"MPI: A message-passing interface standard version 3.0","volume":"2","author":"Jack Dongarra","year":"2013","unstructured":"Jack Dongarra et al. 2013. MPI: A message-passing interface standard version 3.0. High Performance Computing Center Stuttgart (HLRS) 2, 5 (2013), 32.","journal-title":"High Performance Computing Center Stuttgart (HLRS)"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/1851182.1851223"},{"key":"e_1_3_2_1_12_1","volume-title":"Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity. CoRR abs\/2101.03961","author":"Fedus William","year":"2021","unstructured":"William Fedus, Barret Zoph, and Noam Shazeer. 2021. Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity. CoRR abs\/2101.03961 (2021). arXiv:2101.03961 https:\/\/arxiv.org\/abs\/2101.03961"},{"key":"e_1_3_2_1_13_1","unstructured":"Fierce Electronics. (Accessed on 2023-05-26). ChatGPT runs 10K Nvidia training GPUs with potential for thousands more. https:\/\/www.fierceelectronics.com\/sensors\/chatgpt-runs-10k-nvidia-training-gpus-potential-thousands-more."},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of Machine Learning and Systems, A. Smola, A. Dimakis, and I. Stoica (Eds.)","volume":"3","author":"Gebara Nadeen","year":"2021","unstructured":"Nadeen Gebara, Manya Ghobadi, and Paolo Costa. 2021. In-network Aggregation for Shared Machine Learning Clusters. In Proceedings of Machine Learning and Systems, A. Smola, A. Dimakis, and I. Stoica (Eds.), Vol. 3. 829--844. https:\/\/proceedings.mlsys.org\/paper\/2021\/file\/eae27d77ca20db309e056e3d2dcd7d69-Paper.pdf"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2934872.2934911"},{"key":"e_1_3_2_1_16_1","unstructured":"Global Foundries. (Accessed on 2023-06-16). Global Foundries. https:\/\/gf.com\/."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1592568.1592576"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2619239.2626328"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"HotChips 34. (Accessed on 2023-05-26). Passage---A Wafer-Scale Programmable Photonic Communication Substrate. https:\/\/hc34.hotchips.org\/assets\/program\/conference\/day1.","DOI":"10.1109\/HCS55958.2022.9895610"},{"key":"e_1_3_2_1_20_1","unstructured":"Intel Gaudi AI accelerator 2021. Intel Gaudi AI accelerator. https:\/\/habana.ai\/products\/gaudi\/."},{"key":"e_1_3_2_1_21_1","volume-title":"Integrated Circuit Fabrication Science and Technology","author":"Griffin Peter B.","unstructured":"Peter B. Griffin James D. Plummer. 2023. Integrated Circuit Fabrication Science and Technology. Cambridge University Press, Cambridge, United Kingdom."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2934872.2934904"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Norman P. Jouppi George Kurian Sheng Li Peter Ma Rahul Nagarajan Lifeng Nai Nishant Patil Suvinay Subramanian Andy Swing Brian Towles Cliff Young Xiang Zhou Zongwei Zhou and David Patterson. 2023. TPU v4: An Optically Reconfigurable Supercomputer for Machine Learning with Hardware Support for Embeddings. arXiv:2304.01433 [cs.AR]","DOI":"10.1145\/3579371.3589350"},{"key":"e_1_3_2_1_24_1","volume-title":"SiP-ML: High-Bandwidth Optical Network Interconnects for Machine Learning Training. In Proceedings of the 2021 ACM SIGCOMM 2021 Conference.","author":"Khani Mehrdad","unstructured":"Mehrdad Khani, Manya Ghobadi, Mohammad Alizadeh, Ziyi Zhu, Madeleine Glick, Keren Bergman, Amin Vahdat, Benjamin Klenk, and Eiman Ebrahimi. [n. d.]. SiP-ML: High-Bandwidth Optical Network Interconnects for Machine Learning Training. In Proceedings of the 2021 ACM SIGCOMM 2021 Conference."},{"key":"e_1_3_2_1_25_1","volume-title":"ATP: In-network Aggregation for Multi-tenant Learning. In 18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21)","author":"Lao ChonLam","year":"2021","unstructured":"ChonLam Lao, Yanfang Le, Kshiteej Mahajan, Yixi Chen, Wenfei Wu, Aditya Akella, and Michael Swift. 2021. ATP: In-network Aggregation for Multi-tenant Learning. In 18th USENIX Symposium on Networked Systems Design and Implementation (NSDI 21). USENIX Association, 741--761. https:\/\/www.usenix.org\/conference\/nsdi21\/presentation\/lao"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01215349"},{"key":"e_1_3_2_1_27_1","volume-title":"GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding. CoRR abs\/2006.16668","author":"Lepikhin Dmitry","year":"2020","unstructured":"Dmitry Lepikhin, HyoukJoong Lee, Yuanzhong Xu, Dehao Chen, Orhan Firat, Yanping Huang, Maxim Krikun, Noam Shazeer, and Zhifeng Chen. 2020. GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding. CoRR abs\/2006.16668 (2020). arXiv:2006.16668 https:\/\/arxiv.org\/abs\/2006.16668"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3603269.3604836"},{"key":"e_1_3_2_1_29_1","volume-title":"17th USENIX Symposium on Networked Systems Design and Implementation (NSDI 20)","author":"Mellette William M.","year":"2020","unstructured":"William M. Mellette, Rajdeep Das, Yibo Guo, Rob McGuinness, Alex C. Snoeren, and George Porter. 2020. Expanding across time to deliver bandwidth efficiency and low latency. In 17th USENIX Symposium on Networked Systems Design and Implementation (NSDI 20). USENIX Association, Santa Clara, CA, 1--18. https:\/\/www.usenix.org\/conference\/nsdi20\/presentation\/mellette"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3098822.3098838"},{"key":"e_1_3_2_1_31_1","unstructured":"MT-NLG 2021. Using DeepSpeed and Megatron to Train Megatron-Turing NLG 530B the World's Largest and Most Powerful Generative Language Model. https:\/\/www.microsoft.com\/en-us\/research\/blog\/using-deepspeed-and-megatron-to-train-megatron-turing-nlg-530b-the-worlds-largest-and-most-powerful-generative-language-model\/. Accessed October 2021."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3230543.3230560"},{"key":"e_1_3_2_1_33_1","unstructured":"Nvidia DGX Systems 2021. Nvidia DGX Systems. https:\/\/www.nvidia.com\/en-us\/data-center\/dgx-systems\/."},{"key":"e_1_3_2_1_34_1","unstructured":"Nvidia NVLink 2021. Nvidia NVLink and NVSwitch. https:\/\/www.nvidia.com\/en-us\/data-center\/nvlink\/."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544216.3544265"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3563766.3564115"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1364\/NETWORKS.2022.NeTu1D.1"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Thomas Rothvoss. 2012. A simpler proof for O(congestion + dilation) packet routing. arXiv:1206.3718 [cs.DS]","DOI":"10.1007\/978-3-642-36694-9_29"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/2686882"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2686882"},{"key":"e_1_3_2_1_41_1","volume-title":"Swing: Short-cutting Rings for Higher Bandwidth Allreduce. In 21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24)","author":"Sensi Daniele De","year":"2024","unstructured":"Daniele De Sensi, Tommaso Bonato, David Saam, and Torsten Hoefler. 2024. Swing: Short-cutting Rings for Higher Bandwidth Allreduce. In 21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24). USENIX Association, Santa Clara, CA, 1445--1462. https:\/\/www.usenix.org\/conference\/nsdi24\/presentation\/de-sensi"},{"key":"e_1_3_2_1_42_1","volume-title":"20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Shah Aashaka","year":"2023","unstructured":"Aashaka Shah, Vijay Chidambaram, Meghan Cowan, Saeed Maleki, Madan Musuvathi, Todd Mytkowicz, Jacob Nelson, Olli Saarikivi, and Rachee Singh. 2023. TACCL: Guiding Collective Algorithm Synthesis using Communication Sketches. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23). USENIX Association, Boston, MA, 593--612. https:\/\/www.usenix.org\/conference\/nsdi23\/presentation\/shah"},{"key":"e_1_3_2_1_43_1","volume-title":"Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=B1ckMDqlg","author":"Shazeer Noam","year":"2017","unstructured":"Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, and Jeff Dean. 2017. Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=B1ckMDqlg"},{"key":"e_1_3_2_1_44_1","volume-title":"Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism. CoRR abs\/1909.08053","author":"Shoeybi Mohammad","year":"2019","unstructured":"Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper, and Bryan Catanzaro. 2019. Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism. CoRR abs\/1909.08053 (2019). arXiv:1909.08053 http:\/\/arxiv.org\/abs\/1909.08053"},{"key":"e_1_3_2_1_45_1","volume-title":"Optical Network Design and Planning","author":"Simmons Jane M.","unstructured":"Jane M. Simmons. 2008. Optical Network Design and Planning (2nd ed.). Springer, New York.","edition":"2"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2829988.2787508"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3452296.3472895"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3230543.3230570"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1364\/JOCN.451760"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2022.3210534"},{"key":"e_1_3_2_1_51_1","volume-title":"How NVIDIA Blackwell Systems Attack 1-Trillion-Parameter AI Models. The Next Platform","author":"Platform The Next","year":"2024","unstructured":"The Next Platform. 2024. How NVIDIA Blackwell Systems Attack 1-Trillion-Parameter AI Models. The Next Platform (2024). https:\/\/www.nextplatform.com\/2024\/03\/19\/how-nvidia-blackwell-systems-attack-1-trillion-parameter-ai-models\/ Accessed: 2024-06-22."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/1851182.1851222"},{"key":"e_1_3_2_1_53_1","volume-title":"TopoOpt: Co-optimizing Network Topology and Parallelization Strategy for Distributed Training Jobs. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Wang Weiyang","year":"2023","unstructured":"Weiyang Wang, Moein Khazraee, Zhizhen Zhong, Manya Ghobadi, Zhihao Jia, Dheevatsa Mudigere, Ying Zhang, and Anthony Kewitsch. 2023. TopoOpt: Co-optimizing Network Topology and Parallelization Strategy for Distributed Training Jobs. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23). USENIX Association, Boston, MA, 739--767. https:\/\/www.usenix.org\/conference\/nsdi23\/presentation\/wang-weiyang"},{"key":"e_1_3_2_1_54_1","volume-title":"Optical Fiber Telecommunications. Number v. 11","author":"Willner A.","unstructured":"A. Willner. 2019. Optical Fiber Telecommunications. Number v. 11. Elsevier Science. https:\/\/books.google.com\/books?id=A5W2DwAAQBAJ"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1364\/OFC.2023.W1G.1"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1364\/OFC.2013.OW3A.5"},{"key":"e_1_3_2_1_57_1","unstructured":"K. P. Zetie S. F. Adams and R. M. Tocknell. 2000. How Does a Mach-Zehnder Interferometer Work? https:\/\/www.cs.princeton.edu\/courses\/archive\/fall06\/cos576\/papers\/zetie_et_al_mach_zehnder00.pdf. Accessed on 2024-06-01."},{"key":"e_1_3_2_1_58_1","unstructured":"Liangyu Zhao Saeed Maleki Ziyue Yang Hossein Pourreza Aashaka Shah Changho Hwang and Arvind Krishnamurthy. 2024. Forest-Coll: Efficient Collective Communications on Heterogeneous Network Fabrics. arXiv:2402.06787 [cs.NI]"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3452296.3472921"},{"key":"e_1_3_2_1_60_1","volume-title":"21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24)","author":"Zu Yazhou","year":"2024","unstructured":"Yazhou Zu, Alireza Ghaffarkhah, Hoang-Vu Dang, Brian Towles, Steven Hand, Safeen Huda, Adekunle Bello, Alexander Kolbasov, Arash Rezaei, Dayou Du, Steve Lacy, Hang Wang, Aaron Wisner, Chris Lewis, and Henri Bahini. 2024. Resiliency at Scale: Managing Google's TPUv4 Machine Learning Supercomputer. In 21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24). USENIX Association, Santa Clara, CA, 761--774. https:\/\/www.usenix.org\/conference\/nsdi24\/presentation\/zu"}],"event":{"name":"HotNets '24: The 23rd ACM Workshop on Hot Topics in Networks","location":"Irvine CA USA","acronym":"HotNets '24","sponsor":["SIGCOMM ACM Special Interest Group on Data Communication"]},"container-title":["Proceedings of the 23rd ACM Workshop on Hot Topics in Networks"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696348.3696856","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696348.3696856","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T16:09:19Z","timestamp":1755878959000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696348.3696856"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,18]]},"references-count":60,"alternative-id":["10.1145\/3696348.3696856","10.1145\/3696348"],"URL":"https:\/\/doi.org\/10.1145\/3696348.3696856","relation":{},"subject":[],"published":{"date-parts":[[2024,11,18]]},"assertion":[{"value":"2024-11-18","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}