{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T00:44:25Z","timestamp":1760057065797,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,8]]},"DOI":"10.1145\/3748273.3749199","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T16:19:34Z","timestamp":1756829974000},"page":"19-25","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["NSX: Large-Scale Network Simulation on an AI Server"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-1394-0423","authenticated-orcid":false,"given":"Sajy","family":"Khashab","sequence":"first","affiliation":[{"name":"NVIDIA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1357-1494","authenticated-orcid":false,"given":"Hariharan","family":"Sezhiyan","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5562-0067","authenticated-orcid":false,"given":"Rani","family":"Abboud","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2824-5997","authenticated-orcid":false,"given":"Alex","family":"Normatov","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0608-481X","authenticated-orcid":false,"given":"Stefan","family":"Kaestle","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3896-6165","authenticated-orcid":false,"given":"Eliav","family":"Bar-Ilan","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5273-8521","authenticated-orcid":false,"given":"Mohammad","family":"Nassar","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4990-1517","authenticated-orcid":false,"given":"Omer","family":"Shabtai","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8898-8070","authenticated-orcid":false,"given":"Wei","family":"Bai","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2585-9581","authenticated-orcid":false,"given":"Matty","family":"Kadosh","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6163-0569","authenticated-orcid":false,"given":"Jiarong","family":"Xing","sequence":"additional","affiliation":[{"name":"Rice University"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9659-068X","authenticated-orcid":false,"given":"Mark","family":"Silberstein","sequence":"additional","affiliation":[{"name":"NVIDIA and Technion"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2954-0767","authenticated-orcid":false,"given":"T.S. Eugene","family":"Ng","sequence":"additional","affiliation":[{"name":"Rice University"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8326-8124","authenticated-orcid":false,"given":"Ang","family":"Chen","sequence":"additional","affiliation":[{"name":"NVIDIA and University of Michigan"}]}],"member":"320","published-online":{"date-parts":[[2025,9,8]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1013329.1013355"},{"key":"e_1_3_2_1_2_1","unstructured":"AMD. 2019. AMD EPYC 7002 Series Processors. https:\/\/www.amd.com\/en\/products\/processors\/server\/epyc\/7002-series.html."},{"volume-title":"Discrete-Event System Simulation","author":"Banks Carson J. S.","key":"e_1_3_2_1_3_1","unstructured":"Carson J. S. Nelson B. L. Nicol D. M. Banks, J. 2010. Discrete-Event System Simulation. Prentice Hall."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3286062.3286083"},{"key":"e_1_3_2_1_5_1","unstructured":"XAI cluster. 2024. https:\/\/www.capacitymedia.com\/article\/2e4448ylfh4c7zxhcavwg\/news\/article-musks-xais-colossus-cluster-set-for-one-million-gpu-supercomputer-expansion."},{"key":"e_1_3_2_1_6_1","unstructured":"Nvidia Corporation. 2021. NVSHMEM. https:\/\/developer.nvidia.com\/nvshmem."},{"key":"e_1_3_2_1_7_1","volume-title":"https:\/\/developer.nvidia.com\/blog\/cuda-graphs","author":"CUDA Graph. Nvidia Corporation","year":"2019","unstructured":"CUDA Graph. Nvidia Corporation. https:\/\/developer.nvidia.com\/blog\/cuda-graphs. 2019."},{"key":"e_1_3_2_1_8_1","volume-title":"scheduling and floating point. https:\/\/cseweb.ucsd.edu\/\/classes\/fa12\/cse260-b\/Lectures\/Lec09.pdf","author":"Divergence","year":"2012","unstructured":"Divergence, scheduling and floating point. https:\/\/cseweb.ucsd.edu\/\/classes\/fa12\/cse260-b\/Lectures\/Lec09.pdf. 2012."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","unstructured":"Donghua Xu and M. Ammar. 2004. BencHMAP: benchmark-based hardware and model-aware partitioning for parallel and distributed network simulation. The IEEE Computer Society's 12th Annual International Symposium on Modeling Analysis and Simulation of Computer and Telecommunications Systems 2004. (MASCOTS 2004). Proceedings. Volendam Netherlands 2004 pp. 455--463. (2004). https:\/\/doi.org\/10.1109\/MASCOT.2004.1348301","DOI":"10.1109\/MASCOT.2004.1348301"},{"key":"e_1_3_2_1_10_1","volume-title":"https:\/\/developer.nvidia.com\/blog\/dynamic-control-flow-in-cuda-graphs-with-conditional-nodes\/","author":"Conditional Nodes Dynamic Control","year":"2024","unstructured":"Dynamic Control Flow in CUDA Graphs with Conditional Nodes. https:\/\/developer.nvidia.com\/blog\/dynamic-control-flow-in-cuda-graphs-with-conditional-nodes\/ 2024."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3635035.3635036"},{"key":"e_1_3_2_1_12_1","volume-title":"Parallel discrete event simulation. Modeling and Tools for Network Simulation","author":"Kunz Georg","year":"2010","unstructured":"Georg Kunz. 2010. Parallel discrete event simulation. Modeling and Tools for Network Simulation. Springer, 121--131. (2010)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1177\/0037549713508839"},{"key":"e_1_3_2_1_14_1","volume-title":"Accelerating Design Space Exploration for LLM Training Systems with Multi-experiment Parallel Simulation. In 23th USENIX Symposium on Networked Systems Design and Implementation (NSDI 26)","author":"Gui Fei","year":"2026","unstructured":"Fei Gui, Kaihui Gao, Li Chen, Dan Li, Vincent Liu, Ran Zhang, Hongbing Yang, and Dian Xiong. 2026. Accelerating Design Space Exploration for LLM Training Systems with Multi-experiment Parallel Simulation. In 23th USENIX Symposium on Networked Systems Design and Implementation (NSDI 26)."},{"key":"e_1_3_2_1_15_1","first-page":"110","article-title":"Multi-core parallelism for ns-3 simulator. INRIA Sophia-Antipolis","volume":"106","author":"Seguin Guillaume","year":"2009","unstructured":"Guillaume Seguin. 2009. Multi-core parallelism for ns-3 simulator. INRIA Sophia-Antipolis, Tech. Rep 106 (2009), 110. (2009).","journal-title":"Tech. Rep"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/WSC.2001.977456"},{"key":"e_1_3_2_1_17_1","volume-title":"ARK: GPU-driven Code Execution for Distributed Deep Learning. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Hwang Changho","year":"2023","unstructured":"Changho Hwang, KyoungSoo Park, Ran Shu, Xinyuan Qu, Peng Cheng, and Yongqiang Xiong. 2023. ARK: GPU-driven Code Execution for Distributed Deep Learning. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23). USENIX Association, Boston, MA, 87--101. https:\/\/www.usenix.org\/conference\/nsdi23\/presentation\/hwang"},{"key":"e_1_3_2_1_18_1","volume-title":"Distributed simulation: A case study in design and verification of distributed programs","author":"Mani Chandy K.","year":"1979","unstructured":"K. Mani Chandy and Jayadev Misra. 1979. Distributed simulation: A case study in design and verification of distributed programs. IEEE Transactions on software engineering (1979)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3603269.3604844"},{"key":"e_1_3_2_1_20_1","unstructured":"Kalyan S. Perumalla. [n. d.]. Discrete-event Execution Alternatives on General Purpose Graphical Processing Units (GPGPUs). Oak Ridge National Laboratory. ([n.d.])."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/354871.354874"},{"key":"e_1_3_2_1_22_1","volume-title":"d.]. RouteNet: Leveraging graph neural networks for network modeling and optimization in SDN","author":"Rusek Krzysztof","year":"2020","unstructured":"Krzysztof Rusek, Jos\u00e9 Su\u00e1rez-Varela, Paul Almasan, Pere Barlet-Ros, and Albert Cabellos-Aparicio. [n. d.]. RouteNet: Leveraging graph neural networks for network modeling and optimization in SDN. IEEE Journal on Selected Areas in Communications 38, 10 (2020), 2260--2270 ([n.d.])."},{"key":"e_1_3_2_1_23_1","unstructured":"Lecture 3: control flow and synchronisation. https:\/\/people.maths.ox.ac.uk\/gilesm\/cuda\/lecs\/lec3-2x2.pdf [n. d.]."},{"key":"e_1_3_2_1_24_1","volume-title":"https:\/\/docs.nvidia.com\/gameworks\/content\/ de-velopertools\/desktop\/analysis\/report\/cudaexperiments\/sourcelevel\/memorytransactions.htm","author":"Memory Transactions. Nvidia Corporations","year":"2015","unstructured":"Memory Transactions. Nvidia Corporations. https:\/\/docs.nvidia.com\/gameworks\/content\/ de-velopertools\/desktop\/analysis\/report\/cudaexperiments\/sourcelevel\/memorytransactions.htm. 2015."},{"volume-title":"ns3. https:\/\/www.nsnam.org","year":"2017","key":"e_1_3_2_1_25_1","unstructured":"nsnam. ns3. https:\/\/www.nsnam.org. 2017."},{"key":"e_1_3_2_1_26_1","volume-title":"https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/Data-Center\/a100\/pdf\/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf","author":"NVIDIA A100 TENSOR CORE GPU. Nvidia Corporation","year":"2021","unstructured":"NVIDIA A100 TENSOR CORE GPU. Nvidia Corporation. https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/Data-Center\/a100\/pdf\/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf 2021."},{"key":"e_1_3_2_1_27_1","volume-title":"https:\/\/resources.nvidia.com\/en-us-tensor-core\/nvidia-tensor-core-gpu-datasheet?ncid=no-ncid","author":"NVIDIA H100 Tensor Core GPU. Nvidia Corporation","year":"2024","unstructured":"NVIDIA H100 Tensor Core GPU. Nvidia Corporation. https:\/\/resources.nvidia.com\/en-us-tensor-core\/nvidia-tensor-core-gpu-datasheet?ncid=no-ncid 2024."},{"key":"e_1_3_2_1_28_1","volume-title":"https:\/\/www.nextplatform.com\/2025\/01\/22\/openai-declares-its-hardware-independence-sort-of-with-stargate-project\/","author":"Stargate AI","year":"2025","unstructured":"OpenAI Stargate. https:\/\/www.nextplatform.com\/2025\/01\/22\/openai-declares-its-hardware-independence-sort-of-with-stargate-project\/ 2025."},{"key":"e_1_3_2_1_29_1","volume-title":"https:\/\/omnetpp.org","author":"Ltd OpenSim","year":"2018","unstructured":"OpenSim Ltd. OMNeT++. https:\/\/omnetpp.org. 2018."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544216.3544248"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3452296.3472926"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.5555\/2888619.2888624"},{"key":"e_1_3_2_1_33_1","volume-title":"Cornell University. https:\/\/cvw.cac.cornell.edu\/gpu-architecture\/gpu-characteristics\/simt_warp","author":"Warps SIMT","year":"2024","unstructured":"SIMT and Warps. Cornell University. https:\/\/cvw.cac.cornell.edu\/gpu-architecture\/gpu-characteristics\/simt_warp. 2024."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3629574"},{"key":"e_1_3_2_1_35_1","volume-title":"https:\/\/developer.nvidia.com\/blog\/understanding-the-visualization-of-overhead-and-latency-in-nsight-systems\/","author":"Understanding the Visualization of Overhead and Latency in NVIDIA Nsight Systems. Nvidia Corporation","year":"2020","unstructured":"Understanding the Visualization of Overhead and Latency in NVIDIA Nsight Systems. Nvidia Corporation. https:\/\/developer.nvidia.com\/blog\/understanding-the-visualization-of-overhead-and-latency-in-nsight-systems\/. 2020."},{"key":"e_1_3_2_1_36_1","volume-title":"National Institute of Standards and Technology","author":"US Department of Commerce.","year":"2021","unstructured":"US Department of Commerce. National Institute of Standards and Technology. 2021. Machine Learning in Network Modeling and Simulation. https:\/\/www.nist.gov\/programs-projects\/machine-learning-network-modeling-and-simulation. (2021)."},{"key":"e_1_3_2_1_37_1","volume-title":"https:\/\/developer.nvidia.com\/blog\/using-shared-memory-cuda-cc\/","author":"Using Shared Memory in CUDA C\/C++. NVidia Corporation","year":"2013","unstructured":"Using Shared Memory in CUDA C\/C++. NVidia Corporation. https:\/\/developer.nvidia.com\/blog\/using-shared-memory-cuda-cc\/. 2013."},{"key":"e_1_3_2_1_38_1","unstructured":"Vasily Volkov. 2016.. Understanding Latency Hiding on GPUs. Technical Report No. UCB\/EECS-2016-143 (2016.)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","unstructured":"Wenjie Tang Yiping Yao. 2013. A GPU-based discrete event simulation kernel. Simulation: Transactions of the Society for Modeling and Simulation International. (2013) 1--20. https:\/\/doi.org\/10.1177\/0037549713508839","DOI":"10.1177\/0037549713508839"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","unstructured":"Wesley Garey Richard A. Rouil Evan Black Tanguy Ropitault Weichao Gao. 2023. O-RAN with Machine Learning in ns-3. WNS3 '23: Proceedings of the 2023 Workshop on ns-3. (2023). https:\/\/doi.org\/10.1145\/3592149.3592157","DOI":"10.1145\/3592149.3592157"}],"event":{"name":"SIGCOMM '25: ACM SIGCOMM 2025 Conference","sponsor":["SIGCOMM ACM Special Interest Group on Data Communication"],"location":"Coimbra Portugal","acronym":"SIGCOMM '25"},"container-title":["Proceedings of the 2nd Workshop on Networks for AI Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3748273.3749199","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,9]],"date-time":"2025-10-09T14:10:40Z","timestamp":1760019040000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3748273.3749199"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,8]]},"references-count":40,"alternative-id":["10.1145\/3748273.3749199","10.1145\/3748273"],"URL":"https:\/\/doi.org\/10.1145\/3748273.3749199","relation":{},"subject":[],"published":{"date-parts":[[2025,9,8]]},"assertion":[{"value":"2025-09-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}