{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:16:39Z","timestamp":1750220199150,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":58,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,29]],"date-time":"2022-08-29T00:00:00Z","timestamp":1661731200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,29]]},"DOI":"10.1145\/3545008.3545069","type":"proceedings-article","created":{"date-parts":[[2023,1,15]],"date-time":"2023-01-15T01:04:08Z","timestamp":1673744648000},"page":"1-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["ParaGraph: An application-simulator interface and toolkit for hardware-software co-design"],"prefix":"10.1145","author":[{"given":"Mikhail","family":"Isaev","sequence":"first","affiliation":[{"name":"School of Computational Science &amp; Engineering, Georgia Institute of Technology, United States of America"}]},{"given":"Nic","family":"McDonald","sequence":"additional","affiliation":[{"name":"Nvidia, United States of America"}]},{"given":"Jeffrey","family":"Young","sequence":"additional","affiliation":[{"name":"School of Computer Science, Georgia Institute of Technology, United States of America"}]},{"given":"Richard","family":"Vuduc","sequence":"additional","affiliation":[{"name":"School of Computational Science &amp; Engineering, Georgia Institute of Technology, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2023,1,13]]},"reference":[{"unstructured":"Mart\u00edn Abadi Ashish Agarwal Paul Barham Eugene Brevdo Zhifeng Chen Craig Citro Greg\u00a0S. Corrado Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Ian Goodfellow Andrew Harp Geoffrey Irving Michael Isard Yangqing Jia Rafal Jozefowicz Lukasz Kaiser Manjunath Kudlur Josh Levenberg Dandelion Man\u00e9 Rajat Monga Sherry Moore Derek Murray Chris Olah Mike Schuster Jonathon Shlens Benoit Steiner Ilya Sutskever Kunal Talwar Paul Tucker Vincent Vanhoucke Vijay Vasudevan Fernanda Vi\u00e9gas Oriol Vinyals Pete Warden Martin Wattenberg Martin Wicke Yuan Yu and Xiaoqiang Zheng. 2015. TensorFlow: Large-scale Machine Learning on Heterogeneous Systems. https:\/\/www.tensorflow.org\/ Software available from tensorflow.org.","key":"e_1_3_2_1_1_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_2_1","DOI":"10.1109\/ISPASS.2009.4919636"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_3_1","DOI":"10.1145\/2024716.2024718"},{"unstructured":"James Bradbury Roy Frostig Peter Hawkins Matthew\u00a0James Johnson Chris Leary Dougal Maclaurin George Necula Adam Paszke Jake VanderPlas Skye Wanderman-Milne and Qiao Zhang. 2018. JAX: composable transformations of Python+NumPy programs. http:\/\/github.com\/google\/jax http:\/\/github.com\/google\/jax.","key":"e_1_3_2_1_4_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_5_1","DOI":"10.1145\/3293320.3293321"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_6_1","DOI":"10.1002\/cpe.1206"},{"key":"e_1_3_2_1_7_1","volume-title":"TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. 2018. TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). USENIX Association, Carlsbad, CA, 578\u2013594. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/chen"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_8_1","DOI":"10.2172\/1311761"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_9_1","DOI":"10.1145\/173284.155333"},{"key":"e_1_3_2_1_10_1","volume-title":"ProGraML: A Graph-based Program Representation for Data Flow Analysis and Compiler Optimizations. In Thirty-eighth International Conference on Machine Learning (Virtual). PMLR.","author":"Cummins Chris","year":"2021","unstructured":"Chris Cummins, Zacharias\u00a0V. Fisches, Tal Ben-Nun, Torsten Hoefler, Michael O\u2019Boyle, and Hugh Leather. 2021. ProGraML: A Graph-based Program Representation for Data Flow Analysis and Compiler Optimizations. In Thirty-eighth International Conference on Machine Learning (Virtual). PMLR."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_11_1","DOI":"10.5555\/2821589"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_12_1","DOI":"10.18653\/v1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_13_1","DOI":"10.1145\/24039.24041"},{"doi-asserted-by":"publisher","unstructured":"Keno Fischer and Elliot Saba. 2018. Automatic Full Compilation of Julia Programs and ML Models to Cloud TPUs. https:\/\/doi.org\/10.48550\/ARXIV.1810.09868 https:\/\/arxiv.org\/abs\/1810.09868.","key":"e_1_3_2_1_14_1","DOI":"10.48550\/ARXIV.1810.09868"},{"key":"e_1_3_2_1_15_1","volume-title":"XLA: Optimizing Compiler for TensorFlow. https:\/\/www.tensorflow.org\/xla","author":"LLC.","year":"2022","unstructured":"Google, LLC. 2022. XLA: Optimizing Compiler for TensorFlow. https:\/\/www.tensorflow.org\/xla"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_16_1","DOI":"10.1029\/2021MS002717"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_17_1","DOI":"10.1109\/CVPR.2016.90"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_18_1","DOI":"10.1109\/SC.2016.13"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_19_1","DOI":"10.1109\/ISPASS.2013.6557149"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_20_1","DOI":"10.1145\/3360307"},{"key":"e_1_3_2_1_21_1","volume-title":"17th Workshop on Compilers for Parallel Computing (CPC","author":"Khaldi Dounia","year":"2013","unstructured":"Dounia Khaldi, Pierre Jouvelot, Fran\u00e7ois Irigoin, and Corinne Ancourt. 2013. SPIRE: A Methodology for Sequential to Parallel Intermediate Representation Extension. In 17th Workshop on Compilers for Parallel Computing (CPC 2013). Lyon, France. https:\/\/hal-mines-paristech.archives-ouvertes.fr\/hal-00823324"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_22_1","DOI":"10.1145\/2833157.2833158"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_23_1","DOI":"10.1109\/ISCA45697.2020.00085"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_24_1","DOI":"10.1073\/pnas.2101784118"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_25_1","DOI":"10.1145\/3200691.3178493"},{"unstructured":"Sameer Kumar and Norm Jouppi. 2020. Highly Available Data Parallel ML training on Mesh Networks. arxiv:2011.03605\u00a0[cs.LG]","key":"e_1_3_2_1_26_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_27_1","DOI":"10.1109\/CGO.2004.1281665"},{"key":"e_1_3_2_1_28_1","volume-title":"MLIR: A Compiler Infrastructure for the End of Moore\u2019s Law. CoRR abs\/2002.11054(2020). arXiv:2002.11054https:\/\/arxiv.org\/abs\/2002.11054","author":"Lattner Chris","year":"2020","unstructured":"Chris Lattner, Jacques\u00a0A. Pienaar, Mehdi Amini, Uday Bondhugula, River Riddle, Albert Cohen, Tatiana Shpeisman, Andy Davis, Nicolas Vasilache, and Oleksandr Zinenko. 2020. MLIR: A Compiler Infrastructure for the End of Moore\u2019s Law. CoRR abs\/2002.11054(2020). arXiv:2002.11054https:\/\/arxiv.org\/abs\/2002.11054"},{"unstructured":"Zhijing Li Yuwei Ye Stephen Neuendorffer and Adrian Sampson. 2022. Compiler-Driven Simulation of Reconfigurable Hardware Accelerators. CoRR abs\/2202.00739(2022). arXiv:2202.00739https:\/\/arxiv.org\/abs\/2202.00739","key":"e_1_3_2_1_29_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_30_1","DOI":"10.1109\/ISPASS48437.2020.00029"},{"unstructured":"Peter Mattson Christine Cheng Cody Coleman Greg Diamos Paulius Micikevicius David Patterson Hanlin Tang Gu-Yeon Wei Peter Bailis Victor Bittorf David Brooks Dehao Chen Debojyoti Dutta Udit Gupta Kim Hazelwood Andrew Hock Xinyuan Huang Atsushi Ike Bill Jia Daniel Kang David Kanter Naveen Kumar Jeffery Liao Guokai Ma Deepak Narayanan Tayo Oguntebi Gennady Pekhimenko Lillian Pentecost Vijay\u00a0Janapa Reddi Taylor Robie Tom\u00a0St. John Tsuguchika Tabaru Carole-Jean Wu Lingjie Xu Masafumi Yamazaki Cliff Young and Matei Zaharia. 2020. MLPerf Training Benchmark. arxiv:1910.01500\u00a0[cs.LG]","key":"e_1_3_2_1_31_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_32_1","DOI":"10.1109\/ISPASS.2018.00017"},{"unstructured":"Meta. 2020. PyTorch-XLA. https:\/\/github.com\/pytorch\/xla\/ https:\/\/github.com\/pytorch\/xla\/.","key":"e_1_3_2_1_33_1"},{"unstructured":"Maxim Naumov Dheevatsa Mudigere Hao-Jun\u00a0Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson\u00a0G. Azzolini Dmytro Dzhulgakov Andrey Mallevich Ilia Cherniavskii Yinghai Lu Raghuraman Krishnamoorthi Ansha Yu Volodymyr Kondratenko Stephanie Pereira Xianjie Chen Wenlin Chen Vijay Rao Bill Jia Liang Xiong and Misha Smelyanskiy. 2019. Deep Learning Recommendation Model for Personalization and Recommendation Systems. CoRR abs\/1906.00091(2019). https:\/\/arxiv.org\/abs\/1906.00091","key":"e_1_3_2_1_34_1"},{"unstructured":"nsnam. 2022. ns-3. https:\/\/www.nsnam.org\/ https:\/\/www.nsnam.org\/.","key":"e_1_3_2_1_35_1"},{"key":"e_1_3_2_1_36_1","volume-title":"NCCL: NVIDIA Collective Communications Library. https:\/\/developer.nvidia.com\/nccl https:\/\/developer.nvidia.com\/nccl.","author":"NVIDIA.","year":"2022","unstructured":"NVIDIA. 2022. NCCL: NVIDIA Collective Communications Library. https:\/\/developer.nvidia.com\/nccl https:\/\/developer.nvidia.com\/nccl."},{"unstructured":"NVIDIA. 2022. NVLink and NVSwitch. https:\/\/www.nvidia.com\/en-us\/data-center\/nvlink\/ https:\/\/www.nvidia.com\/en-us\/data-center\/nvlink\/.","key":"e_1_3_2_1_37_1"},{"unstructured":"EPFL Parallel Systems Architecture Lab\u00a0(PARSA). 2020. QFlex. https:\/\/qflex.epfl.ch https:\/\/qflex.epfl.ch.","key":"e_1_3_2_1_38_1"},{"volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems 32, H.\u00a0Wallach, H.\u00a0Larochelle, A.\u00a0Beygelzimer, F.\u00a0d'Alch\u00e9-Buc, E.\u00a0Fox, and R.\u00a0Garnett (Eds.). Curran Associates, Inc., 8024\u20138035. http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf","key":"e_1_3_2_1_39_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_40_1","DOI":"10.1109\/InPar.2012.6339601"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_41_1","DOI":"10.1145\/2499370.2462176"},{"key":"e_1_3_2_1_42_1","volume-title":"ASTRA-SIM: Enabling SW\/HW Co-Design Exploration for Distributed DL Training Platforms. In IEEE International Symposium on Performance Analysis of Systems and Software, ISPASS 2020","author":"Rashidi Saeed","year":"2020","unstructured":"Saeed Rashidi, Srinivas Sridharan, Sudarshan Srinivasan, and Tushar Krishna. 2020. ASTRA-SIM: Enabling SW\/HW Co-Design Exploration for Distributed DL Training Platforms. In IEEE International Symposium on Performance Analysis of Systems and Software, ISPASS 2020, Boston, MA, USA, August 22-26, 2020. IEEE."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_43_1","DOI":"10.1145\/1964218.1964225"},{"key":"e_1_3_2_1_44_1","volume-title":"Glow: Graph Lowering Compiler Techniques for Neural Networks. arxiv:1805.00907\u00a0[cs.PL]","author":"Rotem Nadav","year":"2019","unstructured":"Nadav Rotem, Jordan Fix, Saleem Abdulrasool, Garret Catron, Summer Deng, Roman Dzhabarov, Nick Gibson, James Hegeman, Meghan Lele, Roman Levenstein, Jack Montgomery, Bert Maher, Satish Nadathur, Jakob Olesen, Jongsoo Park, Artem Rakhov, Misha Smelyanskiy, and Man Wang. 2019. Glow: Graph Lowering Compiler Techniques for Neural Networks. arxiv:1805.00907\u00a0[cs.PL]"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_45_1","DOI":"10.1109\/HOTCHIPS.2013.7478287"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_46_1","DOI":"10.1088\/1742-5468"},{"unstructured":"Alexander Sergeev and Mike\u00a0Del Balso. 2018. Horovod: fast and easy distributed deep learning in TensorFlow. arxiv:1802.05799\u00a0[cs.LG]","key":"e_1_3_2_1_47_1"},{"unstructured":"Mohammad Shoeybi Mostofa Patwary Raul Puri Patrick LeGresley Jared Casper and Bryan Catanzaro. 2020. Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism. arxiv:1909.08053\u00a0[cs.CL]","key":"e_1_3_2_1_48_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_49_1","DOI":"10.1145\/2063384.2063487"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_50_1","DOI":"10.1145\/3400302.3415751"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_51_1","DOI":"10.1145\/2480741.2480743"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_52_1","DOI":"10.1145\/2694344.2694362"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_53_1","DOI":"10.1145\/169627.169855"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_54_1","DOI":"10.4108\/ICST.SIMUTOOLS2008.3027"},{"key":"e_1_3_2_1_55_1","volume-title":"Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0V. Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Vol.\u00a030. Curran Associates","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141\u00a0ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0V. Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Vol.\u00a030. Curran Associates, Inc.https:\/\/proceedings.neurips.cc\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_56_1","DOI":"10.1109\/MM.2011.78"},{"volume-title":"Compiler-Assisted Source-to-Source Skeletonization of Application Models for System Simulation","author":"Wilke J.","unstructured":"Jeremiah\u00a0J. Wilke, Joseph\u00a0P. Kenny, Samuel Knight, and Sebastien Rumley. 2018. Compiler-Assisted Source-to-Source Skeletonization of Application Models for System Simulation. In High Performance Computing, Rio Yokota, Mich\u00e8le Weiland, David Keyes, and Carsten Trinitis (Eds.). Springer International Publishing, Cham, 123\u2013143.","key":"e_1_3_2_1_57_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_58_1","DOI":"10.1145\/1498765.1498785"}],"event":{"acronym":"ICPP '22","name":"ICPP '22: 51st International Conference on Parallel Processing","location":"Bordeaux France"},"container-title":["Proceedings of the 51st International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3545008.3545069","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3545008.3545069","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:44Z","timestamp":1750186964000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3545008.3545069"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,29]]},"references-count":58,"alternative-id":["10.1145\/3545008.3545069","10.1145\/3545008"],"URL":"https:\/\/doi.org\/10.1145\/3545008.3545069","relation":{},"subject":[],"published":{"date-parts":[[2022,8,29]]},"assertion":[{"value":"2023-01-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}