{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T03:34:12Z","timestamp":1767929652629,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":72,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T00:00:00Z","timestamp":1743292800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,30]]},"DOI":"10.1145\/3669940.3707284","type":"proceedings-article","created":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T12:28:01Z","timestamp":1738844881000},"page":"794-810","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["PartIR: Composing SPMD Partitioning Strategies for Machine Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8716-526X","authenticated-orcid":false,"given":"Sami","family":"Alabed","sequence":"first","affiliation":[{"name":"Google DeepMind, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8374-1874","authenticated-orcid":false,"given":"Daniel","family":"Belov","sequence":"additional","affiliation":[{"name":"Google DeepMind, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1400-3624","authenticated-orcid":false,"given":"Bart","family":"Chrzaszcz","sequence":"additional","affiliation":[{"name":"Google DeepMind, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1031-2598","authenticated-orcid":false,"given":"Juliana","family":"Franco","sequence":"additional","affiliation":[{"name":"Google DeepMind, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6483-3841","authenticated-orcid":false,"given":"Dominik","family":"Grewe","sequence":"additional","affiliation":[{"name":"Google DeepMind, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0122-1377","authenticated-orcid":false,"given":"Dougal","family":"Maclaurin","sequence":"additional","affiliation":[{"name":"Google DeepMind, Cambridge, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9024-7618","authenticated-orcid":false,"given":"James","family":"Molloy","sequence":"additional","affiliation":[{"name":"Google DeepMind, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1789-2824","authenticated-orcid":false,"given":"Tom","family":"Natan","sequence":"additional","affiliation":[{"name":"Google DeepMind, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0703-6435","authenticated-orcid":false,"given":"Tamara","family":"Norman","sequence":"additional","affiliation":[{"name":"Google DeepMind, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0846-3208","authenticated-orcid":false,"given":"Xiaoyue","family":"Pan","sequence":"additional","affiliation":[{"name":"Google DeepMind, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7665-4559","authenticated-orcid":false,"given":"Adam","family":"Paszke","sequence":"additional","affiliation":[{"name":"Google DeepMind, Warsaw, Poland"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8591-5215","authenticated-orcid":false,"given":"Norman A.","family":"Rink","sequence":"additional","affiliation":[{"name":"Google DeepMind, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1179-1934","authenticated-orcid":false,"given":"Michael","family":"Schaarschmidt","sequence":"additional","affiliation":[{"name":"Isomorphic Labs, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4010-8912","authenticated-orcid":false,"given":"Timur","family":"Sitdikov","sequence":"additional","affiliation":[{"name":"Google DeepMind, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7276-9038","authenticated-orcid":false,"given":"Agnieszka","family":"Swietlik","sequence":"additional","affiliation":[{"name":"Google DeepMind, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2079-1996","authenticated-orcid":false,"given":"Dimitrios","family":"Vytiniotis","sequence":"additional","affiliation":[{"name":"Google DeepMind, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3518-3526","authenticated-orcid":false,"given":"Joel","family":"Wee","sequence":"additional","affiliation":[{"name":"Google DeepMind, London, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2025,3,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"TensorFlow: A System for Large-Scale Machine Learning. In 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16)","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, Manjunath Kudlur, Josh Levenberg, Rajat Monga, Sherry Moore, Derek G. Murray, Benoit Steiner, Paul Tucker, Vijay Vasudevan, Pete Warden, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng. 2016. TensorFlow: A System for Large-Scale Machine Learning. In 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16). USENIX Association, Savannah, GA, 265--283. https:\/\/www.usenix.org\/conference\/osdi16\/technical-sessions\/presentation\/abadi"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","unstructured":"Sami Alabed Dominik Grewe Juliana Franco Bart Chrzaszcz Tom Natan Tamara Norman Norman A. Rink Dimitrios Vytiniotis and Michael Schaarschmidt. 2022. Automatic Discovery of Composite SPMD Partitioning Strategies in PartIR. https:\/\/doi.org\/10.48550\/ARXIV.2210.06352","DOI":"10.48550\/ARXIV.2210.06352"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356173"},{"key":"e_1_3_2_1_4_1","volume-title":"Chris Leary, Dougal Maclaurin, George Necula, Adam Paszke, Jake VanderPlas, Skye Wanderman-Milne, and Qiao Zhang.","author":"Bradbury James","year":"2018","unstructured":"James Bradbury, Roy Frostig, Peter Hawkins, Matthew James Johnson, Chris Leary, Dougal Maclaurin, George Necula, Adam Paszke, Jake VanderPlas, Skye Wanderman-Milne, and Qiao Zhang. 2018. JAX: composable transformations of PythonNumPy programs. http:\/\/github.com\/google\/jax"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3495883"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.184"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.87"},{"key":"e_1_3_2_1_8_1","volume-title":"Edouard Mathieu and Max Roser","author":"Charlie Giattino Veronika Samborska","year":"2023","unstructured":"Veronika Samborska Charlie Giattino, Edouard Mathieu and Max Roser. 2023. Data Page: Computation used to train notable artificial intelligence systems. https:\/\/ourworldindata.org\/grapher\/artificial-intelligence-training-computation. Retrieved from [online resource]."},{"key":"e_1_3_2_1_9_1","volume-title":"TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. 2018. TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). USENIX Association, Carlsbad, CA, 578--594. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/chen"},{"key":"e_1_3_2_1_10_1","unstructured":"Youlong Cheng and HyoukJoong Lee. 2019. Train ML models on large images and 3D volumes with spatial partitioning on Cloud TPUs | Google Cloud blog. hrefhttps:\/\/cloud.google.com\/blog\/products\/ai-machine-learning\/train-ml-models-on-large-images-and-3d-volumes-with-spatial-partitioning-on-cloud-tpushttps:\/\/cloud.google.com\/blog\/products\/ai-machine-learning\/train-ml-models-on-large-images-and-3d-volumes-with-spatial-partitioning-on-cloud-tpus. https:\/\/cloud.google.com\/blog\/products\/ai-machine-learning\/train-ml-models-on-large-images-and-3d-volumes-with-spatial-partitioning-on-cloud-tpus [Accessed 06-12-2023]."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2204.02311"},{"key":"e_1_3_2_1_12_1","unstructured":"Google Developers. 2023. Cloud TPU System Architecture. https:\/\/cloud.google.com\/tpu\/docs\/system-architecture-tpu-vm. [Last updated 2023--11-06 UTC.]."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00923"},{"key":"e_1_3_2_1_14_1","volume-title":"Gemini: A Family of Highly Capable Multimodal Models. preprint 2312.11805. arXiv. arXiv:2312.11805 [cs.CL].","author":"Team Gemini","year":"2023","unstructured":"Gemini Team, Rohan Anil, Sebastian Borgeaud, Yonghui Wu, Jean-Baptiste Alayrac, Jiahui Yu, Radu Soricut, Johan Schalkwyk, Andrew M Dai, Anja Hauth, et al. 2023. Gemini: A Family of Highly Capable Multimodal Models. preprint 2312.11805. arXiv. arXiv:2312.11805 [cs.CL]."},{"key":"e_1_3_2_1_15_1","volume-title":"International Conference on Learning Representations.","author":"Godwin Jonathan","year":"2022","unstructured":"Jonathan Godwin, Michael Schaarschmidt, Alexander L Gaunt, Alvaro Sanchez-Gonzalez, Yulia Rubanova, Petar Veli\u010dkovi\u0107, James Kirkpatrick, and Peter Battaglia. 2022. Simple GNN Regularisation for 3D Molecular Property Prediction and Beyond. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_16_1","volume-title":"XLA: Optimizing Compiler for Machine Learning. https:\/\/www.tensorflow.org\/xla. https:\/\/www.tensorflow.org\/xla","author":"Google XLA","year":"2017","unstructured":"Google XLA team. 2017. XLA: Optimizing Compiler for Machine Learning. https:\/\/www.tensorflow.org\/xla. https:\/\/www.tensorflow.org\/xla"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3410463.3414632"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3408974"},{"key":"e_1_3_2_1_19_1","volume-title":"Edge Partition Modulated Graph Convolutional Networks. https:\/\/openreview.net\/forum?id=ET1UAOYeU42","author":"He Yilin","unstructured":"Yilin He, Chaojie Wang, Hao Zhang, Bo Chen, and Mingyuan Zhou. 2022. Edge Partition Modulated Graph Convolutional Networks. https:\/\/openreview.net\/forum?id=ET1UAOYeU42"},{"key":"e_1_3_2_1_20_1","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems, Vol. 33 (2020), 6840--6851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","unstructured":"Jordan Hoffmann Sebastian Borgeaud Arthur Mensch Elena Buchatskaya Trevor Cai Eliza Rutherford Diego de Las Casas Lisa Anne Hendricks Johannes Welbl Aidan Clark Tom Hennigan Eric Noland Katie Millican George van den Driessche Bogdan Damoc Aurelia Guy Simon Osindero Karen Simonyan Erich Elsen Jack W. Rae Oriol Vinyals and Laurent Sifre. 2022. Training Compute-Optimal Large Language Models. https:\/\/doi.org\/10.48550\/ARXIV.2203.15556","DOI":"10.48550\/ARXIV.2203.15556"},{"key":"e_1_3_2_1_22_1","volume-title":"High resolution medical image analysis with spatial partitioning. preprint","author":"Hou Le","year":"1909","unstructured":"Le Hou, Youlong Cheng, Noam Shazeer, Niki Parmar, Yeqing Li, Panagiotis Korfiatis, Travis M Drucker, Daniel J Blezek, and Xiaodan Song. 2019. High resolution medical image analysis with spatial partitioning. preprint 1909.03108. arXiv. arXiv preprint arXiv:1909.03108."},{"key":"e_1_3_2_1_23_1","volume-title":"Dehao Chen, HyoukJoong Lee, Jiquan Ngiam, Quoc V. Le, Yonghui Wu, and Zhifeng Chen.","author":"Huang Yanping","year":"2019","unstructured":"Yanping Huang, Youlong Cheng, Ankur Bapna, Orhan Firat, Mia Xu Chen, Dehao Chen, HyoukJoong Lee, Jiquan Ngiam, Quoc V. Le, Yonghui Wu, and Zhifeng Chen. 2019. GPipe: efficient training of giant neural networks using pipeline parallelism. Curran Associates Inc., Red Hook, NY, USA."},{"key":"e_1_3_2_1_24_1","volume-title":"NeurIPS 2023 Foundation Models for Decision Making Workshop.","author":"Jarrett Daniel","year":"2023","unstructured":"Daniel Jarrett, Miruna Pislar, Michiel A Bakker, Michael Henry Tessler, Raphael Koster, Jan Balaguer, Romuald Elie, Christopher Summerfield, and Andrea Tacchetti. 2023. Language agents as digital representatives in collective decision-making. In NeurIPS 2023 Foundation Models for Decision Making Workshop."},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of Machine Learning and Systems, A. Talwalkar, V. Smith, and M. Zaharia (Eds.)","volume":"1","author":"Jia Zhihao","year":"2019","unstructured":"Zhihao Jia, Matei Zaharia, and Alex Aiken. 2019. Beyond Data and Model Parallelism for Deep Neural Networks.. In Proceedings of Machine Learning and Systems, A. Talwalkar, V. Smith, and M. Zaharia (Eds.), Vol. 1. 1--13. https:\/\/proceedings.mlsys.org\/paper_files\/paper\/2019\/file\/b422680f3db0986ddd7f8f126baaf0fa-Paper.pdf"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589350"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00010"},{"key":"e_1_3_2_1_28_1","unstructured":"Norman P. Jouppi Cliff Young Nishant Patil David A. Patterson Gaurav Agrawal Raminder Bajwa Sarah Bates Suresh Bhatia Nan Boden Al Borchers Rick Boyle Pierre-luc Cantin Clifford Chao Chris Clark Jeremy Coriell Mike Daley Matt Dau Jeffrey Dean Ben Gelb Tara Vazir Ghaemmaghami Rajendra Gottipati William Gulland Robert Hagmann Richard C. Ho Doug Hogberg John Hu Robert Hundt Dan Hurt Julian Ibarz Aaron Jaffey Alek Jaworski Alexander Kaplan Harshit Khaitan Andy Koch Naveen Kumar Steve Lacy James Laudon James Law Diemthu Le Chris Leary Zhuyuan Liu Kyle Lucke Alan Lundin Gordon MacKean Adriana Maggiore Maire Mahony Kieran Miller Rahul Nagarajan Ravi Narayanaswami Ray Ni Kathy Nix Thomas Norrie Mark Omernick Narayana Penukonda Andy Phelps Jonathan Ross Amir Salek Emad Samadiani Chris Severn Gregory Sizikov Matthew Snelham Jed Souter Dan Steinberg Andy Swing Mercedes Tan Gregory Thorson Bo Tian Horia Toma Erick Tuttle Vijay Vasudevan Richard Walter Walter Wang Eric Wilcox and Doe Hyun Yoon. 2017. In-Datacenter Performance Analysis of a Tensor Processing Unit. CoRR Vol. abs\/1704.04760 (2017). showeprint[arXiv]1704.04760 http:\/\/arxiv.org\/abs\/1704.04760"},{"key":"e_1_3_2_1_29_1","volume-title":"Kingma and Jimmy Ba","author":"Diederik","year":"2015","unstructured":"Diederik P. Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. In 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7--9, 2015, Conference Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.). http:\/\/arxiv.org\/abs\/1412.6980"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133901"},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of Machine Learning and Systems","volume":"5","author":"Korthikanti Vijay Anand","year":"2023","unstructured":"Vijay Anand Korthikanti, Jared Casper, Sangkug Lym, Lawrence McAfee, Michael Andersch, Mohammad Shoeybi, and Bryan Catanzaro. 2023. Reducing activation recomputation in large transformer models. Proceedings of Machine Learning and Systems , Vol. 5 (2023)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"e_1_3_2_1_33_1","volume-title":"GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding. CoRR","author":"Lepikhin Dmitry","year":"2020","unstructured":"Dmitry Lepikhin, HyoukJoong Lee, Yuanzhong Xu, Dehao Chen, Orhan Firat, Yanping Huang, Maxim Krikun, Noam Shazeer, and Zhifeng Chen. 2020. GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding. CoRR, Vol. abs\/2006.16668 (2020). arxiv: 2006.16668 https:\/\/arxiv.org\/abs\/2006.16668"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359646"},{"key":"e_1_3_2_1_35_1","volume-title":"International Conference on Machine Learning. PMLR, 7937--7947","author":"Narayanan Deepak","year":"2021","unstructured":"Deepak Narayanan, Amar Phanishayee, Kaiyu Shi, Xie Chen, and Matei Zaharia. 2021a. Memory-efficient pipeline-parallel dnn training. In International Conference on Machine Learning. PMLR, 7937--7947."},{"key":"e_1_3_2_1_36_1","volume-title":"Efficient Large-Scale Language Model Training on GPU Clusters. CoRR","author":"Narayanan Deepak","year":"2021","unstructured":"Deepak Narayanan, Mohammad Shoeybi, Jared Casper, Patrick LeGresley, Mostofa Patwary, Vijay Korthikanti, Dmitri Vainbrand, Prethvi Kashinkunti, Julie Bernauer, Bryan Catanzaro, Amar Phanishayee, and Matei Zaharia. 2021b. Efficient Large-Scale Language Model Training on GPU Clusters. CoRR, Vol. abs\/2104.04473 (2021). showeprint[arXiv]2104.04473 https:\/\/arxiv.org\/abs\/2104.04473"},{"key":"e_1_3_2_1_37_1","unstructured":"NVIDIA. 2021. NVIDIA NVLink and NVSwitch. https:\/\/www.nvidia.com\/en-gb\/data-center\/nvlink\/. Accessed: 2021-10-07."},{"key":"e_1_3_2_1_38_1","unstructured":"Nvidia. 2023. NVIDIA A100 Tensor Core GPU. https:\/\/www.nvidia.com\/en-gb\/data-center\/a100\/. [Last updated 2023-11-06 UTC.]."},{"key":"e_1_3_2_1_39_1","unstructured":"OpenXLA. 2023a. OpenXLA: A machine learning compiler for GPUs CPUs and ML accelerators. https:\/\/github.com\/openxla\/xla. [Last updated 2023-11-06 UTC.]."},{"key":"e_1_3_2_1_40_1","volume-title":"Shardy: A library for performing sharding computations. https:\/\/github.com\/openxla\/shardy.","author":"XLA.","year":"2023","unstructured":"OpenXLA. 2023b. Shardy: A library for performing sharding computations. https:\/\/github.com\/openxla\/shardy."},{"key":"e_1_3_2_1_41_1","unstructured":"OpenXLA. 2023c. StableHLO: Backward compatible ML compute opset inspired by HLO\/MHLO. https:\/\/github.com\/openxla\/stablehlo. [Last updated 2023-11-06 UTC.]."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359652"},{"key":"e_1_3_2_1_43_1","volume-title":"Garnett (Eds.)","volume":"32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems,, H. Wallach, H. Larochelle, A. Beygelzimer, F. d`Alch\u00e9 Buc, E. Fox, and R. Garnett (Eds.), Vol. 32. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper\/2019\/file\/bdbca288fee7f92f2bfa9f7012727740-Paper.pdf"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3473593"},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of Machine Learning and Systems","volume":"5","author":"Pope Reiner","year":"2023","unstructured":"Reiner Pope, Sholto Douglas, Aakanksha Chowdhery, Jacob Devlin, James Bradbury, Jonathan Heek, Kefan Xiao, Shivani Agrawal, and Jeff Dean. 2023. Efficiently scaling transformer inference. Proceedings of Machine Learning and Systems , Vol. 5 (2023)."},{"key":"e_1_3_2_1_46_1","unstructured":"LLVM Project. 2024. 'mesh' Dialect. https:\/\/mlir.llvm.org\/docs\/Dialects\/Mesh\/."},{"key":"e_1_3_2_1_47_1","unstructured":"Jack W. Rae Sebastian Borgeaud Trevor Cai Katie Millican Jordan Hoffmann H. Francis Song John Aslanides Sarah Henderson Roman Ring Susannah Young Eliza Rutherford Tom Hennigan Jacob Menick Albin Cassirer Richard Powell George van den Driessche Lisa Anne Hendricks Maribeth Rauh Po-Sen Huang Amelia Glaese Johannes Welbl Sumanth Dathathri Saffron Huang Jonathan Uesato John Mellor Irina Higgins Antonia Creswell Nat McAleese Amy Wu Erich Elsen Siddhant M. Jayakumar Elena Buchatskaya David Budden Esme Sutherland Karen Simonyan Michela Paganini Laurent Sifre Lena Martens Xiang Lorraine Li Adhiguna Kuncoro Aida Nematzadeh Elena Gribovskaya Domenic Donato Angeliki Lazaridou Arthur Mensch Jean-Baptiste Lespiau Maria Tsimpoukelli Nikolai Grigorev Doug Fritz Thibault Sottiaux Mantas Pajarskas Toby Pohlen Zhitao Gong Daniel Toyama Cyprien de Masson d'Autume Yujia Li Tayfun Terzi Vladimir Mikulik Igor Babuschkin Aidan Clark Diego de Las Casas Aurelia Guy Chris Jones James Bradbury Matthew Johnson Blake A. Hechtman Laura Weidinger Iason Gabriel William S. Isaac Edward Lockhart Simon Osindero Laura Rimell Chris Dyer Oriol Vinyals Kareem Ayoub Jeff Stanway Lorrayne Bennett Demis Hassabis Koray Kavukcuoglu and Geoffrey Irving. 2021. Scaling Language Models: Methods Analysis & Insights from Training Gopher. CoRR Vol. abs\/2112.11446 (2021). showeprint[arXiv]2112.11446 https:\/\/arxiv.org\/abs\/2112.11446"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/2491956.2462176"},{"key":"e_1_3_2_1_49_1","volume-title":"ZeRO: Memory Optimization Towards Training A Trillion Parameter Models. CoRR","author":"Rajbhandari Samyam","year":"2054","unstructured":"Samyam Rajbhandari, Jeff Rasley, Olatunji Ruwase, and Yuxiong He. 2019. ZeRO: Memory Optimization Towards Training A Trillion Parameter Models. CoRR, Vol. abs\/1910.02054 (2019). showeprint[arXiv]1910.02054 http:\/\/arxiv.org\/abs\/1910.02054"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3406703"},{"key":"e_1_3_2_1_51_1","volume-title":"Olatunji Ruwase, Shuangyan Yang, Minjia Zhang, Dong Li, and Yuxiong He.","author":"Ren Jie","year":"2021","unstructured":"Jie Ren, Samyam Rajbhandari, Reza Yazdani Aminabadi, Olatunji Ruwase, Shuangyan Yang, Minjia Zhang, Dong Li, and Yuxiong He. 2021. ZeRO-Offload: Democratizing Billion-Scale Model Training. CoRR, Vol. abs\/2101.06840 (2021). showeprint[arXiv]2101.06840 https:\/\/arxiv.org\/abs\/2101.06840"},{"key":"e_1_3_2_1_52_1","volume-title":"International Conference on Machine Learning. PMLR, 8459--8468","author":"Sanchez-Gonzalez Alvaro","year":"2020","unstructured":"Alvaro Sanchez-Gonzalez, Jonathan Godwin, Tobias Pfaff, Rex Ying, Jure Leskovec, and Peter Battaglia. 2020. Learning to simulate complex physics with graph networks. In International Conference on Machine Learning. PMLR, 8459--8468."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437984.3458829"},{"key":"e_1_3_2_1_54_1","volume-title":"Tamara Norman, James Molloy, Jonathan Godwin","author":"Schaarschmidt Michael","year":"2021","unstructured":"Michael Schaarschmidt, Dominik Grewe, Dimitrios Vytiniotis, Adam Paszke, Georg Stefan Schmid, Tamara Norman, James Molloy, Jonathan Godwin, Norman Alexander Rink, Vinod Nair, et al. 2021. AutoMap: Towards Ergonomic Automated Parallelism for ML Models. preprint 2112.02958. arXiv. arXiv:2112.02958 [cs.LG]."},{"key":"e_1_3_2_1_55_1","unstructured":"Noam Shazeer Youlong Cheng Niki Parmar Dustin Tran Ashish Vaswani Penporn Koanantakool Peter Hawkins HyoukJoong Lee Mingsheng Hong Cliff Young Ryan Sepassi and Blake Hechtman. 2018. Mesh-TensorFlow: Deep Learning for Supercomputers. In Neural Information Processing Systems."},{"key":"e_1_3_2_1_56_1","volume-title":"Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism. CoRR","author":"Shoeybi Mohammad","year":"2019","unstructured":"Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper, and Bryan Catanzaro. 2019. Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism. CoRR, Vol. abs\/1909.08053 (2019). arxiv: 1909.08053 http:\/\/arxiv.org\/abs\/1909.08053"},{"key":"e_1_3_2_1_57_1","volume-title":"Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism. arxiv","author":"Shoeybi Mohammad","year":"1909","unstructured":"Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper, and Bryan Catanzaro. 2020. Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism. arxiv: 1909.08053 [cs.CL]"},{"key":"e_1_3_2_1_58_1","volume-title":"MPI: The Complete Reference","author":"Snir Marc","year":"1995","unstructured":"Marc Snir, Steve W. Otto, David W. Walker, Jack Dongarra, and Steven Huss-Lederman. 1995. MPI: The Complete Reference. MIT Press, Cambridge, MA, USA."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/2784731.2784754"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2017.7863730"},{"key":"e_1_3_2_1_61_1","volume-title":"Galactica: A Large Language Model for Science. preprint 2211.09085. arXiv. arXiv:2211.09085 [cs.CL].","author":"Taylor Ross","year":"2022","unstructured":"Ross Taylor, Marcin Kardas, Guillem Cucurull, Thomas Scialom, Anthony Hartshorn, Elvis Saravia, Andrew Poulton, Viktor Kerkez, and Robert Stojnic. 2022. Galactica: A Large Language Model for Science. preprint 2211.09085. arXiv. arXiv:2211.09085 [cs.CL]."},{"key":"e_1_3_2_1_62_1","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar et al. 2023. LLaMA: Open and Efficient Foundation Language Models. preprint 2302.13971. arXiv. arXiv:2302.13971 [cs.CL]."},{"key":"e_1_3_2_1_63_1","volume-title":"Unity: Accelerating DNN Training Through Joint Optimization of Algebraic Transformations and Parallelization. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Unger Colin","year":"2022","unstructured":"Colin Unger, Zhihao Jia, Wei Wu, Sina Lin, Mandeep Baines, Carlos Efrain Quintero Narvaez, Vinay Ramakrishnaiah, Nirmal Prajapati, Pat McCormick, Jamaludin Mohd-Yusof, Xi Luo, Dheevatsa Mudigere, Jongsoo Park, Misha Smelyanskiy, and Alex Aiken. 2022. Unity: Accelerating DNN Training Through Joint Optimization of Algebraic Transformations and Parallelization. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22). USENIX Association, Carlsbad, CA, 267--284. https:\/\/www.usenix.org\/conference\/osdi22\/presentation\/unger"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/289423.289425"},{"key":"e_1_3_2_1_65_1","volume-title":"Auto-MAP: A DQN Framework for Exploring Distributed Execution Plans for DNN Workloads. CoRR","author":"Wang Siyu","year":"2020","unstructured":"Siyu Wang, Yi Rong, Shiqing Fan, Zhen Zheng, Lansong Diao, Guoping Long, Jun Yang, Xiaoyong Liu, and Wei Lin. 2020. Auto-MAP: A DQN Framework for Exploring Distributed Execution Plans for DNN Workloads. CoRR, Vol. abs\/2007.04069 (2020). showeprint[arXiv]2007.04069 https:\/\/arxiv.org\/abs\/2007.04069"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3567955.3567959"},{"key":"e_1_3_2_1_67_1","unstructured":"xla. 2024. #13875: Reshard LHS and RHS to match output sharding by default to handle dot operations in SPMD partitioner. https:\/\/github.com\/openxla\/xla\/pull\/13875."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","unstructured":"Yuanzhong Xu HyoukJoong Lee Dehao Chen Hongjun Choi Blake Hechtman and Shibo Wang. 2020. Automatic Cross-Replica Sharding of Weight Update in Data-Parallel Training. https:\/\/doi.org\/10.48550\/ARXIV.2004.13336","DOI":"10.48550\/ARXIV.2004.13336"},{"key":"e_1_3_2_1_69_1","volume-title":"GSPMD: General and Scalable Parallelization for ML Computation Graphs. CoRR","author":"Xu Yuanzhong","year":"2021","unstructured":"Yuanzhong Xu, HyoukJoong Lee, Dehao Chen, Blake A. Hechtman, Yanping Huang, Rahul Joshi, Maxim Krikun, Dmitry Lepikhin, Andy Ly, Marcello Maggioni, Ruoming Pang, Noam Shazeer, Shibo Wang, Tao Wang, Yonghui Wu, and Zhifeng Chen. 2021. GSPMD: General and Scalable Parallelization for ML Computation Graphs. CoRR, Vol. abs\/2105.04663 (2021). showeprint[arXiv]2105.04663 https:\/\/arxiv.org\/abs\/2105.04663"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3519939.3523437"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.14778\/3611540.3611569"},{"key":"e_1_3_2_1_72_1","volume-title":"Alpa: Automating Inter- and Intra-Operator Parallelism for Distributed Deep Learning. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Zheng Lianmin","year":"2022","unstructured":"Lianmin Zheng, Zhuohan Li, Hao Zhang, Yonghao Zhuang, Zhifeng Chen, Yanping Huang, Yida Wang, Yuanzhong Xu, Danyang Zhuo, Eric P. Xing, Joseph E. Gonzalez, and Ion Stoica. 2022. Alpa: Automating Inter- and Intra-Operator Parallelism for Distributed Deep Learning. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22). USENIX Association, Carlsbad, CA, 559--578. https:\/\/www.usenix.org\/conference\/osdi22\/presentation\/zheng-lianmin"}],"event":{"name":"ASPLOS '25: 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","location":"Rotterdam Netherlands","acronym":"ASPLOS '25","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3669940.3707284","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3669940.3707284","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T14:49:54Z","timestamp":1755787794000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3669940.3707284"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,30]]},"references-count":72,"alternative-id":["10.1145\/3669940.3707284","10.1145\/3669940"],"URL":"https:\/\/doi.org\/10.1145\/3669940.3707284","relation":{},"subject":[],"published":{"date-parts":[[2025,3,30]]},"assertion":[{"value":"2025-03-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}