{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T15:37:03Z","timestamp":1777736223319,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T00:00:00Z","timestamp":1714176000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,27]]},"DOI":"10.1145\/3620665.3640366","type":"proceedings-article","created":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T14:18:06Z","timestamp":1713795486000},"page":"929-947","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":506,"title":["PyTorch 2: Faster Machine Learning Through Dynamic Python Bytecode Transformation and Graph Compilation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-5207-2179","authenticated-orcid":false,"given":"Jason","family":"Ansel","sequence":"first","affiliation":[{"name":"Meta, Menlo Park, CA, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0621-7872","authenticated-orcid":false,"given":"Edward","family":"Yang","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1133-816X","authenticated-orcid":false,"given":"Horace","family":"He","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9867-5075","authenticated-orcid":false,"given":"Natalia","family":"Gimelshein","sequence":"additional","affiliation":[{"name":"OpenAI, San Francisco, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6777-9168","authenticated-orcid":false,"given":"Animesh","family":"Jain","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0539-0667","authenticated-orcid":false,"given":"Michael","family":"Voznesensky","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8090-7660","authenticated-orcid":false,"given":"Bin","family":"Bao","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6824-4343","authenticated-orcid":false,"given":"Peter","family":"Bell","sequence":"additional","affiliation":[{"name":"Quansight, Austin, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4954-1849","authenticated-orcid":false,"given":"David","family":"Berard","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8149-0483","authenticated-orcid":false,"given":"Evgeni","family":"Burovski","sequence":"additional","affiliation":[{"name":"Quansight, Austin, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0830-7330","authenticated-orcid":false,"given":"Geeta","family":"Chauhan","sequence":"additional","affiliation":[{"name":"Meta, Melo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4276-2227","authenticated-orcid":false,"given":"Anjali","family":"Chourdia","sequence":"additional","affiliation":[{"name":"Meta, Melo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7846-744X","authenticated-orcid":false,"given":"Will","family":"Constable","sequence":"additional","affiliation":[{"name":"Meta, Melo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-4359-1974","authenticated-orcid":false,"given":"Alban","family":"Desmaison","sequence":"additional","affiliation":[{"name":"Meta, Melo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8863-1503","authenticated-orcid":false,"given":"Zachary","family":"DeVito","sequence":"additional","affiliation":[{"name":"Meta, Melo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8337-3498","authenticated-orcid":false,"given":"Elias","family":"Ellison","sequence":"additional","affiliation":[{"name":"Meta, Melo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6406-4699","authenticated-orcid":false,"given":"Will","family":"Feng","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0845-5628","authenticated-orcid":false,"given":"Jiong","family":"Gong","sequence":"additional","affiliation":[{"name":"Intel, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4963-4915","authenticated-orcid":false,"given":"Michael","family":"Gschwind","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1239-3320","authenticated-orcid":false,"given":"Brian","family":"Hirsh","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7558-5570","authenticated-orcid":false,"given":"Sherlock","family":"Huang","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-8198-4526","authenticated-orcid":false,"given":"Kshiteej","family":"Kalambarkar","sequence":"additional","affiliation":[{"name":"Quansight, Austin, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4121-2308","authenticated-orcid":false,"given":"Laurent","family":"Kirsch","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8706-9447","authenticated-orcid":false,"given":"Michael","family":"Lazos","sequence":"additional","affiliation":[{"name":"Meta, Melo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8893-2276","authenticated-orcid":false,"given":"Mario","family":"Lezcano","sequence":"additional","affiliation":[{"name":"Quansight, Austin, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2111-0014","authenticated-orcid":false,"given":"Yanbo","family":"Liang","sequence":"additional","affiliation":[{"name":"Meta, Melo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5462-1466","authenticated-orcid":false,"given":"Jason","family":"Liang","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1993-8648","authenticated-orcid":false,"given":"Yinghai","family":"Lu","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9938-8327","authenticated-orcid":false,"given":"C. K.","family":"Luk","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6873-645X","authenticated-orcid":false,"given":"Bert","family":"Maher","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9351-431X","authenticated-orcid":false,"given":"Yunjie","family":"Pan","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, MI, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3925-967X","authenticated-orcid":false,"given":"Christian","family":"Puhrsch","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1582-5860","authenticated-orcid":false,"given":"Matthias","family":"Reso","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2612-6588","authenticated-orcid":false,"given":"Mark","family":"Saroufim","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5377-8607","authenticated-orcid":false,"given":"Marcos Yukio","family":"Siraichi","sequence":"additional","affiliation":[{"name":"Quansight, Austin, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6048-3189","authenticated-orcid":false,"given":"Helen","family":"Suk","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8370-5554","authenticated-orcid":false,"given":"Shunting","family":"Zhang","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, United States"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-5454-3113","authenticated-orcid":false,"given":"Michael","family":"Suo","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0636-8710","authenticated-orcid":false,"given":"Phil","family":"Tillet","sequence":"additional","affiliation":[{"name":"OpenAI, San Francisco, CA, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2906-8677","authenticated-orcid":false,"given":"Xu","family":"Zhao","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2648-5193","authenticated-orcid":false,"given":"Eikan","family":"Wang","sequence":"additional","affiliation":[{"name":"Intel, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7977-3182","authenticated-orcid":false,"given":"Keren","family":"Zhou","sequence":"additional","affiliation":[{"name":"OpenAI, San Francisco, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9597-1405","authenticated-orcid":false,"given":"Richard","family":"Zou","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5436-9952","authenticated-orcid":false,"given":"Xiaodong","family":"Wang","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4199-0434","authenticated-orcid":false,"given":"Ajit","family":"Mathews","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-1502-9520","authenticated-orcid":false,"given":"William","family":"Wen","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0635-4725","authenticated-orcid":false,"given":"Gregory","family":"Chanan","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2913-3280","authenticated-orcid":false,"given":"Peng","family":"Wu","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2147-9850","authenticated-orcid":false,"given":"Soumith","family":"Chintala","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,4,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.4724125"},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the 12th USENIX Conference on Operating Systems Design and Implementation (OSDI'16)","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, Manjunath Kudlur, Josh Levenberg, Rajat Monga, Sherry Moore, Derek G. Murray, Benoit Steiner, Paul Tucker, Vijay Vasudevan, Pete Warden, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng. 2016. Tensorflow: a system for large-scale machine learning. In Proceedings of the 12th USENIX Conference on Operating Systems Design and Implementation (OSDI'16). USENIX Association, Savannah, GA, USA, 265--283. ISBN: 9781931971331."},{"key":"e_1_3_2_1_3_1","volume-title":"Improving subclassing Tensor by propagating subclass instances. https:\/\/github.com\/pytorch\/rfcs\/blob\/master\/RFC-0001-torch-function-for-methods.md. (Aug","author":"Abbasi Hameer","year":"2020","unstructured":"Hameer Abbasi, Edward Z Yang, and Ralf Gommers. 2020. Improving subclassing Tensor by propagating subclass instances. https:\/\/github.com\/pytorch\/rfcs\/blob\/master\/RFC-0001-torch-function-for-methods.md. (Aug. 2020)."},{"key":"e_1_3_2_1_4_1","volume-title":"Alexandre Passos, Allen Lavoie, Ashish Agarwal, Asim Shankar, Igor Ganichev, Josh Levenberg, Mingsheng Hong, Rajat Monga, and Shanqing Cai.","author":"Agrawal Akshay","year":"2019","unstructured":"Akshay Agrawal, Akshay Naresh Modi, Alexandre Passos, Allen Lavoie, Ashish Agarwal, Asim Shankar, Igor Ganichev, Josh Levenberg, Mingsheng Hong, Rajat Monga, and Shanqing Cai. 2019. TensorFlow Eager: A Multi-Stage, Python-Embedded DSL for Machine Learning. CoRR, abs\/1903.01855. http:\/\/arxiv.org\/abs\/1903.01855 arXiv: 1903.01855."},{"key":"e_1_3_2_1_5_1","volume-title":"Theano: A Python framework for fast computation of mathematical expressions. CoRR, abs\/1605.02688","author":"Al-Rfou Rami","year":"2016","unstructured":"Rami Al-Rfou et al. 2016. Theano: A Python framework for fast computation of mathematical expressions. CoRR, abs\/1605.02688. http:\/\/arxiv.org\/abs\/1605.02688 arXiv: 1605.02688."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2628071.2628092"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2019.8661197"},{"key":"e_1_3_2_1_8_1","volume-title":"Chris Leary, Dougal Maclaurin, George Necula, Adam Paszke, Jake VanderPlas, Skye Wanderman-Milne, and Qiao Zhang, JAX: composable transformations of Python+NumPy programs version 0.3.13","author":"Bradbury James","year":"2018","unstructured":"[SW] James Bradbury, Roy Frostig, Peter Hawkins, Matthew James Johnson, Chris Leary, Dougal Maclaurin, George Necula, Adam Paszke, Jake VanderPlas, Skye Wanderman-Milne, and Qiao Zhang, JAX: composable transformations of Python+NumPy programs version 0.3.13, 2018. url: http:\/\/github.com\/google\/jax."},{"key":"e_1_3_2_1_9_1","unstructured":"Dino Viehland Brett Cannon. 2016. PEP 523: adding a frame evaluation API to CPython. https:\/\/peps.python.org\/pep-0523\/. (2016)."},{"key":"e_1_3_2_1_10_1","unstructured":"Jack Cao. 2022. PyTorch\/XLA 2022 Q4 dev update. https:\/\/dev-discuss.pytorch.org\/t\/pytorch-xla-2022-q4-dev-update\/961. (2022)."},{"key":"e_1_3_2_1_11_1","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. 2018. TVM: an automated End-to-End optimizing compiler for deep learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). USENIX Association, Carlsbad, CA, (Oct. 2018), 578--594. ISBN: 978-1-939133-08-3. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/chen."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.5555\/3327144.3327258"},{"key":"e_1_3_2_1_13_1","unstructured":"Sharan Chetlur Cliff Woolley Philippe Vandermersch Jonathan Cohen John Tran Bryan Catanzaro and Evan Shelhamer. 2014. cuDNN: efficient primitives for deep learning. (2014). arXiv: 1410.07 59 [cs.NE]."},{"key":"e_1_3_2_1_14_1","volume-title":"TorchBench: a collection of open source benchmarks for PyTorch performance and usability evaluation. https:\/\/github.com\/pytorch\/benchmark. (Sept","author":"Constable Will","year":"2020","unstructured":"Will Constable, Xu Zhao, Victor Bittorf, Eric Christoffersen, Taylor Robie, Eric Han, Peng Wu, Nick Korovaiko, Jason Ansel, Orion Reblitz-Richardson, and Soumith Chintala. 2020. TorchBench: a collection of open source benchmarks for PyTorch performance and usability evaluation. https:\/\/github.com\/pytorch\/benchmark. (Sept. 2020)."},{"key":"e_1_3_2_1_15_1","first-page":"46","article-title":"OpenMP: an industry standard API for shared-memory programming. Computational Science & Engineering","volume":"5","author":"Dagum Leonardo","year":"1998","unstructured":"Leonardo Dagum and Ramesh Menon. 1998. OpenMP: an industry standard API for shared-memory programming. Computational Science & Engineering, IEEE, 5, 1, 46--55.","journal-title":"IEEE"},{"key":"e_1_3_2_1_16_1","unstructured":"ONNX Runtime developers. 2021. ONNX runtime. https:\/\/www.onnxruntime.ai. (2021)."},{"key":"e_1_3_2_1_17_1","unstructured":"Zachary DeVito et al. 2018. TorchScript. https:\/\/pytorch.org\/docs\/1.9.0\/jit.html. (Sept. 2018)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575702"},{"key":"e_1_3_2_1_19_1","volume-title":"Yong Yu, and Tianqi Chen.","author":"Feng Siyuan","year":"2022","unstructured":"Siyuan Feng, Bohan Hou, Hongyi Jin, Wuwei Lin, Junru Shao, Ruihang Lai, Zihao Ye, Lianmin Zheng, Cody Hao Yu, Yong Yu, and Tianqi Chen. 2022. TensorIR: an abstraction for automatic tensorized program optimization. (2022). arXiv: 2207.04296 [cs.LG]."},{"key":"e_1_3_2_1_20_1","unstructured":"Alan Gray. 2019. Getting started with CUDA graphs. https:\/\/developer.nvidia.com\/blog\/cuda-graphs\/. (2019)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-2649-2"},{"key":"e_1_3_2_1_22_1","volume-title":"The state of machine learning frameworks","author":"Horace He.","year":"2019","unstructured":"Horace He. 2019. The state of machine learning frameworks in 2019. https:\/\/thegradient.pub\/state-of-ml-frameworks-2019-pytorch-dominates-research-tensorflow-dominates-industry\/. (2019)."},{"key":"e_1_3_2_1_23_1","unstructured":"Mike Innes et al. 2017. On machine learning and programming languages. https:\/\/julialang.org\/blog\/2017\/12\/ml-pl\/. (Dec. 2017)."},{"key":"e_1_3_2_1_24_1","volume-title":"Programming languages --- C++. (Sept","author":"ISO.","year":"1998","unstructured":"ISO. 1998. ISO\/IEC 14882:1998: Programming languages --- C++. (Sept. 1998), 732. http:\/\/webstore.ansi.org\/ansidocstore\/product.asp?sku=ISO%2FIEC+14882%2D1998."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.7717\/peerj-cs.103"},{"key":"e_1_3_2_1_29_1","unstructured":"Adrian M\u00f6nnich Armin Ronacher David Lord Grey Li Joshua Bronson Markus Unterwaditzer and Philip Jones. 2023. Jinja project. https:\/\/github.com\/pallets\/jinja. (2023)."},{"key":"e_1_3_2_1_30_1","volume-title":"Fitzek","author":"P\u00e9ter Vingelmann NVIDIA","year":"2023","unstructured":"NVIDIA, P\u00e9ter Vingelmann, and Frank H.P. Fitzek. 2023. CUDA. https:\/\/developer.nvidia.com\/cuda-toolkit. (2023)."},{"key":"e_1_3_2_1_31_1","unstructured":"2023. ONNX. https:\/\/onnx.ai\/. (2023)."},{"key":"e_1_3_2_1_32_1","volume-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems. Curran Associates Inc.","unstructured":"2019. Pytorch: an imperative style, high-performance deep learning library. Proceedings of the 33rd International Conference on Neural Information Processing Systems. Curran Associates Inc., Red Hook, NY, USA, 12 pages."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2491956.2462176"},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings of Machine Learning and Systems. D. Marculescu, Y. Chi, and C. Wu, (Eds.)","volume":"4","author":"Reed James","year":"2022","unstructured":"James Reed, Zachary DeVito, Horace He, Ansley Ussery, and Jason Ansel. 2022. Torch.fx: practical program capture and transformation for deep learning in python. In Proceedings of Machine Learning and Systems. D. Marculescu, Y. Chi, and C. Wu, (Eds.) Vol. 4, 638--651. https:\/\/proceedings.mlsys.org\/paper\/2022\/file\/ca46c1b9512a7a8315fa3c5a946e8265-Paper.pdf."},{"key":"e_1_3_2_1_35_1","unstructured":"Elvis Saravia. 2021. Papers with Code 2021: a year in review. https:\/\/medium.com\/paperswithcode\/papers-with-code-2021-a-year-in-review-de75d5a77b8b. (2021)."},{"key":"e_1_3_2_1_36_1","unstructured":"Christian Sarofeen Piotr Bialecki Jie Jiang Kevin Stephano Masaki Kozuki Neal Vaidya and Stas Bekman. 2022. Introducing nvFuser a deep learning compiler for PyTorch. https:\/\/pytorch.org\/blog\/introducing-nvfuser-a-deep-learning-compiler-for-pytorch\/. (2022)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2945397"},{"key":"e_1_3_2_1_38_1","volume-title":"Cody Hao Yu, and Tianqi Chen","author":"Shao Junru","year":"2022","unstructured":"Junru Shao, Xiyou Zhou, Siyuan Feng, Bohan Hou, Ruihang Lai, Hongyi Jin, Wuwei Lin, Masahiro Masuda, Cody Hao Yu, and Tianqi Chen. 2022. Tensor program optimization with probabilistic programs. (2022). arXiv: 2205.13603 [cs.LG]."},{"key":"e_1_3_2_1_39_1","volume-title":"Jie Young Sohn, and Denys Shabalin","author":"Suhan Alex","year":"2021","unstructured":"Alex Suhan, Davide Libenzi, Ailing Zhang, Parker Schuh, Brennan Saeta, Jie Young Sohn, and Denys Shabalin. 2021. LazyTensor: combining eager execution with domain-specific compilers. arXiv preprint arXiv:2102.13267."},{"key":"e_1_3_2_1_40_1","unstructured":"PyTorch Team. 2023. TorchDynamo Benchmarking Code. https:\/\/github.com\/pytorch\/pytorch\/tree\/main\/benchmarks\/dynamo. (2023)."},{"key":"e_1_3_2_1_41_1","unstructured":"PyTorch Team. 2023. TorchInductor Performance Dashboard. https:\/\/hud.pytorch.org\/benchmark\/compilers. (2023)."},{"key":"e_1_3_2_1_42_1","unstructured":"PyTorch XLA Team. 2023. PyTorch\/XLA. https:\/\/github.com\/pytorch\/xla. (2023)."},{"key":"e_1_3_2_1_43_1","volume-title":"https:\/\/github.com\/NVIDIA\/cutlass. Version 3.0.0. (Jan","author":"Thakkar Vijay","year":"2023","unstructured":"Vijay Thakkar, Pradeep Ramani, Cris Cecka, Aniket Shivam, Honghao Lu, Ethan Yan, Jack Kosaian, Mark Hoemmen, Haicheng Wu, Andrew Kerr, Matt Nicely, Duane Merrill, Dustyn Blasig, Fengqi Qiao, Piotr Majcher, Paul Springer, Markus Hohnerbach, Jin Wang, and Manish Gupta. 2023. CUTLASS. https:\/\/github.com\/NVIDIA\/cutlass. Version 3.0.0. (Jan. 2023)."},{"key":"e_1_3_2_1_44_1","unstructured":"[SW] The IREE Authors IREE Sept. 2019. url: https:\/\/github.com\/openxla\/iree."},{"key":"e_1_3_2_1_45_1","volume-title":"compiled. https:\/\/developers.googleblog.com\/2017\/03\/xla-tensorflow-compiled.html. (Mar","author":"Team The XLA","year":"2017","unstructured":"The XLA Team. 2017. XLA - Tensorflow, compiled. https:\/\/developers.googleblog.com\/2017\/03\/xla-tensorflow-compiled.html. (Mar. 2017)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3315508.3329973"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330756"},{"key":"e_1_3_2_1_48_1","volume-title":"Tensor comprehensions: framework-agnostic high-performance machine learning abstractions. (2018). arXiv","author":"Vasilache Nicolas","year":"1802","unstructured":"Nicolas Vasilache, Oleksandr Zinenko, Theodoros Theodoridis, Priya Goyal, Zachary DeVito, William S. Moses, Sven Verdoolaege, Andrew Adams, and Albert Cohen. 2018. Tensor comprehensions: framework-agnostic high-performance machine learning abstractions. (2018). arXiv: 1802.04730 [cs.PL]."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1080\/00401706.1962.10490022"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370330"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","unstructured":"Ross Wightman. 2019. PyTorch image models. https:\/\/github.com\/rwightman\/pytorch-image-models. (2019). 10.5281\/zenodo.4414861","DOI":"10.5281\/zenodo.4414861"},{"key":"e_1_3_2_1_53_1","volume-title":"Sylvain Gugger, Mariama Drame, Quentin Lhoest, and Alexander M. Rush.","author":"Wolf Thomas","year":"2020","unstructured":"Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Clement Delangue, Anthony Moi, Perric Cistac, Clara Ma, Yacine Jernite, Julien Plu, Canwen Xu, Teven Le Scao, Sylvain Gugger, Mariama Drame, Quentin Lhoest, and Alexander M. Rush. 2020. Transformers: State-of-the-Art Natural Language Processing. In Association for Computational Linguistics, (Oct. 2020), 38--45. https:\/\/www.aclweb.org\/anthology\/2020.emnlp-demos.6."},{"key":"e_1_3_2_1_54_1","volume-title":"Proceedings of Machine Learning and Systems. D. Marculescu, Y. Chi, and C. Wu, (Eds.)","volume":"4","author":"Xing Jiarong","year":"2022","unstructured":"Jiarong Xing, Leyuan Wang, Shang Zhang, Jack Chen, Ang Chen, and Yibo Zhu. 2022. Bolt: bridging the gap between auto-tuners and hardware-native performance. In Proceedings of Machine Learning and Systems. D. Marculescu, Y. Chi, and C. Wu, (Eds.) Vol. 4, 204--216. https:\/\/proceedings.mlsys.org\/paper_files\/paper\/2022\/file\/38b3eff8baf56627478ec76a704e9b52-Paper.pdf."},{"key":"e_1_3_2_1_55_1","volume-title":"Proceedings of Machine Learning and Systems.","author":"Yu Shangdi","year":"2023","unstructured":"Shangdi Yu and Horace He. 2023. Transcending runtime-memory tradeoffs in checkpointing by being fusion aware. In Proceedings of Machine Learning and Systems."},{"key":"e_1_3_2_1_56_1","volume-title":"Proceedings of Machine Learning and Systems. D. Marculescu, Y. Chi, and C. Wu, (Eds.)","volume":"4","author":"Zheng Bojian","year":"2022","unstructured":"Bojian Zheng, Ziheng Jiang, Cody Hao Yu, Haichen Shen, Joshua Fromm, Yizhi Liu, Yida Wang, Luis Ceze, Tianqi Chen, and Gennady Pekhimenko. 2022. Dietcode: automatic optimization for dynamic tensor programs. In Proceedings of Machine Learning and Systems. D. Marculescu, Y. Chi, and C. Wu, (Eds.) Vol. 4, 848--863. https:\/\/proceedings.mlsys.org\/paper_files\/paper\/2022\/file\/fa7cdfad1a5aaf8370ebeda47a1ff1c3-Paper.pdf."},{"key":"e_1_3_2_1_57_1","volume-title":"Proceedings of the 14th USENIX Conference on Operating Systems Design and Implementation (OSDI'20)","author":"Zheng Lianmin","year":"2020","unstructured":"Lianmin Zheng, Chengfan Jia, Minmin Sun, Zhao Wu, Cody Hao Yu, Ameer Haj-Ali, Yida Wang, Jun Yang, Danyang Zhuo, Koushik Sen, Joseph E. Gonzalez, and Ion Stoica. 2020. Ansor: generating high-performance tensor programs for deep learning. In Proceedings of the 14th USENIX Conference on Operating Systems Design and Implementation (OSDI'20) Article 49. USENIX Association, USA, 17 pages. ISBN: 978-1-939133-19-9."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527440"},{"key":"e_1_3_2_1_59_1","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Zhu Hongyu","year":"2022","unstructured":"Hongyu Zhu, Ruofan Wu, Yijia Diao, Shanbin Ke, Haoyu Li, Chen Zhang, Jilong Xue, Lingxiao Ma, Yuqing Xia, Wei Cui, Fan Yang, Mao Yang, Lidong Zhou, Asaf Cidon, and Gennady Pekhimenko. 2022. ROLLER: fast and efficient tensor compilation for deep learning. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22). USENIX Association, Carlsbad, CA, (July 2022), 233--248. ISBN: 978-1-939133-28-1. https:\/\/www.usenix.org\/conference\/osdi22\/presentation\/zhu."},{"key":"e_1_3_2_1_60_1","unstructured":"Mikhail Zolotukhin. 2021. NNC walkthrough: how PyTorch ops get fused. https:\/\/dev-discuss.pytorch.org\/t\/nnc-walkthrough-how-pytorch-ops-get-fused\/125. (2021)."}],"event":{"name":"ASPLOS '24: 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","location":"La Jolla CA USA","acronym":"ASPLOS '24","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620665.3640366","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3620665.3640366","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:41Z","timestamp":1750291421000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620665.3640366"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,27]]},"references-count":60,"alternative-id":["10.1145\/3620665.3640366","10.1145\/3620665"],"URL":"https:\/\/doi.org\/10.1145\/3620665.3640366","relation":{},"subject":[],"published":{"date-parts":[[2024,4,27]]},"assertion":[{"value":"2024-04-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}