{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T01:05:45Z","timestamp":1773277545316,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T00:00:00Z","timestamp":1743292800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,30]]},"DOI":"10.1145\/3676641.3716249","type":"proceedings-article","created":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T16:47:32Z","timestamp":1743094052000},"page":"998-1013","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["Relax: Composable Abstractions for End-to-End Dynamic Machine Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6400-5079","authenticated-orcid":false,"given":"Ruihang","family":"Lai","sequence":"first","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7370-1495","authenticated-orcid":false,"given":"Junru","family":"Shao","sequence":"additional","affiliation":[{"name":"OpenAI, San Francisco, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4682-983X","authenticated-orcid":false,"given":"Siyuan","family":"Feng","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6747-7014","authenticated-orcid":false,"given":"Steven","family":"Lyubomirsky","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5718-3387","authenticated-orcid":false,"given":"Bohan","family":"Hou","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8589-5453","authenticated-orcid":false,"given":"Wuwei","family":"Lin","sequence":"additional","affiliation":[{"name":"OpenAI, San Francisco, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6450-8108","authenticated-orcid":false,"given":"Zihao","family":"Ye","sequence":"additional","affiliation":[{"name":"University of Washington, Seattle, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6894-6554","authenticated-orcid":false,"given":"Hongyi","family":"Jin","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8193-3010","authenticated-orcid":false,"given":"Yuchen","family":"Jin","sequence":"additional","affiliation":[{"name":"Hyperbolic, San Francisco, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7122-8625","authenticated-orcid":false,"given":"Jiawei","family":"Liu","sequence":"additional","affiliation":[{"name":"University of Illinois Urbana-Champaign, Champaign, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9957-9141","authenticated-orcid":false,"given":"Lesheng","family":"Jin","sequence":"additional","affiliation":[{"name":"Hyperbolic, San Francisco, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9924-3543","authenticated-orcid":false,"given":"Yaxing","family":"Cai","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7732-4391","authenticated-orcid":false,"given":"Ziheng","family":"Jiang","sequence":"additional","affiliation":[{"name":"ByteDance, Seattle, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9571-8660","authenticated-orcid":false,"given":"Yong","family":"Wu","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4793-9069","authenticated-orcid":false,"given":"Sunghyun","family":"Park","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3834-7996","authenticated-orcid":false,"given":"Prakalp","family":"Srivastava","sequence":"additional","affiliation":[{"name":"Netflix, Los Gatos, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3525-2503","authenticated-orcid":false,"given":"Jared","family":"Roesch","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4076-5684","authenticated-orcid":false,"given":"Todd C.","family":"Mowry","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5744-3940","authenticated-orcid":false,"given":"Tianqi","family":"Chen","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA and NVIDIA, Santa Clara, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,3,30]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"265","volume-title":"12th USENIX symposium on operating systems design and implementation (OSDI 16)","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, et al. Tensorflow: a system for large-scale machine learning. In 12th USENIX symposium on operating systems design and implementation (OSDI 16), pages 265--283, 2016."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640366"},{"key":"e_1_3_2_1_3_1","volume-title":"et al. Onnx: Open neural network exchange. https:\/\/github.com\/onnx\/onnx","author":"Bai Junjie","year":"2019","unstructured":"Junjie Bai, Fang Lu, Ke Zhang, et al. Onnx: Open neural network exchange. https:\/\/github.com\/onnx\/onnx, 2019."},{"key":"e_1_3_2_1_4_1","volume-title":"Whisperx: Time-accurate speech transcription of long-form audio","author":"Bain Max","year":"2023","unstructured":"Max Bain, Jaesung Huh, Tengda Han, and Andrew Zisserman. Whisperx: Time-accurate speech transcription of long-form audio, 2023."},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the ACM on Programming Languages, 7(PLDI):394--419","author":"Bansal Manya","year":"2023","unstructured":"Manya Bansal, Olivia Hsu, Kunle Olukotun, and Fredrik Kjolstad. Mosaic: An interoperable compiler for tensor algebra. Proceedings of the ACM on Programming Languages, 7(PLDI):394--419, 2023."},{"key":"e_1_3_2_1_6_1","volume-title":"Shivanshu Purohit, Laria Reynolds, Jonathan Tow, Ben Wang, and Samuel Weinbach. Gpt-neox-20b: An open-source autoregressive language model","author":"Black Sid","year":"2022","unstructured":"Sid Black, Stella Biderman, Eric Hallahan, Quentin Anthony, Leo Gao, Laurence Golding, Horace He, Connor Leahy, Kyle McDonell, Jason Phang, Michael Pieler, USVSN Sai Prashanth, Shivanshu Purohit, Laria Reynolds, Jonathan Tow, Ben Wang, and Samuel Weinbach. Gpt-neox-20b: An open-source autoregressive language model, 2022."},{"key":"e_1_3_2_1_7_1","first-page":"578","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, et al. Tvm: An automated end-to-end optimizing compiler for deep learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18), pages 578--594, 2018."},{"key":"e_1_3_2_1_8_1","volume-title":"Learning to optimize tensor programs","author":"Chen Tianqi","year":"2019","unstructured":"Tianqi Chen, Lianmin Zheng, Eddie Yan, Ziheng Jiang, Thierry Moreau, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. Learning to optimize tensor programs, 2019."},{"key":"e_1_3_2_1_9_1","volume-title":"cudnn: Efficient primitives for deep learning","author":"Chetlur Sharan","year":"2014","unstructured":"Sharan Chetlur, Cliff Woolley, Philippe Vandermersch, Jonathan Cohen, John Tran, Bryan Catanzaro, and Evan Shelhamer. cudnn: Efficient primitives for deep learning, 2014."},{"key":"e_1_3_2_1_10_1","volume-title":"March","author":"Chiang Wei-Lin","year":"2023","unstructured":"Wei-Lin Chiang, Zhuohan Li, Zi Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph E. Gonzalez, Ion Stoica, and Eric P. Xing. Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality, March 2023."},{"key":"e_1_3_2_1_11_1","volume-title":"Axon: A language for dynamic shapes in deep learning graphs","author":"Collins Alexander","year":"2022","unstructured":"Alexander Collins and Vinod Grover. Axon: A language for dynamic shapes in deep learning graphs, 2022."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2016.63"},{"key":"e_1_3_2_1_13_1","first-page":"16344","volume-title":"Advances in Neural Information Processing Systems","volume":"35","author":"Dao Tri","year":"2022","unstructured":"Tri Dao, Dan Fu, Stefano Ermon, Atri Rudra, and Christopher R\u00e9. Flashattention: Fast and memory-efficient exact attention with ioawareness. In S. Koyejo, S. Mohamed, A. Agarwal, D. Belgrave, K. Cho, and A. Oh, editors, Advances in Neural Information Processing Systems, volume 35, pages 16344--16359. Curran Associates, Inc., 2022."},{"key":"e_1_3_2_1_14_1","first-page":"38","article-title":"A compiler for recursive deep learning models","volume":"3","author":"Fegade Pratik","year":"2021","unstructured":"Pratik Fegade, Tianqi Chen, Phillip Gibbons, and Todd Mowry. Cortex: A compiler for recursive deep learning models. Proceedings of Machine Learning and Systems, 3:38--54, 2021.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_15_1","first-page":"721","volume-title":"The cora tensor compiler: Compilation for ragged tensors with minimal padding","author":"Fegade Pratik","year":"2022","unstructured":"Pratik Fegade, Tianqi Chen, Phillip Gibbons, and Todd Mowry. The cora tensor compiler: Compilation for ragged tensors with minimal padding. In D. Marculescu, Y. Chi, and C. Wu, editors, Proceedings of Machine Learning and Systems, volume 4, pages 721--747, 2022."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3576933"},{"key":"e_1_3_2_1_17_1","volume-title":"Compiling machine learning programs via high-level tracing","author":"Frostig Roy","year":"2018","unstructured":"Roy Frostig, Matthew Johnson, and Chris Leary. Compiling machine learning programs via high-level tracing. 2018."},{"key":"e_1_3_2_1_18_1","volume-title":"ggml. https:\/\/github.com\/ggerganov\/ggml","author":"Gerganov Georgi","year":"2022","unstructured":"Georgi Gerganov. ggml. https:\/\/github.com\/ggerganov\/ggml, 2022."},{"key":"e_1_3_2_1_19_1","volume-title":"whisper.cpp. https:\/\/github.com\/ggerganov\/whisper.cpp","author":"Gerganov Georgi","year":"2022","unstructured":"Georgi Gerganov. whisper.cpp. https:\/\/github.com\/ggerganov\/whisper.cpp, 2022."},{"key":"e_1_3_2_1_20_1","volume-title":"llama.cpp. https:\/\/github.com\/ggerganov\/llama.cpp","author":"Gerganov Georgi","year":"2023","unstructured":"Georgi Gerganov. llama.cpp. https:\/\/github.com\/ggerganov\/llama.cpp, 2023."},{"key":"e_1_3_2_1_21_1","volume-title":"Sep","author":"Gray Alan","year":"2019","unstructured":"Alan Gray. Getting started with cuda graphs, Sep 2019."},{"key":"e_1_3_2_1_22_1","first-page":"302","volume-title":"Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","volume":"3","author":"Hagedorn Bastian","year":"2023","unstructured":"Bastian Hagedorn, Bin Fan, Hanfeng Chen, Cris Cecka, Michael Garland, and Vinod Grover. Graphene: An ir for optimized tensor computations on gpus. In Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3, pages 302--313, 2023."},{"key":"e_1_3_2_1_23_1","volume-title":"Intel\u00ae math kernel library for deep learning networks","year":"2017","unstructured":"Intel. Intel\u00ae math kernel library for deep learning networks, 2017."},{"key":"e_1_3_2_1_24_1","volume-title":"sep","author":"Project IREE","year":"2019","unstructured":"IREE Project. IREE, sep 2019."},{"key":"e_1_3_2_1_25_1","volume-title":"Miopen: An open source library for deep learning primitives","author":"Khan Jehandad","year":"2019","unstructured":"Jehandad Khan, Paul Fultz, Artem Tamazov, Daniel Lowell, Chao Liu, Michael Melesse, Murali Nandhimandalam, Kamil Nasyrov, Ilya Perminov, Tejash Shah, Vasilii Filippov, Jing Zhang, Jing Zhou, Bragadeesh Natarajan, and Mayank Daga. Miopen: An open source library for deep learning primitives, 2019."},{"key":"e_1_3_2_1_26_1","volume-title":"Joseph E Gonzalez, Hao Zhang, and Ion Stoica. Efficient memory management for large language model serving with pagedattention. arXiv preprint arXiv:2309.06180","author":"Kwon Woosuk","year":"2023","unstructured":"Woosuk Kwon, Zhuohan Li, Siyuan Zhuang, Ying Sheng, Lianmin Zheng, Cody Hao Yu, Joseph E Gonzalez, Hao Zhang, and Ion Stoica. Efficient memory management for large language model serving with pagedattention. arXiv preprint arXiv:2309.06180, 2023."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"e_1_3_2_1_28_1","volume-title":"NeurIPS","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. Visual instruction tuning. In NeurIPS, 2023."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3572848.3577479"},{"key":"e_1_3_2_1_30_1","volume-title":"et al. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems, 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems, 32, 2019."},{"key":"e_1_3_2_1_31_1","unstructured":"Alec Radford Jong Wook Kim Chris Hallacy Aditya Ramesh Gabriel Goh SandhiniAgarwal Girish Sastry AmandaAskell Pamela Mishkin Jack Clark Gretchen Krueger and Ilya Sutskever. Learning transferable visual models from natural language supervision 2021."},{"key":"e_1_3_2_1_32_1","volume-title":"Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. Robust speech recognition via largescale weak supervision","author":"Radford Alec","year":"2022","unstructured":"Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. Robust speech recognition via largescale weak supervision, 2022."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2499370.2462176"},{"key":"e_1_3_2_1_34_1","first-page":"638","volume-title":"torch.fx: Practical program capture and transformation for deep learning in python","author":"Reed James","year":"2022","unstructured":"James Reed, Zachary DeVito, Horace He, Ansley Ussery, and Jason Ansel. torch.fx: Practical program capture and transformation for deep learning in python. In D. Marculescu, Y. Chi, and C. Wu, editors, Proceedings of Machine Learning and Systems, volume 4, page 638--651, 2022."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3211346.3211348"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.5555\/AAI28262914"},{"key":"e_1_3_2_1_37_1","unstructured":"Baptiste Rozi\u00e8re Jonas Gehring Fabian Gloeckle Sten Sootla Itai Gat Xiaoqing Ellen Tan Yossi Adi Jingyu Liu Romain Sauvestre Tal Remez J\u00e9r\u00e9my Rapin Artyom Kozhevnikov Ivan Evtimov Joanna Bitton Manish Bhatt Cristian Canton Ferrer Aaron Grattafiori Wenhan Xiong Alexandre D\u00e9fossez Jade Copet Faisal Azhar Hugo Touvron Louis Martin Nicolas Usunier Thomas Scialom and Gabriel Synnaeve. Code llama: Open foundation models for code 2024."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3122948.3122949"},{"key":"e_1_3_2_1_39_1","first-page":"35783","article-title":"Tensor program optimization with probabilistic programs","volume":"35","author":"Shao Junru","year":"2022","unstructured":"Junru Shao, Xiyou Zhou, Siyuan Feng, Bohan Hou, Ruihang Lai, Hongyi Jin, Wuwei Lin, Masahiro Masuda, Cody Hao Yu, and Tianqi Chen. Tensor program optimization with probabilistic programs. Advances in Neural Information Processing Systems, 35:35783--35796, 2022.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_40_1","first-page":"208","volume-title":"Proceedings of Machine Learning and Systems","volume":"3","author":"Shen Haichen","year":"2021","unstructured":"Haichen Shen, Jared Roesch, Zhi Chen, Wei Chen, Yong Wu, Mu Li, Vin Sharma, Zachary Tatlock, and Yida Wang. Nimble: Efficiently compiling dynamic neural networks for model inference. In A. Smola, A. Dimakis, and I. Stoica, editors, Proceedings of Machine Learning and Systems, volume 3, pages 208--222, 2021."},{"key":"e_1_3_2_1_41_1","volume-title":"jan","author":"Thakkar Vijay","year":"2023","unstructured":"Vijay Thakkar, Pradeep Ramani, Cris Cecka, Aniket Shivam, Honghao Lu, Ethan Yan, Jack Kosaian, Mark Hoemmen, Haicheng Wu, Andrew Kerr, Matt Nicely, Duane Merrill, Dustyn Blasig, Fengqi Qiao, Piotr Majcher, Paul Springer, Markus Hohnerbach, Jin Wang, and Manish Gupta. CUTLASS, jan 2023."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3315508.3329973"},{"key":"e_1_3_2_1_43_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev Punit Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang Ross Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic Sergey Edunov and Thomas Scialom. Llama 2: Open foundation and fine-tuned chat models 2023."},{"key":"e_1_3_2_1_44_1","volume-title":"Advances in Neural Information Processing Systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. Attention is all you need. In I. Guyon, U. Von Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett, editors, Advances in Neural Information Processing Systems, volume 30. Curran Associates, Inc., 2017."},{"key":"e_1_3_2_1_45_1","first-page":"38","volume-title":"Sylvain Gugger, Mariama Drame, Quentin Lhoest, and Alexander M. Rush. Transformers: State-of-the-Art Natural Language Processing.","author":"Wolf Thomas","year":"2020","unstructured":"Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Clement Delangue, Anthony Moi, Perric Cistac, Clara Ma, Yacine Jernite, Julien Plu, Canwen Xu, Teven Le Scao, Sylvain Gugger, Mariama Drame, Quentin Lhoest, and Alexander M. Rush. Transformers: State-of-the-Art Natural Language Processing. pages 38--45. Association for Computational Linguistics, October 2020."},{"key":"e_1_3_2_1_46_1","volume-title":"Wizardlm: Empowering large language models to follow complex instructions. arXiv preprint arXiv:2304.12244","author":"Xu Can","year":"2023","unstructured":"Can Xu, Qingfeng Sun, Kai Zheng, Xiubo Geng, Pu Zhao, Jiazhan Feng, Chongyang Tao, and Daxin Jiang. Wizardlm: Empowering large language models to follow complex instructions. arXiv preprint arXiv:2304.12244, 2023."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582047"},{"key":"e_1_3_2_1_48_1","first-page":"848","volume-title":"Haichen Shen, Joshua Fromm, Yizhi Liu, Yida Wang, Luis Ceze, Tianqi Chen, and Gennady Pekhimenko. Dietcode: Automatic optimization for dynamic tensor programs. In D. Marculescu","author":"Zheng Bojian","year":"2022","unstructured":"Bojian Zheng, Ziheng Jiang, Cody Hao Yu, Haichen Shen, Joshua Fromm, Yizhi Liu, Yida Wang, Luis Ceze, Tianqi Chen, and Gennady Pekhimenko. Dietcode: Automatic optimization for dynamic tensor programs. In D. Marculescu, Y. Chi, and C. Wu, editors, Proceedings of Machine Learning and Systems, volume 4, pages 848--863, 2022."},{"key":"e_1_3_2_1_49_1","first-page":"863","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI","author":"Zheng Lianmin","year":"2020","unstructured":"Lianmin Zheng, Chengfan Jia, Minmin Sun, Zhao Wu, Cody Hao Yu, Ameer Haj-Ali, Yida Wang, Jun Yang, Danyang Zhuo, Koushik Sen, Joseph E. Gonzalez, and Ion Stoica. Ansor: Generating High-Performance tensor programs for deep learning. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI , pages 863--879. USENIX Association, November 2020."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507723"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437984.3458838"}],"event":{"name":"ASPLOS '25: 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","location":"Rotterdam Netherlands","acronym":"ASPLOS '25","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676641.3716249","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3676641.3716249","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T11:06:01Z","timestamp":1755774361000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676641.3716249"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,30]]},"references-count":51,"alternative-id":["10.1145\/3676641.3716249","10.1145\/3676641"],"URL":"https:\/\/doi.org\/10.1145\/3676641.3716249","relation":{},"subject":[],"published":{"date-parts":[[2025,3,30]]},"assertion":[{"value":"2025-03-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}