{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T01:45:20Z","timestamp":1773193520868,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T00:00:00Z","timestamp":1714176000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS-2147909"],"award-info":[{"award-number":["CNS-2147909"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS-2211882"],"award-info":[{"award-number":["CNS-2211882"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS-2239351"],"award-info":[{"award-number":["CNS-2239351"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,27]]},"DOI":"10.1145\/3620666.3651383","type":"proceedings-article","created":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T12:08:21Z","timestamp":1713960501000},"page":"755-769","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Optimal Kernel Orchestration for Tensor Programs with Korch"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-4096-0511","authenticated-orcid":false,"given":"Muyan","family":"Hu","sequence":"first","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Urbana Champaign, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4661-0060","authenticated-orcid":false,"given":"Ashwin","family":"Venkatram","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices, San Jose, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6656-1030","authenticated-orcid":false,"given":"Shreyashri","family":"Biswas","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6292-5066","authenticated-orcid":false,"given":"Balamurugan","family":"Marimuthu","sequence":"additional","affiliation":[{"name":"Sambanova Systems, Palo Alto, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5718-3387","authenticated-orcid":false,"given":"Bohan","family":"Hou","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5406-0736","authenticated-orcid":false,"given":"Gabriele","family":"Oliaro","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4605-148X","authenticated-orcid":false,"given":"Haojie","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7327-748X","authenticated-orcid":false,"given":"Liyan","family":"Zheng","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9371-8358","authenticated-orcid":false,"given":"Xupeng","family":"Miao","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7656-6428","authenticated-orcid":false,"given":"Jidong","family":"Zhai","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1270-5185","authenticated-orcid":false,"given":"Zhihao","family":"Jia","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2024,4,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"https:\/\/aws.amazon.com\/ec2\/instance-types\/p3\/","author":"Amazon","year":"2022","unstructured":"Amazon ec2 p3 instances. https:\/\/aws.amazon.com\/ec2\/instance-types\/p3\/, 2022."},{"key":"e_1_3_2_1_2_1","first-page":"265","volume-title":"12th {USENIX} symposium on operating systems design and implementation ({OSDI} 16)","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, et al. Tensorflow: A system for large-scale machine learning. In 12th {USENIX} symposium on operating systems design and implementation ({OSDI} 16), pages 265--283, 2016."},{"key":"e_1_3_2_1_3_1","volume-title":"Learning to optimize halide with tree search and random programs. ACM Transactions on Graphics (TOG), 38(4):1--12","author":"Adams Andrew","year":"2019","unstructured":"Andrew Adams, Karima Ma, Luke Anderson, Riyadh Baghdadi, Tzu-Mao Li, Micha\u00ebl Gharbi, Benoit Steiner, Steven Johnson, Kayvon Fatahalian, Fr\u00e9do Durand, et al. Learning to optimize halide with tree search and random programs. ACM Transactions on Graphics (TOG), 38(4):1--12, 2019."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485486"},{"key":"e_1_3_2_1_5_1","volume-title":"Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934","author":"Bochkovskiy Alexey","year":"2020","unstructured":"Alexey Bochkovskiy, Chien-Yao Wang, and Hong-Yuan Mark Liao. Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934, 2020."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01587"},{"key":"e_1_3_2_1_7_1","volume-title":"TVM: end-to-end optimization stack for deep learning. CoRR, abs\/1802.04799","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Haichen Shen, Eddie Q. Yan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. TVM: end-to-end optimization stack for deep learning. CoRR, abs\/1802.04799, 2018."},{"key":"e_1_3_2_1_8_1","first-page":"31","article-title":"Learning to optimize tensor programs","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Lianmin Zheng, Eddie Yan, Ziheng Jiang, Thierry Moreau, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. Learning to optimize tensor programs. Advances in Neural Information Processing Systems, 31, 2018.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_9_1","volume-title":"cudnn: Efficient primitives for deep learning. CoRR, abs\/1410.0759","author":"Chetlur Sharan","year":"2014","unstructured":"Sharan Chetlur, Cliff Woolley, Philippe Vandermersch, Jonathan Cohen, John Tran, Bryan Catanzaro, and Evan Shelhamer. cudnn: Efficient primitives for deep learning. CoRR, abs\/1410.0759, 2014."},{"key":"e_1_3_2_1_10_1","volume-title":"https:\/\/developer.nvidia.com\/cublas","author":"Us Dense Linear","year":"2016","unstructured":"Dense Linear Algebra on GPUs. https:\/\/developer.nvidia.com\/cublas, 2016."},{"key":"e_1_3_2_1_11_1","first-page":"16344","article-title":"Flashattention: Fast and memory-efficient exact attention with io-awareness","volume":"35","author":"Dao Tri","year":"2022","unstructured":"Tri Dao, Dan Fu, Stefano Ermon, Atri Rudra, and Christopher R\u00e9. Flashattention: Fast and memory-efficient exact attention with io-awareness. Advances in Neural Information Processing Systems, 35:16344--16359, 2022.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/0167-8191(88)90094-4"},{"key":"e_1_3_2_1_13_1","volume-title":"BERT: pre-training of deep bidirectional transformers for language understanding. CoRR, abs\/1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. BERT: pre-training of deep bidirectional transformers for language understanding. CoRR, abs\/1810.04805, 2018."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575702"},{"key":"e_1_3_2_1_15_1","volume-title":"Yolox: Exceeding yolo series","author":"Ge Zheng","year":"2021","unstructured":"Zheng Ge, Songtao Liu, Feng Wang, Zeming Li, and Jian Sun. Yolox: Exceeding yolo series in 2021. arXiv preprint arXiv:2107.08430, 2021."},{"key":"e_1_3_2_1_16_1","volume-title":"Autodriving: A photoelicitation technique. Journal of consumer Research, 18(3):257--272","author":"Heisley Deborah D","year":"1991","unstructured":"Deborah D Heisley and Sidney J Levy. Autodriving: A photoelicitation technique. Journal of consumer Research, 18(3):257--272, 1991."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3200691.3178507"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359630"},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the 2nd Conference on Systems and Machine Learning, SysML'19","author":"Jia Zhihao","year":"2019","unstructured":"Zhihao Jia, James Thomas, Todd Warzawski, Mingyu Gao, Matei Zaharia, and Alex Aiken. Optimizing dnn computation with relaxed graph substitutions. In Proceedings of the 2nd Conference on Systems and Machine Learning, SysML'19, 2019."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46475-6_43"},{"key":"e_1_3_2_1_21_1","volume-title":"Pulp: a linear programming toolkit for python","author":"Mitchell Stuart","year":"2011","unstructured":"Stuart Mitchell, Michael OSullivan, and Iain Dunning. Pulp: a linear programming toolkit for python. The University of Auckland, Auckland, New Zealand, 65, 2011."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2897824.2925952"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3453483.3454083"},{"key":"e_1_3_2_1_24_1","volume-title":"Open neural network exchange. https:\/\/onnx.ai\/","author":"ONNX","year":"2022","unstructured":"ONNX: Open neural network exchange. https:\/\/onnx.ai\/, 2022."},{"key":"e_1_3_2_1_25_1","volume-title":"https:\/\/github.com\/onnx\/onnx\/blob\/main\/docs\/Operators.md","author":"Operators ONNX","year":"2022","unstructured":"ONNX Operators. https:\/\/github.com\/onnx\/onnx\/blob\/main\/docs\/Operators.md, 2022."},{"key":"e_1_3_2_1_26_1","volume-title":"International journal of computer vision, 38(1):15--33","author":"Papageorgiou Constantine","year":"2000","unstructured":"Constantine Papageorgiou and Tomaso Poggio. A trainable system for object detection. International journal of computer vision, 38(1):15--33, 2000."},{"key":"e_1_3_2_1_27_1","volume-title":"https:\/\/pytorch.org","author":"Tensors","year":"2023","unstructured":"Tensors and Dynamic neural networks in Python with strong GPU acceleration. https:\/\/pytorch.org, 2023."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2491956.2462176"},{"key":"e_1_3_2_1_29_1","first-page":"35783","article-title":"Tensor program optimization with probabilistic programs","volume":"35","author":"Shao Junru","year":"2022","unstructured":"Junru Shao, Xiyou Zhou, Siyuan Feng, Bohan Hou, Ruihang Lai, Hongyi Jin, Wuwei Lin, Masahiro Masuda, Cody Hao Yu, and Tianqi Chen. Tensor program optimization with probabilistic programs. Advances in Neural Information Processing Systems, 35:35783--35796, 2022.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_30_1","volume-title":"Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456","author":"Song Yang","year":"2020","unstructured":"Yang Song, Jascha Sohl-Dickstein, Diederik P Kingma, Abhishek Kumar, Stefano Ermon, and Ben Poole. Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456, 2020."},{"key":"e_1_3_2_1_31_1","first-page":"24829","article-title":"Multidimensional planner for dnn parallelization","volume":"34","author":"Tarnawski Jakub M","year":"2021","unstructured":"Jakub M Tarnawski, Deepak Narayanan, and Amar Phanishayee. Piper: Multidimensional planner for dnn parallelization. Advances in Neural Information Processing Systems, 34:24829--24840, 2021.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_32_1","volume-title":"Programmable inference accelerator. https:\/\/developer.nvidia.com\/tensorrt","author":"NVIDIA","year":"2017","unstructured":"NVIDIA TensorRT: Programmable inference accelerator. https:\/\/developer.nvidia.com\/tensorrt, 2017."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3315508.3329973"},{"key":"e_1_3_2_1_34_1","first-page":"5998","volume-title":"Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. Attention is all you need. In Advances in neural information processing systems, pages 5998--6008, 2017."},{"key":"e_1_3_2_1_35_1","first-page":"37","volume-title":"15th USENIX Symposium on Operating Systems Design and Implementation (OSDI 21)","author":"Wang Haojie","year":"2021","unstructured":"Haojie Wang, Jidong Zhai, Mingyu Gao, Zixuan Ma, Shizhi Tang, Liyan Zheng, Yuanzhi Li, Kaiyuan Rong, Yuanyong Chen, and Zhihao Jia. Pet: Optimizing tensor programs with partially equivalent transformations and automated corrections. In 15th USENIX Symposium on Operating Systems Design and Implementation (OSDI 21), pages 37--54, 2021."},{"key":"e_1_3_2_1_36_1","first-page":"12077","article-title":"Simple and efficient design for semantic segmentation with transformers","volume":"34","author":"Xie Enze","year":"2021","unstructured":"Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M Alvarez, and Ping Luo. Segformer: Simple and efficient design for semantic segmentation with transformers. Advances in Neural Information Processing Systems, 34:12077--12090, 2021.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_37_1","unstructured":"Lianmin Zheng Chengfan Jia Minmin Sun Zhao Wu Cody Hao Yu Ameer Haj-Ali Yida Wang Jun Yang Danyang Zhuo Koushik Sen et al. Ansor: generating high-performance tensor programs for deep learning. In 14th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 20) pages 863--879 2020."},{"key":"e_1_3_2_1_38_1","first-page":"739","volume-title":"17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23)","author":"Zheng Liyan","year":"2023","unstructured":"Liyan Zheng, Haojie Wang, Jidong Zhai, Muyan Hu, Zixuan Ma, Tuowei Wang, Shuhong Huang, Xupeng Miao, Shizhi Tang, Kezhao Huang, et al. {EINNET}: Optimizing tensor programs with {Derivation-Based} transformations. In 17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23), pages 739--755, 2023."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378508"}],"event":{"name":"ASPLOS '24: 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","location":"La Jolla CA USA","acronym":"ASPLOS '24","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620666.3651383","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3620666.3651383","content-type":"text\/html","content-version":"vor","intended-application":"syndication"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:43Z","timestamp":1750291423000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620666.3651383"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,27]]},"references-count":39,"alternative-id":["10.1145\/3620666.3651383","10.1145\/3620666"],"URL":"https:\/\/doi.org\/10.1145\/3620666.3651383","relation":{},"subject":[],"published":{"date-parts":[[2024,4,27]]},"assertion":[{"value":"2024-04-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}