{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T16:29:48Z","timestamp":1775579388440,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T00:00:00Z","timestamp":1714176000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-1846354"],"award-info":[{"award-number":["CCF-1846354"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-2217144"],"award-info":[{"award-number":["CCF-2217144"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-2313028"],"award-info":[{"award-number":["CCF-2313028"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,27]]},"DOI":"10.1145\/3620666.3651348","type":"proceedings-article","created":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T12:08:21Z","timestamp":1713960501000},"page":"367-381","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Felix: Optimizing Tensor Programs with Gradient Descent"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-7080-891X","authenticated-orcid":false,"given":"Yifan","family":"Zhao","sequence":"first","affiliation":[{"name":"University of Illinois Urbana Champaign, Champaign, Illinois, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9496-9028","authenticated-orcid":false,"given":"Hashim","family":"Sharif","sequence":"additional","affiliation":[{"name":"University of Illinois Urbana-Champaign, Champaign, Illinois, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0760-9690","authenticated-orcid":false,"given":"Vikram","family":"Adve","sequence":"additional","affiliation":[{"name":"University of Illinois Urbana Champaign, Champaign, Illinois, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7319-8845","authenticated-orcid":false,"given":"Sasa","family":"Misailovic","sequence":"additional","affiliation":[{"name":"University of Illinois Urbana Champaign, Champaign, Illinois, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2024,4,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Chris Olah","author":"Abadi Mart\u00edn","unstructured":"Mart\u00edn Abadi, Ashish Agarwal, Paul Barham, Eugene Brevdo, Zhifeng Chen, Craig Citro, Gregory S. Corrado, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Ian J. Goodfellow, Andrew Harp, Geoffrey Irving, Michael Isard, Yangqing Jia, Rafal J\u00f3zefowicz, Lukasz Kaiser, Manjunath Kudlur, Josh Levenberg, Dan Man\u00e9, Rajat Monga, Sherry Moore, Derek Gordon Murray, Chris Olah, Mike Schuster, Jonathon Shlens, Benoit Steiner, Ilya Sutskever, Kunal Talwar, Paul A. Tucker, Vincent Vanhoucke, Vijay Vasudevan, Fernanda B. Vi\u00e9gas, Oriol Vinyals, Pete Warden, Martin Wattenberg, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng. Tensorflow: Large-scale machine learning on heterogeneous distributed systems. CoRR, abs\/1603.04467, 2016."},{"key":"e_1_3_2_1_2_1","volume-title":"Learning to optimize halide with tree search and random programs. ACM Transactions on Graphics (TOG), 38","author":"Adams Andrew","year":"2019","unstructured":"Andrew Adams, Karima Ma, Luke Anderson, Riyadh Baghdadi, Tzu-Mao Li, Micha\u00ebl Gharbi, Benoit Steiner, Steven Johnson, Kayvon Fatahalian, Fr\u00e9do Durand, and Jonathan Ragan-Kelley. Learning to optimize halide with tree search and random programs. ACM Transactions on Graphics (TOG), 38, 2019."},{"key":"e_1_3_2_1_3_1","volume-title":"A deep learning based cost model for automatic code optimization. CoRR, abs\/2104.04955","author":"Baghdadi Riyadh","year":"2021","unstructured":"Riyadh Baghdadi, Massinissa Merouani, Mohamed-Hicham Leghettas, Kamel Abdous, Taha Arbaoui, Karima Benatchba, and Saman P. Amarasinghe. A deep learning based cost model for automatic code optimization. CoRR, abs\/2104.04955, 2021."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2019.8661197"},{"key":"e_1_3_2_1_5_1","volume-title":"Smooth interpretation. ACM Sigplan Notices, 45(6)","author":"Chaudhuri Swarat","year":"2010","unstructured":"Swarat Chaudhuri and Armando Solar-Lezama. Smooth interpretation. ACM Sigplan Notices, 45(6), 2010."},{"key":"e_1_3_2_1_6_1","volume-title":"Mxnet: A flexible and efficient machine learning library for heterogeneous distributed systems. CoRR, abs\/1512.01274","author":"Chen Tianqi","year":"2015","unstructured":"Tianqi Chen, Mu Li, Yutian Li, Min Lin, Naiyan Wang, Minjie Wang, Tianjun Xiao, Bing Xu, Chiyuan Zhang, and Zheng Zhang. Mxnet: A flexible and efficient machine learning library for heterogeneous distributed systems. CoRR, abs\/1512.01274, 2015."},{"key":"e_1_3_2_1_7_1","volume-title":"TVM: end-to-end optimization stack for deep learning. CoRR, abs\/1802.04799","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Haichen Shen, Eddie Q. Yan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. TVM: end-to-end optimization stack for deep learning. CoRR, abs\/1802.04799, 2018."},{"key":"e_1_3_2_1_8_1","volume-title":"NVIDIA A10 Tensor Core GPU. https:\/\/www.nvidia.com\/en-us\/data-center\/products\/a10-gpu\/","author":"NVIDIA Corporation","year":"2024","unstructured":"NVIDIA Corporation. NVIDIA A10 Tensor Core GPU. https:\/\/www.nvidia.com\/en-us\/data-center\/products\/a10-gpu\/, 2024."},{"key":"e_1_3_2_1_9_1","volume-title":"NVIDIA Jetson Xavier. https:\/\/www.nvidia.com\/en-us\/autonomous-machines\/embedded-systems\/jetson-xavier-series\/","author":"NVIDIA Corporation","year":"2024","unstructured":"NVIDIA Corporation. NVIDIA Jetson Xavier. https:\/\/www.nvidia.com\/en-us\/autonomous-machines\/embedded-systems\/jetson-xavier-series\/, 2024."},{"key":"e_1_3_2_1_10_1","volume-title":"NVIDIA RTX A5000 Graphics Card. https:\/\/www.nvidia.com\/en-us\/design-visualization\/rtx-a5000\/","author":"NVIDIA Corporation","year":"2024","unstructured":"NVIDIA Corporation. NVIDIA RTX A5000 Graphics Card. https:\/\/www.nvidia.com\/en-us\/design-visualization\/rtx-a5000\/, 2024."},{"key":"e_1_3_2_1_11_1","volume-title":"L2 regularization for learning kernels. CoRR, abs\/1205.2653","author":"Cortes Corinna","year":"2012","unstructured":"Corinna Cortes, Mehryar Mohri, and Afshin Rostamizadeh. L2 regularization for learning kernels. CoRR, abs\/1205.2653, 2012."},{"key":"e_1_3_2_1_12_1","volume-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale. CoRR, abs\/2010.11929","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. An image is worth 16\u00d716 words: Transformers for image recognition at scale. CoRR, abs\/2010.11929, 2020."},{"key":"e_1_3_2_1_13_1","volume-title":"Transformer Supported Frameworks","author":"Face Hugging","year":"2023","unstructured":"Hugging Face. Transformer Supported Frameworks (2023). https:\/\/huggingface.co\/docs\/transformers\/index#supported-frameworks."},{"key":"e_1_3_2_1_14_1","volume-title":"Learning spatio-temporal features with 3d residual networks for action recognition. CoRR, abs\/1708.07632","author":"Hara Kensho","year":"2017","unstructured":"Kensho Hara, Hirokatsu Kataoka, and Yutaka Satoh. Learning spatio-temporal features with 3d residual networks for action recognition. CoRR, abs\/1708.07632, 2017."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446762"},{"key":"e_1_3_2_1_17_1","volume-title":"Kingma and Jimmy Ba. Adam: A method for stochastic optimization. CoRR, abs\/1412.6980","author":"Diederik","year":"2014","unstructured":"Diederik P. Kingma and Jimmy Ba. Adam: A method for stochastic optimization. CoRR, abs\/1412.6980, 2014."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133901"},{"issue":"1","key":"e_1_3_2_1_19_1","first-page":"2021","volume":"18","author":"Labini Paolo Sylos","unstructured":"Paolo Sylos Labini, Marco Cianfriglia, Damiano Perri, Osvaldo Gervasi, Grigori Fursin, Anton Lokhmotov, Cedric Nugteren, Bruno Carpentieri, Fabiana Zollo, and Flavio Vella. On the anatomy of predictive models for accelerating gpu convolution kernels and beyond. ACM Trans. Archit. Code Optim., 18(1), jan 2021.","journal-title":"Trans. Archit. Code Optim."},{"key":"e_1_3_2_1_20_1","volume-title":"MLIR: A compiler infrastructure for the end of moore's law. CoRR, abs\/2002.11054","author":"Lattner Chris","year":"2020","unstructured":"Chris Lattner, Jacques A. Pienaar, Mehdi Amini, Uday Bondhugula, River Riddle, Albert Cohen, Tatiana Shpeisman, Andy Davis, Nicolas Vasilache, and Oleksandr Zinenko. MLIR: A compiler infrastructure for the end of moore's law. CoRR, abs\/2002.11054, 2020."},{"issue":"4","key":"e_1_3_2_1_21_1","volume":"37","author":"Li Tzu-Mao","year":"2018","unstructured":"Tzu-Mao Li, Micha\u00ebl Gharbi, Andrew Adams, Fr\u00e9do Durand, and Jonathan Ragan-Kelley. Differentiable programming for image processing and deep learning in halide. ACM Trans. Graph., 37(4), 2018.","journal-title":"ACM Trans. Graph."},{"key":"e_1_3_2_1_22_1","volume-title":"DARTS: differentiable architecture search. CoRR, abs\/1806.09055","author":"Liu Hanxiao","year":"2018","unstructured":"Hanxiao Liu, Karen Simonyan, and Yiming Yang. DARTS: differentiable architecture search. CoRR, abs\/1806.09055, 2018."},{"key":"e_1_3_2_1_23_1","first-page":"32","author":"Mendis Charith","year":"2019","unstructured":"Charith Mendis, Cambridge Yang, Yewen Pu, Saman Amarasinghe, and Michael Carbin. Compiler auto-vectorization with imitation learning. Advances in Neural Information Processing Systems, 32, 2019.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_24_1","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Moses William","year":"2020","unstructured":"William Moses and Valentin Churavy. Instead of rewriting foreign code for machine learning, automatically synthesize fast gradients. In Advances in Neural Information Processing Systems, volume 33, 2020."},{"issue":"4","key":"e_1_3_2_1_25_1","first-page":"2016","volume":"35","author":"Mullapudi Ravi Teja","unstructured":"Ravi Teja Mullapudi, Andrew Adams, Dillon Sharlet, Jonathan Ragan-Kelley, and Kayvon Fatahalian. Automatically scheduling halide image processing pipelines. ACM Trans. Graph., 35(4), jul 2016.","journal-title":"Trans. Graph."},{"key":"e_1_3_2_1_26_1","volume-title":"et al. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems, 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems, 32, 2019."},{"key":"e_1_3_2_1_27_1","volume-title":"Unsupervised representation learning with deep convolutional generative adversarial networks. CoRR, abs\/1511.06434","author":"Radford Alec","year":"2015","unstructured":"Alec Radford, Luke Metz, and Soumith Chintala. Unsupervised representation learning with deep convolutional generative adversarial networks. CoRR, abs\/1511.06434, 2015."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2491956.2462176"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/4235.728207"},{"key":"e_1_3_2_1_30_1","volume-title":"Inverted residuals and linear bottlenecks: Mobile networks for classification, detection and segmentation. CoRR, abs\/1801.04381","author":"Sandler Mark","year":"2018","unstructured":"Mark Sandler, Andrew G. Howard, Menglong Zhu, Andrey Zhmoginov, and Liang-Chieh Chen. Inverted residuals and linear bottlenecks: Mobile networks for classification, detection and segmentation. CoRR, abs\/1801.04381, 2018."},{"key":"e_1_3_2_1_31_1","volume-title":"Advances in Neural Information Processing Systems","volume":"35","author":"Shao Junru","year":"2022","unstructured":"Junru Shao, Xiyou Zhou, Siyuan Feng, Bohan Hou, Ruihang Lai, Hongyi Jin, Wuwei Lin, Masahiro Masuda, Cody Hao Yu, and Tianqi Chen. Tensor program optimization with probabilistic programs. In Advances in Neural Information Processing Systems, volume 35, 2022."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3446108"},{"key":"e_1_3_2_1_33_1","volume-title":"Llama: Open and efficient foundation language models. CoRR, abs\/2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, Aur\u00e9lien Rodriguez, Armand Joulin, Edouard Grave, and Guillaume Lample. Llama: Open and efficient foundation language models. CoRR, abs\/2302.13971, 2023."},{"key":"e_1_3_2_1_34_1","volume-title":"Tuna: A static analysis approach to optimizing deep neural networks. CoRR, abs\/2104.14641","author":"Wang Yao","year":"2021","unstructured":"Yao Wang, Xingyu Zhou, Yanming Wang, Rui Li, Yong Wu, and Vin Sharma. Tuna: A static analysis approach to optimizing deep neural networks. CoRR, abs\/2104.14641, 2021."},{"key":"e_1_3_2_1_35_1","volume-title":"Learning structured sparsity in deep neural networks. CoRR, abs\/1608.03665","author":"Wen Wei","year":"2016","unstructured":"Wei Wen, Chunpeng Wu, Yandan Wang, Yiran Chen, and Hai Li. Learning structured sparsity in deep neural networks. CoRR, abs\/1608.03665, 2016."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3434304"},{"key":"e_1_3_2_1_37_1","first-page":"5","author":"Zhao Yifan","year":"2023","unstructured":"Yifan Zhao, Hashim Sharif, Peter Pao-Huang, Vatsin Ninad Shah, Arun Narenthiran Sivakumar, Mateus Valverde Gasparino, Abdulrahman Mahmoud, Nathan Zhao, Sarita Adve, Girish Chowdhary, Sasa Misailovic, and Vikram Adve. Approxcaliper: A programmable framework for application-aware neural network optimization. In Proceedings of Machine Learning and Systems 5, 2023.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_38_1","volume-title":"USENIX Conference on Operating Systems Design and Implementation, OSDI'20","author":"Zheng Lianmin","year":"2020","unstructured":"Lianmin Zheng, Chengfan Jia, Minmin Sun, Zhao Wu, Cody Hao Yu, Ameer Haj-Ali, Yida Wang, Jun Yang, Danyang Zhuo, Koushik Sen, Joseph E. Gonzalez, and Ion Stoica. Ansor: Generating high-performance tensor programs for deep learning. In USENIX Conference on Operating Systems Design and Implementation, OSDI'20, USA, 2020."},{"key":"e_1_3_2_1_39_1","volume-title":"NeurIPS","author":"Zheng Lianmin","year":"2021","unstructured":"Lianmin Zheng, Ruochen Liu, Junru Shao, Tianqi Chen, Joseph E Gonzalez, Ion Stoica, and Ameer Haj Ali. Tenset: A large-scale program performance dataset for learned tensor compilers. In NeurIPS; Datasets and Benchmarks Track, 2021."},{"key":"e_1_3_2_1_40_1","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Zhuang Tao","year":"2020","unstructured":"Tao Zhuang, Zhixuan Zhang, Yuheng Huang, Xiaoyi Zeng, Kai Shuang, and Xiang Li. Neuron-level structured pruning using polarization regularizer. In Advances in Neural Information Processing Systems, volume 33, 2020."}],"event":{"name":"ASPLOS '24: 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","location":"La Jolla CA USA","acronym":"ASPLOS '24","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620666.3651348","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3620666.3651348","content-type":"text\/html","content-version":"vor","intended-application":"syndication"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:43Z","timestamp":1750291423000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620666.3651348"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,27]]},"references-count":40,"alternative-id":["10.1145\/3620666.3651348","10.1145\/3620666"],"URL":"https:\/\/doi.org\/10.1145\/3620666.3651348","relation":{},"subject":[],"published":{"date-parts":[[2024,4,27]]},"assertion":[{"value":"2024-04-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}