{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T17:16:18Z","timestamp":1771953378277,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","funder":[{"name":"NSF &#x28;National Science Foundation&#x29;","award":["2411134"],"award-info":[{"award-number":["2411134"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,3,22]]},"DOI":"10.1145\/3760250.3762221","type":"proceedings-article","created":{"date-parts":[[2025,12,11]],"date-time":"2025-12-11T15:06:36Z","timestamp":1765465596000},"page":"132-146","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Linear Layouts: Robust Code Generation of Efficient Tensor Computation Using F_2"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7977-3182","authenticated-orcid":false,"given":"Keren","family":"Zhou","sequence":"first","affiliation":[{"name":"George Mason University, Fairfax, VA, USA and OpenAI, San Francisco, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8893-2276","authenticated-orcid":false,"given":"Mario","family":"Lezcano-Casado","sequence":"additional","affiliation":[{"name":"OpenAI, San Francisco, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2532-6950","authenticated-orcid":false,"given":"Adam P.","family":"Goucher","sequence":"additional","affiliation":[{"name":"OpenAI, San Francisco, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7309-8321","authenticated-orcid":false,"given":"Akhmed","family":"Rakhmati","sequence":"additional","affiliation":[{"name":"OpenAI, San Francisco, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8035-3726","authenticated-orcid":false,"given":"Jeff","family":"Niu","sequence":"additional","affiliation":[{"name":"OpenAI, San Francisco, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2376-7307","authenticated-orcid":false,"given":"Justin","family":"Lebar","sequence":"additional","affiliation":[{"name":"OpenAI, San Francisco, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0203-6610","authenticated-orcid":false,"given":"Pawel","family":"Szczerbuk","sequence":"additional","affiliation":[{"name":"OpenAI, San Francisco, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6824-4343","authenticated-orcid":false,"given":"Peter","family":"Bell","sequence":"additional","affiliation":[{"name":"OpenAI, San Francisco, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0636-8710","authenticated-orcid":false,"given":"Phil","family":"Tillet","sequence":"additional","affiliation":[{"name":"OpenAI, San Francisco, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8259-7578","authenticated-orcid":false,"given":"Thomas","family":"Raoux","sequence":"additional","affiliation":[{"name":"OpenAI, San Francisco, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6010-5682","authenticated-orcid":false,"given":"Zahi","family":"Moudallal","sequence":"additional","affiliation":[{"name":"OpenAI, San Francisco, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,12,11]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"The Free Encyclopedia. https:\/\/en.wikipedia.org\/wiki\/Hamming_weight Accessed: 2025-07-07.","unstructured":"[n.d.]. Hamming weight. Wikipedia, The Free Encyclopedia. https:\/\/en.wikipedia.org\/wiki\/Hamming_weight Accessed: 2025-07-07."},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the 12th USENIX Conference on Operating Systems Design and Implementation","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, Manjunath Kudlur, Josh Levenberg, Rajat Monga, Sherry Moore, Derek G. Murray, Benoit Steiner, Paul Tucker, Vijay Vasudevan, PeteWarden, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng. 2016. TensorFlow: a system for large-scale machine learning. In Proceedings of the 12th USENIX Conference on Operating Systems Design and Implementation (Savannah, GA, USA) (OSDI'16). USENIX Association, USA, 265--283."},{"key":"e_1_3_2_1_3_1","unstructured":"Alibaba DAMO Academy. 2023. BladeDISC: A Lightweight High-Performance Compiler for Dynamic Shape Neural Networks. In SIGMOD. https:\/\/arxiv.org\/abs\/2305.10741"},{"key":"e_1_3_2_1_4_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_5_1","volume-title":"XLA: Optimizing Compiler for Machine Learning. Google OpenXLA Project","author":"Google","year":"2020","unstructured":"Google AI. 2020. XLA: Optimizing Compiler for Machine Learning. Google OpenXLA Project (2020). https:\/\/www.tensorflow.org\/xla"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640366"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 29th ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI). 101--113","author":"Bondhugula Uday","unstructured":"Uday Bondhugula, Albert Hartono, J. Ramanujam, and P. Sadayappan. 2008. A Practical Automatic Polyhedral Parallelizer and Locality Optimizer. In Proceedings of the 29th ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI). 101--113."},{"key":"e_1_3_2_1_8_1","volume-title":"Chris Leary, Dougal Maclaurin, George Necula, Adam Paszke, Jake VanderPlas, Skye Wanderman-Milne, and Qiao Zhang.","author":"Bradbury James","year":"2018","unstructured":"James Bradbury, Roy Frostig, Peter Hawkins, Matthew James Johnson, Chris Leary, Dougal Maclaurin, George Necula, Adam Paszke, Jake VanderPlas, Skye Wanderman-Milne, and Qiao Zhang. 2018. JAX: composable transformations of PythonNumPy programs. http:\/\/github.com\/jax-ml\/jax"},{"key":"e_1_3_2_1_9_1","volume-title":"TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In OSDI. 578--594. https:\/\/arxiv.org\/abs\/1802.04799","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, et al. 2018. TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In OSDI. 578--594. https:\/\/arxiv.org\/abs\/1802.04799"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446725"},{"key":"e_1_3_2_1_11_1","unstructured":"chengzeyi. 2024. Fused attention kernel incorrect results. https:\/\/github.com\/triton-lang\/triton\/issues\/4310 Accessed: 2025-03-03."},{"key":"e_1_3_2_1_12_1","unstructured":"Sharan Chetlur Cliff Woolley Philippe Vandermersch Jonathan Cohen John Tran Bryan Catanzaro and Evan Shelhamer. 2014. cuDNN: Efficient Primitives for Deep Learning. arXiv:1410.0759 [cs.NE] https:\/\/arxiv.org\/abs\/1410.0759"},{"key":"e_1_3_2_1_13_1","unstructured":"NVIDIA Corporation. 2013. cuBLAS Library User Guide. https:\/\/docs.nvidia.com\/cuda\/cublas\/"},{"key":"e_1_3_2_1_14_1","volume-title":"Accessed","author":"NVIDIA Corporation","year":"2024","unstructured":"NVIDIA Corporation. 2024. NVIDIA CuTe. https:\/\/github.com\/NVIDIA\/cutlass\/blob\/main\/media\/docs\/cute\/00_quickstart.md. Accessed: Feb. 24, 2025."},{"key":"e_1_3_2_1_15_1","unstructured":"daniel-geon park. 2024. Incorrect results when using both tl.sum() and tl.cumsum() in one kernel. https:\/\/github.com\/triton-lang\/triton\/issues\/3017 Accessed: 2025-03-03."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575702"},{"key":"e_1_3_2_1_17_1","unstructured":"Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Amy Yang Angela Fan et al. 2024. The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)."},{"key":"e_1_3_2_1_18_1","first-page":"1","article-title":"Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity","volume":"23","author":"Fedus William","year":"2022","unstructured":"William Fedus, Barret Zoph, and Noam Shazeer. 2022. Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity. Journal of Machine Learning Research 23, 120 (2022), 1--39.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_19_1","volume-title":"Gptq: Accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:2210.17323","author":"Frantar Elias","year":"2022","unstructured":"Elias Frantar, Saleh Ashkboos, Torsten Hoefler, and Dan Alistarh. 2022. Gptq: Accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:2210.17323 (2022)."},{"key":"e_1_3_2_1_20_1","volume-title":"Proceedings of the 8th International Workshop on Polyhedral Compilation Techniques.","author":"Grosser Tobias","year":"2012","unstructured":"Tobias Grosser, Torsten H\u00f6nig, Paul Feautrier, Armin Gro\u00dfe, Louis-No\u00ebl Pouchet, Sven Verdoolaege, and Albert Cohen. 2012. Polly\u2014Performing Polyhedral Optimizations on a Low-Level Intermediate Representation. In Proceedings of the 8th International Workshop on Polyhedral Compilation Techniques."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582018"},{"key":"e_1_3_2_1_22_1","volume-title":"CuAsmRL: Optimizing GPU SASS Schedules via Deep Reinforcement Learning. arXiv preprint arXiv:2501.08071","author":"He Guoliang","year":"2025","unstructured":"Guoliang He and Eiko Yoneki. 2025. CuAsmRL: Optimizing GPU SASS Schedules via Deep Reinforcement Learning. arXiv preprint arXiv:2501.08071 (2025)."},{"key":"e_1_3_2_1_23_1","unstructured":"Paul Iannetta. 2022. Compiling Trees: Combining Data Layouts and the Polyhedral Model. Ph.D. Dissertation. Universit\u00e9 de Strasbourg."},{"key":"e_1_3_2_1_24_1","unstructured":"Intel Corporation. [n.d.]. What is Xe Matrix eXtensions (XMX)? https:\/\/www.intel.com\/content\/www\/us\/en\/support\/articles\/000091112\/graphics.html. Accessed: 2025-03-02."},{"key":"e_1_3_2_1_25_1","volume-title":"TASO: Optimizing Deep Learning Computation Graphs with Automated Substitutions. In SOSP. https:\/\/arxiv.org\/abs\/1907.04892","author":"Jia Zhihao","year":"2019","unstructured":"Zhihao Jia, Matei Zaharia, and Alex Aiken. 2019. TASO: Optimizing Deep Learning Computation Graphs with Automated Substitutions. In SOSP. https:\/\/arxiv.org\/abs\/1907.04892"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_2_1_27_1","unstructured":"PyTorch Labs. 2025. TritonBench: A Collection of PyTorch Custom Operators for Performance Evaluation. https:\/\/github.com\/pytorchlabs\/tritonbench Accessed: 2025-03-06."},{"key":"e_1_3_2_1_28_1","unstructured":"Ruihang Lai Junru Shao Siyuan Feng Steven S Lyubomirsky Bohan Hou Wuwei Lin Zihao Ye Hongyi Jin Yuchen Jin Jiawei Liu et al. 2023. Relax: composable abstractions for end-to-end dynamic machine learning. arXiv preprint arXiv:2311.02103 (2023)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3385412.3385992"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Jianling Li Shangzhan Li Zhenye Gao Qi Shi Yuxuan Li Zefan Wang Jiacheng Huang Haojie Wang Jianrong Wang Xu Han et al. 2025. TritonBench: Benchmarking Large Language Model Capabilities for Generating Triton Operators. arXiv preprint arXiv:2502.14752 (2025).","DOI":"10.18653\/v1\/2025.findings-acl.1183"},{"key":"e_1_3_2_1_32_1","first-page":"87","article-title":"Awq: Activation-aware weight quantization for on-device llm compression and acceleration","volume":"6","author":"Lin Ji","year":"2024","unstructured":"Ji Lin, Jiaming Tang, Haotian Tang, Shang Yang, Wei-Ming Chen, Wei-Chen Wang, Guangxuan Xiao, Xingyu Dang, Chuang Gan, and Song Han. 2024. Awq: Activation-aware weight quantization for on-device llm compression and acceleration. Proceedings of Machine Learning and Systems 6 (2024), 87--100.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_33_1","volume-title":"Spinquant: Llm quantization with learned rotations. arXiv preprint arXiv:2405.16406","author":"Liu Zechun","year":"2024","unstructured":"Zechun Liu, Changsheng Zhao, Igor Fedorov, Bilge Soran, Dhruv Choudhary, Raghuraman Krishnamoorthi, Vikas Chandra, Yuandong Tian, and Tijmen Blankevoort. 2024. Spinquant: Llm quantization with learned rotations. arXiv preprint arXiv:2405.16406 (2024)."},{"key":"e_1_3_2_1_34_1","volume-title":"Finite Fields with Applications to Coding Theory, Cryptography and Related Areas","author":"Peters Mullen","unstructured":"Mullen and Peters. 2002. Finite Fields with Applications to Coding Theory, Cryptography and Related Areas. Springer Berlin Heidelberg."},{"key":"e_1_3_2_1_35_1","unstructured":"NVIDIA Corporation. 2024. PTX ISA Version 8.5. https:\/\/docs.nvidia.com\/cuda\/parallel-thread-execution\/ Accessed: 2025-03-02."},{"key":"e_1_3_2_1_36_1","unstructured":"NVIDIA Corporation. 2025. Parallel Thread Execution ISA. https:\/\/docs.nvidia.com\/cuda\/parallel-thread-execution\/#tensor-memory Accessed: 2025-03-02."},{"key":"e_1_3_2_1_37_1","unstructured":"Adam Paszke. 2024. Mosaic GPU: A DSL for Fast Hopper Kernels in Python. YouTube Video. https:\/\/www.youtube.com\/watch?v=tnADC2XuAr0"},{"key":"e_1_3_2_1_38_1","unstructured":"PheelaV. 2024. triton.language.associative_scan returning incorrect results when reverse=True. https:\/\/github.com\/triton-lang\/triton\/issues\/4362 Accessed: 2025-03-03."},{"key":"e_1_3_2_1_39_1","volume-title":"Error-correcting codes and finite fields","author":"Pretzel Oliver","unstructured":"Oliver Pretzel. 1992. Error-correcting codes and finite fields. Oxford University Press."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2491956.2462176"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00024"},{"key":"e_1_3_2_1_42_1","unstructured":"Bita Darvish Rouhani Nitin Garegrat Tom Savell Ankit More Kyung-Nam Han Ritchie Zhao Mathew Hall Jasmine Klar Eric Chung Yuan Yu Michael Schulte Ralph Wittig Ian Bratt Nigel Stephens Jelena Milanovic John Brothers Pradeep Dubey Marius Cornea Alexander Heinecke Andres Rodriguez Martin Langhammer Summer Deng Maxim Naumov Paulius Micikevicius Michael Siu and Colin Verrilli. 2023. OCP Microscaling Formats (MX) Specification Version 1.0. https:\/\/www.opencompute.org\/documents\/ocp-microscalingformats-mx-v1-0-spec-final-pdf Accessed: 2025-03-03."},{"key":"e_1_3_2_1_43_1","volume-title":"Proceedings of the 9th International Workshop on Polyhedral Compilation Techniques (IMPACT).","author":"Sarkar Sumit","year":"2019","unstructured":"Sumit Sarkar and Tobias Grosser. 2019. Integrating Data Layout Transformations with the Polyhedral Model. In Proceedings of the 9th International Workshop on Polyhedral Compilation Techniques (IMPACT)."},{"key":"e_1_3_2_1_44_1","first-page":"68658","article-title":"Flashattention-3: Fast and accurate attention with asynchrony and low-precision","volume":"37","author":"Shah Jay","year":"2025","unstructured":"Jay Shah, Ganesh Bikshandi, Ying Zhang, Vijay Thakkar, Pradeep Ramani, and Tri Dao. 2025. Flashattention-3: Fast and accurate attention with asynchrony and low-precision. Advances in Neural Information Processing Systems 37 (2025), 68658--68685.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_45_1","volume-title":"Megatron-lm: Training multibillion parameter language models using model parallelism. arXiv preprint arXiv:1909.08053","author":"Shoeybi Mohammad","year":"2019","unstructured":"Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper, and Bryan Catanzaro. 2019. Megatron-lm: Training multibillion parameter language models using model parallelism. arXiv preprint arXiv:1909.08053 (2019)."},{"key":"e_1_3_2_1_46_1","volume-title":"Rene Van Oostrum, and Joseph Greathouse","author":"Sitaraman Gina","year":"2022","unstructured":"Gina Sitaraman, Noel Chalmers, Nicholas Malaya, Damon McDougall, Ossian O'Reilly, Rene Van Oostrum, and Joseph Greathouse. 2022. AMD Matrix Cores. https:\/\/gpuopen.com\/learn\/amd-lab-notes\/amdlab-notes-matrix-cores-readme\/ Accessed: 2025-03-02."},{"key":"e_1_3_2_1_47_1","volume-title":"Simple, Fast, and Adorable AI Kernels. arXiv preprint arXiv:2410.20399","author":"Spector Benjamin F","year":"2024","unstructured":"Benjamin F Spector, Simran Arora, Aaryan Singhal, Daniel Y Fu, and Christopher R\u00e9. 2024. ThunderKittens: Simple, Fast, and Adorable AI Kernels. arXiv preprint arXiv:2410.20399 (2024)."},{"key":"e_1_3_2_1_48_1","volume-title":"Triton Language: Python API Documentation. https:\/\/triton-lang.org\/main\/python-api\/triton.language.html Accessed: 2025-03-02.","author":"Developers The Triton","year":"2025","unstructured":"The Triton Developers. 2025. Triton Language: Python API Documentation. https:\/\/triton-lang.org\/main\/python-api\/triton.language.html Accessed: 2025-03-02."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3315508.3329973"},{"key":"e_1_3_2_1_50_1","volume-title":"Triton: Open Issues Labeled 'Bug' Related to 'Layout'. https:\/\/github.com\/triton-lang\/triton\/issues?q=is%3Aissueis%3Aopenlabel%3Abuglayout","author":"Developers Triton","year":"2025","unstructured":"Triton Developers. 2025. Triton: Open Issues Labeled 'Bug' Related to 'Layout'. https:\/\/github.com\/triton-lang\/triton\/issues?q=is%3Aissueis%3Aopenlabel%3Abuglayout"},{"key":"e_1_3_2_1_51_1","volume-title":"Training LLMs with MXFP4. arXiv preprint arXiv:2502.20586","author":"Tseng Albert","year":"2025","unstructured":"Albert Tseng, Tao Yu, and Youngsuk Park. 2025. Training LLMs with MXFP4. arXiv preprint arXiv:2502.20586 (2025)."},{"key":"e_1_3_2_1_52_1","volume-title":"Glow: Graph Lowering Compiler Techniques for Neural Networks. In Facebook AI. https:\/\/engineering.fb.com\/2018\/05\/14\/ml-applications\/glow\/","author":"Vasilache Nick","year":"2018","unstructured":"Nick Vasilache, Oleksandr Zinenko, et al. 2018. Glow: Graph Lowering Compiler Techniques for Neural Networks. In Facebook AI. https:\/\/engineering.fb.com\/2018\/05\/14\/ml-applications\/glow\/"},{"key":"e_1_3_2_1_53_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, ?ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_54_1","volume-title":"Training deep neural networks with 8-bit floating point numbers. Advances in neural information processing systems 31","author":"Wang Naigang","year":"2018","unstructured":"Naigang Wang, Jungwook Choi, Daniel Brand, Chia-Yu Chen, and Kailash Gopalakrishnan. 2018. Training deep neural networks with 8-bit floating point numbers. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_2_1_55_1","unstructured":"Wikipedia contributors. 2025. Swizzling (computer graphics). https:\/\/en.wikipedia.org\/wiki\/Swizzling_(computer_graphics) Accessed: 2025-03-02."},{"key":"e_1_3_2_1_56_1","unstructured":"Lucas Wilkinson. 2024. Introducing Machete: A Mixed- Input GEMM Kernel Optimized for NVIDIA Hopper GPUs. https:\/\/neuralmagic.com\/blog\/introducing-machete-a-mixedinput-gemm-kernel-optimized-for-nvidia-hopper-gpus\/ Accessed: 2025-03-03."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446753"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378511"},{"key":"e_1_3_2_1_59_1","volume-title":"Ansor: Generating High-Performance Tensor Programs for Deep Learning. In OSDI. https:\/\/arxiv.org\/abs\/2006.06762","author":"Zheng Lianmin","year":"2020","unstructured":"Lianmin Zheng, Zhao Wu, et al. 2020. Ansor: Generating High-Performance Tensor Programs for Deep Learning. In OSDI. https:\/\/arxiv.org\/abs\/2006.06762"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527440"}],"event":{"name":"ASPLOS '26:31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems","location":"Pittsburgh PA USA","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGARCH ACM Special Interest Group on Computer Architecture","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3760250.3762221","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,11]],"date-time":"2025-12-11T15:08:24Z","timestamp":1765465704000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3760250.3762221"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,11]]},"references-count":60,"alternative-id":["10.1145\/3760250.3762221","10.1145\/3760250"],"URL":"https:\/\/doi.org\/10.1145\/3760250.3762221","relation":{},"subject":[],"published":{"date-parts":[[2025,12,11]]},"assertion":[{"value":"2025-12-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}