{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T08:23:42Z","timestamp":1769502222900,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":95,"publisher":"ACM","funder":[{"name":"National Key R&D Program of China","award":["2023YFB4404400"],"award-info":[{"award-number":["2023YFB4404400"]}]},{"name":"NSFC","award":["62222411"],"award-info":[{"award-number":["62222411"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,18]]},"DOI":"10.1145\/3725843.3756027","type":"proceedings-article","created":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T17:19:56Z","timestamp":1760721596000},"page":"1381-1396","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["LLMulator: Generalizable Cost Modeling for Dataflow Accelerators with Input-Adaptive Control Flow"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1920-0101","authenticated-orcid":false,"given":"Kaiyan","family":"Chang","sequence":"first","affiliation":[{"name":"SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9237-559X","authenticated-orcid":false,"given":"Wenlong","family":"Zhu","sequence":"additional","affiliation":[{"name":"SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8407-2594","authenticated-orcid":false,"given":"Shengwen","family":"Liang","sequence":"additional","affiliation":[{"name":"SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8082-4218","authenticated-orcid":false,"given":"Huawei","family":"Li","sequence":"additional","affiliation":[{"name":"SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5172-4736","authenticated-orcid":false,"given":"Ying","family":"Wang","sequence":"additional","affiliation":[{"name":"SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,17]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3326334"},{"key":"e_1_3_3_2_3_2","unstructured":"Riyadh Baghdadi Massinissa Merouani Mohamed-Hicham LEGHETTAS Kamel Abdous Taha Arbaoui Karima BENATCHBA and Saman amarasinghe. 2021. A deep learning based cost model for automatic code optimization. Proceedings of Machine Learning and Systems 3 (2021) 181\u2013193."},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2019.8661197"},{"key":"e_1_3_3_2_5_2","first-page":"112","volume-title":"International Symposium on Logic-Based Program Synthesis and Transformation","author":"Barany Gerg\u00f6","year":"2017","unstructured":"Gerg\u00f6 Barany. 2017. Liveness-driven random program generation. In International Symposium on Logic-Based Program Synthesis and Transformation. Springer, 112\u2013127."},{"key":"e_1_3_3_2_6_2","unstructured":"Iz Beltagy Matthew\u00a0E Peters and Arman Cohan. 2020. Longformer: The long-document transformer. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2004.05150 (2020)."},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"crossref","unstructured":"Yoshua Bengio J\u00e9r\u00f4me Louradour Ronan Collobert and Jason Weston. 2009. Curriculum learning. (2009) 41\u201348.","DOI":"10.1145\/1553374.1553380"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582061"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/PACT58117.2023.00012"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3657356"},{"key":"e_1_3_3_2_11_2","unstructured":"Bodhisatwa Chatterjee Neeraj Jadhav Sharjeel Khan and Santosh Pande. 2024. Phaedrus: Exploring Dynamic Application Behavior with Lightweight Generative Models and Large-Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.06994 (2024)."},{"key":"e_1_3_3_2_12_2","first-page":"578","volume-title":"Proceedings of the 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. 2018. TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In Proceedings of the 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI). 578\u2013594."},{"key":"e_1_3_3_2_13_2","unstructured":"Tianqi Chen Lianmin Zheng Eddie Yan Ziheng Jiang Thierry Moreau Luis Ceze Carlos Guestrin and Arvind Krishnamurthy. 2018. Learning to optimize tensor programs. Advances in Neural Information Processing Systems 31 (2018)."},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"Yu-Hsin Chen Joel Emer and Vivienne Sze. 2016. Eyeriss: A spatial architecture for energy-efficient dataflow for convolutional neural networks. ACM SIGARCH computer architecture news 44 3 (2016) 367\u2013379.","DOI":"10.1145\/3007787.3001177"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.195"},{"key":"e_1_3_3_2_16_2","first-page":"2244","volume-title":"International Conference on Machine Learning","author":"Cummins Chris","year":"2021","unstructured":"Chris Cummins, Zacharias\u00a0V Fisches, Tal Ben-Nun, Torsten Hoefler, Michael\u00a0FP O\u2019Boyle, and Hugh Leather. 2021. Programl: A graph-based program representation for data flow analysis and compiler optimizations. In International Conference on Machine Learning. PMLR, 2244\u20132253."},{"key":"e_1_3_3_2_17_2","first-page":"9784","volume-title":"International Conference on Machine Learning","author":"Dagan Gautier","year":"2024","unstructured":"Gautier Dagan, Gabriel Synnaeve, and Baptiste Roziere. 2024. Getting the most out of your tokenizer for pre-training and domain adaptation. In International Conference on Machine Learning. PMLR, 9784\u20139805."},{"key":"e_1_3_3_2_18_2","unstructured":"DeepSeek-AI Daya Guo Dejian Yang Haowei Zhang Junxiao Song Ruoyu Zhang Runxin Xu Qihao Zhu Shirong Ma Peiyi Wang Xiao Bi Xiaokang Zhang Xingkai Yu Yu Wu Z.\u00a0F. Wu Zhibin Gou Zhihong Shao Zhuoshu Li Ziyi Gao Aixin Liu Bing Xue Bingxuan Wang Bochao Wu Bei Feng Chengda Lu Chenggang Zhao Chengqi Deng Chenyu Zhang Chong Ruan Damai Dai Deli Chen Dongjie Ji Erhang Li Fangyun Lin Fucong Dai Fuli Luo Guangbo Hao Guanting Chen Guowei Li H. Zhang Han Bao Hanwei Xu Haocheng Wang Honghui Ding Huajian Xin Huazuo Gao Hui Qu Hui Li Jianzhong Guo Jiashi Li Jiawei Wang Jingchang Chen Jingyang Yuan Junjie Qiu Junlong Li J.\u00a0L. Cai Jiaqi Ni Jian Liang Jin Chen Kai Dong Kai Hu Kaige Gao Kang Guan Kexin Huang Kuai Yu Lean Wang Lecong Zhang Liang Zhao Litong Wang Liyue Zhang Lei Xu Leyi Xia Mingchuan Zhang Minghua Zhang Minghui Tang Meng Li Miaojun Wang Mingming Li Ning Tian Panpan Huang Peng Zhang Qiancheng Wang Qinyu Chen Qiushi Du Ruiqi Ge Ruisong Zhang Ruizhe Pan Runji Wang R.\u00a0J. Chen R.\u00a0L. Jin Ruyi Chen Shanghao Lu Shangyan Zhou Shanhuang Chen Shengfeng Ye Shiyu Wang Shuiping Yu Shunfeng Zhou Shuting Pan S.\u00a0S. Li Shuang Zhou Shaoqing Wu Shengfeng Ye Tao Yun Tian Pei Tianyu Sun T. Wang Wangding Zeng Wanjia Zhao Wen Liu Wenfeng Liang Wenjun Gao Wenqin Yu Wentao Zhang W.\u00a0L. Xiao Wei An Xiaodong Liu Xiaohan Wang Xiaokang Chen Xiaotao Nie Xin Cheng Xin Liu Xin Xie Xingchao Liu Xinyu Yang Xinyuan Li Xuecheng Su Xuheng Lin X.\u00a0Q. Li Xiangyue Jin Xiaojin Shen Xiaosha Chen Xiaowen Sun Xiaoxiang Wang Xinnan Song Xinyi Zhou Xianzu Wang Xinxia Shan Y.\u00a0K. Li Y.\u00a0Q. Wang Y.\u00a0X. Wei Yang Zhang Yanhong Xu Yao Li Yao Zhao Yaofeng Sun Yaohui Wang Yi Yu Yichao Zhang Yifan Shi Yiliang Xiong Ying He Yishi Piao Yisong Wang Yixuan Tan Yiyang Ma Yiyuan Liu Yongqiang Guo Yuan Ou Yuduan Wang Yue Gong Yuheng Zou Yujia He Yunfan Xiong Yuxiang Luo Yuxiang You Yuxuan Liu Yuyang Zhou Y.\u00a0X. Zhu Yanhong Xu Yanping Huang Yaohui Li Yi Zheng Yuchen Zhu Yunxian Ma Ying Tang Yukun Zha Yuting Yan Z.\u00a0Z. Ren Zehui Ren Zhangli Sha Zhe Fu Zhean Xu Zhenda Xie Zhengyan Zhang Zhewen Hao Zhicheng Ma Zhigang Yan Zhiyu Wu Zihui Gu Zijia Zhu Zijun Liu Zilin Li Ziwei Xie Ziyang Song Zizheng Pan Zhen Huang Zhipeng Xu Zhongyu Zhang and Zhen Zhang. 2025. Deepseek-r1: Incentivizing reasoning capability in llms via reinforcement learning. (2025). arxiv:https:\/\/arXiv.org\/abs\/2501.12948\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2501.12948"},{"key":"e_1_3_3_2_19_2","volume-title":"The number sense: How the mind creates mathematics","author":"Dehaene Stanislas","year":"2011","unstructured":"Stanislas Dehaene. 2011. The number sense: How the mind creates mathematics. OUP USA."},{"key":"e_1_3_3_2_20_2","first-page":"4171","volume-title":"Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers)","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers). 4171\u20134186."},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750389"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Lieven Eeckhout Robert\u00a0H Bell\u00a0Jr Bastiaan Stougie Koen De\u00a0Bosschere and Lizy\u00a0K John. 2004. Control flow modeling in statistical simulation for accurate and efficient processor design studies. ACM SIGARCH Computer Architecture News 32 2 (2004) 350.","DOI":"10.1145\/1028176.1006730"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586110"},{"key":"e_1_3_3_2_24_2","unstructured":"Matthias Fey and Jan\u00a0Eric Lenssen. 2019. Fast graph representation learning with PyTorch Geometric. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1903.02428 (2019)."},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"e_1_3_3_2_26_2","unstructured":"Ian\u00a0J Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2014. Generative adversarial nets. Advances in neural information processing systems 27 (2014)."},{"key":"e_1_3_3_2_27_2","unstructured":"Google and SkyWater\u00a0Technology Foundry. 2020. SkyWater Open Source PDK: Open Source Process Design Kit for Usage with SkyWater Technology Foundry\u2019s 130nm Node. GitHub Repository. https:\/\/github.com\/google\/skywater-pdk Repository includes documentation libraries and tooling files.."},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/InPar.2012.6339595"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00077"},{"key":"e_1_3_3_2_30_2","unstructured":"Will Hamilton Zhitao Ying and Jure Leskovec. 2017. Inductive representation learning on large graphs. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"crossref","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2015. Spatial pyramid pooling in deep convolutional networks for visual recognition. IEEE transactions on pattern analysis and machine intelligence 37 9 (2015) 1904\u20131916.","DOI":"10.1109\/TPAMI.2015.2389824"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_2_34_2","volume-title":"International Conference on Learning Representations","author":"Hu Edward\u00a0J","year":"2022","unstructured":"Edward\u00a0J Hu, yelong shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2022. LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=nZeVKeeFYf9"},{"key":"e_1_3_3_2_35_2","unstructured":"Wenhao Hu Jinhao Duan Chunchen Wei Li Zhang Yue Zhang and Kaidi Xu. 2025. DynaCode: A Dynamic Complexity-Aware Code Benchmark for Evaluating Large Language Models in Code Generation. arxiv:https:\/\/arXiv.org\/abs\/2503.10452\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2503.10452"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00050"},{"key":"e_1_3_3_2_37_2","unstructured":"Forrest Iandola Matt Moskewicz Sergey Karayev Ross Girshick Trevor Darrell and Kurt Keutzer. 2014. Densenet: Implementing efficient convnet descriptor pyramids. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1404.1869 (2014)."},{"key":"e_1_3_3_2_38_2","first-page":"448","volume-title":"International conference on machine learning","author":"Ioffe Sergey","year":"2015","unstructured":"Sergey Ioffe and Christian Szegedy. 2015. Batch normalization: Accelerating deep network training by reducing internal covariate shift. In International conference on machine learning. pmlr, 448\u2013456."},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"crossref","unstructured":"Lana Josipovi\u0107 Shabnam Sheikhha Andrea Guerrieri Paolo Ienne and Jordi Cortadella. 2021. Buffer placement and sizing for high-performance dataflow circuits. ACM Transactions on Reconfigurable Technology and Systems (TRETS) 15 1 (2021) 1\u201332.","DOI":"10.1145\/3477053"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/3453483.3454038"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575747"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-11404-6_3"},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"crossref","unstructured":"Florent Kirchner Nikolai Kosmatov Virgile Prevosto Julien Signoles and Boris Yakobowski. 2015. Frama-C: A software analysis perspective. Formal aspects of computing 27 3 (2015) 573\u2013609.","DOI":"10.1007\/s00165-014-0326-7"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"crossref","unstructured":"James Kirkpatrick Razvan Pascanu Neil Rabinowitz Joel Veness Guillaume Desjardins Andrei\u00a0A. Rusu Kieran Milan John Quan Tiago Ramalho Agnieszka Grabska-Barwinska Demis Hassabis Claudia Clopath Dharshan Kumaran and Raia Hadsell. 2017. Overcoming catastrophic forgetting in neural networks. Proceedings of the national academy of sciences 114 13 (2017) 3521\u20133526.","DOI":"10.1073\/pnas.1611835114"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","unstructured":"Klaus Krogmann Michael Kuperberg and Ralf Reussner. 2010. Using Genetic Search for Reverse Engineering of Parametric Behavior Models for Performance Prediction. IEEE Transactions on Software Engineering 36 6 (2010) 865\u2013877. 10.1109\/TSE.2010.69","DOI":"10.1109\/TSE.2010.69"},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358252"},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"crossref","unstructured":"Hyoukjun Kwon Prasanth Chatarasi Vivek Sarkar Tushar Krishna Michael Pellauer and Angshuman Parashar. 2020. Maestro: A Data-Centric Approach to Understand Reuse Performance and Hardware Cost of DNN Mappings. IEEE Micro 40 3 (2020) 20\u201329.","DOI":"10.1109\/MM.2020.2985963"},{"key":"e_1_3_3_2_49_2","unstructured":"Zhenzhong Lan Mingda Chen Sebastian Goodman Kevin Gimpel Piyush Sharma and Radu Soricut. 2019. Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1909.11942 (2019)."},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"crossref","unstructured":"Yann LeCun L\u00e9on Bottou Yoshua Bengio and Patrick Haffner. 1998. Gradient-based learning applied to document recognition. Proc. IEEE 86 11 (1998) 2278\u20132324.","DOI":"10.1109\/5.726791"},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.19"},{"key":"e_1_3_3_2_52_2","unstructured":"Harrison Lee Samrat Phatale Hassan Mansoor Thomas Mesnard Johan Ferret Kellie Lu Colton Bishop Ethan Hall Victor Carbune Abhinav Rastogi and Sushant Prakash. 2023. Rlaif vs. rlhf: Scaling reinforcement learning from human feedback with ai feedback. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.00267 (2023)."},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.23919\/DATE54114.2022.9774525"},{"key":"e_1_3_3_2_54_2","doi-asserted-by":"publisher","DOI":"10.5555\/1478784"},{"key":"e_1_3_3_2_55_2","unstructured":"Zhiqi Li Wenhai Wang Hongyang Li Enze Xie Chonghao Sima Tong Lu Qiao Yu and Jifeng Dai. 2024. Bevformer: learning bird\u2019s-eye-view representation from lidar-camera via spatiotemporal transformers. IEEE Transactions on Pattern Analysis and Machine Intelligence (2024)."},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00040"},{"key":"e_1_3_3_2_57_2","doi-asserted-by":"publisher","DOI":"10.1109\/ASP-DAC47756.2020.9045442"},{"key":"e_1_3_3_2_58_2","doi-asserted-by":"publisher","unstructured":"Shang Liu Wenji Fang Yao Lu Jing Wang Qijun Zhang Hongce Zhang and Zhiyao Xie. 2025. RTLCoder: Fully Open-Source and Efficient LLM-Assisted RTL Code Generation Technique. IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems 44 4 (2025) 1448\u20131461. 10.1109\/TCAD.2024.3483089","DOI":"10.1109\/TCAD.2024.3483089"},{"key":"e_1_3_3_2_59_2","unstructured":"Yinhan Liu Myle Ott Naman Goyal Jingfei Du Mandar Joshi Danqi Chen Omer Levy Mike Lewis Luke Zettlemoyer and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1907.11692 (2019)."},{"key":"e_1_3_3_2_60_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00062"},{"key":"e_1_3_3_2_61_2","unstructured":"Liangchen Luo Yinxiao Liu Rosanne Liu Samrat Phatale Meiqi Guo Harsh Lara Yunxuan Li Lei Shu Yun Zhu Lei Meng Jiao Sun and Abhinav Rastogi. 2024. Improve Mathematical Reasoning in Language Models by Automated Process Supervision. (2024). arxiv:https:\/\/arXiv.org\/abs\/2406.06592\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2406.06592"},{"key":"e_1_3_3_2_62_2","doi-asserted-by":"publisher","DOI":"10.1145\/3287624.3288756"},{"key":"e_1_3_3_2_63_2","doi-asserted-by":"crossref","unstructured":"Gordon\u00a0Euhyun Moon Hyoukjun Kwon Geonhwa Jeong Prasanth Chatarasi Sivasankaran Rajamanickam and Tushar Krishna. 2021. Evaluating spatial accelerator architectures with tiled matrix-matrix multiplication. IEEE Transactions on Parallel and Distributed Systems 33 4 (2021) 1002\u20131014.","DOI":"10.1109\/TPDS.2021.3104240"},{"key":"e_1_3_3_2_64_2","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530673"},{"key":"e_1_3_3_2_65_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2019.00042"},{"key":"e_1_3_3_2_66_2","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans and Ilya Sutskever. 2018. Improving language understanding by generative pre-training. OpenAI (2018)."},{"key":"e_1_3_3_2_67_2","unstructured":"Rafael Rafailov Archit Sharma Eric Mitchell Christopher\u00a0D Manning Stefano Ermon and Chelsea Finn. 2023. Direct preference optimization: Your language model is secretly a reward model. Advances in Neural Information Processing Systems 36 (2023) 53728\u201353741."},{"key":"e_1_3_3_2_68_2","unstructured":"Colin Raffel Noam Shazeer Adam Roberts Katherine Lee Sharan Narang Michael Matena Yanqi Zhou Wei Li and Peter\u00a0J Liu. 2020. Exploring the limits of transfer learning with a unified text-to-text transformer. Journal of machine learning research 21 140 (2020) 1\u201367."},{"key":"e_1_3_3_2_69_2","doi-asserted-by":"crossref","unstructured":"James Requeima John Bronskill Dami Choi Richard Turner and David\u00a0K Duvenaud. 2024. Llm processes: Numerical predictive distributions conditioned on natural language. Advances in Neural Information Processing Systems 37 (2024) 109609\u2013109671.","DOI":"10.52202\/079017-3479"},{"key":"e_1_3_3_2_70_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00073"},{"key":"e_1_3_3_2_71_2","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3658249"},{"key":"e_1_3_3_2_72_2","unstructured":"Qwen Team. 2024. Qwen2 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.10671 (2024)."},{"key":"e_1_3_3_2_73_2","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian\u00a0Canton Ferrer Moya Chen Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev Punit\u00a0Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi Alan Schelten Ruan Silva Eric\u00a0Michael Smith Ranjan Subramanian Xiaoqing\u00a0Ellen Tan Binh Tang Ross Taylor Adina Williams Jian\u00a0Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic Sergey Edunov and Thomas Scialom. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.09288 (2023)."},{"key":"e_1_3_3_2_74_2","volume-title":"First Conference on Language Modeling","author":"Vacareanu Robert","year":"2024","unstructured":"Robert Vacareanu, Vlad\u00a0Andrei Negru, Vasile Suciu, and Mihai Surdeanu. 2024. From Words to Numbers: Your Large Language Model Is Secretly A Capable Regressor When Given In-Context Examples. In First Conference on Language Modeling. https:\/\/openreview.net\/forum?id=LzpaUxcNFK"},{"key":"e_1_3_3_2_75_2","unstructured":"Nicolas Vasilache Oleksandr Zinenko Theodoros Theodoridis Priya Goyal Zachary DeVito William\u00a0S Moses Sven Verdoolaege Andrew Adams and Albert Cohen. 2018. Tensor comprehensions: Framework-agnostic high-performance machine learning abstractions. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1802.04730 (2018)."},{"key":"e_1_3_3_2_76_2","unstructured":"verilator and Contributors. 2024. Verilator: Open-source SystemVerilog simulator and lint system. https:\/\/github.com\/verilator\/verilator GitHub repository."},{"key":"e_1_3_3_2_77_2","unstructured":"Leandro von Werra Younes Belkada Lewis Tunstall Edward Beeching Tristan Thrush Nathan Lambert Shengyi Huang Kashif Rasul and Quentin Gallou\u00e9dec. 2020. TRL: Transformer Reinforcement Learning. https:\/\/github.com\/huggingface\/trl."},{"key":"e_1_3_3_2_78_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00076"},{"key":"e_1_3_3_2_79_2","unstructured":"Jason Wei Xuezhi Wang Dale Schuurmans Maarten Bosma brian ichter Fei Xia Ed Chi Quoc\u00a0V Le and Denny Zhou. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems 35 (2022) 24824\u201324837."},{"key":"e_1_3_3_2_80_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"e_1_3_3_2_81_2","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530408"},{"key":"e_1_3_3_2_82_2","volume-title":"The Second Tiny Papers Track at ICLR 2024","author":"Wu Zhenglong","year":"2024","unstructured":"Zhenglong Wu, Qi Qi, Zirui Zhuang, Haifeng Sun, and Jingyu Wang. 2024. Pre-tokenization of numbers for large language models. In The Second Tiny Papers Track at ICLR 2024."},{"key":"e_1_3_3_2_83_2","doi-asserted-by":"publisher","DOI":"10.1145\/3617232.3624858"},{"key":"e_1_3_3_2_84_2","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480064"},{"key":"e_1_3_3_2_85_2","unstructured":"Jiarong Xing Leyuan Wang Shang Zhang Jack Chen Ang Chen and Yibo Zhu. 2022. Bolt: Bridging the gap between auto-tuners and hardware-native performance. Proceedings of Machine Learning and Systems 4 (2022) 204\u2013216."},{"key":"e_1_3_3_2_86_2","doi-asserted-by":"crossref","unstructured":"Han Xu Jiayi Ma Junjun Jiang Xiaojie Guo and Haibin Ling. 2020. U2Fusion: A unified unsupervised image fusion network. IEEE transactions on pattern analysis and machine intelligence 44 1 (2020) 502\u2013518.","DOI":"10.1109\/TPAMI.2020.3012548"},{"key":"e_1_3_3_2_87_2","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378514"},{"key":"e_1_3_3_2_88_2","unstructured":"Fisher Yu and Vladlen Koltun. 2015. Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1511.07122 (2015)."},{"key":"e_1_3_3_2_89_2","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190551"},{"key":"e_1_3_3_2_90_2","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575737"},{"key":"e_1_3_3_2_91_2","doi-asserted-by":"publisher","DOI":"10.1145\/3453483.3454106"},{"key":"e_1_3_3_2_92_2","first-page":"863","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Zheng Lianmin","year":"2020","unstructured":"Lianmin Zheng, Chengfan Jia, Minmin Sun, Zhao Wu, Cody\u00a0Hao Yu, Ameer Haj-Ali, Yida Wang, Jun Yang, Danyang Zhuo, Koushik Sen, Joseph\u00a0E. Gonzalez, and Ion Stoica. 2020. Ansor: Generating High-Performance Tensor Programs for Deep Learning. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). USENIX Association, 863\u2013879. https:\/\/www.usenix.org\/conference\/osdi20\/presentation\/zheng"},{"key":"e_1_3_3_2_93_2","volume-title":"Proceedings of the 35th Conference on Neural Information Processing Systems (NeurIPS)","author":"Zheng Lianmin","year":"2021","unstructured":"Lianmin Zheng, Ruochen Liu, Junru Shao, Tianqi Chen, Joseph\u00a0E. Gonzalez, Ion Stoica, and Ameer\u00a0Haj Ali. 2021. Tenset: A Large-Scale Program Performance Dataset for Learned Tensor Compilers. In Proceedings of the 35th Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_2_94_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3623792"},{"key":"e_1_3_3_2_95_2","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378508"},{"key":"e_1_3_3_2_96_2","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3317884"}],"event":{"name":"MICRO 2025: 58th IEEE\/ACM International Symposium on Microarchitecture","location":"Seoul Korea","acronym":"MICRO 2025","sponsor":["SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing"]},"container-title":["Proceedings of the 58th IEEE\/ACM International Symposium on Microarchitecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3725843.3756027","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,26]],"date-time":"2026-01-26T21:47:42Z","timestamp":1769464062000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3725843.3756027"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,17]]},"references-count":95,"alternative-id":["10.1145\/3725843.3756027","10.1145\/3725843"],"URL":"https:\/\/doi.org\/10.1145\/3725843.3756027","relation":{},"subject":[],"published":{"date-parts":[[2025,10,17]]},"assertion":[{"value":"2025-10-17","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}