{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T12:18:01Z","timestamp":1773317881174,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","funder":[{"name":"European High-Performance Computing Joint Undertaking","award":["101034126"],"award-info":[{"award-number":["101034126"]}]},{"name":"ERC project PSAP","award":["101002047"],"award-info":[{"award-number":["101002047"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3712285.3759900","type":"proceedings-article","created":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T16:04:47Z","timestamp":1762963487000},"page":"137-151","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["PerfDojo: Automated ML Library Generation for Heterogeneous Architectures"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-9487-9990","authenticated-orcid":false,"given":"Andrei","family":"Ivanov","sequence":"first","affiliation":[{"name":"ETH Z\u00fcrich, Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0061-5072","authenticated-orcid":false,"given":"Siyuan","family":"Shen","sequence":"additional","affiliation":[{"name":"ETH Z\u00fcrich, Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9103-1403","authenticated-orcid":false,"given":"Gioele","family":"Gottardo","sequence":"additional","affiliation":[{"name":"ETH Z\u00fcrich, Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7654-6038","authenticated-orcid":false,"given":"Marcin","family":"Chrapek","sequence":"additional","affiliation":[{"name":"ETH Z\u00fcrich, Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8662-6353","authenticated-orcid":false,"given":"Afif","family":"Boudaoud","sequence":"additional","affiliation":[{"name":"ETH Z\u00fcrich, Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4884-3934","authenticated-orcid":false,"given":"Timo","family":"Schneider","sequence":"additional","affiliation":[{"name":"ETH Z\u00fcrich, Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8068-3806","authenticated-orcid":false,"given":"Luca","family":"Benini","sequence":"additional","affiliation":[{"name":"ETH Z\u00fcrich, Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1333-9797","authenticated-orcid":false,"given":"Torsten","family":"Hoefler","sequence":"additional","affiliation":[{"name":"ETH Z\u00fcrich, Zurich, Switzerland"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_2_2_2","first-page":"265","volume-title":"12th USENIX symposium on operating systems design and implementation (OSDI 16)","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, et\u00a0al. 2016. TensorFlow: a system for Large-Scale machine learning. In 12th USENIX symposium on operating systems design and implementation (OSDI 16). 265\u2013283."},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2019.8661197"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356173"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i16.29720"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/1375581.1375595"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"crossref","unstructured":"Matheus Cavalcante Fabian Schuiki Florian Zaruba Michael Schaffner and Luca Benini. 2019. Ara: A 1-GHz+ scalable and energy-efficient RISC-V vector processor with multiprecision floating-point support in 22-nm FD-SOI. IEEE Transactions on Very Large Scale Integration (VLSI) Systems 28 2 (2019) 530\u2013543.","DOI":"10.1109\/TVLSI.2019.2950087"},{"key":"e_1_3_3_2_8_2","unstructured":"Tianqi Chen Thierry Moreau Ziheng Jiang Haichen Shen Eddie\u00a0Q Yan Leyuan Wang Yuwei Hu Luis Ceze Carlos Guestrin and Arvind Krishnamurthy. 2018. TVM: end-to-end optimization stack for deep learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1802.04799 11 20 (2018)."},{"key":"e_1_3_3_2_9_2","unstructured":"Zhi Chen Cody\u00a0Hao Yu Trevor Morris Jorn Tuyls Yi-Hsiang Lai Jared Roesch Elliott Delaye Vin Sharma and Yida Wang. 2021. Bring your own codegen to deep learning compiler. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2105.03215 (2021)."},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"crossref","unstructured":"Jack Choquette Wishwesh Gandhi Olivier Giroux Nick Stam and Ronny Krashinsky. 2021. Nvidia a100 tensor core gpu: Performance and innovation. IEEE Micro 41 2 (2021) 29\u201335.","DOI":"10.1109\/MM.2021.3061394"},{"key":"e_1_3_3_2_11_2","unstructured":"Chris Cummins Bram Wasti Jiadong Guo Brandon Cui Jason Ansel Sahir Gomez Somya Jain Jia Liu Olivier Teytaud Benoit Steiner Yuandong Tian and Hugh Leather. 2021. CompilerGym: Robust Performant Compiler Optimization Environments for AI Research. arxiv:https:\/\/arXiv.org\/abs\/2109.08267\u00a0[cs.PL] https:\/\/arxiv.org\/abs\/2109.08267"},{"key":"e_1_3_3_2_12_2","volume-title":"Hot chips: a symposium on high performance chips","author":"Ditty Michael","year":"2018","unstructured":"Michael Ditty, Ashish Karandikar, and David Reed. 2018. Nvidia\u2019s xavier soc. In Hot chips: a symposium on high performance chips."},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3576933"},{"key":"e_1_3_3_2_14_2","unstructured":"Meire Fortunato Mohammad\u00a0Gheshlaghi Azar Bilal Piot Jacob Menick Ian Osband Alex Graves Vlad Mnih Remi Munos Demis Hassabis Olivier Pietquin Charles Blundell and Shane Legg. 2019. Noisy Networks for Exploration. arxiv:https:\/\/arXiv.org\/abs\/1706.10295\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/1706.10295"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3710848.3710871"},{"key":"e_1_3_3_2_16_2","unstructured":"Roy Frostig Matthew\u00a0James Johnson and Chris Leary. 2018. Compiling machine learning programs via high-level tracing. Systems for Machine Learning 4 9 (2018)."},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3289602.3293906"},{"key":"e_1_3_3_2_18_2","unstructured":"Sai\u00a0Krishna Gottipati Yashaswi Pathak Rohan Nuttall Sahir Raviteja Chunduru Ahmed Touati Sriram\u00a0Ganapathi Subramanian Matthew\u00a0E. Taylor and Sarath Chandar. 2023. Maximum Reward Formulation In Reinforcement Learning. arxiv:https:\/\/arXiv.org\/abs\/2010.03744\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2010.03744"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"crossref","unstructured":"Ivo Grondman Lucian Busoniu Gabriel\u00a0AD Lopes and Robert Babuska. 2012. A survey of actor-critic reinforcement learning: Standard and natural policy gradients. IEEE Transactions on Systems Man and Cybernetics part C (applications and reviews) 42 6 (2012) 1291\u20131307.","DOI":"10.1109\/TSMCC.2012.2218595"},{"key":"e_1_3_3_2_20_2","unstructured":"Daya Guo Dejian Yang Haowei Zhang Junxiao Song Ruoyu Zhang Runxin Xu Qihao Zhu Shirong Ma Peiyi Wang Xiao Bi et\u00a0al. 2025. Deepseek-r1: Incentivizing reasoning capability in llms via reinforcement learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.12948 (2025)."},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3519939.3523446"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3669940.3707218"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589350"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"crossref","unstructured":"Stavros Kalapothas Manolis Galetakis Georgios Flamis Fotis Plessas and Paris Kitsos. 2023. A survey on risc-v-based machine learning ecosystem. Information 14 2 (2023) 64.","DOI":"10.3390\/info14020064"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISBI.2008.4541126"},{"key":"e_1_3_3_2_27_2","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Alex Graves Ioannis Antonoglou Daan Wierstra and Martin Riedmiller. 2013. Playing Atari with Deep Reinforcement Learning. arxiv:https:\/\/arXiv.org\/abs\/1312.5602\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/1312.5602"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"crossref","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei\u00a0A. Rusu Joel Veness Marc\u00a0G. Bellemare Alex Graves Martin\u00a0A. Riedmiller Andreas Fidjeland Georg Ostrovski Stig Petersen Charlie Beattie Amir Sadik Ioannis Antonoglou Helen King Dharshan Kumaran Daan Wierstra Shane Legg and Demis Hassabis. 2015. Human-level control through deep reinforcement learning. Nature 518 (2015) 529\u2013533.","DOI":"10.1038\/nature14236"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"crossref","unstructured":"Ravi\u00a0Teja Mullapudi Vinay Vasista and Uday Bondhugula. 2015. Polymage: Automatic optimization for image processing pipelines. ACM SIGARCH Computer Architecture News 43 1 (2015) 429\u2013443.","DOI":"10.1145\/2786763.2694364"},{"key":"e_1_3_3_2_30_2","unstructured":"Adam Paszke Sam Gross Francisco Massa Adam Lerer James Bradbury Gregory Chanan Trevor Killeen Zeming Lin Natalia Gimelshein Luca Antiga et\u00a0al. 2019. Pytorch: An imperative style high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"crossref","unstructured":"Jonathan Ragan-Kelley Connelly Barnes Andrew Adams Sylvain Paris Fr\u00e9do Durand and Saman Amarasinghe. 2013. Halide: a language and compiler for optimizing parallelism locality and recomputation in image processing pipelines. Acm Sigplan Notices 48 6 (2013) 519\u2013530.","DOI":"10.1145\/2499370.2462176"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3613214"},{"key":"e_1_3_3_2_33_2","unstructured":"Tom Schaul John Quan Ioannis Antonoglou and David Silver. 2016. Prioritized Experience Replay. arxiv:https:\/\/arXiv.org\/abs\/1511.05952\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/1511.05952"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"crossref","unstructured":"Fabian Schuiki Florian Zaruba Torsten Hoefler and Luca Benini. 2020. Stream semantic registers: A lightweight risc-v isa extension achieving full compute utilization in single-issue cores. IEEE Trans. Comput. 70 2 (2020) 212\u2013227.","DOI":"10.1109\/TC.2020.2987314"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"crossref","unstructured":"Jay Shah Ganesh Bikshandi Ying Zhang Vijay Thakkar Pradeep Ramani and Tri Dao. 2024. Flashattention-3: Fast and accurate attention with asynchrony and low-precision. Advances in Neural Information Processing Systems 37 (2024) 68658\u201368685.","DOI":"10.52202\/079017-2193"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"crossref","unstructured":"Junru Shao Xiyou Zhou Siyuan Feng Bohan Hou Ruihang Lai Hongyi Jin Wuwei Lin Masahiro Masuda Cody\u00a0Hao Yu and Tianqi Chen. 2022. Tensor Program Optimization with Probabilistic Programs. Advances in Neural Information Processing Systems 35 (2022) 35783\u201335796.","DOI":"10.52202\/068431-2593"},{"key":"e_1_3_3_2_37_2","first-page":"387","volume-title":"International conference on machine learning","author":"Silver David","year":"2014","unstructured":"David Silver, Guy Lever, Nicolas Heess, Thomas Degris, Daan Wierstra, and Martin Riedmiller. 2014. Deterministic policy gradient algorithms. In International conference on machine learning. Pmlr, 387\u2013395."},{"key":"e_1_3_3_2_38_2","unstructured":"Daniel Snider and Ruofan Liang. 2023. Operator Fusion in XLA: Analysis and Evaluation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2301.13062 (2023)."},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3310273.3321559"},{"key":"e_1_3_3_2_40_2","volume-title":"Reinforcement Learning, second edition: An Introduction","author":"Sutton R.S.","year":"2018","unstructured":"R.S. Sutton and A.G. Barto. 2018. Reinforcement Learning, second edition: An Introduction. MIT Press. https:\/\/books.google.com\/books?id=uWV0DwAAQBAJ"},{"key":"e_1_3_3_2_41_2","volume-title":"Advances in Neural Information Processing Systems","author":"Sutton Richard\u00a0S","year":"1999","unstructured":"Richard\u00a0S Sutton, David McAllester, Satinder Singh, and Yishay Mansour. 1999. Policy Gradient Methods for Reinforcement Learning with Function Approximation. In Advances in Neural Information Processing Systems , S.\u00a0Solla, T.\u00a0Leen, and K.\u00a0M\u00fcller (Eds.), Vol.\u00a012. MIT Press. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/1999\/file\/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3315508.3329973"},{"key":"e_1_3_3_2_43_2","unstructured":"TVM Community Discussion 11001. [n. d.]. Autoscheduler failed to find a valid schedule. https:\/\/discuss.tvm.apache.org\/t\/autoscheduler-failed-to-find-a-valid-schedule\/11001"},{"key":"e_1_3_3_2_44_2","unstructured":"TVM Community Discussion 12268. [n. d.]. Auto-Scheduler Time Out: No Valid Schedule for GEMM. https:\/\/discuss.tvm.apache.org\/t\/auto-scheduler-time-out-no-valid-schedule-for-gemm\/12268"},{"key":"e_1_3_3_2_45_2","unstructured":"TVM Community Discussion 12750. [n. d.]. Auto-Scheduler Cannot Find Any Valid Schedule. https:\/\/discuss.tvm.apache.org\/t\/auto-scheduler-cannot-find-any-valid-schedule\/12750"},{"key":"e_1_3_3_2_46_2","unstructured":"TVM Community Discussion 15238. [n. d.]. Bug: Auto Scheduler cannot find any valid schedule. https:\/\/discuss.tvm.apache.org\/t\/bug-auto-scheduler-cannot-find-any-valid-schedule\/15238"},{"key":"e_1_3_3_2_47_2","unstructured":"TVM GitHub Issue 15206. [n. d.]. Auto Scheduler cannot find any valid schedule. https:\/\/github.com\/apache\/tvm\/issues\/15206"},{"key":"e_1_3_3_2_48_2","unstructured":"TVM GitHub Issue 16670. [n. d.]. Auto scheduler Cannot find any valid schedule. https:\/\/github.com\/apache\/tvm\/issues\/16670"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","unstructured":"Hado van Hasselt Arthur Guez and David Silver. 2015. Deep Reinforcement Learning with Double Q-learning. 10.48550\/ARXIV.1509.06461","DOI":"10.48550\/ARXIV.1509.06461"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"crossref","unstructured":"Nicolas Vasilache Oleksandr Zinenko Theodoros Theodoridis Priya Goyal Zachary Devito William\u00a0S Moses Sven Verdoolaege Andrew Adams and Albert Cohen. 2019. The next 700 accelerated layers: From mathematical expressions of network computation graphs to accelerated gpu kernels automatically. ACM Transactions on Architecture and Code Optimization (TACO) 16 4 (2019) 1\u201326.","DOI":"10.1145\/3355606"},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","DOI":"10.5555\/1888390.1888455"},{"key":"e_1_3_3_2_52_2","unstructured":"Pablo Villalobos Jaime Sevilla Tamay Besiroglu Lennart Heim Anson Ho and Marius Hobbhahn. 2022. Machine learning model sizes and the parameter gap. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2207.02852 (2022)."},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1145\/3497776.3517769"},{"key":"e_1_3_3_2_54_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-28701-5_11"},{"key":"e_1_3_3_2_55_2","unstructured":"Ziyu Wang Tom Schaul Matteo Hessel Hado van Hasselt Marc Lanctot and Nando de Freitas. 2016. Dueling Network Architectures for Deep Reinforcement Learning. arxiv:https:\/\/arXiv.org\/abs\/1511.06581\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/1511.06581"},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"crossref","unstructured":"Christopher\u00a0JCH Watkins and Peter Dayan. 1992. Q-learning. Machine learning 8 (1992) 279\u2013292.","DOI":"10.1023\/A:1022676722315"},{"key":"e_1_3_3_2_57_2","doi-asserted-by":"publisher","unstructured":"F. Woergoetter and B. Porr. 2008. Reinforcement learning. Scholarpedia 3 3 (2008) 1448. 10.4249\/scholarpedia.1448revision #127590.","DOI":"10.4249\/scholarpedia.1448"},{"key":"e_1_3_3_2_58_2","unstructured":"Jiarong Xing Leyuan Wang Shang Zhang Jack Chen Ang Chen and Yibo Zhu. 2022. Bolt: Bridging the gap between auto-tuners and hardware-native performance. Proceedings of Machine Learning and Systems 4 (2022) 204\u2013216."},{"key":"e_1_3_3_2_59_2","doi-asserted-by":"crossref","unstructured":"Florian Zaruba Fabian Schuiki Torsten Hoefler and Luca Benini. 2020. Snitch: A tiny pseudo dual-issue processor for area and energy efficient execution of floating-point intensive workloads. IEEE Trans. Comput. 70 11 (2020) 1845\u20131860.","DOI":"10.1109\/TC.2020.3027900"},{"key":"e_1_3_3_2_60_2","first-page":"863","volume-title":"14th USENIX symposium on operating systems design and implementation (OSDI 20)","author":"Zheng Lianmin","year":"2020","unstructured":"Lianmin Zheng, Chengfan Jia, Minmin Sun, Zhao Wu, Cody\u00a0Hao Yu, Ameer Haj-Ali, Yida Wang, Jun Yang, Danyang Zhuo, Koushik Sen, et\u00a0al. 2020. Ansor: Generating High-Performance tensor programs for deep learning. In 14th USENIX symposium on operating systems design and implementation (OSDI 20). 863\u2013879."},{"key":"e_1_3_3_2_61_2","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378508"}],"event":{"name":"SC '25: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St. Louis MO USA","acronym":"SC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759900","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T18:29:39Z","timestamp":1773253779000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3712285.3759900"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":60,"alternative-id":["10.1145\/3712285.3759900","10.1145\/3712285"],"URL":"https:\/\/doi.org\/10.1145\/3712285.3759900","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}