{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T17:04:15Z","timestamp":1774631055381,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,1,20]],"date-time":"2025-01-20T00:00:00Z","timestamp":1737331200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"European Research Council (ERC)","award":["101088865"],"award-info":[{"award-number":["101088865"]}]},{"name":"European Union?s Horizon 2020 program (CONVOLVE)","award":["101070374"],"award-info":[{"award-number":["101070374"]}]},{"name":"Flanders AI Research Program"},{"name":"Research Foundation-Flanders (FWO)","award":["1SE7723N"],"award-info":[{"award-number":["1SE7723N"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,1,20]]},"DOI":"10.1145\/3658617.3697652","type":"proceedings-article","created":{"date-parts":[[2025,3,4]],"date-time":"2025-03-04T14:32:21Z","timestamp":1741098741000},"page":"1055-1061","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["OpenGeMM: A Highly-Efficient GeMM Accelerator Generator with Lightweight RISC-V Control and Tight Memory Coupling"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-3001-3611","authenticated-orcid":false,"given":"Xiaoling","family":"Yi","sequence":"first","affiliation":[{"name":"KU Leuven, Leuven, Belgium"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0286-4609","authenticated-orcid":false,"given":"Ryan","family":"Antonio","sequence":"additional","affiliation":[{"name":"KU Leuven, Leuven, Belgium"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0692-1227","authenticated-orcid":false,"given":"Joren","family":"Dumoulin","sequence":"additional","affiliation":[{"name":"KU Leuven, Leuven, Belgium"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0418-2713","authenticated-orcid":false,"given":"Jiacong","family":"Sun","sequence":"additional","affiliation":[{"name":"KU Leuven, Leuven, Belgium"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9503-403X","authenticated-orcid":false,"given":"Josse","family":"Van Delm","sequence":"additional","affiliation":[{"name":"KU Leuven, Leuven, Belgium"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7809-9563","authenticated-orcid":false,"given":"Guilherme","family":"Pereira Paim","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, INESC-ID, Instituto Superior T\u00e9cnico, University of Lisbon, Lisbon, Lisbon, Portugal"},{"name":"KU Leuven, Leuven, Flemish Brabant, Belgium"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3495-9263","authenticated-orcid":false,"given":"Marian","family":"Verhelst","sequence":"additional","affiliation":[{"name":"KU Leuven, Leuven, Belgium"}]}],"member":"320","published-online":{"date-parts":[[2025,3,4]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.3390\/fi14120363"},{"key":"e_1_3_2_1_2_1","unstructured":"Cristina Silvano Daniele Ielmini Fabrizio Ferrandi Leandro Fiorin Serena Curzel Luca Benini Francesco Conti Angelo Garofalo Cristian Zambelli Enrico Calore Sebastiano Fabio Schifano Maurizio Palesi Giuseppe Ascia Davide Patti Stefania Perri Nicola Petra Davide De Caro Luciano Lavagno Teodoro Urso Valeria Cardellini Gian Carlo Cardarilli and Robert Birke. A survey on deep learning hardware accelerators for heterogeneous hpc platforms 2023."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eng.2020.01.007"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2023.3305937"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.23919\/VLSICircuits52068.2021.9492338"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00035"},{"key":"e_1_3_2_1_7_1","first-page":"19","volume-title":"Hot Chips","volume":"30","author":"Sijstermans Frans","year":"2018","unstructured":"Frans Sijstermans. The nvidia deep learning accelerator. In Hot Chips, volume 30, pages 19--21, 2018."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSESS47205.2019.9040769"},{"key":"e_1_3_2_1_10_1","volume-title":"The risc-v compressed instruction set manual, version 1.7. EECS Department","author":"Waterman Andrew","year":"2015","unstructured":"Andrew Waterman, Yunsup Lee, David A Patterson, and Krste Asanovi\u0107. The risc-v compressed instruction set manual, version 1.7. EECS Department, University of California, Berkeley, UCB\/EECS-2015-157, 2015."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2023.3318301"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586216"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.3390\/electronics11152373"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2018.2857019"},{"key":"e_1_3_2_1_15_1","volume-title":"Redmule: A mixed-precision matrix-matrix operation engine for flexible and energy-efficient on-chip linear algebra and tinyml training acceleration. arXiv preprint arXiv:2301.03904","author":"Tortorella Yvan","year":"2023","unstructured":"Yvan Tortorella, Luca Bertaccini, Luca Benini, Davide Rossi, and Francesco Conti. Redmule: A mixed-precision matrix-matrix operation engine for flexible and energy-efficient on-chip linear algebra and tinyml training acceleration. arXiv preprint arXiv:2301.03904, 2023."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.3390\/computers7020027"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISQED57927.2023.10129330"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS48785.2022.9937422"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2022.3214064"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2021.3059962"},{"key":"e_1_3_2_1_21_1","volume-title":"Low-memory gemm-based convolution algorithms for deep neural networks. arXiv preprint arXiv:1709.03395","author":"Anderson Andrew","year":"2017","unstructured":"Andrew Anderson, Aravind Vasudevan, Cormac Keane, and David Gregg. Low-memory gemm-based convolution algorithms for deep neural networks. arXiv preprint arXiv:1709.03395, 2017."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2020.3027900"},{"key":"e_1_3_2_1_23_1","volume-title":"December","author":"Waterman Andrew","year":"2021","unstructured":"Andrew Waterman, Krste Asanovi\u0107, and John Hauser. The RISC-V Instruction Set Manual, Volume II: Privileged Architecture. RISC-V International, December 2021. Document Version 20211203."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2020.2987314"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2228360.2228584"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00015"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISVLSI54635.2022.00021"},{"key":"e_1_3_2_1_28_1","first-page":"630","volume-title":"Proceedings, Part IV 14","author":"He Kaiming","year":"2016","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. Identity mappings in deep residual networks. In Computer Vision-ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11--14, 2016, Proceedings, Part IV 14, pages 630--645. Springer, 2016."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_2_1_30_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, et al. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929, 2020."},{"key":"e_1_3_2_1_31_1","volume-title":"Bert: Pretraining of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. Bert: Pretraining of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805, 2018."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ESSCIRC53450.2021.9567768"}],"event":{"name":"ASPDAC '25: 30th Asia and South Pacific Design Automation Conference","location":"Tokyo Japan","acronym":"ASPDAC '25","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEICE","IPSJ","IEEE CAS","IEEE CEDA"]},"container-title":["Proceedings of the 30th Asia and South Pacific Design Automation Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3658617.3697652","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3658617.3697652","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:49Z","timestamp":1750295869000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3658617.3697652"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,20]]},"references-count":31,"alternative-id":["10.1145\/3658617.3697652","10.1145\/3658617"],"URL":"https:\/\/doi.org\/10.1145\/3658617.3697652","relation":{},"subject":[],"published":{"date-parts":[[2025,1,20]]},"assertion":[{"value":"2025-03-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}