{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,19]],"date-time":"2026-06-19T22:42:29Z","timestamp":1781908949606,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,2,27]],"date-time":"2025-02-27T00:00:00Z","timestamp":1740614400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"Semiconductor Research Corporation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2019306,2213701,2217003,2324864,2328972"],"award-info":[{"award-number":["2019306,2213701,2217003,2324864,2328972"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,2,27]]},"DOI":"10.1145\/3706628.3708870","type":"proceedings-article","created":{"date-parts":[[2025,2,26]],"date-time":"2025-02-26T12:22:11Z","timestamp":1740572531000},"page":"92-102","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":15,"title":["ARIES: An Agile MLIR-Based Compilation Flow for Reconfigurable Devices with AI Engines"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3659-339X","authenticated-orcid":false,"given":"Jinming","family":"Zhuang","sequence":"first","affiliation":[{"name":"Brown University, Providence, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6901-8837","authenticated-orcid":false,"given":"Shaojie","family":"Xiang","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6617-0075","authenticated-orcid":false,"given":"Hongzheng","family":"Chen","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2850-0176","authenticated-orcid":false,"given":"Niansong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7655-4080","authenticated-orcid":false,"given":"Zhuoping","family":"Yang","sequence":"additional","affiliation":[{"name":"Brown University, Providence, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-5041-8374","authenticated-orcid":false,"given":"Tony","family":"Mao","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0778-0308","authenticated-orcid":false,"given":"Zhiru","family":"Zhang","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0493-1844","authenticated-orcid":false,"given":"Peipei","family":"Zhou","sequence":"additional","affiliation":[{"name":"Brown University, Providence, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,2,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3058217"},{"key":"e_1_3_2_1_2_1","volume-title":"AMD XDNA\u2122 NPU in Ryzen\u2122 AI Processors","author":"Rico Alejandro","year":"2024","unstructured":"Alejandro Rico, Satyaprakash Pareek, Javier Cabezas, David Clarke, Baris Ozgul, Francisco Barat, Yao Fu, Stephan M\u00fcnz, Dylan Stuart, Patrick Schlangen, et al. AMD XDNA\u2122 NPU in Ryzen\u2122 AI Processors. IEEE Micro, 2024."},{"key":"e_1_3_2_1_3_1","volume-title":"Tenstorrent Scales AI Performance. https:\/\/tenstorrent.com\/ vision\/tenstorrent-scales-ai-performance","author":"Gwennap Linley","year":"2020","unstructured":"Linley Gwennap. Tenstorrent Scales AI Performance. https:\/\/tenstorrent.com\/ vision\/tenstorrent-scales-ai-performance, 2020."},{"key":"e_1_3_2_1_4_1","first-page":"1","volume-title":"2023 IEEE Hot Chips 35 Symposium (HCS)","author":"Mahurin Eric","year":"2023","unstructured":"Eric Mahurin. Qualocmm\u00ae Hexagon\u2122NPU. In 2023 IEEE Hot Chips 35 Symposium (HCS), pages 1--19. IEEE Computer Society, 2023."},{"key":"e_1_3_2_1_5_1","unstructured":"AMD. Versal Adaptive SoC AIE-ML Architecture Manual (AM020) 2024."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543622.3573210"},{"key":"e_1_3_2_1_7_1","volume-title":"Composing Heterogeneous Accelerators for Deep Learning on Versal ACAP Architecture. ACM Trans. Reconfigurable Technol. Syst., aug","author":"Zhuang Jinming","year":"2024","unstructured":"Jinming Zhuang, Jason Lau, Hanchen Ye, Zhuoping Yang, Shixin Ji, Jack Lo, Kristof Denolf, Stephen Neuendorffer, Alex Jones, Jingtong Hu, Yiyu Shi, Deming Chen, Jason Cong, and Peipei Zhou. CHARM 2.0: Composing Heterogeneous Accelerators for Deep Learning on Versal ACAP Architecture. ACM Trans. Reconfigurable Technol. Syst., aug 2024. Just Accepted."},{"key":"e_1_3_2_1_8_1","first-page":"96","volume-title":"Diana Marculescu. MaxEVA: Maximizing the Efficiency of Matrix Multiplication on Versal AI Engine. In 2023 International Conference on Field Programmable Technology","author":"Taka Endri","year":"2023","unstructured":"Endri Taka, Aman Arora, Kai-Chiang Wu, and Diana Marculescu. MaxEVA: Maximizing the Efficiency of Matrix Multiplication on Versal AI Engine. In 2023 International Conference on Field Programmable Technology, pages 96--105, 2023."},{"key":"e_1_3_2_1_9_1","first-page":"463","volume-title":"Onur Mutlu. SPARTA: Spatial Acceleration for Efficient and Scalable Horizontal Diffusion Weather Stencil Computation. In Proceedings of the 37th ACM International Conference on Supercomputing, ICS '23","author":"Singh Gagandeep","year":"2023","unstructured":"Gagandeep Singh, Alireza Khodamoradi, Kristof Denolf, Jack Lo, Juan Gomez- Luna, Joseph Melber, Andra Bisca, Henk Corporaal, and Onur Mutlu. SPARTA: Spatial Acceleration for Efficient and Scalable Horizontal Diffusion Weather Stencil Computation. In Proceedings of the 37th ACM International Conference on Supercomputing, ICS '23, page 463--476, New York, NY, USA, 2023. Association for Computing Machinery."},{"key":"e_1_3_2_1_10_1","unstructured":"AMD. Riallto: An exploration framework for the AMD Ryzen AI NPU. https:\/\/riallto.ai\/. Accessed: 2024-09--15."},{"key":"e_1_3_2_1_11_1","unstructured":"AMD. MLIR-AIE: An MLIR-based AI Engine toolchain. https:\/\/xilinx.github.io\/mlir-aie\/. Accessed: 2024-09--15."},{"key":"e_1_3_2_1_12_1","unstructured":"AMD. MLIR-AIR: An MLIR-based toolchain for AMD AI Engine-enabled devices. https:\/\/xilinx.github.io\/mlir-air\/AIRDialect.html. Accessed: 2024-09--15."},{"key":"e_1_3_2_1_13_1","volume-title":"AMD XDNA\u2122 NPU in Ryzen\u2122 AI Processors","author":"Rico Alejandro","year":"2024","unstructured":"Alejandro Rico, Satyaprakash Pareek, Javier Cabezas, David Clarke, Baris Ozgul, Francisco Barat, Yao Fu, Stephan M\u00fcnz, Dylan Stuart, Patrick Schlangen, et al. AMD XDNA\u2122 NPU in Ryzen\u2122 AI Processors. IEEE Micro, 2024."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289602.3293906"},{"key":"e_1_3_2_1_15_1","first-page":"1","volume-title":"Design Challenges and DSE Perspectives. In 2023 60th ACM\/IEEE Design Automation Conference (DAC)","author":"Zhuang Jinming","year":"2023","unstructured":"Jinming Zhuang, Zhuoping Yang, and Peipei Zhou. High Performance, Low Power Matrix Multiply Design on ACAP: from Architecture, Design Challenges and DSE Perspectives. In 2023 60th ACM\/IEEE Design Automation Conference (DAC), pages 1--6, 2023."},{"key":"e_1_3_2_1_16_1","first-page":"1","volume-title":"AIM: Accelerating Arbitrary-Precision Integer Multiplication on Heterogeneous Reconfigurable Computing Platform Versal ACAP. In 2023 IEEE\/ACM International Conference on Computer Aided Design (ICCAD)","author":"Yang Zhuoping","year":"2023","unstructured":"Zhuoping Yang, Jinming Zhuang, Jiaqi Yin, Cunxi Yu, Alex K. Jones, and Peipei Zhou. AIM: Accelerating Arbitrary-Precision Integer Multiplication on Heterogeneous Reconfigurable Computing Platform Versal ACAP. In 2023 IEEE\/ACM International Conference on Computer Aided Design (ICCAD), pages 1--9, 2023."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626202.3637569"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2024.3443692"},{"key":"e_1_3_2_1_19_1","first-page":"200","volume-title":"Dingwen Tao. H-GCN: A Graph Convolutional Network Accelerator on Versal ACAP Architecture. In 2022 32nd International Conference on Field- Programmable Logic and Applications (FPL)","author":"Zhang Chengming","year":"2022","unstructured":"Chengming Zhang, Tong Geng, Anqi Guo, Jiannan Tian, Martin Herbordt, Ang Li, and Dingwen Tao. H-GCN: A Graph Convolutional Network Accelerator on Versal ACAP Architecture. In 2022 32nd International Conference on Field- Programmable Logic and Applications (FPL), pages 200--208, 2022."},{"key":"e_1_3_2_1_20_1","unstructured":"AMD. AMD Ryzen\u2122 AI Software Stack. https:\/\/www.amd.com\/en\/developer\/resources\/ryzen-ai-software.html. Accessed: 2024-09--15."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640366"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3315508.3329973"},{"key":"e_1_3_2_1_23_1","volume-title":"Proc. ACM Program. Lang., 8(PLDI), jun","author":"Chen Hongzheng","year":"2024","unstructured":"Hongzheng Chen, Niansong Zhang, Shaojie Xiang, Zhichen Zeng, Mengjia Dai, and Zhiru Zhang. Allo: A Programming Model for Composable Accelerator Design. Proc. ACM Program. Lang., 8(PLDI), jun 2024."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530681"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"e_1_3_2_1_26_1","unstructured":"AMD. AIE vector dialect. https:\/\/xilinx.github.io\/mlir-aie\/AIEVecDialect.html. Accessed: 2024-09--15."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/2435264.2435273"},{"key":"e_1_3_2_1_28_1","volume-title":"Newnes","author":"Cook Shane","year":"2012","unstructured":"Shane Cook. CUDA programming: a developer's guide to parallel computing with GPUs. Newnes, 2012."},{"key":"e_1_3_2_1_29_1","volume-title":"Eyeriss: A spatial architecture for energy-efficient dataflow for convolutional neural networks. ACM SIGARCH computer architecture news, 44(3):367--379","author":"Chen Yu-Hsin","year":"2016","unstructured":"Yu-Hsin Chen, Joel Emer, and Vivienne Sze. Eyeriss: A spatial architecture for energy-efficient dataflow for convolutional neural networks. ACM SIGARCH computer architecture news, 44(3):367--379, 2016."},{"key":"e_1_3_2_1_30_1","volume-title":"AI Engine Intrinsics User Guide (UG1078)","author":"AMD.","year":"2024","unstructured":"AMD. AI Engine Intrinsics User Guide (UG1078), 2024."},{"key":"e_1_3_2_1_31_1","unstructured":"AMD. Versal Adaptive SoC AI Engine Architecture Manual (AM009) 2024."},{"key":"e_1_3_2_1_32_1","first-page":"1","volume-title":"Automation & Test in Europe Conference & Exhibition (DATE)","author":"Dai Tuo","year":"2024","unstructured":"Tuo Dai, Bizhao Shi, and Guojie Luo. WideSA: A High Array Utilization Mapping Scheme for Uniform Recurrences on ACAP. In 2024 Design, Automation & Test in Europe Conference & Exhibition (DATE), pages 1--6. IEEE, 2024."},{"key":"e_1_3_2_1_33_1","volume-title":"https:\/\/xilinx-wiki.atlassian.net\/wiki\/spaces\/A\/pages\/2273738753\/VersalEvaluationBoard-SystemController-Update6","author":"Tool AMD.","year":"2024","unstructured":"AMD. Board evaluation and management Tool. https:\/\/xilinx-wiki.atlassian.net\/wiki\/spaces\/A\/pages\/2273738753\/VersalEvaluationBoard-SystemController-Update6, 2024. Accessed: 2024-09--15."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM.2019.00033"},{"key":"e_1_3_2_1_35_1","first-page":"93","volume-title":"Jason Cong. AutoSA: A Polyhedral Compiler for High- Performance Systolic Arrays on FPGA. In The 2021 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays, FPGA '21","author":"Guo Licheng","year":"2021","unstructured":"JieWang, Licheng Guo, and Jason Cong. AutoSA: A Polyhedral Compiler for High- Performance Systolic Arrays on FPGA. In The 2021 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays, FPGA '21, page 93--104, New York, NY, USA, 2021. Association for Computing Machinery."},{"key":"e_1_3_2_1_36_1","volume-title":"October","author":"Kjolstad Fredrik","year":"2017","unstructured":"Fredrik Kjolstad, Shoaib Kamil, Stephen Chou, David Lugato, and Saman Amarasinghe. The tensor algebra compiler. 1(OOPSLA), October 2017."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_38_1","first-page":"741","volume-title":"Deming Chen. ScaleHLS: A New Scalable High-Level Synthesis Framework on Multi-Level Intermediate Representation. In 2022 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","author":"Ye Hanchen","year":"2022","unstructured":"Hanchen Ye, Cong Hao, Jianyi Cheng, Hyunmin Jeong, Jack Huang, Stephen Neuendorffer, and Deming Chen. ScaleHLS: A New Scalable High-Level Synthesis Framework on Multi-Level Intermediate Representation. In 2022 IEEE International Symposium on High-Performance Computer Architecture (HPCA), pages 741--755, 2022."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3617232.3624850"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"Yi-Hsiang Lai Yuze Chi Yuwei Hu Jie Wang Cody Hao Yu Yuan Zhou Jason Cong and Zhiru Zhang. HeteroCL: A Multi-Paradigm Programming Infrastructure for Software-Defined Reconfigurable Computing. In Proceedings of the 2019 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays FPGA '19 page 242--251 New York NY USA 2019. Association for Computing Machinery.","DOI":"10.1145\/3289602.3293910"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3490422.3502369"}],"event":{"name":"FPGA '25: The 2025 ACM\/SIGDA International Symposium on Field Programmable Gate Arrays","location":"Monterey CA USA","acronym":"FPGA '25","sponsor":["SIGDA ACM Special Interest Group on Design Automation"]},"container-title":["Proceedings of the 2025 ACM\/SIGDA International Symposium on Field Programmable Gate Arrays"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706628.3708870","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3706628.3708870","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T21:53:40Z","timestamp":1755899620000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706628.3708870"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,27]]},"references-count":41,"alternative-id":["10.1145\/3706628.3708870","10.1145\/3706628"],"URL":"https:\/\/doi.org\/10.1145\/3706628.3708870","relation":{},"subject":[],"published":{"date-parts":[[2025,2,27]]},"assertion":[{"value":"2025-02-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}