{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T07:17:47Z","timestamp":1774595867801,"version":"3.50.1"},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,9,1]],"date-time":"2025-09-01T00:00:00Z","timestamp":1756684800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,9,1]],"date-time":"2025-09-01T00:00:00Z","timestamp":1756684800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,9,1]]},"DOI":"10.1109\/fpl68686.2025.00051","type":"proceedings-article","created":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T19:48:24Z","timestamp":1774554504000},"page":"323-331","source":"Crossref","is-referenced-by-count":0,"title":["GAMA: High-Performance GEMM Acceleration on AMD Versal ML-Optimized AI Engines"],"prefix":"10.1109","author":[{"given":"Kaustubh Manohar","family":"Mhatre","sequence":"first","affiliation":[{"name":"Arizona State University,Tempe,USA"}]},{"given":"Endri","family":"Taka","sequence":"additional","affiliation":[{"name":"The University of Texas at Austin Austin,USA"}]},{"given":"Aman","family":"Arora","sequence":"additional","affiliation":[{"name":"Arizona State University,Tempe,USA"}]}],"member":"263","reference":[{"key":"ref1","year":"2022","journal-title":"AI Engine API User Guide."},{"key":"ref2","volume-title":"AMD Versal ACAP","year":"2024"},{"key":"ref3","article-title":"AMD\/Xilinx","year":"2021","journal-title":"Versal ACAP AI Engine Architecture Manual (AM009)"},{"key":"ref4","article-title":"AMD\/Xilinx","year":"2022","journal-title":"AI Engine Kernel and Graph Programming Guide (UG1079)."},{"key":"ref5","article-title":"AMD\/Xilinx","year":"2023","journal-title":"Versal Adaptive SoC AIE-ML Architecture Manual (AM020)"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC43674.2020.9286183"},{"key":"ref7","first-page":"219","article-title":"Exploiting On-Chip Heterogeneity of Versal Architecture for GNN Inference Acceleration","volume-title":"2023 33rd International Conference on Field-Programmable Logic and Applications (FPL)","author":"Chen"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/FPL64840.2024.00039"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2024.3443692"},{"key":"ref10","volume-title":"A Compiler Infrastructure for the End of Moore\u2019s Law","author":"Lattner","year":"2020"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/AERO55745.2023.10115906"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/2491956.2462176"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3577193.3593719"},{"key":"ref14","volume-title":"MaxEVA: Maximizing the Efficiency of Matrix Multiplication on Versal AI Engine","author":"Taka","year":"2023"},{"key":"ref15","doi-asserted-by":"crossref","DOI":"10.1145\/3695053.3731088","volume-title":"Reconfigurable Stream Network Architecture","author":"Wang","year":"2025"},{"key":"ref16","first-page":"2023","volume-title":"Evaluation of Xilinx Versal Device","author":"Wierse"},{"key":"ref17","first-page":"1","article-title":"AIM: Accelerating Arbitrary-Precision Integer Multiplication on Heterogeneous Reconfigurable Computing Platform Versal ACAP","volume-title":"2023 IEEE\/ACM International Conference on Computer Aided Design (ICCAD)","author":"Yang"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN54540.2023.10191067"},{"key":"ref19","first-page":"200","article-title":"H-GCN: A Graph Convolutional Network Accelerator on Versal ACAP Architecture","volume-title":"2022 32nd International Conference on Field-Programmable Logic and Applications (FPL)","author":"Zhang"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3543622.3573210"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3686163"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3706628.3708870"},{"key":"ref23","volume-title":"SSR: Spatial Sequential Hybrid Architecture for Latency Throughput Tradeoff in Transformer Acceleration","author":"Zhuang","year":"2024"},{"key":"ref24","volume-title":"AutoMM: Energy-Efficient Multi-Data-Type Matrix Multiply Design on Heterogeneous Programmable System-on-Chip","author":"Zhuang","year":"2023"}],"event":{"name":"2025 35th International Conference on Field-Programmable Logic and Applications (FPL)","location":"Leiden, Netherlands","start":{"date-parts":[[2025,9,1]]},"end":{"date-parts":[[2025,9,5]]}},"container-title":["2025 35th International Conference on Field-Programmable Logic and Applications (FPL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11449056\/11449057\/11449136.pdf?arnumber=11449136","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T05:50:20Z","timestamp":1774590620000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11449136\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,1]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/fpl68686.2025.00051","relation":{},"subject":[],"published":{"date-parts":[[2025,9,1]]}}}