{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T02:14:23Z","timestamp":1769825663310,"version":"3.49.0"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,5]],"date-time":"2024-05-05T00:00:00Z","timestamp":1714867200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,5]],"date-time":"2024-05-05T00:00:00Z","timestamp":1714867200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,5]]},"DOI":"10.1109\/ispass61541.2024.00022","type":"proceedings-article","created":{"date-parts":[[2024,7,16]],"date-time":"2024-07-16T17:19:44Z","timestamp":1721150384000},"page":"132-143","source":"Crossref","is-referenced-by-count":19,"title":["On the Rise of AMD Matrix Cores: Performance, Power Efficiency, and Programmability"],"prefix":"10.1109","author":[{"given":"Gabin","family":"Schieffer","sequence":"first","affiliation":[{"name":"KTH Royal Institute of Technology,Stockholm,Sweden"}]},{"given":"Daniel Ara\u00fajo","family":"De Medeiros","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Stockholm,Sweden"}]},{"given":"Jennifer","family":"Faj","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Stockholm,Sweden"}]},{"given":"Aniruddha","family":"Marathe","sequence":"additional","affiliation":[{"name":"Lawrence Livermore National Laboratory,Livermore,USA"}]},{"given":"Ivy","family":"Peng","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Stockholm,Sweden"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Top500 list","year":"2023"},{"key":"ref2","first-page":"522","article-title":"Nvidia tensor core programmability, performance & precision","volume-title":"2018 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)","author":"Markidis"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00050"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3011893"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3330345.3331057"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TNS.2020.2977583"},{"key":"ref7","article-title":"Introducing AMD CDNA2 architecture","volume-title":"AMD, Whitepaper","year":"2021"},{"key":"ref8","volume-title":"AMD instinct MIl 00 instruction set architecture","year":"2020"},{"key":"ref9","volume-title":"AMD instinct MI200 instruction set architecture","year":"2022"},{"key":"ref10","article-title":"Nvidia A100 tensor core gpu architecture","volume-title":"Nvidia, Whitepaper","year":"2020"},{"key":"ref11","volume-title":"Parallel thread execution ISA documentation","year":"2023"},{"key":"ref12","volume-title":"Amd matrix instruction calculator","year":"2023"},{"key":"ref13","volume-title":"LAPACK - Linear Algebra PACKage"},{"key":"ref14","volume-title":"AMD Instinct MI250X Datasheet","year":"2021"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3217824"},{"key":"ref16","volume-title":"Hierarchical roofline on AMD In-stinct MI200 gpus","year":"2022"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3505285"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/WORKS49585.2019.00009"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/SAAHPC.2012.26"},{"key":"ref20","article-title":"User-level power monitoring and application performance on cray xc30 supercomputers","volume-title":"Proceedings of the Cray User Group (CUG)","author":"Hart","year":"2014"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW59300.2023.00068"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/P3HPC56579.2022.00008"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3624062.3624223"},{"key":"ref24","article-title":"Quantum computer simulations at warp speed: Assessing the impact of gpu acceleration","author":"Faj","year":"2023","journal-title":"arXiv preprint"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00071"},{"issue":"5","key":"ref26","doi-asserted-by":"crossref","first-page":"560","DOI":"10.1177\/10943420231178552","article-title":"Hipbone: A performance-portable graphics processing unit-accelerated c++ version of the nekbone benchmark","volume":"37","author":"Chalmers","year":"2023","journal-title":"The International Journal of High Performance Computing Applications"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-39698-4_41"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3524059.3532368"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/PMBS.2018.8641600"},{"key":"ref30","article-title":"Mixed precision training","volume-title":"International Conference on Learning Representations","author":"Micikevicius","year":"2018"},{"key":"ref31","article-title":"A study of bftoat16 for deep learning training","author":"Kalamkar","year":"2019","journal-title":"arXiv preprint"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00204"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476157"}],"event":{"name":"2024 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)","location":"Indianapolis, IN, USA","start":{"date-parts":[[2024,5,5]]},"end":{"date-parts":[[2024,5,7]]}},"container-title":["2024 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10589923\/10590014\/10590025.pdf?arnumber=10590025","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,19]],"date-time":"2024-07-19T04:54:44Z","timestamp":1721364884000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10590025\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,5]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/ispass61541.2024.00022","relation":{},"subject":[],"published":{"date-parts":[[2024,5,5]]}}}