{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,18]],"date-time":"2025-10-18T00:09:31Z","timestamp":1760746171844,"version":"build-2065373602"},"reference-count":52,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T00:00:00Z","timestamp":1757894400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T00:00:00Z","timestamp":1757894400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,9,15]]},"DOI":"10.1109\/hpec67600.2025.11196413","type":"proceedings-article","created":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T17:35:37Z","timestamp":1760636137000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["Accelerating Supercomputing: AI-Hardware-Driven Innovation for Speed and Efficiency"],"prefix":"10.1109","author":[{"given":"Jack","family":"Dongarra","sequence":"first","affiliation":[{"name":"University of Tennessee Oak Ridge National Laboratory University of Manchester,Oak Ridge,TN,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"John","family":"Gunnels","sequence":"additional","affiliation":[{"name":"NVIDIA Corporation,Santa Clara,CA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Harun","family":"Bayraktar","sequence":"additional","affiliation":[{"name":"NVIDIA Corporation,Santa Clara,CA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Azzam","family":"Haidar","sequence":"additional","affiliation":[{"name":"NVIDIA Corporation,Santa Clara,CA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dan","family":"Ernst","sequence":"additional","affiliation":[{"name":"NVIDIA Corporation,Santa Clara,CA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.728"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"article-title":"A study of bfloat16 for deep learning training","year":"2019","author":"Kalamkar","key":"ref3"},{"article-title":"Fp8 formats for deep learning","year":"2022","author":"Micikevicius","key":"ref4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/320764.320766"},{"volume-title":"IBM system\/360 principles of operation","year":"1964","key":"ref6"},{"key":"ref7","first-page":"345,349","article-title":"Cray t90 series ieee floating point migration issues and solutions","volume-title":"CRAY User Group 1996 Spring Proceedings","author":"Garnatz"},{"volume-title":"IEEE Standard for Floating-Point Arithmetic, IEEE Std 754-2019 (revision of IEEE Std 754-2008)","year":"2019","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/800053.801923"},{"volume-title":"The 80386, 80486, and Pentium Microprocessors: Hardware, Software, and Interfacing","year":"1997","author":"Triebel","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3113475"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/1015706.1015800"},{"key":"ref13","first-page":"16:1","article-title":"Scalable parallel programming with cuda","volume-title":"SIGGRAPH Classes","author":"Nickolls","year":"2008"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ISBI.2008.4541126"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2008.05.008"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2008.917757"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553486"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1201\/9781420010749"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2010.69"},{"key":"ref20","article-title":"Rocm"},{"author":"Group","key":"ref21","article-title":"Sycl, C++ Programming for Heterogenous Parallel COmputing"},{"key":"ref22","article-title":"OpenACC"},{"key":"ref23","article-title":"OpenMP"},{"article-title":"Pytorch: An imperative style, high-performance deep learning library","year":"2019","author":"Paszke","key":"ref24"},{"key":"ref25","first-page":"265","article-title":"Tensorflow: a system for large-scale machine learning","volume-title":"Proceedings of the 12th USENIX Conference on Operating Systems Design and Implementation","author":"Abadi"},{"article-title":"JAX: composable transformations of Python+NumPy programs","year":"2018","author":"Bradbury","key":"ref26"},{"key":"ref27","article-title":"CuPy- NumPY & SciPy for GPU"},{"key":"ref28","article-title":"OCP 8-bit Floating Point Specification (OFP8)","author":"Micikevicius","year":"2023","journal-title":"Open Compute Project"},{"article-title":"Microscaling data formats for deep learning","year":"2023","author":"Rouhani","key":"ref29"},{"year":"2017","key":"ref30","article-title":"Nvidia v100 gpu architecture"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS61541.2024.00022"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"year":"2024","key":"ref33","article-title":"CUDA PTX ISA"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-09766-4_157"},{"article-title":"Top 500. the list","year":"2024","author":"Strohmaier","key":"ref35"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2007.445"},{"article-title":"Hpl-ai mixed-precision benchmark: The next frontier of supercomputing","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage, and Analysis","author":"Dongarra","key":"ref37"},{"key":"ref38","article-title":"The state of the transistor in 3 charts"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1177\/1094342017738610"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/sc41406.2024.00010"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/sc41406.2024.00013"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/sc41406.2024.00012"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2008.11.005"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1177\/10943420211003313"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00050"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1098\/rspa.2020.0110"},{"key":"ref47","first-page":"1737","article-title":"Deep learning with limited numerical precision","volume-title":"Proceedings of the 32nd International Conference on Machine Learning","volume":"37","author":"Gupta"},{"issue":"9","key":"ref48","first-page":"1517","article-title":"Solving lattice qcd systems of equations using mixed precision solvers on gpus","volume-title":"Computer Physics Communications","volume":"181","author":"Clark","year":"2010"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1098\/rsos.211631"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1177\/10943420221090256"},{"article-title":"Performance enhancement of the ozaki scheme on integer matrix multiplication unit","year":"2024","author":"Uchino","key":"ref51"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2019.00019"}],"event":{"name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","start":{"date-parts":[[2025,9,15]]},"location":"Wakefield, MA, USA","end":{"date-parts":[[2025,9,19]]}},"container-title":["2025 IEEE High Performance Extreme Computing Conference (HPEC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11196085\/11196088\/11196413.pdf?arnumber=11196413","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T04:43:42Z","timestamp":1760676222000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11196413\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,15]]},"references-count":52,"URL":"https:\/\/doi.org\/10.1109\/hpec67600.2025.11196413","relation":{},"subject":[],"published":{"date-parts":[[2025,9,15]]}}}