{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T15:34:43Z","timestamp":1759073683520,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":11,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,17]],"date-time":"2024-07-17T00:00:00Z","timestamp":1721174400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSF","award":["OAC-2402542, OAC-1854828, OAC-2139536"],"award-info":[{"award-number":["OAC-2402542, OAC-1854828, OAC-2139536"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,17]]},"DOI":"10.1145\/3626203.3670561","type":"proceedings-article","created":{"date-parts":[[2024,7,17]],"date-time":"2024-07-17T20:12:20Z","timestamp":1721247140000},"page":"1-5","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Automatic BLAS Offloading on Unified Memory Architecture: A Study on NVIDIA Grace-Hopper"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1051-5927","authenticated-orcid":false,"given":"Junjie","family":"Li","sequence":"first","affiliation":[{"name":"Texas Advanced Computing Center, The University of Texas at Austin, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8505-0223","authenticated-orcid":false,"given":"Yinzhi","family":"Wang","sequence":"additional","affiliation":[{"name":"Texas Advanced Computing Center, The University of Texas at Austin, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7882-3571","authenticated-orcid":false,"given":"Xiao","family":"Liang","sequence":"additional","affiliation":[{"name":"Pittsburgh Supercomputing Center, Carnegie Mellon University, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3486-7863","authenticated-orcid":false,"given":"Hang","family":"Liu","sequence":"additional","affiliation":[{"name":"Texas Advanced Computing Center, The University of Texas at Austin, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,7,17]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.72.1240"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2016.07.013"},{"key":"e_1_3_2_1_3_1","unstructured":"Hewlett Packard Enterprise. [n. d.]. HPE Cray Programming Environment documentation. https:\/\/h41374.www4.hpe.com\/docs\/csml\/cray_libsci_acc.html"},{"key":"e_1_3_2_1_4_1","unstructured":"IBM. [n. d.]. IBM Engineering and Scientific Subroutine Library for Linux on POWER. https:\/\/www.ibm.com\/docs\/en\/SSFHY8_6.1\/reference\/essl_reference_pdf.pdf"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1002\/pssb.200541463"},{"key":"e_1_3_2_1_6_1","unstructured":"Junjie Li and Yinzhi Wang. 2024. SCILIB-accel: automatic BLAS offload tool. https:\/\/github.com\/nicejunjie\/scilib-accel"},{"key":"e_1_3_2_1_7_1","unstructured":"NVIDIA. [n. d.]. NVBLAS documentation. https:\/\/docs.nvidia.com\/cuda\/nvblas"},{"key":"e_1_3_2_1_8_1","unstructured":"NVIDIA. 2023. NVIDIA GH200 Grace Hopper Superchip Architecture. (2023). https:\/\/resources.nvidia.com\/en-us-grace-cpu\/nvidia-grace-hopper"},{"key":"e_1_3_2_1_9_1","unstructured":"Heidi Poxon. 2013. Introduction to the Cray Accelerated Scientific Libraries. (2013). https:\/\/www.olcf.ornl.gov\/wp-content\/uploads\/2013\/01\/Scientific_Libs.pdf"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3624062.3624143"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.75.2867"}],"event":{"name":"PEARC '24: Practice and Experience in Advanced Research Computing","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"],"location":"Providence RI USA","acronym":"PEARC '24"},"container-title":["Practice and Experience in Advanced Research Computing 2024: Human Powered Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626203.3670561","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626203.3670561","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:56:08Z","timestamp":1755867368000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626203.3670561"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,17]]},"references-count":11,"alternative-id":["10.1145\/3626203.3670561","10.1145\/3626203"],"URL":"https:\/\/doi.org\/10.1145\/3626203.3670561","relation":{},"subject":[],"published":{"date-parts":[[2024,7,17]]},"assertion":[{"value":"2024-07-17","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}