{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T14:28:29Z","timestamp":1754144909662,"version":"3.41.2"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","funder":[{"name":"NSF","award":["2446084"],"award-info":[{"award-number":["2446084"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,16]]},"DOI":"10.1145\/3736112.3736144","type":"proceedings-article","created":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T15:02:27Z","timestamp":1749826947000},"page":"41-52","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Array Programming on GPUs: Challenges and Opportunities"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-7276-7715","authenticated-orcid":false,"given":"Xinyi","family":"Li","sequence":"first","affiliation":[{"name":"University of Utah, Salt Lake City, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0840-2761","authenticated-orcid":false,"given":"Mark","family":"Baranowski","sequence":"additional","affiliation":[{"name":"University of Utah, Salt Lake City, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-9878-7642","authenticated-orcid":false,"given":"Harvey","family":"Dam","sequence":"additional","affiliation":[{"name":"University of Utah, Salt Lake City, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4161-9278","authenticated-orcid":false,"given":"Ganesh","family":"Gopalakrishnan","sequence":"additional","affiliation":[{"name":"University of Utah, Salt Lake City, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,6,16]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3624062.3625122"},{"volume-title":"PLDI Array\u201925 Workshop Artifact from the University of Utah.  https:\/\/github.com\/ganeshutah\/PLDI25-Array-Workshop","key":"e_1_3_2_2_2_1","unstructured":"[n. d.]. PLDI Array\u201925 Workshop Artifact from the University of Utah. https:\/\/github.com\/ganeshutah\/PLDI25-Array-Workshop"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"crossref","unstructured":"Pierre Blanchard Nicholas J Higham Florent Lopez Th\u00e9o Mary and Srikara Pranesh. 2020. Mixed Precision Block Fused Multiply-Add: Error Analysis and Application to GPU Tensor Cores. SISC https:\/\/hal.science\/hal-02491076","DOI":"10.1137\/19M1289546"},{"volume-title":"Reasoning About Parallel Architectures","author":"Collier William W.","key":"e_1_3_2_2_4_1","unstructured":"William W. Collier. 1992. Reasoning About Parallel Architectures. Prentice-Hall, Inc., Upper Saddle River, NJ, United States. isbn:978-0-13-767187-8"},{"key":"e_1_3_2_2_5_1","unstructured":"[n. d.]. Correctness in Scientific Computing a PLDI 2023 Workshop. https:\/\/pldi23.sigplan.org\/home\/csc-2023#program"},{"key":"e_1_3_2_2_6_1","unstructured":"[n. d.]. cuSolver. https:\/\/docs.nvidia.com\/cuda\/cusolver\/index.html#cusolverirsrefinement-t"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","unstructured":"James Demmel Jack Dongarra Mark Gates Greg Henry Julien Langou Xiaoye Li Piotr Luszczek Weslley Pereira Jason Riedy and Cindy Rubio-Gonz\u00e1lez. 2022. Proposed Consistent Exception Handling for the BLAS and LAPACK. https:\/\/doi.org\/10.48550\/arXiv.2207.09281 10.48550\/arXiv.2207.09281","DOI":"10.48550\/arXiv.2207.09281"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2016.31"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","unstructured":"Peter Dinda Alex Bernat and Conor Hetland. 2020. Spying on the Floating Point Behavior of Existing Unmodified Scientific Applications. HPDC \u201920. 5\u201316. isbn:9781450370523 https:\/\/doi.org\/10.1145\/3369583.3392673 10.1145\/3369583.3392673","DOI":"10.1145\/3369583.3392673"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","unstructured":"Jack Dongarra Laura Grigori and Nicholas Higham. 2020. Numerical algorithms for high-performance computational science. Philosophical Transactions of the Royal Society A http:\/\/doi.org\/10.1098\/rsta.2019.0066 10.1098\/rsta.2019.0066","DOI":"10.1098\/rsta.2019.0066"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","unstructured":"Massimiliano Fasi Nicholas Higham Mantas Mikaitis and Srikara Pranesh. 2021. Numerical behavior of NVIDIA tensor cores. PeerJ. Computer science https:\/\/doi.org\/10.7717\/peerj-cs.330 10.7717\/peerj-cs.330","DOI":"10.7717\/peerj-cs.330"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","unstructured":"Ganesh Gopalakrishnan Paul D. Hovland Costin Iancu Sriram Krishnamoorthy Ignacio Laguna Richard A. Lethin Koushik Sen Stephen F. Siegel and Armando Solar-Lezama. 2017. Report of the HPC Correctness Summit January. arXiv https:\/\/doi.org\/10.2172\/1470989 10.2172\/1470989","DOI":"10.2172\/1470989"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","unstructured":"Ganesh Gopalakrishnan Ignacio Laguna Ang Li Pavel Panchekha Cindy Rubio-Gonz\u00e1lez and Zachary Tatlock. 2021. Guarding Numerics Amidst Rising Heterogeneity. In Correctness. 9\u201315. https:\/\/doi.org\/10.1109\/Correctness54621.2021.00007 10.1109\/Correctness54621.2021.00007","DOI":"10.1109\/Correctness54621.2021.00007"},{"key":"e_1_3_2_2_14_1","unstructured":"[n. d.]. GPU-FPX: A Low-Overhead tool for Floating-Point Exception Detection in NVIDIA GPUs. https:\/\/github.com\/LLNL\/GPU-FPX.git"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/227699.227701"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","unstructured":"Yafan Huang Sheng Di Xiaodong Yu Guanpeng Li and Franck Cappello. 2023. cuSZp: An Ultra-fast GPU Error-bounded Lossy Compression Framework with Optimized End-to-End Performance. SC \u201923. isbn:9798400701092 https:\/\/doi.org\/10.1145\/3581784.3607048 10.1145\/3581784.3607048","DOI":"10.1145\/3581784.3607048"},{"key":"e_1_3_2_2_17_1","unstructured":"[n. d.]. https:\/\/standards.ieee.org\/ieee\/754\/6210\/"},{"key":"e_1_3_2_2_18_1","unstructured":"[n. d.]. IEEE-754 Floating Point Converter. https:\/\/www.h-schmidt.net\/FloatConverter\/IEEE754.html"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2019.00118"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"crossref","unstructured":"Ignacio Laguna Xinyi Li and Ganesh Gopalakrishnan. 2022. BinFPE: Accurate Floating-Point Exception Detection for GPU Applications. https:\/\/pldi22.sigplan.org\/home\/SOAP-2022#event-overview In SOAP \u201922.","DOI":"10.1145\/3520313.3534655"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","unstructured":"Ignacio Laguna Tanmay Tirpankar Xinyi Li and Ganesh Gopalakrishnan. 2022. FPChecker: Floating-Point Exception Detection Tool and Benchmark for Parallel and Distributed HPC. In IISWC \u201922. 39\u201350. https:\/\/doi.org\/10.1109\/IISWC55918.2022.00014 10.1109\/IISWC55918.2022.00014","DOI":"10.1109\/IISWC55918.2022.00014"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","unstructured":"Tao Lei Yu Zhang Sida I. Wang Hui Dai and Yoav Artzi. 2018. Simple Recurrent Units for Highly Parallelizable Recurrence. In EMNLP. https:\/\/doi.org\/10.18653\/v1\/D18-1477 10.18653\/v1\/D18-1477","DOI":"10.18653\/v1\/D18-1477"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3588195.3592991"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid59990.2024.00014"},{"key":"e_1_3_2_2_25_1","unstructured":"Xinyi Li Ang Li Ignacio Laguna and Ganesh Gopalakrishnan. [n. d.]. https:\/\/github.com\/LLNL\/FTTN.git"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2022.3215477"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575711"},{"key":"e_1_3_2_2_28_1","unstructured":"Lei Mao. 2020. NVIDIA Tensor Core Programming. https:\/\/leimao.github.io\/blog\/NVIDIA-Tensor-Core-Programming\/ Accessed: 2025-04-01"},{"key":"e_1_3_2_2_29_1","unstructured":"Dolores Miao Ignacio Laguna and Cindy Rubio-Gonz\u00e1lez. 2025. FloatGuard: Efficient Whole-Program Detection of Floating-Point Exceptions in AMD GPUs. In HPDC \u201925\u2019."},{"key":"e_1_3_2_2_30_1","unstructured":"Mantas Mikaitis. 2023. Monotonicity of Multi-Term Floating-Point Adders. arxiv:2304.01407."},{"key":"e_1_3_2_2_31_1","unstructured":"Fraser Mince Dzung Dinh Jonas Kgomo Neil Thompson and Sara Hooker. 2023. The Grand Illusion: The Myth of Software Portability and Implications for ML Progress. arXiv arxiv:2309.07181"},{"key":"e_1_3_2_2_32_1","unstructured":"[n. d.]. https:\/\/docs.nvidia.com\/cuda\/floating-point\/index.html"},{"key":"e_1_3_2_2_33_1","unstructured":"Nvidia. [n. d.]. Cutlass TF32 example. https:\/\/github.com\/NVIDIA\/cutlass\/blob\/main\/examples\/14_ampere_tf32_tensorop_gemm\/ampere_tf32_tensorop_gemm.cu"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1912.01703"},{"key":"e_1_3_2_2_35_1","unstructured":"[n. d.]. PyBlaz Compressor. https:\/\/github.com\/damtharvey\/pyblaz.git"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3552309"},{"key":"e_1_3_2_2_37_1","unstructured":"[n. d.]. Tools to Diagnose and Repair Floating-Point Errors in Heterogeneous Computing Hardware and Software. https:\/\/fpanalysistools.org\/SC24\/"},{"key":"e_1_3_2_2_38_1","unstructured":"[n. d.]. SRU NaN Issue. https:\/\/github.com\/asappresearch\/sru\/issues\/193"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2502.15999"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","unstructured":"Oreste Villa Mark Stephenson David Nellans and Stephen W Keckler. 2019. NVBit: A Dynamic Binary Instrumentation Framework for NVIDIA GPUs. In MICRO. 372\u2013383. https:\/\/doi.org\/10.1145\/3352460.3358307 10.1145\/3352460.3358307","DOI":"10.1145\/3352460.3358307"},{"key":"e_1_3_2_2_41_1","unstructured":"Peichen Xie Yanjie Gao and Jilong Xue. 2024. FPRev: Revealing the Order of Floating-Point Summation by Numerical Testing. arxiv:2411.00442."},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/PMBS56514.2022.00015"}],"event":{"name":"ARRAY '25: 11th ACM SIGPLAN International Workshop on Libraries, Languages and Compilers for Array Programming","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages"],"location":"Seoul Republic of Korea","acronym":"ARRAY '25"},"container-title":["Proceedings of the 11th ACM SIGPLAN International Workshop on Libraries, Languages and Compilers for Array Programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3736112.3736144","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,16]],"date-time":"2025-07-16T05:57:42Z","timestamp":1752645462000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3736112.3736144"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,16]]},"references-count":42,"alternative-id":["10.1145\/3736112.3736144","10.1145\/3736112"],"URL":"https:\/\/doi.org\/10.1145\/3736112.3736144","relation":{},"subject":[],"published":{"date-parts":[[2025,6,16]]},"assertion":[{"value":"2025-06-16","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}