{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T08:04:20Z","timestamp":1769155460002,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,12]],"date-time":"2024-11-12T00:00:00Z","timestamp":1731369600000},"content-version":"vor","delay-in-days":366,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Department of Energy"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,12]]},"DOI":"10.1145\/3624062.3624184","type":"proceedings-article","created":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T13:53:39Z","timestamp":1699624419000},"page":"1081-1092","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["MatRIS: Multi-level Math Library Abstraction for Heterogeneity and Performance Portability using IRIS Runtime"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3419-4037","authenticated-orcid":false,"given":"Mohammad Alaul Haque","family":"Monil","sequence":"first","affiliation":[{"name":"Computer Science and Mathematics Division, Oak Ridge National Laboratory, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8259-8891","authenticated-orcid":false,"given":"Narasinga Rao","family":"Miniskar","sequence":"additional","affiliation":[{"name":"Computer Science and Mathematics Division, Oak Ridge National Laboratory, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6647-2690","authenticated-orcid":false,"given":"Keita","family":"Teranishi","sequence":"additional","affiliation":[{"name":"Computer Science and Mathematics Division, Oak Ridge National Laboratory, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2449-6720","authenticated-orcid":false,"given":"Jeffrey S.","family":"Vetter","sequence":"additional","affiliation":[{"name":"Computer Science and Mathematics Division, Oak Ridge National Laboratory, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1479-4310","authenticated-orcid":false,"given":"Pedro","family":"Valero-Lara","sequence":"additional","affiliation":[{"name":"Computer Science and Mathematics Division, Oak Ridge National Laboratory, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,11,12]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"hipBLAS, the Basic Linear Algebra Subroutine library. https:\/\/github.com\/ROCmSoftwarePlatform\/hipBLAS [Online","author":"AMD.","year":"2022","unstructured":"AMD. 2022. hipBLAS, the Basic Linear Algebra Subroutine library. https:\/\/github.com\/ROCmSoftwarePlatform\/hipBLAS [Online; accessed 6-July-2022]."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.1631"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3293883.3302577"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2013.98"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC49654.2021.9622794"},{"key":"e_1_3_2_2_6_1","volume-title":"van\u00a0de Geijn","author":"Catal\u00e1n Sandra","year":"2016","unstructured":"Sandra Catal\u00e1n, Jos\u00e9\u00a0R. Herrero, Enrique\u00a0S. Quintana-Ort\u00ed, Rafael Rodr\u00edguez-S\u00e1nchez, and Robert\u00a0A. van\u00a0de Geijn. 2016. A Case for Malleable Thread-Level Linear Algebra Libraries: The LU Factorization with Partial Pivoting. CoRR abs\/1611.06365 (2016). arxiv:1611.06365http:\/\/arxiv.org\/abs\/1611.06365"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58144-2_8"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2049662.2049663"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479897317685"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2017.05.138"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3264491"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3264491"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-09766-4_151"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342020938421"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/504210.504213"},{"key":"e_1_3_2_2_16_1","volume-title":"The Intel Math Kernel Library. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/tools\/oneapi\/onemkl-documentation.html?s=Newest [Online","year":"2022","unstructured":"Intel. 2022. The Intel Math Kernel Library. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/tools\/oneapi\/onemkl-documentation.html?s=Newest [Online; accessed 6-July-2022]."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC49654.2021.9622873"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2019.12.005"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC56025.2022.00042"},{"key":"e_1_3_2_2_20_1","volume-title":"Pedro Valero-Lara, Frank Liu, and Jeffrey\u00a0S Vetter.","author":"Miniskar Narasinga\u00a0Rao","year":"2023","unstructured":"Narasinga\u00a0Rao Miniskar, Mohammad Alaul\u00a0Haque Monil, Pedro Valero-Lara, Frank Liu, and Jeffrey\u00a0S Vetter. 2023. Tiling Framework for Heterogeneous Computing of Matrix-Based Tiled Algorithms. (2023)."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/RSDHA56811.2022.00007"},{"key":"e_1_3_2_2_22_1","volume-title":"cuBLAS, the CUDA Basic Linear Algebra Subroutine library. https:\/\/docs.nvidia.com\/cuda\/cublas\/index.html [Online","author":"NVIDIA.","year":"2022","unstructured":"NVIDIA. 2022. cuBLAS, the CUDA Basic Linear Algebra Subroutine library. https:\/\/docs.nvidia.com\/cuda\/cublas\/index.html [Online; accessed 6-July-2022]."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3097283"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2900122"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/EMPDP.2019.8671545"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2019.12.002"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/IA356718.2022.00010"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-06156-1_35"},{"key":"e_1_3_2_2_29_1","volume-title":"KokkACC: Enhancing Kokkos with OpenACC. In 9th Workshop on Accelerator Programming Using Directives, WACCPD@SC 2022","author":"Valero-Lara Pedro","year":"2022","unstructured":"Pedro Valero-Lara, Seyong Lee, Marc\u00a0Gonz\u00e1lez Tallada, Joel\u00a0E. Denny, and Jeffrey\u00a0S. Vetter. 2022. KokkACC: Enhancing Kokkos with OpenACC. In 9th Workshop on Accelerator Programming Using Directives, WACCPD@SC 2022, Dallas, TX, USA, November 13-18, 2022. IEEE, 32\u201342."},{"key":"e_1_3_2_2_30_1","volume-title":"PPAM 2017","author":"Valero-Lara Pedro","year":"2017","unstructured":"Pedro Valero-Lara, Ivan Mart\u00ednez-Perez, Ra\u00fal Sirvent, Xavier Martorell, and Antonio\u00a0J. Pe\u00f1a. 2017. NVIDIA GPUs Scalability to Solve Multiple (Batch) Tridiagonal Systems Implementation of cuThomasBatch. In Parallel Processing and Applied Mathematics - 12th International Conference, PPAM 2017, Lublin, Poland, September 10-13, 2017, Revised Selected Papers, Part I. 243\u2013253."},{"key":"e_1_3_2_2_31_1","volume-title":"CUDA Routines to compute batch of tridiagonal systems on NVIDIA GPUs. Concurrency and Computation: Practice and Experience 30, 24","author":"Valero-Lara Pedro","year":"2018","unstructured":"Pedro Valero-Lara, Ivan Mart\u00ednez-P\u00e9rez, Ra\u00fal Sirvent, Xavier Martorell, and Antonio\u00a0J. Pe\u00f1a. 2018. cuThomasBatch and cuThomasVBatch, CUDA Routines to compute batch of tridiagonal systems on NVIDIA GPUs. Concurrency and Computation: Practice and Experience 30, 24 (2018)."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/PDP2018.2018.00065"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2013.12.026"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2503210.2503219"},{"key":"e_1_3_2_2_35_1","volume-title":"Encyclopedia of Parallel Computing, David\u00a0A","author":"Whaley Clint","unstructured":"R.\u00a0Clint Whaley. 2011. ATLAS (Automatically Tuned Linear Algebra Software). In Encyclopedia of Parallel Computing, David\u00a0A. Padua (Ed.). Springer, 95\u2013101."},{"key":"e_1_3_2_2_36_1","volume-title":"Proceedings of the ACM\/IEEE Conference on Supercomputing, SC 1998","author":"R.","year":"1998","unstructured":"R.\u00a0Clinton Whaley and Jack\u00a0J. Dongarra. 1998. Automatically Tuned Linear Algebra Software. In Proceedings of the ACM\/IEEE Conference on Supercomputing, SC 1998, November 7-13, 1998, Orlando, FL, USA. IEEE Computer Society, 38."},{"key":"e_1_3_2_2_37_1","volume-title":"https:\/\/www.openblas.net\/ [Online","author":"Zhang\u00a0Xianyi Martin\u00a0Kroeker","year":"2022","unstructured":"Martin\u00a0Kroeker Zhang\u00a0Xianyi. 2022. OpenBLAS. https:\/\/www.openblas.net\/ [Online; accessed 6-July-2022]."}],"event":{"name":"SC-W 2023: Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis","location":"Denver CO USA","acronym":"SC-W 2023"},"container-title":["Proceedings of the SC '23 Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624184","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3624062.3624184","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3624062.3624184","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T03:04:45Z","timestamp":1755745485000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624184"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,12]]},"references-count":37,"alternative-id":["10.1145\/3624062.3624184","10.1145\/3624062"],"URL":"https:\/\/doi.org\/10.1145\/3624062.3624184","relation":{},"subject":[],"published":{"date-parts":[[2023,11,12]]},"assertion":[{"value":"2023-11-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}