{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,2]],"date-time":"2026-03-02T19:30:36Z","timestamp":1772479836502,"version":"3.50.1"},"reference-count":14,"publisher":"Association for Computing Machinery (ACM)","issue":"1","license":[{"start":{"date-parts":[[2023,3,21]],"date-time":"2023-03-21T00:00:00Z","timestamp":1679356800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Exascale Computing Project","award":["17-SC-20-SC"],"award-info":[{"award-number":["17-SC-20-SC"]}]},{"DOI":"10.13039\/100006132","name":"U.S. Department of Energy Office of Science","doi-asserted-by":"crossref","id":[{"id":"10.13039\/100006132","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/100006168","name":"National Nuclear Security Administration","doi-asserted-by":"crossref","id":[{"id":"10.13039\/100006168","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":["ACM Trans. Math. Softw."],"published-print":{"date-parts":[[2023,3,31]]},"abstract":"<jats:p>\n            We present the new features available in the recent release of\n            <jats:monospace>SuperLU_DIST<\/jats:monospace>\n            , Version 8.1.1.\n            <jats:monospace>SuperLU_DIST<\/jats:monospace>\n            is a distributed-memory parallel sparse direct solver. The new features include (1) a 3D communication-avoiding algorithm framework that trades off inter-process communication for selective memory duplication, (2) multi-GPU support for both NVIDIA GPUs and AMD GPUs, and (3) mixed-precision routines that perform single-precision LU factorization and double-precision iterative refinement. Apart from the algorithm improvements, we also modernized the software build system to use CMake and Spack package installation tools to simplify the installation procedure. Throughout the article, we describe in detail the pertinent performance-sensitive parameters associated with each new algorithmic feature, show how they are exposed to the users, and give general guidance of how to set these parameters. We illustrate that the solver\u2019s performance both in time and memory can be greatly improved after systematic tuning of the parameters, depending on the input sparse matrix and underlying hardware.\n          <\/jats:p>","DOI":"10.1145\/3577197","type":"journal-article","created":{"date-parts":[[2022,12,19]],"date-time":"2022-12-19T14:49:50Z","timestamp":1671461390000},"page":"1-20","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":19,"title":["Newly Released Capabilities in the Distributed-Memory SuperLU Sparse Direct Solver"],"prefix":"10.1145","volume":"49","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0747-698X","authenticated-orcid":false,"given":"Xiaoye S.","family":"Li","sequence":"first","affiliation":[{"name":"Lawrence Berkeley National Laboratory, Berkeley, CA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2128-0219","authenticated-orcid":false,"given":"Paul","family":"Lin","sequence":"additional","affiliation":[{"name":"Lawrence Berkeley National Laboratory, Berkeley, CA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3750-1178","authenticated-orcid":false,"given":"Yang","family":"Liu","sequence":"additional","affiliation":[{"name":"Lawrence Berkeley National Laboratory, Berkeley, CA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9432-5855","authenticated-orcid":false,"given":"Piyush","family":"Sao","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, TN"}]}],"member":"320","published-online":{"date-parts":[[2023,3,21]]},"reference":[{"key":"e_1_3_4_2_2","doi-asserted-by":"publisher","DOI":"10.1137\/18M1189348"},{"key":"e_1_3_4_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/1141885.1141894"},{"key":"e_1_3_4_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/1462173.1462177"},{"key":"e_1_3_4_5_2","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611976830.14"},{"key":"e_1_3_4_6_2","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611976137.9"},{"key":"e_1_3_4_7_2","volume-title":"SuperLU Users\u2019 Guide","author":"Li X. S.","year":"1999","unstructured":"X. S. Li, J. W. Demmel, J. R. Gilbert, L. Grigori, P. Sao, M. Shao, and I. Yamazaki. 1999. SuperLU Users\u2019 Guide. Technical Report LBNL-44289 (last updated June 2018). Lawrence Berkeley National Laboratory. https:\/\/portal.nersc.gov\/project\/sparse\/superlu\/ug.pdf."},{"key":"e_1_3_4_8_2","volume-title":"Proceedings of the High Performance Networking and Computing Conference (SC\u201998)","author":"Li X. S.","year":"1998","unstructured":"X. S. Li and J. W. Demmel. 1998. Making sparse Gaussian elimination scalable by static pivoting. In Proceedings of the High Performance Networking and Computing Conference (SC\u201998)."},{"key":"e_1_3_4_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/779359.779361"},{"key":"e_1_3_4_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441621"},{"key":"e_1_3_4_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2015.104"},{"key":"e_1_3_4_12_2","doi-asserted-by":"crossref","unstructured":"P. Sao R. Vuduc and X. Li. 2014. A distributed CPU-GPU sparse direct solver. In Euro-Par 2014 Parallel Processing . Lecture Notes in Computer Science Vol. 8632. Springer 487\u2013498.","DOI":"10.1007\/978-3-319-09873-9_41"},{"key":"e_1_3_4_13_2","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2019.03.004"},{"key":"e_1_3_4_14_2","doi-asserted-by":"publisher","DOI":"10.1145\/3330345.3330357"},{"key":"e_1_3_4_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2012.63"}],"container-title":["ACM Transactions on Mathematical Software"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3577197","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3577197","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:32Z","timestamp":1750178852000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3577197"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3,21]]},"references-count":14,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,3,31]]}},"alternative-id":["10.1145\/3577197"],"URL":"https:\/\/doi.org\/10.1145\/3577197","relation":{},"ISSN":["0098-3500","1557-7295"],"issn-type":[{"value":"0098-3500","type":"print"},{"value":"1557-7295","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,3,21]]},"assertion":[{"value":"2022-05-19","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2022-11-25","order":1,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2023-03-21","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}