{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T07:27:26Z","timestamp":1770708446961,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,11]],"date-time":"2023-11-11T00:00:00Z","timestamp":1699660800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Scientific Discovery through Advanced Computing (SciDAC) program through the FASTMath Institute","award":["DE-AC02- 05CH11231"],"award-info":[{"award-number":["DE-AC02- 05CH11231"]}]},{"name":"Exascale Computing Project","award":["17-SC-20-SC"],"award-info":[{"award-number":["17-SC-20-SC"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,12]]},"DOI":"10.1145\/3581784.3607092","type":"proceedings-article","created":{"date-parts":[[2023,11,14]],"date-time":"2023-11-14T21:47:06Z","timestamp":1699998426000},"page":"1-15","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Unified Communication Optimization Strategies for Sparse Triangular Solver on CPU and GPU Clusters"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3750-1178","authenticated-orcid":false,"given":"Yang","family":"Liu","sequence":"first","affiliation":[{"name":"Lawrence Berkeley National Laboratory, Berkeley, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9624-9449","authenticated-orcid":false,"given":"Nan","family":"Ding","sequence":"additional","affiliation":[{"name":"Lawrence Berkeley National Laboratory, Berkeley, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9432-5855","authenticated-orcid":false,"given":"Piyush","family":"Sao","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8327-5717","authenticated-orcid":false,"given":"Samuel","family":"Williams","sequence":"additional","affiliation":[{"name":"Lawrence Berkeley National Laboratory, Berkeley, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0747-698X","authenticated-orcid":false,"given":"Xiaoye Sherry","family":"Li","sequence":"additional","affiliation":[{"name":"Lawrence Berkeley National Laboratory, Berkeley, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,11,11]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-57675-2_33"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611974690.ch2"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/2049662.2049663"},{"key":"e_1_3_2_1_5_1","volume-title":"Communication-avoiding parallel and sequential QR factorizations. CoRR abs\/0806.2159","author":"Demmel James","year":"2008","unstructured":"James Demmel , Laura Grigori , Mark Hoemmen , and Julien Langou . 2008. Communication-avoiding parallel and sequential QR factorizations. CoRR abs\/0806.2159 ( 2008 ). James Demmel, Laura Grigori, Mark Hoemmen, and Julien Langou. 2008. Communication-avoiding parallel and sequential QR factorizations. CoRR abs\/0806.2159 (2008)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2008.4536305"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479895291765"},{"key":"e_1_3_2_1_8_1","unstructured":"J. W. Demmel J. R. Gilbert and X. S. Li. 1997. SuperLU and SuperLU_MT. http:\/\/www.netlib.org\/scalapack\/prototype\/.  J. W. Demmel J. R. Gilbert and X. S. Li. 1997. SuperLU and SuperLU_MT. http:\/\/www.netlib.org\/scalapack\/prototype\/."},{"key":"e_1_3_2_1_9_1","volume-title":"Technical Report UCB\/\/CSD-97-944. Computer Science Division, U.C.","author":"Demmel J. W.","year":"1997","unstructured":"J. W. Demmel , J. R. Gilbert , and X. S. Li . 1997 . SuperLU Users' Guide . Technical Report UCB\/\/CSD-97-944. Computer Science Division, U.C. Berkeley . J. W. Demmel, J. R. Gilbert, and X. S. Li. 1997. SuperLU Users' Guide. Technical Report UCB\/\/CSD-97-944. Computer Science Division, U.C. Berkeley."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479897317685"},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of SC19","author":"Ding N.","unstructured":"N. Ding , Y. Liu , X. S. Li , and S. Williams . 2019. Leveraging One-Sided Communication for Sparse Triangular Solvers --- A Pathway to Exascale Solvers . In Proceedings of SC19 . Denver, CO. N. Ding, Y. Liu, X. S. Li, and S. Williams. 2019. Leveraging One-Sided Communication for Sparse Triangular Solvers --- A Pathway to Exascale Solvers. In Proceedings of SC19. Denver, CO."},{"key":"e_1_3_2_1_12_1","volume-title":"Multi-GPU Parallel Sparse Triangular Solver. In SIAM Conference on Applied and Computational Discrete Algorithms (ACDA21)","author":"Ding Nan","year":"2021","unstructured":"Nan Ding , Yang Liu , Samuel Williams , and Xiaoye S Li . 2021 . A Message-Driven , Multi-GPU Parallel Sparse Triangular Solver. In SIAM Conference on Applied and Computational Discrete Algorithms (ACDA21) . SIAM, 147--159. Nan Ding, Yang Liu, Samuel Williams, and Xiaoye S Li. 2021. A Message-Driven, Multi-GPU Parallel Sparse Triangular Solver. In SIAM Conference on Applied and Computational Discrete Algorithms (ACDA21). SIAM, 147--159."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611976137.9"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/PDP2018.2018.00034"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2021.07.013"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2022.102897"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1137\/S1064827595287997"},{"key":"e_1_3_2_1_18_1","volume-title":"An Overview of SuperLU: Algorithms, Implementation, and User Interface","author":"X. S. Li.","unstructured":"X. S. Li. 2003. An Overview of SuperLU: Algorithms, Implementation, and User Interface . Technical Report LBNL-53848. Lawrence Berkeley National Laboratory . http:\/\/crd.lbl.gov\/~xiaoye\/LBNL-53848.pdf. X. S. Li. 2003. An Overview of SuperLU: Algorithms, Implementation, and User Interface. Technical Report LBNL-53848. Lawrence Berkeley National Laboratory. http:\/\/crd.lbl.gov\/~xiaoye\/LBNL-53848.pdf."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/1089014.1089017"},{"key":"e_1_3_2_1_20_1","volume-title":"Proceedings of VECPAR'08 8th International Meeting High Performance Computing for Computational Science","author":"Li X. S.","year":"2008","unstructured":"X. S. Li . 2008 . Evaluation of sparse factorization and triangular solution on multicore architectures . In Proceedings of VECPAR'08 8th International Meeting High Performance Computing for Computational Science . Toulouse, France. X. S. Li. 2008. Evaluation of sparse factorization and triangular solution on multicore architectures. In Proceedings of VECPAR'08 8th International Meeting High Performance Computing for Computational Science. Toulouse, France."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/125\/1\/012079"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.5555\/509058.509092"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the Ninth SIAM Conference on Parallel Processing for Scientific Computing","author":"Li X. S.","unstructured":"X. S. Li and J. W. Demmel . 1999. A Scalable Sparse Direct Solver using Static Pivoting . In Proceedings of the Ninth SIAM Conference on Parallel Processing for Scientific Computing . San Antonio, Texas. X. S. Li and J. W. Demmel. 1999. A Scalable Sparse Direct Solver using Static Pivoting. In Proceedings of the Ninth SIAM Conference on Parallel Processing for Scientific Computing. San Antonio, Texas."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/779359.779361"},{"key":"e_1_3_2_1_25_1","volume-title":"Newly Released Capabilities in Distributed-memory SuperLU Sparse Direct Solver. ACM Trans. Math. Software","author":"Li Xiaoye S","year":"2022","unstructured":"Xiaoye S Li , Paul Lin , Yang Liu , and Piyush Sao . 2022. Newly Released Capabilities in Distributed-memory SuperLU Sparse Direct Solver. ACM Trans. Math. Software ( 2022 ). Xiaoye S Li, Paul Lin, Yang Liu, and Piyush Sao. 2022. Newly Released Capabilities in Distributed-memory SuperLU Sparse Direct Solver. ACM Trans. Math. Software (2022)."},{"key":"e_1_3_2_1_26_1","volume-title":"Technical Report LBNL-2178E.","author":"Li X. S.","year":"2009","unstructured":"X. S. Li and M. Shao . 2009 . A Supernodal approach to imcomplete LU factorization with partial pivoting. Technical Report LBNL-2178E. Lawrence Berkeley National Laboratory. ACM Trans. Mathematical Software ( submitted). X. S. Li and M. Shao. 2009. A Supernodal approach to imcomplete LU factorization with partial pivoting. Technical Report LBNL-2178E. Lawrence Berkeley National Laboratory. ACM Trans. Mathematical Software (submitted)."},{"key":"e_1_3_2_1_27_1","article-title":"A Supernodal approach to imcomplete LU factorization with partial pivoting","volume":"37","author":"Li X. S.","year":"2010","unstructured":"X. S. Li and M. Shao . 2010 . A Supernodal approach to imcomplete LU factorization with partial pivoting . ACM Trans. Math. Software 37 , 4 (2010). X. S. Li and M. Shao. 2010. A Supernodal approach to imcomplete LU factorization with partial pivoting. ACM Trans. Math. Software 37, 4 (2010).","journal-title":"ACM Trans. Math. Software"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.4244"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611975215.9"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCS.2017.102"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00607-009-0066-3"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1142\/S0129626498000067"},{"key":"e_1_3_2_1_33_1","volume-title":"High Performance Computing in Science and Engineering: HPCSE'15","author":"Reps Bram","year":"2015","unstructured":"Bram Reps , P Ghysels , O Schenk , K Meerbergen , and W Vanroose . 2015 . Communication Avoiding and Hiding in preconditioned Krylov solvers . In High Performance Computing in Science and Engineering: HPCSE'15 . Bram Reps, P Ghysels, O Schenk, K Meerbergen, and W Vanroose. 2015. Communication Avoiding and Hiding in preconditioned Krylov solvers. In High Performance Computing in Science and Engineering: HPCSE'15."},{"key":"e_1_3_2_1_34_1","unstructured":"F.-H. Rouet. 2012. Memory and performance issues in parallel multifrontal factorization and triangular solutions with sparse right-hand sides. Theses. Universit\u00e9 de Toulouse.  F.-H. Rouet. 2012. Memory and performance issues in parallel multifrontal factorization and triangular solutions with sparse right-hand sides. Theses. Universit\u00e9 de Toulouse."},{"key":"e_1_3_2_1_35_1","volume-title":"29th IEEE International Parallel & Distributed Processing Symposium (IPDPS)","author":"Sao P.","unstructured":"P. Sao , X. Liu , R. Vuduc , and X.S. Li . 2015. A Sparse Direct Solver for Distributed Memory Xeon Phi-accelerated Systems . In 29th IEEE International Parallel & Distributed Processing Symposium (IPDPS) . Hyderabad, India. P. Sao, X. Liu, R. Vuduc, and X.S. Li. 2015. A Sparse Direct Solver for Distributed Memory Xeon Phi-accelerated Systems. In 29th IEEE International Parallel & Distributed Processing Symposium (IPDPS). Hyderabad, India."},{"key":"e_1_3_2_1_36_1","first-page":"487","volume-title":"Proc. of Euro-Par","volume":"8632","author":"Sao P.","year":"2014","unstructured":"P. Sao , R. Vuduc , and X. Li . 2014. A Distributed CPU-GPU Sparse Direct Solver . In Proc. of Euro-Par 2014 , LNCS Vol. 8632 , pp. 487 -- 498 . Porto, Portugal. P. Sao, R. Vuduc, and X. Li. 2014. A Distributed CPU-GPU Sparse Direct Solver. In Proc. of Euro-Par 2014, LNCS Vol. 8632, pp. 487--498. Porto, Portugal."},{"key":"e_1_3_2_1_37_1","volume-title":"32nd IEEE International Parallel & Distributed Processing Symposium (IPDPS)","author":"Sao P.","unstructured":"P. Sao , R. Vuduc , and X.S. Li . 2018. A Communication-Avoiding 3D Factorization for Sparse Matrices . In 32nd IEEE International Parallel & Distributed Processing Symposium (IPDPS) . Vancouver, Canada. P. Sao, R. Vuduc, and X.S. Li. 2018. A Communication-Avoiding 3D Factorization for Sparse Matrices. In 32nd IEEE International Parallel & Distributed Processing Symposium (IPDPS). Vancouver, Canada."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2019.03.004"},{"key":"e_1_3_2_1_39_1","volume-title":"ICS 2019: International Conference on Supercomputing","author":"Sao P.","unstructured":"P. Sao , R. Vuduc , and X.S. Li . 2019. A Communication-Avoiding 3D Sparse Triangular Solver . In ICS 2019: International Conference on Supercomputing . Phoenix, AZ. P. Sao, R. Vuduc, and X.S. Li. 2019. A Communication-Avoiding 3D Sparse Triangular Solver. In ICS 2019: International Conference on Supercomputing. Phoenix, AZ."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2010.5470941"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2014.06.006"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178487.3178513"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2017.104"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CAHPC.2018.8645938"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3066635"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.camwa.2019.09.012"}],"event":{"name":"SC '23: International Conference for High Performance Computing, Networking, Storage and Analysis","location":"Denver CO USA","acronym":"SC '23","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","IEEE CS"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581784.3607092","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581784.3607092","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:23Z","timestamp":1750178183000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581784.3607092"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,11]]},"references-count":45,"alternative-id":["10.1145\/3581784.3607092","10.1145\/3581784"],"URL":"https:\/\/doi.org\/10.1145\/3581784.3607092","relation":{},"subject":[],"published":{"date-parts":[[2023,11,11]]},"assertion":[{"value":"2023-11-11","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}