{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T16:02:02Z","timestamp":1780675322111,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":81,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,11]],"date-time":"2023-11-11T00:00:00Z","timestamp":1699660800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,12]]},"DOI":"10.1145\/3581784.3607050","type":"proceedings-article","created":{"date-parts":[[2023,11,14]],"date-time":"2023-11-14T21:47:06Z","timestamp":1699998426000},"page":"1-14","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":19,"title":["PanguLU: A Scalable Regular Two-Dimensional Block-Cyclic Sparse Direct Solver on Distributed Heterogeneous Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-0769-9591","authenticated-orcid":false,"given":"Xu","family":"Fu","sequence":"first","affiliation":[{"name":"China University of Petroleum-Beijing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-5787-8964","authenticated-orcid":false,"given":"Bingbin","family":"Zhang","sequence":"additional","affiliation":[{"name":"China University of Petroleum-Beijing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2172-8435","authenticated-orcid":false,"given":"Tengcheng","family":"Wang","sequence":"additional","affiliation":[{"name":"China University of Petroleum-Beijing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2868-1257","authenticated-orcid":false,"given":"Wenhao","family":"Li","sequence":"additional","affiliation":[{"name":"China University of Petroleum-Beijing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6387-8116","authenticated-orcid":false,"given":"Yuechen","family":"Lu","sequence":"additional","affiliation":[{"name":"China University of Petroleum-Beijing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0286-9429","authenticated-orcid":false,"given":"Enxin","family":"Yi","sequence":"additional","affiliation":[{"name":"China University of Petroleum-Beijing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6731-4317","authenticated-orcid":false,"given":"Jianqi","family":"Zhao","sequence":"additional","affiliation":[{"name":"China University of Petroleum-Beijing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5379-9429","authenticated-orcid":false,"given":"Xiaohan","family":"Geng","sequence":"additional","affiliation":[{"name":"China University of Petroleum-Beijing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3072-1311","authenticated-orcid":false,"given":"Fangying","family":"Li","sequence":"additional","affiliation":[{"name":"China University of Petroleum-Beijing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7234-7220","authenticated-orcid":false,"given":"Jingwen","family":"Zhang","sequence":"additional","affiliation":[{"name":"China University of Petroleum-Beijing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0632-9494","authenticated-orcid":false,"given":"Zhou","family":"Jin","sequence":"additional","affiliation":[{"name":"China University of Petroleum-Beijing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2150-5759","authenticated-orcid":false,"given":"Weifeng","family":"Liu","sequence":"additional","affiliation":[{"name":"China University of Petroleum-Beijing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,11,11]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"https:\/\/www.top500.org\/. 2023.  https:\/\/www.top500.org\/. 2023."},{"issue":"3","key":"e_1_3_2_1_2_1","volume":"38","author":"Agullo E.","year":"2016","unstructured":"E. Agullo , P. R. Amestoy , A. Buttari , A. Guermouche , J.-Y. L'Excellent , and F. -H. Rouet. Robust Memory-Aware Mappings for Parallel Multifrontal Factorizations. SIAM Journal on Scientific Computing , 38 ( 3 ), 2016 . E. Agullo, P. R. Amestoy, A. Buttari, A. Guermouche, J.-Y. L'Excellent, and F.-H. Rouet. Robust Memory-Aware Mappings for Parallel Multifrontal Factorizations. SIAM Journal on Scientific Computing, 38(3), 2016.","journal-title":"-H. Rouet. Robust Memory-Aware Mappings for Parallel Multifrontal Factorizations. SIAM Journal on Scientific Computing"},{"key":"e_1_3_2_1_3_1","volume-title":"LU Factorization for Accelerator-Based Systems. In AICCSA '11","author":"Agullo E.","year":"2011","unstructured":"E. Agullo , C. Augonnet , J. Dongarra , M. Faverge , J. Langou , H. Ltaief , and S. Tomov . LU Factorization for Accelerator-Based Systems. In AICCSA '11 . IEEE, 2011 . E. Agullo, C. Augonnet, J. Dongarra, M. Faverge, J. Langou, H. Ltaief, and S. Tomov. LU Factorization for Accelerator-Based Systems. In AICCSA '11. IEEE, 2011."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1137\/120903476"},{"issue":"4","key":"e_1_3_2_1_5_1","volume":"17","author":"Amestoy P. R.","year":"1996","unstructured":"P. R. Amestoy , T. A. Davis , and I. S. Duff . An Approximate Minimum Degree Ordering Algorithm. SIAM Journal on Matrix Analysis and Applications , 17 ( 4 ), 1996 . P. R. Amestoy, T. A. Davis, and I. S. Duff. An Approximate Minimum Degree Ordering Algorithm. SIAM Journal on Matrix Analysis and Applications, 17(4), 1996.","journal-title":"An Approximate Minimum Degree Ordering Algorithm. SIAM Journal on Matrix Analysis and Applications"},{"key":"e_1_3_2_1_6_1","volume-title":"An Approximate Minimum Degree Ordering Algorithm. ACM Transactions on Mathematical Software, 30(3)","author":"Amestoy P. R.","year":"2004","unstructured":"P. R. Amestoy , T. A. Davis , and I. S. Duff . Algorithm 837: AMD , An Approximate Minimum Degree Ordering Algorithm. ACM Transactions on Mathematical Software, 30(3) , 2004 . P. R. Amestoy, T. A. Davis, and I. S. Duff. Algorithm 837: AMD, An Approximate Minimum Degree Ordering Algorithm. ACM Transactions on Mathematical Software, 30(3), 2004."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479899358194"},{"key":"e_1_3_2_1_8_1","volume-title":"MUMPS: A General Purpose Distributed Memory Sparse Solver. In International Workshop on Applied Parallel Computing","author":"Amestoy P. R.","year":"2000","unstructured":"P. R. Amestoy , I. S. Duff , J.-Y. L' Excellent , and J. Koster . MUMPS: A General Purpose Distributed Memory Sparse Solver. In International Workshop on Applied Parallel Computing , 2000 . P. R. Amestoy, I. S. Duff, J.-Y. L' Excellent, and J. Koster. MUMPS: A General Purpose Distributed Memory Sparse Solver. In International Workshop on Applied Parallel Computing, 2000."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479802419877"},{"key":"e_1_3_2_1_10_1","series-title":"SIAM Journal on Matrix Analysis and Applications, 24(2)","volume-title":"An Unsymmetrized Multifrontal LU Factorization","author":"Amestoy P. R.","year":"2002","unstructured":"P. R. Amestoy and C. Puglisi . An Unsymmetrized Multifrontal LU Factorization . SIAM Journal on Matrix Analysis and Applications, 24(2) , 2002 . P. R. Amestoy and C. Puglisi. An Unsymmetrized Multifrontal LU Factorization. SIAM Journal on Matrix Analysis and Applications, 24(2), 2002."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.5555\/323215"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342019888567"},{"issue":"3","key":"e_1_3_2_1_13_1","volume":"26","author":"Chen X.","year":"2015","unstructured":"X. Chen , L. Ren , Y. Wang , and H. Yang . GPU-Accelerated Sparse LU Factorization for Circuit Simulation with Performance Modeling. IEEE Transactions on Parallel and Distributed Systems , 26 ( 3 ), 2015 . X. Chen, L. Ren, Y. Wang, and H. Yang. GPU-Accelerated Sparse LU Factorization for Circuit Simulation with Performance Modeling. IEEE Transactions on Parallel and Distributed Systems, 26(3), 2015.","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"e_1_3_2_1_14_1","volume-title":"An Adaptive LU Factorization Algorithm for Parallel Circuit Simulation. In ASP-DAC '12","author":"Chen X.","year":"2012","unstructured":"X. Chen , Y. Wang , and H. Yang . An Adaptive LU Factorization Algorithm for Parallel Circuit Simulation. In ASP-DAC '12 , 2012 . X. Chen, Y. Wang, and H. Yang. An Adaptive LU Factorization Algorithm for Parallel Circuit Simulation. In ASP-DAC '12, 2012."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2012.2217964"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.7873\/DATE.2015.0603"},{"issue":"2","key":"e_1_3_2_1_17_1","volume":"30","author":"Davis T. A.","year":"2004","unstructured":"T. A. Davis . Algorithm 832: UMFPACK V4.3---an Unsymmetric-Pattern Multifrontal Method. ACM Transactions on Mathematical Software , 30 ( 2 ), 2004 . T. A. Davis. Algorithm 832: UMFPACK V4.3---an Unsymmetric-Pattern Multifrontal Method. ACM Transactions on Mathematical Software, 30(2), 2004.","journal-title":"Unsymmetric-Pattern Multifrontal Method. ACM Transactions on Mathematical Software"},{"key":"e_1_3_2_1_18_1","volume-title":"SIAM","author":"Davis T. A.","year":"2006","unstructured":"T. A. Davis . Direct Methods for Sparse Linear Systems . SIAM , 2006 . T. A. Davis. Direct Methods for Sparse Linear Systems. SIAM, 2006."},{"issue":"3","key":"e_1_3_2_1_19_1","volume":"30","author":"Davis T. A.","year":"2004","unstructured":"T. A. Davis , J. R. Gilbert , S. I. Larimore , and E. G. Ng . A Column Approximate Minimum Degree Ordering Algorithm. ACM Transactions on Mathematical Software , 30 ( 3 ), 2004 . T. A. Davis, J. R. Gilbert, S. I. Larimore, and E. G. Ng. A Column Approximate Minimum Degree Ordering Algorithm. ACM Transactions on Mathematical Software, 30(3), 2004.","journal-title":"A Column Approximate Minimum Degree Ordering Algorithm. ACM Transactions on Mathematical Software"},{"key":"e_1_3_2_1_20_1","volume-title":"A Column Approximate Minimum Degree Ordering Algorithm. ACM Transactions on Mathematical Software, 30(3)","author":"Davis T. A.","year":"2004","unstructured":"T. A. Davis , J. R. Gilbert , S. I. Larimore , and E. G. Ng . Algorithm 836: COLAMD , A Column Approximate Minimum Degree Ordering Algorithm. ACM Transactions on Mathematical Software, 30(3) , 2004 . T. A. Davis, J. R. Gilbert, S. I. Larimore, and E. G. Ng. Algorithm 836: COLAMD, A Column Approximate Minimum Degree Ordering Algorithm. ACM Transactions on Mathematical Software, 30(3), 2004."},{"key":"e_1_3_2_1_21_1","volume-title":"Dynamic Supernodes in Sparse Cholesky Update\/Downdate and Triangular Solves. ACM Transactions on Mathematical Software, 35(4)","author":"Davis T. A.","year":"2009","unstructured":"T. A. Davis and W. W. Hager . Dynamic Supernodes in Sparse Cholesky Update\/Downdate and Triangular Solves. ACM Transactions on Mathematical Software, 35(4) , 2009 . T. A. Davis and W. W. Hager. Dynamic Supernodes in Sparse Cholesky Update\/Downdate and Triangular Solves. ACM Transactions on Mathematical Software, 35(4), 2009."},{"key":"e_1_3_2_1_22_1","volume-title":"The University of Florida Sparse Matrix Collection. ACM Transactions on Mathematical Software, 38(1)","author":"Davis T. A.","year":"2011","unstructured":"T. A. Davis and Y. Hu . The University of Florida Sparse Matrix Collection. ACM Transactions on Mathematical Software, 38(1) , 2011 . T. A. Davis and Y. Hu. The University of Florida Sparse Matrix Collection. ACM Transactions on Mathematical Software, 38(1), 2011."},{"issue":"3","key":"e_1_3_2_1_23_1","volume":"37","author":"Davis T. A.","year":"2010","unstructured":"T. A. Davis and E. Palamadai Natarajan . Algorithm 907: KLU , A Direct Sparse Solver for Circuit Simulation Problems. ACM Transactions on Mathematical Software , 37 ( 3 ), 2010 . T. A. Davis and E. Palamadai Natarajan. Algorithm 907: KLU, A Direct Sparse Solver for Circuit Simulation Problems. ACM Transactions on Mathematical Software, 37(3), 2010.","journal-title":"A Direct Sparse Solver for Circuit Simulation Problems. ACM Transactions on Mathematical Software"},{"issue":"3","key":"e_1_3_2_1_24_1","volume":"20","author":"Demmel J. W.","year":"1999","unstructured":"J. W. Demmel , S. C. Eisenstat , J. R. Gilbert , X. S. Li , and J. W. H. Liu . A Supernodal Approach to Sparse Partial Pivoting. SIAM Journal on Matrix Analysis and Applications , 20 ( 3 ), 1999 . J. W. Demmel, S. C. Eisenstat, J. R. Gilbert, X. S. Li, and J. W. H. Liu. A Supernodal Approach to Sparse Partial Pivoting. SIAM Journal on Matrix Analysis and Applications, 20(3), 1999.","journal-title":"Sparse Partial Pivoting. SIAM Journal on Matrix Analysis and Applications"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479897317685"},{"key":"e_1_3_2_1_26_1","volume-title":"Multi-GPU Parallel Sparse Triangular Solver. In ACDA21 '21","author":"Ding N.","year":"2021","unstructured":"N. Ding , Y. Liu , S. Williams , and X. S. Li . A Message-driven , Multi-GPU Parallel Sparse Triangular Solver. In ACDA21 '21 , 2021 . N. Ding, Y. Liu, S. Williams, and X. S. Li. A Message-driven, Multi-GPU Parallel Sparse Triangular Solver. In ACDA21 '21, 2021."},{"key":"e_1_3_2_1_27_1","volume-title":"Survey of Sparse Matrix Research. Proceedings of the IEEE, 65(4)","author":"Duff I. S.","year":"1977","unstructured":"I. S. Duff . A Survey of Sparse Matrix Research. Proceedings of the IEEE, 65(4) , 1977 . I. S. Duff. A Survey of Sparse Matrix Research. Proceedings of the IEEE, 65(4), 1977."},{"key":"e_1_3_2_1_28_1","volume-title":"Parallel Implementation of Multifrontal Schemes. Parallel computing, 3(3)","author":"Duff I. S.","year":"1986","unstructured":"I. S. Duff . Parallel Implementation of Multifrontal Schemes. Parallel computing, 3(3) , 1986 . I. S. Duff. Parallel Implementation of Multifrontal Schemes. Parallel computing, 3(3), 1986."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1201\/9781003069522-9"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1093\/acprof:oso\/9780198508380.001.0001"},{"key":"e_1_3_2_1_31_1","volume-title":"NLAFET Working Note","author":"Duff I. S.","year":"2018","unstructured":"I. S. Duff , J. D. Hogg , and F. Lopez . A New Sparse Symmetric Indefinite Solver Using A Posteriori Threshold Pivoting . NLAFET Working Note , 2018 . I. S. Duff, J. D. Hogg, and F. Lopez. A New Sparse Symmetric Indefinite Solver Using A Posteriori Threshold Pivoting. NLAFET Working Note, 2018."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479897317661"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479899358443"},{"key":"e_1_3_2_1_34_1","volume-title":"Parco '19","author":"Duff I. S.","year":"2019","unstructured":"I. S. Duff , P. Leleux , D. Ruiz , and F. S. Torun . Improving the Scalability of the ABCD Solver with a Combination of New Load Balancing and Communication Minimization Techniques . In Parco '19 , 2019 . I. S. Duff, P. Leleux, D. Ruiz, and F. S. Torun. Improving the Scalability of the ABCD Solver with a Combination of New Load Balancing and Communication Minimization Techniques. In Parco '19, 2019."},{"key":"e_1_3_2_1_35_1","series-title":"SIAM Journal on Matrix Analysis and Applications, 27(2)","volume-title":"Strategies for Scaling and Pivoting for Sparse Symmetric Indefinite Problems","author":"Duff I. S.","year":"2005","unstructured":"I. S. Duff and S. Pralet . Strategies for Scaling and Pivoting for Sparse Symmetric Indefinite Problems . SIAM Journal on Matrix Analysis and Applications, 27(2) , 2005 . I. S. Duff and S. Pralet. Strategies for Scaling and Pivoting for Sparse Symmetric Indefinite Problems. SIAM Journal on Matrix Analysis and Applications, 27(2), 2005."},{"key":"e_1_3_2_1_36_1","volume-title":"The Multifrontal Solution of Indefinite Sparse Symmetric Linear. ACM Transactions on Mathematical Software, 9(3)","author":"Duff I. S.","year":"1983","unstructured":"I. S. Duff and J. K. Reid . The Multifrontal Solution of Indefinite Sparse Symmetric Linear. ACM Transactions on Mathematical Software, 9(3) , 1983 . I. S. Duff and J. K. Reid. The Multifrontal Solution of Indefinite Sparse Symmetric Linear. ACM Transactions on Mathematical Software, 9(3), 1983."},{"key":"e_1_3_2_1_37_1","volume-title":"MA48, a Fortran code for direct solution of sparse un-symmetric linear systems of equations","author":"Duff I. S.","year":"1993","unstructured":"I. S. Duff and J. K. Reid . MA48, a Fortran code for direct solution of sparse un-symmetric linear systems of equations . Science and Engineering Research Council , Rutherford Appleton Laboratory, 1993 . I. S. Duff and J. K. Reid. MA48, a Fortran code for direct solution of sparse un-symmetric linear systems of equations. Science and Engineering Research Council, Rutherford Appleton Laboratory, 1993."},{"issue":"2","key":"e_1_3_2_1_38_1","volume":"22","author":"Duff I. S.","year":"1996","unstructured":"I. S. Duff and J. K. Reid . The Design of MA48: A Code for The Direct Solution of Sparse Unsymmetric Linear Systems of Equations. ACM Transactions on Mathematical Software , 22 ( 2 ), 1996 . I. S. Duff and J. K. Reid. The Design of MA48: A Code for The Direct Solution of Sparse Unsymmetric Linear Systems of Equations. ACM Transactions on Mathematical Software, 22(2), 1996.","journal-title":"ACM Transactions on Mathematical Software"},{"key":"e_1_3_2_1_39_1","volume-title":"A Parallel Direct Solver for Large Sparse Highly Unsymmetric Linear Systems. ACM Transactions on Mathematical Software, 30(2)","author":"Duff I. S.","year":"2004","unstructured":"I. S. Duff and J. A. Scott . A Parallel Direct Solver for Large Sparse Highly Unsymmetric Linear Systems. ACM Transactions on Mathematical Software, 30(2) , 2004 . I. S. Duff and J. A. Scott. A Parallel Direct Solver for Large Sparse Highly Unsymmetric Linear Systems. ACM Transactions on Mathematical Software, 30(2), 2004."},{"key":"e_1_3_2_1_40_1","series-title":"SIAM Journal on Matrix Analysis and Applications, 13(1)","volume-title":"Exploiting Structural Symmetry in Unsymmetric Sparse Symbolic Factorization","author":"Eisenstat S. C.","year":"1992","unstructured":"S. C. Eisenstat and J. W. H. Liu . Exploiting Structural Symmetry in Unsymmetric Sparse Symbolic Factorization . SIAM Journal on Matrix Analysis and Applications, 13(1) , 1992 . S. C. Eisenstat and J. W. H. Liu. Exploiting Structural Symmetry in Unsymmetric Sparse Symbolic Factorization. SIAM Journal on Matrix Analysis and Applications, 13(1), 1992."},{"issue":"1","key":"e_1_3_2_1_41_1","volume":"14","author":"Eisenstat S. C.","year":"1993","unstructured":"S. C. Eisenstat and J. W. H. Liu . Exploiting Structural Symmetry in a Sparse Partial Pivoting Code. SIAM Journal on Scientific Computing , 14 ( 1 ), 1993 . S. C. Eisenstat and J. W. H. Liu. Exploiting Structural Symmetry in a Sparse Partial Pivoting Code. SIAM Journal on Scientific Computing, 14(1), 1993.","journal-title":"Sparse Partial Pivoting Code. SIAM Journal on Scientific Computing"},{"key":"e_1_3_2_1_42_1","volume-title":"Supernodal Sparse Cholesky Factorization on Distributed-memory Multiprocessors. In ICPP '93","volume":"3","author":"Eswar K.","year":"1993","unstructured":"K. Eswar , P. Sadayappan , C.-H. Huang , and V. Visvanathan . Supernodal Sparse Cholesky Factorization on Distributed-memory Multiprocessors. In ICPP '93 , volume 3 , 1993 . K. Eswar, P. Sadayappan, C.-H. Huang, and V. Visvanathan. Supernodal Sparse Cholesky Factorization on Distributed-memory Multiprocessors. In ICPP '93, volume 3, 1993."},{"key":"e_1_3_2_1_43_1","volume-title":"Multifrontal Factorization of Sparse Matrices on Shared-Memory Multiprocessors. In ICPP '91","author":"Eswar K.","year":"1991","unstructured":"K. Eswar , P. Sadayappan , and V. Visvanathan . Multifrontal Factorization of Sparse Matrices on Shared-Memory Multiprocessors. In ICPP '91 , 1991 . K. Eswar, P. Sadayappan, and V. Visvanathan. Multifrontal Factorization of Sparse Matrices on Shared-Memory Multiprocessors. In ICPP '91, 1991."},{"issue":"4","key":"e_1_3_2_1_44_1","volume":"33","author":"Gaihre A.","year":"2022","unstructured":"A. Gaihre , X. S. Li , and H. Liu . gSoFa: Scalable Sparse Symbolic LU Factorization on GPUs. IEEE Transactions on Parallel and Distributed Systems , 33 ( 4 ), 2022 . A. Gaihre, X. S. Li, and H. Liu. gSoFa: Scalable Sparse Symbolic LU Factorization on GPUs. IEEE Transactions on Parallel and Distributed Systems, 33(4), 2022.","journal-title":"gSoFa: Scalable Sparse Symbolic LU Factorization on GPUs. IEEE Transactions on Parallel and Distributed Systems"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1137\/050638102"},{"issue":"4","key":"e_1_3_2_1_46_1","volume":"32","author":"Grigori L.","year":"2011","unstructured":"L. Grigori , J. W. Demmel , and H. Xiang . CALU: A Communication Optimal LU Factorization Algorithm. SIAM Journal on Matrix Analysis and Applications , 32 ( 4 ), 2011 . L. Grigori, J. W. Demmel, and H. Xiang. CALU: A Communication Optimal LU Factorization Algorithm. SIAM Journal on Matrix Analysis and Applications, 32(4), 2011.","journal-title":"CALU: A Communication Optimal LU Factorization Algorithm. SIAM Journal on Matrix Analysis and Applications"},{"key":"e_1_3_2_1_47_1","volume-title":"WSMP: Watson Sparse Matrix Package (Part-I: Direct Solution of Symmetric Sparse Systems)","author":"Gupta A.","year":"2000","unstructured":"A. Gupta . WSMP: Watson Sparse Matrix Package (Part-I: Direct Solution of Symmetric Sparse Systems) . IBM TJ Watson Research Center , Yorktown Heights, NY , 2000 . A. Gupta. WSMP: Watson Sparse Matrix Package (Part-I: Direct Solution of Symmetric Sparse Systems). IBM TJ Watson Research Center, Yorktown Heights, NY, 2000."},{"key":"e_1_3_2_1_48_1","volume-title":"WSMP: Watson Sparse Matrix Package (Part-II: Direct Solution of General Sparse Systems)","author":"Gupta A.","year":"2000","unstructured":"A. Gupta . WSMP: Watson Sparse Matrix Package (Part-II: Direct Solution of General Sparse Systems) . IBM TJ Watson Research Center , Yorktown Heights, NY , 2000 . A. Gupta. WSMP: Watson Sparse Matrix Package (Part-II: Direct Solution of General Sparse Systems). IBM TJ Watson Research Center, Yorktown Heights, NY, 2000."},{"issue":"3","key":"e_1_3_2_1_49_1","volume":"24","author":"He K.","year":"2015","unstructured":"K. He , S. X.-D. Tan , H. Wang , and G. Shi . GPU-Accelerated Parallel Sparse LU Factorization Method for Fast Circuit Analysis. IEEE Transactions on Very Large Scale Integration Systems , 24 ( 3 ), 2015 . K. He, S. X.-D. Tan, H. Wang, and G. Shi. GPU-Accelerated Parallel Sparse LU Factorization Method for Fast Circuit Analysis. IEEE Transactions on Very Large Scale Integration Systems, 24(3), 2015.","journal-title":"IEEE Transactions on Very Large Scale Integration Systems"},{"key":"e_1_3_2_1_50_1","volume-title":"Partitioning Meshes, and Computing Fill-reducing Orderings of Sparse Matrices.","author":"Karypis G.","year":"1997","unstructured":"G. Karypis and V. Kumar . METIS: A Software Package for Partitioning Unstructured Graphs , Partitioning Meshes, and Computing Fill-reducing Orderings of Sparse Matrices. 1997 . G. Karypis and V. Kumar. METIS: A Software Package for Partitioning Unstructured Graphs, Partitioning Meshes, and Computing Fill-reducing Orderings of Sparse Matrices. 1997."},{"key":"e_1_3_2_1_51_1","volume-title":"Huang. On Sparse Matrix Reordering for Parallel Factorization. In ICS '94","author":"Kumar B.","year":"1994","unstructured":"B. Kumar , P. Sadayappan , and C.- H. Huang. On Sparse Matrix Reordering for Parallel Factorization. In ICS '94 , 1994 . B. Kumar, P. Sadayappan, and C.-H. Huang. On Sparse Matrix Reordering for Parallel Factorization. In ICS '94, 1994."},{"issue":"11","key":"e_1_3_2_1_52_1","volume":"26","author":"Lee W.-K.","year":"2018","unstructured":"W.-K. Lee , R. Achar , and M. S. Nakhla . Dynamic GPU Parallel Sparse LU Factorization for Fast Circuit Simulation. IEEE Transactions on Very Large Scale Integration Systems , 26 ( 11 ), 2018 . W.-K. Lee, R. Achar, and M. S. Nakhla. Dynamic GPU Parallel Sparse LU Factorization for Fast Circuit Simulation. IEEE Transactions on Very Large Scale Integration Systems, 26(11), 2018.","journal-title":"IEEE Transactions on Very Large Scale Integration Systems"},{"issue":"3","key":"e_1_3_2_1_53_1","volume":"31","author":"An X. S. Li.","year":"2005","unstructured":"X. S. Li. An Overview of SuperLU: Algorithms , Implementation, and User Interface. ACM Transactions on Mathematical Software , 31 ( 3 ), 2005 . X. S. Li. An Overview of SuperLU: Algorithms, Implementation, and User Interface. ACM Transactions on Mathematical Software, 31(3), 2005.","journal-title":"User Interface. ACM Transactions on Mathematical Software"},{"key":"e_1_3_2_1_54_1","volume-title":"SC '98","author":"Li X. S.","year":"1998","unstructured":"X. S. Li and J. W. Demmel . Making Sparse Gaussian Elimination Scalable by Static Pivoting . In SC '98 , 1998 . X. S. Li and J. W. Demmel. Making Sparse Gaussian Elimination Scalable by Static Pivoting. In SC '98, 1998."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/779359.779361"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3577197"},{"key":"e_1_3_2_1_57_1","volume-title":"SIAM Journal on Matrix Analysis and Applications, 11(1)","author":"Liu J. W. H.","year":"1990","unstructured":"J. W. H. Liu . The Role of Elimination Trees in Sparse Factorization . SIAM Journal on Matrix Analysis and Applications, 11(1) , 1990 . J. W. H. Liu. The Role of Elimination Trees in Sparse Factorization. SIAM Journal on Matrix Analysis and Applications, 11(1), 1990."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-43659-3_45"},{"key":"e_1_3_2_1_59_1","volume-title":"Unified Communication Optimization Strategies for Sparse Triangular Solver on CPU and GPU Clusters. In SC '23","author":"Liu Y.","year":"2023","unstructured":"Y. Liu , N. Ding , P. Sao , S. Williams , and X. S. Li . Unified Communication Optimization Strategies for Sparse Triangular Solver on CPU and GPU Clusters. In SC '23 , 2023 . Y. Liu, N. Ding, P. Sao, S. Williams, and X. S. Li. Unified Communication Optimization Strategies for Sparse Triangular Solver on CPU and GPU Clusters. In SC '23, 2023."},{"key":"e_1_3_2_1_60_1","volume-title":"Highly Scalable Distributed-memory Sparse Triangular Solution Algorithms. In CSC '18","author":"Liu Y.","year":"2018","unstructured":"Y. Liu , M. Jacquelin , P. Ghysels , and X. S. Li . Highly Scalable Distributed-memory Sparse Triangular Solution Algorithms. In CSC '18 , 2018 . Y. Liu, M. Jacquelin, P. Ghysels, and X. S. Li. Highly Scalable Distributed-memory Sparse Triangular Solution Algorithms. In CSC '18, 2018."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/MDAT.2020.2974910"},{"key":"e_1_3_2_1_62_1","volume-title":"Sparse LU Factorization for Parallel Circuit Simulation on GPU. In DAC '12","author":"Ren L.","year":"2012","unstructured":"L. Ren , X. Chen , Y. Wang , C. Zhang , and H. Yang . Sparse LU Factorization for Parallel Circuit Simulation on GPU. In DAC '12 , 2012 . L. Ren, X. Chen, Y. Wang, C. Zhang, and H. Yang. Sparse LU Factorization for Parallel Circuit Simulation on GPU. In DAC '12, 2012."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/76263.76304"},{"key":"e_1_3_2_1_64_1","volume-title":"Efficient Sparse Matrix Factorization for Circuit Simulation On Vector Supercomputers. In DAC '98","author":"Sadayappan P.","year":"1989","unstructured":"P. Sadayappan and V. Visvanathan . Efficient Sparse Matrix Factorization for Circuit Simulation On Vector Supercomputers. In DAC '98 , 1989 . P. Sadayappan and V. Visvanathan. Efficient Sparse Matrix Factorization for Circuit Simulation On Vector Supercomputers. In DAC '98, 1989."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3330345.3330357"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2018.00100"},{"key":"e_1_3_2_1_67_1","volume-title":"A Communication-Avoiding 3D Algorithm for Sparse LU Factorization on Heterogeneous Systems. Journal of Parallel and Distributed Computing, 131","author":"Sao P.","year":"2019","unstructured":"P. Sao , X. S. Li , and R. Vuduc . A Communication-Avoiding 3D Algorithm for Sparse LU Factorization on Heterogeneous Systems. Journal of Parallel and Distributed Computing, 131 , 2019 . P. Sao, X. S. Li, and R. Vuduc. A Communication-Avoiding 3D Algorithm for Sparse LU Factorization on Heterogeneous Systems. Journal of Parallel and Distributed Computing, 131, 2019."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2015.104"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-09873-9_41"},{"key":"e_1_3_2_1_70_1","volume-title":"Solving Unsymmetric Sparse Systems of Linear Equations with PARDISO. Future Generation Computer Systems, 20(3)","author":"Schenk O.","year":"2004","unstructured":"O. Schenk and K. G\u00e4rtner . Solving Unsymmetric Sparse Systems of Linear Equations with PARDISO. Future Generation Computer Systems, 20(3) , 2004 . O. Schenk and K. G\u00e4rtner. Solving Unsymmetric Sparse Systems of Linear Equations with PARDISO. Future Generation Computer Systems, 20(3), 2004."},{"key":"e_1_3_2_1_71_1","volume-title":"Two-level Dynamic Scheduling in PARDISO: Improved scalability on Shared memory Multiprocessing Systems. Parallel Computing, 28(2)","author":"Schenk O.","year":"2002","unstructured":"O. Schenk and K. G\u00e4rtner . Two-level Dynamic Scheduling in PARDISO: Improved scalability on Shared memory Multiprocessing Systems. Parallel Computing, 28(2) , 2002 . O. Schenk and K. G\u00e4rtner. Two-level Dynamic Scheduling in PARDISO: Improved scalability on Shared memory Multiprocessing Systems. Parallel Computing, 28(2), 2002."},{"key":"e_1_3_2_1_72_1","first-page":"40","author":"Schenk O.","year":"2000","unstructured":"O. Schenk , K. G\u00e4rtner , and W. Fichtner . Efficient Sparse LU Factorization with Left-right Looking Strategy on Shared Memory Multiprocessors. BIT Numerical Mathematics , 40 , 2000 . O. Schenk, K. G\u00e4rtner, and W. Fichtner. Efficient Sparse LU Factorization with Left-right Looking Strategy on Shared Memory Multiprocessors. BIT Numerical Mathematics, 40, 2000.","journal-title":"BIT Numerical Mathematics"},{"key":"e_1_3_2_1_73_1","volume-title":"PARDISO: a High-Performance Serial and Parallel Sparse Linear Solver in Semiconductor Device Simulation. Future Generation Computer Systems, 18(1)","author":"Schenk O.","year":"2001","unstructured":"O. Schenk , K. G\u00e4rtner , W. Fichtner , and A. Stricker . PARDISO: a High-Performance Serial and Parallel Sparse Linear Solver in Semiconductor Device Simulation. Future Generation Computer Systems, 18(1) , 2001 . O. Schenk, K. G\u00e4rtner, W. Fichtner, and A. Stricker. PARDISO: a High-Performance Serial and Parallel Sparse Linear Solver in Semiconductor Device Simulation. Future Generation Computer Systems, 18(1), 2001."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3067731"},{"issue":"9","key":"e_1_3_2_1_75_1","volume":"78","author":"Tian M.","year":"2022","unstructured":"M. Tian , J. Wang , Z. Zhang , W. Du , J. Pan , and T. Liu . swSuperLU: A Highly Scalable Sparse Direct Solver on Sunway Manycore Architecture. The Journal of Supercomputing , 78 ( 9 ), 2022 . M. Tian, J. Wang, Z. Zhang, W. Du, J. Pan, and T. Liu. swSuperLU: A Highly Scalable Sparse Direct Solver on Sunway Manycore Architecture. The Journal of Supercomputing, 78(9), 2022.","journal-title":"The Journal of Supercomputing"},{"key":"e_1_3_2_1_76_1","volume-title":"Accelerating Sparse LU Factorization with Density-Aware Adaptive Matrix Multiplication for Circuit Simulation. In DAC '23","author":"Wang T.","year":"2023","unstructured":"T. Wang , W. Li , H. Pei , Y. Sun , Z. Jin , and W. Liu . Accelerating Sparse LU Factorization with Density-Aware Adaptive Matrix Multiplication for Circuit Simulation. In DAC '23 , 2023 . T. Wang, W. Li, H. Pei, Y. Sun, Z. Jin, and W. Liu. Accelerating Sparse LU Factorization with Density-Aware Adaptive Matrix Multiplication for Circuit Simulation. In DAC '23, 2023."},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1137\/09074543X"},{"key":"e_1_3_2_1_78_1","volume-title":"End-to-End LU Factorization of Large Matrices on GPUs. In PPoPP '23","author":"Xia Y.","year":"2023","unstructured":"Y. Xia , P. Jiang , G. Agrawal , and R. Ramnath . End-to-End LU Factorization of Large Matrices on GPUs. In PPoPP '23 , 2023 . Y. Xia, P. Jiang, G. Agrawal, and R. Ramnath. End-to-End LU Factorization of Large Matrices on GPUs. In PPoPP '23, 2023."},{"key":"e_1_3_2_1_79_1","volume-title":"Exploiting Architecture Advances for Sparse Solvers in Circuit Simulation. In DATE '22","author":"Yan Z.","year":"2022","unstructured":"Z. Yan , B. Xie , X. Li , and Y. Bao . Exploiting Architecture Advances for Sparse Solvers in Circuit Simulation. In DATE '22 , 2022 . Z. Yan, B. Xie, X. Li, and Y. Bao. Exploiting Architecture Advances for Sparse Solvers in Circuit Simulation. In DATE '22, 2022."},{"key":"e_1_3_2_1_80_1","volume-title":"A CPU-GPU Hybrid Approach for the Unsymmetric Multifrontal Method. Parallel Computing, 37(12)","author":"Yu C.","year":"2011","unstructured":"C. Yu , W. Wang , and D. Pierce . A CPU-GPU Hybrid Approach for the Unsymmetric Multifrontal Method. Parallel Computing, 37(12) , 2011 . C. Yu, W. Wang, and D. Pierce. A CPU-GPU Hybrid Approach for the Unsymmetric Multifrontal Method. Parallel Computing, 37(12), 2011."},{"key":"e_1_3_2_1_81_1","volume-title":"SFLU: Synchronization-Free Sparse LU Factorization for Fast Circuit Simulation on GPUs. In DAC '21","author":"Zhao J.","year":"2021","unstructured":"J. Zhao , Y. Wen , Y. Luo , Z. Jin , W. Liu , and Z. Zhou . SFLU: Synchronization-Free Sparse LU Factorization for Fast Circuit Simulation on GPUs. In DAC '21 , 2021 . J. Zhao, Y. Wen, Y. Luo, Z. Jin, W. Liu, and Z. Zhou. SFLU: Synchronization-Free Sparse LU Factorization for Fast Circuit Simulation on GPUs. In DAC '21, 2021."}],"event":{"name":"SC '23: International Conference for High Performance Computing, Networking, Storage and Analysis","location":"Denver CO USA","acronym":"SC '23","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","IEEE CS"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581784.3607050","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581784.3607050","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:22Z","timestamp":1750178182000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581784.3607050"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,11]]},"references-count":81,"alternative-id":["10.1145\/3581784.3607050","10.1145\/3581784"],"URL":"https:\/\/doi.org\/10.1145\/3581784.3607050","relation":{},"subject":[],"published":{"date-parts":[[2023,11,11]]},"assertion":[{"value":"2023-11-11","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}