{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T01:46:24Z","timestamp":1770687984425,"version":"3.49.0"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T00:00:00Z","timestamp":1764288000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T00:00:00Z","timestamp":1764288000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"The Science and Technology Project of Qinghai Province","award":["2023-QY-208"],"award-info":[{"award-number":["2023-QY-208"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["CCF Trans. HPC"],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1007\/s42514-025-00259-6","type":"journal-article","created":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T15:24:29Z","timestamp":1764343469000},"page":"120-131","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Qklu: a two-dimensional block-cyclic sparse direct solver"],"prefix":"10.1007","volume":"8","author":[{"given":"Renqian","family":"Wan","sequence":"first","affiliation":[]},{"given":"Jianqiang","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Haodong","family":"Bian","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,28]]},"reference":[{"key":"259_CR1","first-page":"121","volume-title":"International Workshop on Applied Parallel Computing","author":"PR Amestoy","year":"2000","unstructured":"Amestoy, P.R., Duff, I.S., L\u2019Excellent, J.-Y., Koster, J.: Mumps: a general purpose distributed memory sparse solver. In: International Workshop on Applied Parallel Computing, pp. 121\u2013130. Springer (2000)"},{"issue":"1","key":"259_CR2","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1137\/S0895479899358194","volume":"23","author":"PR Amestoy","year":"2001","unstructured":"Amestoy, P.R., Duff, I.S., L\u2019Excellent, J.-Y., Koster, J.: A fully asynchronous multifrontal solver using distributed dynamic scheduling. SIAM J. Matrix Anal. Appl. 23(1), 15\u201341 (2001)","journal-title":"SIAM J. Matrix Anal. Appl."},{"issue":"3","key":"259_CR3","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1145\/1024074.1024081","volume":"30","author":"PR Amestoy","year":"2004","unstructured":"Amestoy, P.R., Davis, T.A., Duff, I.S.: Algorithm 837: amd, an approximate minimum degree ordering algorithm. ACM Trans. Math. Software (TOMS) 30(3), 381\u2013388 (2004)","journal-title":"ACM Trans. Math. Software (TOMS)"},{"issue":"3","key":"259_CR4","doi-asserted-by":"publisher","first-page":"1451","DOI":"10.1137\/120903476","volume":"37","author":"P Amestoy","year":"2015","unstructured":"Amestoy, P., Ashcraft, C., Boiteau, O., Buttari, A., l\u2019Excellent, J.-Y., Weisbecker, C.: Improving multifrontal methods by means of block low-rank representations. SIAM J. Sci. Comput. 37(3), 1451\u20131474 (2015)","journal-title":"SIAM J. Sci. Comput."},{"issue":"4","key":"259_CR5","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1145\/76909.76910","volume":"15","author":"C Ashcraft","year":"1989","unstructured":"Ashcraft, C., Grimes, R.: The influence of relaxed supernode partitions on the multifrontal method. ACM Trans. Math. Software (TOMS) 15(4), 291\u2013309 (1989)","journal-title":"ACM Trans. Math. Software (TOMS)"},{"key":"259_CR6","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898718201","volume-title":"LAPACK95 Users\u2019 Guide","author":"VA Barker","year":"2001","unstructured":"Barker, V.A., Blackford, L.S., Dongarra, J., Croz, J.D., Hammarling, S., Marinova, M., Wa\u015bniewski, J., Yalamov, P.: LAPACK95 Users\u2019 Guide. SIAM, Philadelphia (2001)"},{"issue":"12","key":"259_CR7","doi-asserted-by":"publisher","first-page":"759","DOI":"10.1016\/j.parco.2011.09.002","volume":"37","author":"DY Chenhan","year":"2011","unstructured":"Chenhan, D.Y., Wang, W., Pierce, D.: A cpu-gpu hybrid approach for the unsymmetric multifrontal method. Parallel Comput. 37(12), 759\u2013770 (2011)","journal-title":"Parallel Comput."},{"issue":"2","key":"259_CR8","doi-asserted-by":"publisher","first-page":"196","DOI":"10.1145\/992200.992206","volume":"30","author":"TA Davis","year":"2004","unstructured":"Davis, T.A.: Algorithm 832: umfpack v4. 3\u2014an unsymmetric-pattern multifrontal method. ACM Trans. Math. Software (TOMS) 30(2), 196\u2013199 (2004)","journal-title":"ACM Trans. Math. Software (TOMS)"},{"issue":"1","key":"259_CR9","first-page":"1","volume":"38","author":"TA Davis","year":"2011","unstructured":"Davis, T.A., Hu, Y.: The university of florida sparse matrix collection. ACM Trans. Math. Software (TOMS) 38(1), 1\u201325 (2011)","journal-title":"ACM Trans. Math. Software (TOMS)"},{"issue":"3","key":"259_CR10","doi-asserted-by":"publisher","first-page":"377","DOI":"10.1145\/1024074.1024080","volume":"30","author":"TA Davis","year":"2004","unstructured":"Davis, T.A., Gilbert, J.R., Larimore, S.I., Ng, E.G.: Algorithm 836: colamd, a column approximate minimum degree ordering algorithm. ACM Trans. Math. Software (TOMS) 30(3), 377\u2013380 (2004)","journal-title":"ACM Trans. Math. Software (TOMS)"},{"issue":"4","key":"259_CR11","doi-asserted-by":"publisher","first-page":"915","DOI":"10.1137\/S0895479897317685","volume":"20","author":"JW Demmel","year":"1999","unstructured":"Demmel, J.W., Gilbert, J.R., Li, X.S.: An asynchronous parallel supernodal algorithm for sparse gaussian elimination. SIAM J. Matrix Anal. Appl. 20(4), 915\u2013952 (1999)","journal-title":"SIAM J. Matrix Anal. Appl."},{"issue":"3","key":"259_CR12","doi-asserted-by":"publisher","first-page":"720","DOI":"10.1137\/S0895479895291765","volume":"20","author":"JW Demmel","year":"1999","unstructured":"Demmel, J.W., Eisenstat, S.C., Gilbert, J.R., Li, X.S., Liu, J.W.: A supernodal approach to sparse partial pivoting. SIAM J. Matrix Anal. Appl. 20(3), 720\u2013755 (1999)","journal-title":"SIAM J. Matrix Anal. Appl."},{"key":"259_CR13","unstructured":"Duff, I., Hogg, J., Lopez, F.: A new sparse symmetric indefinite solver using a posteriori threshold pivoting. NLAFET Working Note (2018)"},{"issue":"4","key":"259_CR14","doi-asserted-by":"publisher","first-page":"500","DOI":"10.1109\/PROC.1977.10514","volume":"65","author":"IS Duff","year":"1977","unstructured":"Duff, I.S.: A survey of sparse matrix research. Proc. IEEE 65(4), 500\u2013535 (1977)","journal-title":"Proc. IEEE"},{"issue":"3","key":"259_CR15","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1016\/0167-8191(86)90019-0","volume":"3","author":"IS Duff","year":"1986","unstructured":"Duff, I.S.: Parallel implementation of multifrontal schemes. Parallel Comput. 3(3), 193\u2013204 (1986)","journal-title":"Parallel Comput."},{"issue":"4","key":"259_CR16","doi-asserted-by":"publisher","first-page":"889","DOI":"10.1137\/S0895479897317661","volume":"20","author":"IS Duff","year":"1999","unstructured":"Duff, I.S., Koster, J.: The design and use of algorithms for permuting large entries to the diagonal of sparse matrices. SIAM J. Matrix Anal. Appl. 20(4), 889\u2013901 (1999)","journal-title":"SIAM J. Matrix Anal. Appl."},{"issue":"4","key":"259_CR17","doi-asserted-by":"publisher","first-page":"973","DOI":"10.1137\/S0895479899358443","volume":"22","author":"IS Duff","year":"2001","unstructured":"Duff, I.S., Koster, J.: On algorithms for permuting large entries to the diagonal of a sparse matrix. SIAM J. Matrix Anal. Appl. 22(4), 973\u2013996 (2001)","journal-title":"SIAM J. Matrix Anal. Appl."},{"issue":"2","key":"259_CR18","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1137\/04061043X","volume":"27","author":"IS Duff","year":"2005","unstructured":"Duff, I.S., Pralet, S.: Strategies for scaling and pivoting for sparse symmetric indefinite problems. SIAM J. Matrix Anal. Appl. 27(2), 313\u2013340 (2005)","journal-title":"SIAM J. Matrix Anal. Appl."},{"issue":"3","key":"259_CR19","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1145\/356044.356047","volume":"9","author":"IS Duff","year":"1983","unstructured":"Duff, I.S., Reid, J.K.: The multifrontal solution of indefinite sparse symmetric linear. ACM Trans. Math. Software (TOMS) 9(3), 302\u2013325 (1983)","journal-title":"ACM Trans. Math. Software (TOMS)"},{"issue":"2","key":"259_CR20","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1145\/992200.992201","volume":"30","author":"IS Duff","year":"2004","unstructured":"Duff, I.S., Scott, J.A.: A parallel direct solver for large sparse highly unsymmetric linear systems. ACM Trans. Math. Software (TOMS) 30(2), 95\u2013117 (2004)","journal-title":"ACM Trans. Math. Software (TOMS)"},{"key":"259_CR21","unstructured":"Eswar, K., Sadayappan, P., Visvanathan, V.: Multifrontal factorization of sparse matrices on shared-memory multiprocessors. In: ICPP (3) (1991)"},{"key":"259_CR22","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1109\/ICPP.1993.170","volume-title":"1993 International Conference on Parallel Processing-ICPP\u201993","author":"K Eswar","year":"1993","unstructured":"Eswar, K., Sadayappan, P., Huang, C.-H., Visvanathan, V.: Supernodal sparse cholesky factorization on distributed-memory multiprocessors. In: 1993 International Conference on Parallel Processing-ICPP\u201993, vol. 3, pp. 18\u201322. IEEE (1993)"},{"key":"259_CR23","doi-asserted-by":"crossref","unstructured":"Fu, X., Zhang, B., Wang, T., Li, W., Lu, Y., Yi, E., Zhao, J., Geng, X., Li, F., Zhang, J., etal.: Pangulu: A scalable regular two-dimensional block-cyclic sparse direct solver on distributed heterogeneous systems. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201314 (2023)","DOI":"10.1145\/3581784.3607050"},{"issue":"4","key":"259_CR24","doi-asserted-by":"publisher","first-page":"1015","DOI":"10.1109\/TPDS.2021.3090316","volume":"33","author":"A Gaihre","year":"2021","unstructured":"Gaihre, A., Li, X.S., Liu, H.: Gsofa: scalable sparse symbolic lu factorization on gpus. IEEE Trans. Parallel Distrib. Syst. 33(4), 1015\u20131026 (2021)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"issue":"3","key":"259_CR25","doi-asserted-by":"publisher","first-page":"1289","DOI":"10.1137\/050638102","volume":"29","author":"L Grigori","year":"2007","unstructured":"Grigori, L., Demmel, J.W., Li, X.S.: Parallel symbolic factorization for sparse lu with static pivoting. SIAM J. Sci. Comput. 29(3), 1289\u20131314 (2007)","journal-title":"SIAM J. Sci. Comput."},{"key":"259_CR26","unstructured":"Gupta, A.: Wsmp: Watson sparse matrix package (part-i: direct solution of symmetric sparse systems). IBM TJ Watson Research Center, Yorktown Heights, NY, Tech. Rep. RC 21886 (2000)"},{"issue":"3","key":"259_CR27","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1109\/TVLSI.2015.2421287","volume":"24","author":"K He","year":"2015","unstructured":"He, K., Tan, S.X.-D., Wang, H., Shi, G.: Gpu-accelerated parallel sparse lu factorization method for fast circuit analysis. IEEE Trans. Very Large Scale Integr. VLSI Syst. 24(3), 1140\u20131150 (2015)","journal-title":"IEEE Trans. Very Large Scale Integr. VLSI Syst."},{"key":"259_CR28","doi-asserted-by":"crossref","unstructured":"Kumar, B., Sadayappan, P., Huang, C.-H.: On sparse matrix reordering for parallel factorization. In: Proceedings of the 8th International Conference on Supercomputing, pp. 431\u2013438 (1994)","DOI":"10.1145\/181181.181576"},{"issue":"11","key":"259_CR29","doi-asserted-by":"publisher","first-page":"2518","DOI":"10.1109\/TVLSI.2018.2858014","volume":"26","author":"W-K Lee","year":"2018","unstructured":"Lee, W.-K., Achar, R., Nakhla, M.S.: Dynamic gpu parallel sparse lu factorization for fast circuit simulation. IEEE Trans. Very Large Scale Integr. VLSI Syst. 26(11), 2518\u20132529 (2018)","journal-title":"IEEE Trans. Very Large Scale Integr. VLSI Syst."},{"issue":"3","key":"259_CR30","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1145\/1089014.1089017","volume":"31","author":"XS Li","year":"2005","unstructured":"Li, X.S.: An overview of superlu: algorithms, implementation, and user interface. ACM Trans. Math. Software (TOMS) 31(3), 302\u2013325 (2005)","journal-title":"ACM Trans. Math. Software (TOMS)"},{"key":"259_CR31","first-page":"34","volume-title":"Proceedings of the 1998 ACM\/IEEE Conference on Supercomputing","author":"XS Li","year":"1998","unstructured":"Li, X.S., Demmel, J.W.: Making sparse gaussian elimination scalable by static pivoting. In: Proceedings of the 1998 ACM\/IEEE Conference on Supercomputing, pp. 34\u201334. IEEE (1998)"},{"issue":"2","key":"259_CR32","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1145\/779359.779361","volume":"29","author":"XS Li","year":"2003","unstructured":"Li, X.S., Demmel, J.W.: Superlu_dist: a scalable distributed-memory sparse direct solver for unsymmetric linear systems. ACM Trans. Math. Software (TOMS) 29(2), 110\u2013140 (2003)","journal-title":"ACM Trans. Math. Software (TOMS)"},{"key":"259_CR33","doi-asserted-by":"crossref","unstructured":"Liu, Y., Ding, N., Sao, P., Williams, S., Li, X.S.: Unified communication optimization strategies for sparse triangular solver on cpu and gpu clusters. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201315 (2023)","DOI":"10.1145\/3581784.3607092"},{"issue":"1","key":"259_CR34","doi-asserted-by":"publisher","first-page":"134","DOI":"10.1137\/0611010","volume":"11","author":"JW Liu","year":"1990","unstructured":"Liu, J.W.: The role of elimination trees in sparse factorization. SIAM J. Matrix Anal. Appl. 11(1), 134\u2013172 (1990)","journal-title":"SIAM J. Matrix Anal. Appl."},{"key":"259_CR35","doi-asserted-by":"publisher","first-page":"617","DOI":"10.1007\/978-3-319-43659-3_45","volume-title":"Euro-Par 2016: Parallel Processing:22nd International Conference on Parallel and Distributed Computing","author":"W Liu","year":"2016","unstructured":"Liu, W., Li, A., Hogg, J., Duff, I.S., Vinter, B.: A synchronization-free algorithm for parallel sparse triangular solves. In: Euro-Par 2016: Parallel Processing:22nd International Conference on Parallel and Distributed Computing, pp. 617\u2013630. Springer (2016) . (Grenoble, France, August 24-26, 2016, Proceedings 22)"},{"issue":"3","key":"259_CR36","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1109\/MDAT.2020.2974910","volume":"37","author":"S Peng","year":"2020","unstructured":"Peng, S., Tan, S.X.-D.: Glu3. 0: fast gpu-based parallel sparse lu factorization for circuit simulation. IEEE Design & Test 37(3), 78\u201390 (2020)","journal-title":"IEEE Design & Test"},{"key":"259_CR37","unstructured":"Puglisi, C.: An unsymmetrized multifrontal lu factorization (2000)"},{"key":"259_CR38","doi-asserted-by":"publisher","first-page":"487","DOI":"10.1007\/978-3-319-09873-9_41","volume-title":"Euro-Par 2014 Parallel Processing: 20th International Conference","author":"P Sao","year":"2014","unstructured":"Sao, P., Vuduc, R., Li, X.S.: A distributed cpu-gpu sparse direct solver. In: Euro-Par 2014 Parallel Processing: 20th International Conference, pp. 487\u2013498. Springer (2014) . (Porto, Portugal, August 25-29, 2014. Proceedings 20)"},{"key":"259_CR39","doi-asserted-by":"publisher","first-page":"218","DOI":"10.1016\/j.jpdc.2019.03.004","volume":"131","author":"P Sao","year":"2019","unstructured":"Sao, P., Li, X.S., Vuduc, R.: A communication-avoiding 3d algorithm for sparse lu factorization on heterogeneous systems. J. Parallel Distributed Comput. 131, 218\u2013234 (2019)","journal-title":"J. Parallel Distributed Comput."},{"issue":"2","key":"259_CR40","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1016\/S0167-8191(01)00135-1","volume":"28","author":"O Schenk","year":"2002","unstructured":"Schenk, O., G\u00e4rtner, K.: Two-level dynamic scheduling in pardiso: improved scalability on shared memory multiprocessing systems. Parallel Comput. 28(2), 187\u2013197 (2002)","journal-title":"Parallel Comput."},{"issue":"3","key":"259_CR41","doi-asserted-by":"publisher","first-page":"475","DOI":"10.1016\/j.future.2003.07.011","volume":"20","author":"O Schenk","year":"2004","unstructured":"Schenk, O., G\u00e4rtner, K.: Solving unsymmetric sparse systems of linear equations with pardiso. Futur. Gener. Comput. Syst. 20(3), 475\u2013487 (2004)","journal-title":"Futur. Gener. Comput. Syst."},{"issue":"1","key":"259_CR42","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1016\/S0167-739X(00)00076-5","volume":"18","author":"O Schenk","year":"2001","unstructured":"Schenk, O., G\u00e4rtner, K., Fichtner, W., Stricker, A.: Pardiso: a high-performance serial and parallel sparse linear solver in semiconductor device simulation. Futur. Gener. Comput. Syst. 18(1), 69\u201378 (2001)","journal-title":"Futur. Gener. Comput. Syst."},{"issue":"3","key":"259_CR43","doi-asserted-by":"publisher","first-page":"1382","DOI":"10.1137\/09074543X","volume":"31","author":"J Xia","year":"2010","unstructured":"Xia, J., Chandrasekaran, S., Gu, M., Li, X.S.: Superfast multifrontal method for large structured linear systems of equations. SIAM J. Matrix Anal. Appl. 31(3), 1382\u20131411 (2010)","journal-title":"SIAM J. Matrix Anal. Appl."},{"key":"259_CR44","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1109\/DAC18074.2021.9586141","volume-title":"2021 58th ACM\/IEEE Design Automation Conference (DAC)","author":"J Zhao","year":"2021","unstructured":"Zhao, J., Wen, Y., Luo, Y., Jin, Z., Liu, W., Zhou, Z.: Sflu: synchronization-free sparse lu factorization for fast circuit simulation on gpus. In: 2021 58th ACM\/IEEE Design Automation Conference (DAC), pp. 37\u201342. IEEE (2021)"}],"container-title":["CCF Transactions on High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-025-00259-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42514-025-00259-6","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-025-00259-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T08:56:03Z","timestamp":1770627363000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42514-025-00259-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,28]]},"references-count":44,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,2]]}},"alternative-id":["259"],"URL":"https:\/\/doi.org\/10.1007\/s42514-025-00259-6","relation":{},"ISSN":["2524-4922","2524-4930"],"issn-type":[{"value":"2524-4922","type":"print"},{"value":"2524-4930","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,28]]},"assertion":[{"value":"7 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 September 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 November 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"On behalf of all authors, the corresponding author states that there is no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}