{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T15:21:29Z","timestamp":1771600889355,"version":"3.50.1"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,11,7]],"date-time":"2022-11-07T00:00:00Z","timestamp":1667779200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,11,7]],"date-time":"2022-11-07T00:00:00Z","timestamp":1667779200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.1007\/s11704-022-1749-6","type":"journal-article","created":{"date-parts":[[2022,11,7]],"date-time":"2022-11-07T04:12:18Z","timestamp":1667794338000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["swSpAMM: optimizing large-scale sparse approximate matrix multiplication on Sunway Taihulight"],"prefix":"10.1007","volume":"17","author":[{"given":"Xiaoyan","family":"Liu","sequence":"first","affiliation":[]},{"given":"Yi","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Bohong","family":"Yin","sequence":"additional","affiliation":[]},{"given":"Hailong","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Zhongzhi","family":"Luan","sequence":"additional","affiliation":[]},{"given":"Depei","family":"Qian","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,11,7]]},"reference":[{"issue":"4","key":"1749_CR1","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/3320060","volume":"52","author":"T Ben-Nun","year":"2020","unstructured":"Ben-Nun T, Hoefler T. Demystifying parallel and distributed deep learning: an in-depth concurrency analysis. ACM Computing Surveys, 2020, 52(4): 65","journal-title":"ACM Computing Surveys"},{"key":"1749_CR2","doi-asserted-by":"crossref","unstructured":"Azad A, Bulu\u00e7, A, Gilbert J. Parallel triangle counting and enumeration using matrix algebra. In: Proceedings of 2015 IEEE International Parallel and Distributed Processing Symposium Workshop. 2015, 804\u2013811","DOI":"10.1109\/IPDPSW.2015.75"},{"key":"1749_CR3","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1016\/j.cpc.2014.10.021","volume":"187","author":"M Del Ben","year":"2015","unstructured":"Del Ben M, Sch\u00fctt O, Wentz T, Messmer P, Hutter J, VandeVondele J. Enabling simulation at the fifth rung of DFT: large scale RPA calculations with excellent time to solution. Computer Physics Communications, 2015, 187: 120\u2013129","journal-title":"Computer Physics Communications"},{"issue":"16","key":"1749_CR4","doi-asserted-by":"publisher","first-page":"10891","DOI":"10.1103\/PhysRevB.47.10891","volume":"47","author":"X P Li","year":"1993","unstructured":"Li X P, Nunes R W, Vanderbilt D. Density-matrix electronic-structure method with linear system-size scaling. Physical Review B, 1993, 47(16): 10891\u201310894","journal-title":"Physical Review B"},{"issue":"1\u20132","key":"1749_CR5","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1016\/S0010-4655(00)00074-6","volume":"128","author":"M Challacombe","year":"2000","unstructured":"Challacombe M. A general parallel sparse-blocked matrix multiply for linear scaling SCF theory. Computer Physics Communications, 2000, 128(1\u20132): 93\u2013107","journal-title":"Computer Physics Communications"},{"key":"1749_CR6","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1007\/978-90-481-2853-2_12","volume-title":"Linear-Scaling Techniques in Computational Chemistry and Physics","author":"E H Rubensson","year":"2011","unstructured":"Rubensson E H, Rudberg E, Salek P. Methods for Hartree-Fock and density functional theory electronic structure calculations with linearly scaling processor time and memory usage. In: Zalesny R, Papadopoulos M G, Mezey P G, Leszczynski J, eds. Linear-Scaling Techniques in Computational Chemistry and Physics. Dordrecht: Springer, 2011, 263\u2013300"},{"key":"1749_CR7","doi-asserted-by":"crossref","unstructured":"Gale T, Zaharia M, Young C, Elsen E. Sparse GPU kernels for deep learning. In: Proceedings of SC20: International Conference for High Performance Computing, Networking, Storage and Analysis. 2020, 1\u201314","DOI":"10.1109\/SC41405.2020.00021"},{"key":"1749_CR8","doi-asserted-by":"publisher","unstructured":"Liu X, Liu Y, Yang H, Dun M, Yin B, Luan Z, Qian D. Accelerating approximate matrix multiplication for near-sparse matrices on GPUs. The Journal of Supercomputing, 2022, doi: https:\/\/doi.org\/10.1007\/s11227-022-04334-5","DOI":"10.1007\/s11227-022-04334-5"},{"issue":"168","key":"1749_CR9","doi-asserted-by":"publisher","first-page":"491","DOI":"10.1090\/S0025-5718-1984-0758197-9","volume":"43","author":"S Demko","year":"1984","unstructured":"Demko S, Moss W F, Smith P W. Decay rates for inverses of band matrices. Mathematics of Computation, 1984, 43(168): 491\u2013499","journal-title":"Mathematics of Computation"},{"issue":"1","key":"1749_CR10","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1137\/100814019","volume":"55","author":"M Benzi","year":"2013","unstructured":"Benzi M, Boito P, Razouk N. Decay properties of spectral projectors with applications to electronic structure. SIAM Review, 2013, 55(1): 3\u201364","journal-title":"SIAM Review"},{"issue":"3","key":"1749_CR11","doi-asserted-by":"publisher","first-page":"036503","DOI":"10.1088\/0034-4885\/75\/3\/036503","volume":"75","author":"D R Bowler","year":"2012","unstructured":"Bowler D R, Miyazaki T. O(N) methods in electronic structure calculations. Reports on Progress in Physics, 2012, 75(3): 036503","journal-title":"Reports on Progress in Physics"},{"key":"1749_CR12","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1007\/128_2011_195","volume-title":"Multiscale Molecular Methods in Applied Chemistry","author":"B Kirchner","year":"2011","unstructured":"Kirchner B, di Dio P J, Hutter J. Real-world predictions from ab initio molecular dynamics simulations. In: Kirchner B, Vrabec J, eds. Multiscale Molecular Methods in Applied Chemistry. Berlin: Springer, 2011, 109\u2013153"},{"issue":"5","key":"1749_CR13","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1088\/1367-2630\/8\/5\/071","volume":"8","author":"M Cramer","year":"2006","unstructured":"Cramer M, Eisert J. Correlations, spectral gap and entanglement in harmonic quantum systems on generic lattices. New Journal of Physics, 2006, 8(5): 71","journal-title":"New Journal of Physics"},{"issue":"1","key":"1749_CR14","doi-asserted-by":"publisher","first-page":"012309","DOI":"10.1103\/PhysRevA.73.012309","volume":"73","author":"M Cramer","year":"2006","unstructured":"Cramer M, Eisert J, Plenio M B, Drei\u00dfig J. Entanglement-area law for general bosonic harmonic lattice systems. Physical Review A, 2006, 73(1): 012309","journal-title":"Physical Review A"},{"key":"1749_CR15","unstructured":"Eisert J, Cramer M, Plenio M B. Area laws for the entanglement entropy \u2014 a review. 2008, arXiv preprint arXiv: 0808.3773"},{"issue":"1","key":"1749_CR16","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1007\/s00220-006-0049-6","volume":"267","author":"N Schuch","year":"2006","unstructured":"Schuch N, Cirac J I, Wolf M M. Quantum states on harmonic lattices. Communications in Mathematical Physics, 2006, 267(1): 65\u201392","journal-title":"Communications in Mathematical Physics"},{"issue":"4","key":"1749_CR17","doi-asserted-by":"publisher","first-page":"C170","DOI":"10.1137\/110848244","volume":"34","author":"A Bulu\u00e7","year":"2012","unstructured":"Bulu\u00e7 A, Gilbert J R. Parallel sparse matrix-matrix multiplication and indexing: implementation and experiments. SIAM Journal on Scientific Computing, 2012, 34(4): C170\u2013C191","journal-title":"SIAM Journal on Scientific Computing"},{"key":"1749_CR18","doi-asserted-by":"crossref","unstructured":"Im E J, Yelick K. Optimizing sparse matrix computations for register reuse in SPARSITY. In: Proceedings of International Conference on Computational Science. 2001, 127\u2013136","DOI":"10.1007\/3-540-45545-0_22"},{"key":"1749_CR19","unstructured":"Challacombe M, Bock N. Fast multiplication of matrices with decay. 2010, arXiv preprint arXiv: 1011.3534"},{"issue":"1","key":"1749_CR20","doi-asserted-by":"publisher","first-page":"C1","DOI":"10.1137\/140974602","volume":"38","author":"N Bock","year":"2016","unstructured":"Bock N, Challacombe M, Kal\u00e9 L V. Solvers for O(N) electronic structure in the strong scaling limit. SIAM Journal on Scientific Computing, 2016, 38(1): C1\u2013C21","journal-title":"SIAM Journal on Scientific Computing"},{"key":"1749_CR21","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1016\/j.softx.2018.03.005","volume":"7","author":"E Rudberg","year":"2018","unstructured":"Rudberg E, Rubensson E H, Sa\u0142ek P, Kruchinina A. Ergo: an open-source program for linear-scaling electronic structure calculations. SoftwareX, 2018, 7: 107\u2013111","journal-title":"SoftwareX"},{"key":"1749_CR22","volume-title":"A cellular computer to implement the Kalman filter algorithm","author":"L E Cannon","year":"1969","unstructured":"Cannon L E. A cellular computer to implement the Kalman filter algorithm. Montana State University, Dissertation, 1969"},{"key":"1749_CR23","doi-asserted-by":"crossref","unstructured":"Blackford L S, Choi J, Cleary A, D\u2019Azeuedo E, Demmel J, Dhillon I, Hammarling S, Henry G, Petitet A, Stanley K, Walker D, Whaley R C, Dongarra J J. ScaLAPACK User\u2019s Guide. Philadelphia: Society for Industrial and Applied Mathematics, 1997","DOI":"10.1137\/1.9780898719642"},{"key":"1749_CR24","doi-asserted-by":"crossref","unstructured":"Solomonik E, Demmel J. Communication-optimal parallel 2.5D matrix multiplication and LU factorization algorithms. In: Proceedings of the 17th International Euro-ParConference. 2011, 90\u2013109","DOI":"10.1007\/978-3-642-23397-5_10"},{"key":"1749_CR25","doi-asserted-by":"crossref","unstructured":"Lazzaro A, VandeVondele J, Hutter J, Sch\u00fctt O. Increasing the efficiency of sparse matrix-matrix multiplication with a 2.5D algorithm and one-sided MPI. In: Proceedings of Platform for Advanced Scientific Computing Conference. 2017, 3","DOI":"10.1145\/3093172.3093228"},{"key":"1749_CR26","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1016\/j.jpdc.2017.01.022","volume":"104","author":"M Moldaschl","year":"2017","unstructured":"Moldaschl M, Prikopa K E, Gansterer W N. Fault tolerant communication-optimal 2.5D matrix multiplication. Journal of Parallel and Distributed Computing, 2017, 104: 179\u2013190","journal-title":"Journal of Parallel and Distributed Computing"},{"issue":"5","key":"1749_CR27","doi-asserted-by":"publisher","first-page":"575","DOI":"10.1147\/rd.395.0575","volume":"39","author":"R C Agarwal","year":"1995","unstructured":"Agarwal R C, Balle S M, Gustavson F G, Joshi M, Palkar P. A three-dimensional approach to parallel matrix multiplication. IBM Journal of Research and Development, 1995, 39(5): 575\u2013582","journal-title":"IBM Journal of Research and Development"},{"key":"1749_CR28","doi-asserted-by":"crossref","unstructured":"Siegel J, Villa O, Krishnamoorthy S, Tumeo A, Li X. Efficient sparse matrix-matrix multiplication on heterogeneous high performance systems. In: Proceedings of 2010 IEEE International Conference on Cluster Computing Workshops and Posters (CLUSTER WORKSHOPS). 2010, 1\u20138","DOI":"10.1109\/CLUSTERWKSP.2010.5613109"},{"issue":"7","key":"1749_CR29","doi-asserted-by":"publisher","first-page":"072001","DOI":"10.1007\/s11432-016-5588-7","volume":"59","author":"H Fu","year":"2016","unstructured":"Fu H, Liao J, Yang J, Wang L, Song Z, Huang X, Yang C, Xue W, Liu F, Qiao F, Zhao W, Yin X, Hou C, Zhang C, Ge W, Zhang J, Wang Y, Zhou C, Yang G. The Sunway Taihulight supercomputer: system and applications. Science China Information Sciences, 2016, 59(7): 072001","journal-title":"Science China Information Sciences"},{"key":"1749_CR30","doi-asserted-by":"crossref","unstructured":"Fu H, Liao J, Xue W, Wang L, Chen D, Gu L, Xu J, Ding N, Wang X, He C, Xu S, Liang Y, Fang J, Xu Y, Zheng W, Xu J, Zheng Z, Wei W, Ji X, Zhang H, Chen B, Li K, Huang X, Chen W, Yang G. Refactoring and optimizing the community atmosphere model (CAM) on the Sunway Taihulight supercomputer. In: SC\u201916: Proceedings of International Conference for High Performance Computing, Networking, Storage and Analysis. 2016, 969\u2013980","DOI":"10.1109\/SC.2016.82"},{"key":"1749_CR31","doi-asserted-by":"crossref","unstructured":"Lin H, Zhu X, Yu B, Tang X, Xue W, Chen W, Zhang L, Hoefler T, Ma X, Liu X, Zheng W, Xu J. ShenTu: processing multi-trillion edge graphs on millions of cores in seconds. In: Proceedings of SC18: International Conference for High Performance Computing, Networking, Storage and Analysis. 2018, 706\u2013716","DOI":"10.1109\/SC.2018.00059"},{"key":"1749_CR32","doi-asserted-by":"crossref","unstructured":"Yue H, Deng L, Meng D, Wang Y, Sun Y. Parallelization and optimization of large-scale CFD simulations on Sunway Taihulight system. In: Proceedings of the 13th Conference on Advanced Computer Architecture. 2020, 260\u2013274","DOI":"10.1007\/978-981-15-8135-9_19"},{"key":"1749_CR33","doi-asserted-by":"crossref","unstructured":"Yang C, Xue W, Fu H, You H, Wang X, Ao Y, Liu F, Gan L, Xu P, Wang L, Yang G, Zheng W. 10M-core scalable fully-implicit solver for nonhydrostatic atmospheric dynamics. In: SC\u201916: Proceedings of International Conference for High Performance Computing, Networking, Storage and Analysis. 2016, 57\u201368","DOI":"10.1109\/SC.2016.5"},{"key":"1749_CR34","doi-asserted-by":"crossref","unstructured":"Xu Z, Lin J, Matsuoka S. Benchmarking SW26010 many-core processor. In: Proceedings of 2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW). 2017, 743\u2013752","DOI":"10.1109\/IPDPSW.2017.9"},{"key":"1749_CR35","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7055.001.0001","volume-title":"Using MPI: Portable Parallel Programming with the Message Passing Interface","author":"W Gropp","year":"1999","unstructured":"Gropp W, Lusk E, Skjellum A. Using MPI: Portable Parallel Programming with the Message Passing Interface. Cambridge: MIT Press, 1999"},{"key":"1749_CR36","doi-asserted-by":"crossref","unstructured":"Kwasniewski G, Kabi\u0107 M, Besta M, VandeVondele J, Solc\u00e0 R, Hoefler T. Red-blue pebbling revisited: near optimal parallel matrix-matrix multiplication. In: Proceedings of International Conference for High Performance Computing, Networking, Storage and Analysis. 2019, 24","DOI":"10.1145\/3295500.3356181"},{"key":"1749_CR37","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J. Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of 2014 IEEE Conference on Computer Vision and Pattern Recognition. 2014, 580\u2013587","DOI":"10.1109\/CVPR.2014.81"},{"key":"1749_CR38","unstructured":"Artemov A. Sparse approximate matrix multiplication in a fully recursive distributed task-based parallel framework. 2019, arXiv preprint arXiv: 1906.08148"},{"key":"1749_CR39","doi-asserted-by":"crossref","unstructured":"Kale L V, Krishnan S. CHARM++: a portable concurrent object oriented system based on C++. In: Proceedings of the 8th Annual Conference on Object-Oriented Programming Systems, Languages, and Applications. 1993, 91\u2013108","DOI":"10.1145\/167962.165874"},{"issue":"1","key":"1749_CR40","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1109\/99.660313","volume":"5","author":"L Dagum","year":"1998","unstructured":"Dagum L, Menon R. OpenMP: an industry standard API for shared-memory programming. IEEE Computational Science and Engineering, 1998, 5(1): 46\u201355","journal-title":"IEEE Computational Science and Engineering"},{"issue":"7","key":"1749_CR41","doi-asserted-by":"publisher","first-page":"328","DOI":"10.1016\/j.parco.2013.09.006","volume":"40","author":"E H Rubensson","year":"2014","unstructured":"Rubensson E H, Rudberg E. Chunks and tasks: a programming model for parallelization of dynamic algorithms. Parallel Computing, 2014, 40(7): 328\u2013343","journal-title":"Parallel Computing"},{"key":"1749_CR42","doi-asserted-by":"crossref","unstructured":"Liu C, Xie B, Liu X, Xue W, Yang H, Liu X. Towards efficient SpMV on Sunway Manycore architectures. In: Proceedings of 2018 International Conference on Supercomputing. 2018, 363\u2013373","DOI":"10.1145\/3205289.3205313"},{"key":"1749_CR43","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1016\/j.ins.2020.11.013","volume":"549","author":"M Dun","year":"2021","unstructured":"Dun M, Li Y, Sun Q, Yang H, Li W, Luan Z, Gan L, Yang G, Qian D. Towards efficient canonical polyadic decomposition on Sunway many-core processor. Information Sciences, 2021, 549: 221\u2013248","journal-title":"Information Sciences"},{"issue":"2","key":"1749_CR44","doi-asserted-by":"publisher","first-page":"1020","DOI":"10.1109\/TETC.2018.2881265","volume":"9","author":"X Zhong","year":"2021","unstructured":"Zhong X, Li M, Yang H, Liu Y, Qian D. swMR: a framework for accelerating MapReduce applications on Sunway Taihulight. IEEE Transactions on Emerging Topics in Computing, 2021, 9(2): 1020\u20131030","journal-title":"IEEE Transactions on Emerging Topics in Computing"},{"issue":"5","key":"1749_CR45","doi-asserted-by":"publisher","first-page":"4533","DOI":"10.1007\/s11227-020-03444-2","volume":"77","author":"Q Han","year":"2021","unstructured":"Han Q, Yang H, Dun M, Luan Z, Gan L, Yang G, Qian D. Towards efficient tile low-rank GEMM computation on Sunway many-core processors. The Journal of Supercomputing, 2021, 77(5): 4533\u20134564","journal-title":"The Journal of Supercomputing"},{"key":"1749_CR46","doi-asserted-by":"crossref","unstructured":"Li M, Liu Y, Yang H, Hu Y, Sun Q, Chen B, You X, Liu X, Luan Z, Qian D. Automatic code generation and optimization of large-scale stencil computation on many-core processors. In: Proceedings of the 50th International Conference on Parallel Processing. 2021, 34","DOI":"10.1145\/3472456.3473517"},{"issue":"5","key":"1749_CR47","doi-asserted-by":"publisher","first-page":"1194","DOI":"10.1109\/TPDS.2019.2962395","volume":"31","author":"Y Hu","year":"2020","unstructured":"Hu Y, Yang H, Luan Z, Gan L, Yang G, Qian D. Massively scaling seismic processing on Sunway Taihulight supercomputer. IEEE Transactions on Parallel and Distributed Systems, 2020, 31(5): 1194\u20131208","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"issue":"7","key":"1749_CR48","doi-asserted-by":"publisher","first-page":"1636","DOI":"10.1109\/TPDS.2019.2953852","volume":"31","author":"M Li","year":"2020","unstructured":"Li M, Liu Y, Yang H, Luan Z, Gan L, Yang G, Qian D. Accelerating sparse cholesky factorization on Sunway Manycore architecture. IEEE Transactions on Parallel and Distributed Systems, 2020, 31(7): 1636\u20131650","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"1749_CR49","doi-asserted-by":"crossref","unstructured":"Wang X, Liu W, Xue W, Wu L. swSpTRSV: a fast sparse triangular solve with sparse level tile layout on Sunway architectures. In: Proceedings of the 23rd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming. 2018, 338\u2013353","DOI":"10.1145\/3178487.3178513"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-022-1749-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11704-022-1749-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-022-1749-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T20:23:47Z","timestamp":1726691027000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11704-022-1749-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,7]]},"references-count":49,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,8]]}},"alternative-id":["1749"],"URL":"https:\/\/doi.org\/10.1007\/s11704-022-1749-6","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"value":"2095-2228","type":"print"},{"value":"2095-2236","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,11,7]]},"assertion":[{"value":"16 December 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 May 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 November 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"174104"}}