{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T02:47:58Z","timestamp":1769222878213,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,12]],"date-time":"2024-08-12T00:00:00Z","timestamp":1723420800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"the Strategic Priority Research Program of Chinese Academy of Sciences","award":["XDB0500102"],"award-info":[{"award-number":["XDB0500102"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,12]]},"DOI":"10.1145\/3673038.3673159","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T18:29:01Z","timestamp":1723141741000},"page":"1156-1165","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["DB-SpGEMM: A Massively Distributed Block-Sparse Matrix-Matrix Multiplication for Linear-Scaling DFT Calculations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-7686-3561","authenticated-orcid":false,"given":"Zhong","family":"Zheng","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6487-3658","authenticated-orcid":false,"given":"Junshi","family":"Chen","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, China and Laoshan Laboratory, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9644-8574","authenticated-orcid":false,"given":"Yang","family":"Zhao","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8994-6280","authenticated-orcid":false,"given":"Longsheng","family":"Song","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6641-1003","authenticated-orcid":false,"given":"Xinming","family":"Qin","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3900-3722","authenticated-orcid":false,"given":"Hong","family":"An","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, China and Laoshan Laboratory, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,8,12]]},"reference":[{"key":"e_1_3_2_1_1_1","series-title":"SIAM journal on numerical analysis 19, 4","volume-title":"An interior penalty finite element method with discontinuous elements","author":"Arnold N","year":"1982","unstructured":"Douglas\u00a0N Arnold. 1982. An interior penalty finite element method with discontinuous elements. SIAM journal on numerical analysis 19, 4 (1982), 742\u2013760."},{"key":"e_1_3_2_1_2_1","series-title":"SIAM journal on numerical analysis 39, 5","volume-title":"Unified analysis of discontinuous Galerkin methods for elliptic problems","author":"Arnold N","year":"2002","unstructured":"Douglas\u00a0N Arnold, Franco Brezzi, Bernardo Cockburn, and L\u00a0Donatella Marini. 2002. Unified analysis of discontinuous Galerkin methods for elliptic problems. SIAM journal on numerical analysis 39, 5 (2002), 1749\u20131779."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1137\/15M104253X"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/2486159.2486196"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2009.06.022"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2014.03.012"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2008.45"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3202518"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3627037"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevB.47.10895"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2017.12.010"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-016-5588-7"},{"key":"e_1_3_2_1_13_1","volume-title":"Gpu kernels for block-sparse weights. arXiv preprint arXiv:1711.09224 3, 2","author":"Gray Scott","year":"2017","unstructured":"Scott Gray, Alec Radford, and Diederik\u00a0P Kingma. 2017. Gpu kernels for block-sparse weights. arXiv preprint arXiv:1711.09224 3, 2 (2017), 2."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ScalA49573.2019.00010"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00062"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Jan\u00a0S Hesthaven and Tim Warburton. 2004. High\u2013order nodal discontinuous Galerkin methods for the Maxwell eigenvalue problem. Philosophical Transactions of the Royal Society of London. Series A: Mathematical Physical and Engineering Sciences 362 1816 (2004) 493\u2013524.","DOI":"10.1098\/rsta.2003.1332"},{"key":"e_1_3_2_1_17_1","volume-title":"Inhomogeneous electron gas. Physical review 136, 3B","author":"Hohenberg Pierre","year":"1964","unstructured":"Pierre Hohenberg and Walter Kohn. 1964. Inhomogeneous electron gas. Physical review 136, 3B (1964), B864."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/3571885.3571891"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1063\/1.4931732"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.scib.2020.06.025"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1002\/wcms.1159"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3378176"},{"key":"e_1_3_2_1_23_1","volume-title":"Advances in dataflow programming languages. ACM computing surveys (CSUR) 36, 1","author":"Johnston M","year":"2004","unstructured":"Wesley\u00a0M Johnston, JR\u00a0Paul Hanna, and Richard\u00a0J Millar. 2004. Advances in dataflow programming languages. ACM computing surveys (CSUR) 36, 1 (2004), 1\u201334."},{"key":"e_1_3_2_1_24_1","volume-title":"Self-consistent equations including exchange and correlation effects. Physical review 140, 4A","author":"Kohn Walter","year":"1965","unstructured":"Walter Kohn and Lu\u00a0Jeu Sham. 1965. Self-consistent equations including exchange and correlation effects. Physical review 140, 4A (1965), A1133."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3093172.3093228"},{"key":"e_1_3_2_1_26_1","unstructured":"Lin Lin Jianfeng Lu Lexing Ying Roberto Car and Weinan E. 2009. Fast algorithm for extracting the diagonal of the inverse matrix with application to the electronic structure analysis of metallic systems. (2009)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/s42514-022-00126-8"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevB.66.155115"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1063\/1.1559913"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevB.58.12704"},{"key":"e_1_3_2_1_31_1","volume-title":"48th AIAA aerospace sciences meeting including the new horizons forum and aerospace exposition. 363.","author":"Peraire Jaime","unstructured":"Jaime Peraire, Ngoc Nguyen, and Bernardo Cockburn. 2010. A hybridizable discontinuous Galerkin method for the compressible Euler and Navier-Stokes equations. In 48th AIAA aerospace sciences meeting including the new horizons forum and aerospace exposition. 363."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.0505436102"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1177\/10943420231177631"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Ole Sch\u00fctt Peter Messmer J\u00fcrg Hutter and Joost VandeVondele. 2016. GPU-Accelerated Sparse Matrix\u2013Matrix Multiplication for Linear Scaling Density Functional Theory. Electronic Structure Calculations on Graphics Processing Units: From Quantum Chemistry to Condensed Matter Physics (2016) 173\u2013190.","DOI":"10.1002\/9781118670712.ch8"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1080\/0144235X.2010.520454"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1021\/ct200897x"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1016\/0166-1280(92)85024-F"}],"event":{"name":"ICPP '24: the 53rd International Conference on Parallel Processing","location":"Gotland Sweden","acronym":"ICPP '24"},"container-title":["Proceedings of the 53rd International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3673038.3673159","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3673038.3673159","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,23]],"date-time":"2025-09-23T17:28:35Z","timestamp":1758648515000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3673038.3673159"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,12]]},"references-count":37,"alternative-id":["10.1145\/3673038.3673159","10.1145\/3673038"],"URL":"https:\/\/doi.org\/10.1145\/3673038.3673159","relation":{},"subject":[],"published":{"date-parts":[[2024,8,12]]},"assertion":[{"value":"2024-08-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}