{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T07:55:14Z","timestamp":1776930914239,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["No.2022YFB4501400"],"award-info":[{"award-number":["No.2022YFB4501400"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No.62372199"],"award-info":[{"award-number":["No.62372199"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3712285.3759849","type":"proceedings-article","created":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T16:05:39Z","timestamp":1762963539000},"page":"645-660","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Bridging the Gap between Unstructured SpMM and Structured Sparse Tensor Cores"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-4635-1903","authenticated-orcid":false,"given":"Yukang","family":"Dong","sequence":"first","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3572-0999","authenticated-orcid":false,"given":"Ziyuan","family":"Shen","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5628-8806","authenticated-orcid":false,"given":"Wenbin","family":"Jiang","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2055-5476","authenticated-orcid":false,"given":"Zhenghang","family":"Liu","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-3662-4226","authenticated-orcid":false,"given":"Ye","family":"Xu","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6138-1958","authenticated-orcid":false,"given":"Bingyi","family":"He","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuahn, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3058-7581","authenticated-orcid":false,"given":"Ran","family":"Zheng","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3934-7605","authenticated-orcid":false,"given":"Hai","family":"Jin","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_2_2_2","first-page":"75","volume-title":"SpringSim (HPS)","author":"Anzt Hartwig","year":"2015","unstructured":"Hartwig Anzt, Stanimire Tomov, and Jack\u00a0J Dongarra. 2015. Accelerating the LOBPCG method on GPUs using a blocked Sparse Matrix Vector Product. In SpringSim (HPS). 75\u201382."},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"crossref","unstructured":"Alkida Balliu Sebastian Brandt Juho Hirvonen Dennis Olivetti Mika\u00ebl Rabie and Jukka Suomela. 2021. Lower Bounds for Maximal Matchings and Maximal Independent Sets. Journal of the ACM (JACM) 68 5 (2021) 1\u201330.","DOI":"10.1145\/3461458"},{"key":"e_1_3_3_2_4_2","unstructured":"David\u00a0M Blei Andrew\u00a0Y Ng and Michael\u00a0I Jordan. 2003. Latent dirichlet allocation. Journal of machine Learning research 3 Jan (2003) 993\u20131022."},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-349-03521-2"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607087"},{"key":"e_1_3_3_2_7_2","first-page":"300","volume-title":"Proceedings of the 24th Symposium on Principles and Practice of Parallel Programming (PPoPP 2019)","author":"Sadayappan Changwan Hong, Aravind Sukumaran-Rajam, Israt Nisa, Kunal Singh, and P.","year":"2019","unstructured":"Changwan Hong, Aravind Sukumaran-Rajam, Israt Nisa, Kunal Singh, and P. Sadayappan. 2019. Adaptive Sparse Tiling for Sparse Matrix Multiplication. In Proceedings of the 24th Symposium on Principles and Practice of Parallel Programming (PPoPP 2019). ACM, 300\u2013314."},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"crossref","unstructured":"Timothy\u00a0A. Davis and Yifan Hu. 2011. The university of Florida sparse matrix collection. ACM Transactions on Mathematical Software (TOMS) 38 1 (2011) 1:1\u20131:25.","DOI":"10.1145\/2049662.2049663"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS54959.2023.00057"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"crossref","unstructured":"Zvi Galil. 1986. Efficient Algorithms for Finding Maximum Matching in Graphs. ACM Computing Surveys (CSUR) 18 1 (1986) 23\u201338.","DOI":"10.1145\/6462.6502"},{"key":"e_1_3_3_2_11_2","first-page":"1","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC 2020)","author":"Yang Guyue Huang, Guohao Dai, Yu Wang, and Huazhong","year":"2020","unstructured":"Guyue Huang, Guohao Dai, Yu Wang, and Huazhong Yang. 2020. GE-SpMM: General-purpose Sparse Matrix-Matrix Multiplication on GPUs for Graph Neural Networks. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC 2020). IEEE\/ACM, 1\u201312."},{"key":"e_1_3_3_2_12_2","first-page":"1024","volume-title":"Proceedings of the 30th Conference on Neural Information Processing Systems (NeurIPS 2017)","author":"Hamilton William\u00a0L.","year":"2017","unstructured":"William\u00a0L. Hamilton, Zhitao Ying, and Jure Leskovec. 2017. Inductive Representation Learning on Large Graphs. In Proceedings of the 30th Conference on Neural Information Processing Systems (NeurIPS 2017). 1024\u20131034."},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"Oscar Higgott and Craig Gidney. 2025. Sparse Blossom: correcting a million errors per core second with minimum-weight matching. Quantum 9 (2025) 1600.","DOI":"10.22331\/q-2025-01-20-1600"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Xuanteng Huang Xianwei Zhang Panfei Yang and Nong Xiao. 2023. Benchmarking GPU Tensor Cores on General Matrix Multiplication Kernels through CUTLASS. Applied Sciences 13 24 (2023) 13022.","DOI":"10.3390\/app132413022"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3332466.3374546"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"crossref","unstructured":"Telikepalli Kavitha. 2024. Maximum Matchings and Popularity. SIAM Journal on Discrete Mathematics 38 2 (2024) 1202\u20131221.","DOI":"10.1137\/22M1523248"},{"key":"e_1_3_3_2_18_2","unstructured":"Andrew\u00a0S Lan Andrew\u00a0E Waters Christoph Studer and Richard\u00a0G Baraniuk. 2014. Sparse Factor Analysis for Learning and Content Analytics. The Journal of Machine Learning Research 15 1 (2014) 1959\u20132008."},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW63119.2024.00199"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"crossref","unstructured":"Junqing Lin Jingwei Sun Xiaolong Shi Honghe Zhang Xianzhi Yu Xinzhi Wang Jun Yao and Guangzhong Sun. 2024. LO-SpMM: Low-cost Search for High-performance SpMM Kernels on GPUs. ACM Transactions on Architecture and Code Optimization (TACO) 21 4 (2024) 1\u201325.","DOI":"10.1145\/3685277"},{"key":"e_1_3_3_2_21_2","unstructured":"Zhuang Liu Mingjie Sun Tinghui Zhou Gao Huang and Trevor Darrell. 2018. Rethinking the value of network pruning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1810.05270 (2018)."},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS57955.2024.00064"},{"key":"e_1_3_3_2_23_2","unstructured":"NVIDIA. 2020. Ampere GPU Architecture Whitepaper. https:\/\/www.nvidia.com\/content\/PDF\/nvidia-ampere-ga-102-gpu-architecture-whitepaper-v2.pdf."},{"key":"e_1_3_3_2_24_2","unstructured":"NVIDIA. 2025. cuSPARSE Library. https:\/\/docs.nvidia.com."},{"key":"e_1_3_3_2_25_2","unstructured":"NVIDIA. 2025. cuSPARSELt Library. https:\/\/docs.nvidia.com\/cuda\/cusparselt\/."},{"key":"e_1_3_3_2_26_2","unstructured":"NVIDIA. 2025. PTX ISA 8.5: CUDA Toolkit Documentation. https:\/\/docs.nvidia.com\/cuda\/parallel-thread-execution\/."},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00060"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3627535.3638470"},{"key":"e_1_3_3_2_29_2","first-page":"19","volume-title":"12th IEEE\/ACM Workshop on Irregular Applications: Architectures and Algorithms (IA3 2022)","author":"Vella Paolo Sylos Labini, Massimo Bernaschi, Werner Nutt, Francesco Silvestri, and Flavio","year":"2022","unstructured":"Paolo Sylos Labini, Massimo Bernaschi, Werner Nutt, Francesco Silvestri, and Flavio Vella. 2022. Blocking Sparse Matrices to Leverage Dense-Specific Multiplication. In 12th IEEE\/ACM Workshop on Irregular Applications: Architectures and Algorithms (IA3 2022). IEEE, 19\u201324."},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"crossref","unstructured":"Russell Reed. 1993. Pruning algorithms-a survey. IEEE Transactions on Neural Networks 4 5 (1993) 740\u2013747.","DOI":"10.1109\/72.248452"},{"key":"e_1_3_3_2_31_2","first-page":"253","volume-title":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS 2024)","author":"Chu Ruibo Fan, Wei Wang, and Xiaowen","year":"2024","unstructured":"Ruibo Fan, Wei Wang, and Xiaowen Chu. 2024. DTC-SpMM: Bridging the Gap in Accelerating General Sparse Matrix Multiplication with Tensor Cores. In Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS 2024). ACM, 253\u2013267."},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00016"},{"key":"e_1_3_3_2_33_2","unstructured":"Amy Shoemaker and Sagar Vare. 2016. Edmonds\u2019 blossom algorithm. CME 18 (2016)."},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/InfoTech52438.2021.9548600"},{"key":"e_1_3_3_2_35_2","first-page":"1","volume-title":"Proceedings of the 5th International Conference on Learning Representations (ICLR 2017)","author":"Welling Thomas N. Kipf and Max","year":"2017","unstructured":"Thomas N. Kipf and Max Welling. 2017. Semi-Supervised Classification with Graph Convolutional Networks. In Proceedings of the 5th International Conference on Learning Representations (ICLR 2017). 1\u201314."},{"key":"e_1_3_3_2_36_2","first-page":"1","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC 2020)","author":"Elsen Trevor Gale, Matei Zaharia, Cliff Young, and Erich","year":"2020","unstructured":"Trevor Gale, Matei Zaharia, Cliff Young, and Erich Elsen. 2020. Sparse GPU Kernels for Deep Learning. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC 2020). IEEE\/ACM, 1\u201314."},{"key":"e_1_3_3_2_37_2","first-page":"5998","volume-title":"Advances in Neural Information Processing Systems (NeurIPS 2017)","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N. Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems (NeurIPS 2017). 5998\u20136008."},{"key":"e_1_3_3_2_38_2","first-page":"1","volume-title":"Proceedings of the 6th International Conference on Learning Representations (ICLR 2018)","author":"Velickovic Petar","year":"2018","unstructured":"Petar Velickovic, Guillem Cucurull, Arantxa Casanova, Adriana Romero, Pietro Li\u00f2, and Yoshua Bengio. 2018. Graph Attention Networks. In Proceedings of the 6th International Conference on Learning Representations (ICLR 2018). 1\u201312."},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"crossref","unstructured":"Junjie Wang Hao Gao Yu Han Chi Ding Shuning Pan Yong Wang Qiuhan Jia Hui-Tian Wang Dingyu Xing and Jian Sun. 2023. MAGUS: Machine Learning and Graph Theory Assisted Universal Structure Searcher. National Science Review 10 7 (2023) nwad128.","DOI":"10.1093\/nsr\/nwad128"},{"key":"e_1_3_3_2_40_2","volume-title":"Introduction to Graph Theory","author":"West Douglas\u00a0Brent","year":"2001","unstructured":"Douglas\u00a0Brent West et\u00a0al. 2001. Introduction to Graph Theory. Vol.\u00a02. Prentice hall Upper Saddle River."},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00071"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"crossref","unstructured":"Jianrong Yan Wenbin Jiang Dongao He Suyang Wen Yang Li Hai Jin and Zhiyuan Shao. 2025. RT-GNN: Accelerating Sparse Graph Neural Networks by Tensor-CUDA Kernel Fusion. ACM Transactions on Architecture and Code Optimization (TACO) 22 1 (2025) 1\u201325.","DOI":"10.1145\/3702001"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582047"},{"key":"e_1_3_3_2_44_2","first-page":"149","volume-title":"Proceedings of the 2023 USENIX Annual Technical Conference (ATC 2023)","author":"Ding Yuke Wang, Boyuan Feng, Zheng Wang, Guyue Huang, and Yufei","year":"2023","unstructured":"Yuke Wang, Boyuan Feng, Zheng Wang, Guyue Huang, and Yufei Ding. 2023. TC-GNN: Bridging Sparse GNN Computation and Dense Tensor Cores on GPUs. In Proceedings of the 2023 USENIX Annual Technical Conference (ATC 2023). USENIX Association, 149\u2013164."},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3673038.3673108"},{"key":"e_1_3_3_2_46_2","first-page":"1","volume-title":"International Conference for High Performance Computing, Networking, Storage and Analysis (SC 2021)","author":"Xie Zhaodong Chen, Zheng Qu, Liu Liu, Yufei Ding, and Yuan","year":"2021","unstructured":"Zhaodong Chen, Zheng Qu, Liu Liu, Yufei Ding, and Yuan Xie. 2021. Efficient Tensor Core-Based GPU Kernels for Structured Sparsity under Reduced Precision. In International Conference for High Performance Computing, Networking, Storage and Analysis (SC 2021). IEEE\/ACM, 1\u201314."},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358269"}],"event":{"name":"SC '25: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St. Louis MO USA","acronym":"SC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759849","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T18:32:57Z","timestamp":1773253977000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3712285.3759849"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":46,"alternative-id":["10.1145\/3712285.3759849","10.1145\/3712285"],"URL":"https:\/\/doi.org\/10.1145\/3712285.3759849","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}