{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T03:05:24Z","timestamp":1769655924722,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,1,28]]},"DOI":"10.1145\/3774934.3786422","type":"proceedings-article","created":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T15:25:57Z","timestamp":1769613957000},"page":"232-244","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["ASM-SpMM: Unleashing the Potential of Arm SME for Sparse Matrix Multiplication Acceleration"],"prefix":"10.1145","author":[{"given":"Jiazhi","family":"Jiang","sequence":"first","affiliation":[{"name":"Sun Yat-sen University, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4105-6161","authenticated-orcid":false,"given":"Xijia","family":"Yao","sequence":"additional","affiliation":[{"name":"Sun Yat-sen University, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7372-8588","authenticated-orcid":false,"given":"Jiayu","family":"Chen","sequence":"additional","affiliation":[{"name":"Sun Yat-sen University, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9850-8384","authenticated-orcid":false,"given":"Jinhui","family":"Wei","sequence":"additional","affiliation":[{"name":"Sun Yat-sen University, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5582-1031","authenticated-orcid":false,"given":"Dan","family":"Huang","sequence":"additional","affiliation":[{"name":"Sun Yat-sen University, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5315-3375","authenticated-orcid":false,"given":"Yutong","family":"Lu","sequence":"additional","affiliation":[{"name":"Sun Yat-sen University, Guangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2026,1,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651378"},{"key":"e_1_3_2_1_2_1","unstructured":"Matthias Fey and Jan Eric Lenssen. 2019. Fast graph representation learning with PyTorch Geometric. arXiv preprint arXiv:1903.02428."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.5555\/3433701.3433723"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2023.3281714"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3293883.3295712"},{"key":"e_1_3_2_1_6_1","unstructured":"Weihua Hu Matthias Fey Marinka Zitnik Yuxiao Dong Hongyu Ren Bowen Liu Michele Catasta and Jure Leskovec. 2021. Open Graph Benchmark: Datasets for Machine Learning on Graphs. arxiv:2005.00687. arxiv:2005.00687"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3712285.3759769"},{"key":"e_1_3_2_1_8_1","unstructured":"Intel. 2021. Intel\u00ae Advanced Matrix Extensions Overview.. https:\/\/www.intel.com \/content\/www\/us\/en\/products\/docs\/accelerator-engines\/advanced-matrix extensions\/overview.html."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2023.3280805"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3545008.3545022"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3332466.3374546"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.21105\/joss.01244"},{"key":"e_1_3_2_1_13_1","volume-title":"SNAP: A General Purpose Network Analysis and Graph Mining Library. arxiv:1606.07550. arxiv:1606.07550","author":"Leskovec Jure","year":"2016","unstructured":"Jure Leskovec and Rok Sosic. 2016. SNAP: A General Purpose Network Analysis and Graph Mining Library. arxiv:1606.07550. arxiv:1606.07550"},{"key":"e_1_3_2_1_14_1","unstructured":"Nvidia. 2020. NVIDIA V100 TENSOR CORE GPU.. https:\/\/www.nvidia.com\/enus \/data-center\/v100\/."},{"key":"e_1_3_2_1_15_1","volume-title":"High Performance Unstructured SpMM Computation Using Tensor Cores. In SC24: International Conference for High Performance Computing, Networking, Storage and Analysis. 1\u201314","author":"Okanovic Patrik","year":"2024","unstructured":"Patrik Okanovic, Grzegorz Kwasniewski, Paolo Sylos Labini, Maciej Besta, Flavio Vella, and Torsten Hoefler. 2024. High Performance Unstructured SpMM Computation Using Tensor Cores. In SC24: International Conference for High Performance Computing, Networking, Storage and Analysis. 1\u201314."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640426"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Stefan Remke and Alexander Breuer. 2024. Hello SME! Generating Fast Matrix Multiplication Kernels Using the Scalable Matrix Extension. In SC24-W: Workshops of the International Conference for High Performance Computing Networking Storage and Analysis. 1443\u20131454.","DOI":"10.1109\/SCW63240.2024.00185"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3710848.3710858"},{"key":"e_1_3_2_1_19_1","volume-title":"Deep Graph Library: Towards Efficient and Scalable Deep Learning on Graphs. CoRR, abs\/1909.01315","author":"Wang Minjie","year":"2019","unstructured":"Minjie Wang, Lingfan Yu, Da Zheng, Quan Gan, Yu Gai, Zihao Ye, Mufei Li, Jinjing Zhou, Qi Huang, Chao Ma, Ziyue Huang, Qipeng Guo, Hao Zhang, Haibin Lin, Junbo Zhao, Jinyang Li, Alexander J. Smola, and Zheng Zhang. 2019. Deep Graph Library: Towards Efficient and Scalable Deep Learning on Graphs. CoRR, abs\/1909.01315 (2019), arXiv:1909.01315. arxiv:1909.01315"},{"key":"e_1_3_2_1_20_1","volume-title":"ICLR workshop on representation learning on graphs and manifolds.","author":"Wang Minjie Yu","year":"2019","unstructured":"Minjie Yu Wang. 2019. Deep graph library: Towards efficient and scalable deep learning on graphs. In ICLR workshop on representation learning on graphs and manifolds."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3721145.3725770"},{"key":"e_1_3_2_1_22_1","volume-title":"2023 USENIX Annual Technical Conference (USENIX ATC 23)","author":"Wang Yuke","year":"2023","unstructured":"Yuke Wang, Boyuan Feng, Zheng Wang, Guyue Huang, and Yufei Ding. 2023. $TC-GNN$: Bridging sparse $GNN$ computation and dense tensor cores on $GPUs$. In 2023 USENIX Annual Technical Conference (USENIX ATC 23). 149\u2013164."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.14778\/3626292.3626303"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3673038.3673108"},{"key":"e_1_3_2_1_25_1","volume-title":"OpenFFT-SME: An Efficient Outer Product Pattern FFT Library on ARM SME CPUs. In 2024 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 938\u2013949","author":"Zhang Ruge","year":"2024","unstructured":"Ruge Zhang, Haipeng Jia, Yunquan Zhang, Baicheng Yan, Penghao Ma, Long Wang, and Wenxuan Zhao. 2024. OpenFFT-SME: An Efficient Outer Product Pattern FFT Library on ARM SME CPUs. In 2024 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 938\u2013949."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1186\/s40649-019-0069-y"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3330345.3330351"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3710848.3710888"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3650200.3656611"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2023.103035"}],"event":{"name":"PPoPP '26: 31st ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming","location":"Sydney NSW Australia","acronym":"PPoPP '26","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","SIGPLAN ACM Special Interest Group on Programming Languages"]},"container-title":["Proceedings of the 31st ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3774934.3786422","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T15:29:44Z","timestamp":1769614184000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3774934.3786422"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,28]]},"references-count":30,"alternative-id":["10.1145\/3774934.3786422","10.1145\/3774934"],"URL":"https:\/\/doi.org\/10.1145\/3774934.3786422","relation":{},"subject":[],"published":{"date-parts":[[2026,1,28]]},"assertion":[{"value":"2026-01-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}