{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T12:24:28Z","timestamp":1773318268020,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","funder":[{"name":"National Natural Science Foun- dation of China (NSFC)","award":["62332011"],"award-info":[{"award-number":["62332011"]}]},{"name":"NSFC-RGC","award":["62461160333"],"award-info":[{"award-number":["62461160333"]}]},{"DOI":"10.13039\/501100003819","name":"Natural Science Foundation of Hubei Province","doi-asserted-by":"publisher","award":["2021CFA037"],"award-info":[{"award-number":["2021CFA037"]}],"id":[{"id":"10.13039\/501100003819","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3712285.3759826","type":"proceedings-article","created":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T16:05:39Z","timestamp":1762963539000},"page":"631-644","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["StraGCN: GPU-Accelerated Strassen\u2019s Sparse-Dense Matrix Multiplication for Graph Convolutional Network Training"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-7058-3719","authenticated-orcid":false,"given":"Weidong","family":"He","sequence":"first","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4290-1408","authenticated-orcid":false,"given":"Haikun","family":"Liu","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3950-3209","authenticated-orcid":false,"given":"Zhuohui","family":"Duan","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6302-813X","authenticated-orcid":false,"given":"Xiaofei","family":"Liao","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9927-6925","authenticated-orcid":false,"given":"Shuhao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2589-0073","authenticated-orcid":false,"given":"Fubing","family":"Mao","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3934-7605","authenticated-orcid":false,"given":"Hai","family":"Jin","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_3_2_2","doi-asserted-by":"crossref","unstructured":"Sergi Abadal Akshay Jain Robert Guirado Jorge L\u00f3pez-Alonso and Eduard Alarc\u00f3n. 2021. Computing graph neural networks: A survey from algorithms to accelerators. ACM Computing Surveys 54 9 (2021) 1\u201338.","DOI":"10.1145\/3477141"},{"key":"e_1_3_3_3_3_2","first-page":"311","volume-title":"Proceedings of the 2024 34th International Conference on Field-Programmable Logic and Applications (FPL)","author":"Ahmad Afzal","year":"2024","unstructured":"Afzal Ahmad, Linfeng Du, and Wei Zhang. 2024. Fast and Practical Strassen\u2019s Matrix Multiplication using FPGAs. In Proceedings of the 2024 34th International Conference on Field-Programmable Logic and Applications (FPL). 311\u2013317."},{"key":"e_1_3_3_3_4_2","doi-asserted-by":"publisher","DOI":"10.5555\/3437539.3437703"},{"key":"e_1_3_3_3_5_2","doi-asserted-by":"crossref","unstructured":"David\u00a0H. Bailey King Lee and Horst\u00a0D. Simon. 1991. Using Strassen\u2019s algorithm to accelerate the solution of linear systems. The Journal of Supercomputing 4 (1991) 357\u2013371.","DOI":"10.1007\/BF00129836"},{"key":"e_1_3_3_3_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/2806416.2806472"},{"key":"e_1_3_3_3_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607037"},{"key":"e_1_3_3_3_8_2","doi-asserted-by":"crossref","unstructured":"Connor\u00a0W. Coley Wengong Jin Luke Rogers Timothy\u00a0F. Jamison Tommi\u00a0S. Jaakkola William\u00a0H. Green Regina Barzilay and Klavs\u00a0F. Jensen. 2019. A graph-convolutional neural network model for the prediction of chemical reactivity. Chemical Science 10 2 (2019) 370\u2013377.","DOI":"10.1039\/C8SC04228D"},{"key":"e_1_3_3_3_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/28395.28396"},{"key":"e_1_3_3_3_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530508"},{"key":"e_1_3_3_3_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467437"},{"key":"e_1_3_3_3_12_2","doi-asserted-by":"crossref","unstructured":"Alhussein Fawzi Matej Balog Aja Huang Thomas Hubert Bernardino Romera-Paredes Mohammadamin Barekatain Alexander Novikov Francisco\u00a0J. R.\u00a0Ruiz Julian Schrittwieser Grzegorz Swirszcz David Silver Demis Hassabis and Pushmeet Kohli. 2022. Discovering faster matrix multiplication algorithms with reinforcement learning. Nature 610 7930 (2022) 47\u201353.","DOI":"10.1038\/s41586-022-05172-4"},{"key":"e_1_3_3_3_13_2","unstructured":"Matthias Fey and Jan\u00a0Eric Lenssen. 2019. Fast graph representation learning with PyTorch Geometric. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1903.02428 (2019)."},{"key":"e_1_3_3_3_14_2","first-page":"551","volume-title":"Proceedings of the 15th USENIX Symposium on Operating Systems Design and Implementation (OSDI)","author":"Gandhi Swapnil","year":"2021","unstructured":"Swapnil Gandhi and Anand\u00a0Padmanabha Iyer. 2021. P3: Distributed deep graph learning at scale. In Proceedings of the 15th USENIX Symposium on Operating Systems Design and Implementation (OSDI). 551\u2013568."},{"key":"e_1_3_3_3_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480113"},{"key":"e_1_3_3_3_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00019"},{"key":"e_1_3_3_3_17_2","unstructured":"Lei He. 2019. EnGN: A high-throughput and energy-efficient accelerator for large graph neural networks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1909.00155 (2019) 237."},{"key":"e_1_3_3_3_18_2","unstructured":"Mikael Henaff Joan Bruna and Yann LeCun. 2015. Deep convolutional networks on graph-structured data. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1506.05163 (2015)."},{"key":"e_1_3_3_3_19_2","first-page":"690","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC)","author":"Huang Jianyu","year":"2016","unstructured":"Jianyu Huang, Tyler\u00a0M. Smith, Greg\u00a0M. Henry, and Robert\u00a0A. Van De\u00a0Geijn. 2016. Strassen\u2019s algorithm reloaded. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC). 690\u2013701."},{"key":"e_1_3_3_3_20_2","doi-asserted-by":"crossref","unstructured":"Jianyu Huang Chenhan\u00a0D. Yu and Robert A. van\u00a0de Geijn. 2020. Strassen\u2019s algorithm reloaded on GPUs. ACM Transactions on Mathematical Software (TOMS) 46 1 (2020) 1\u201322.","DOI":"10.1145\/3372419"},{"key":"e_1_3_3_3_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA53966.2022.00079"},{"key":"e_1_3_3_3_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10070983"},{"key":"e_1_3_3_3_23_2","doi-asserted-by":"crossref","unstructured":"George Karypis and Vipin Kumar. 1998. A fast and high quality multilevel scheme for partitioning irregular graphs. SIAM Journal on Scientific Computing 20 1 (1998) 359\u2013392.","DOI":"10.1137\/S1064827595287997"},{"key":"e_1_3_3_3_24_2","unstructured":"Thomas\u00a0N. Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1609.02907 (2016)."},{"key":"e_1_3_3_3_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2013.50"},{"key":"e_1_3_3_3_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575706"},{"key":"e_1_3_3_3_27_2","doi-asserted-by":"publisher","DOI":"10.5555\/3104322.3104425"},{"key":"e_1_3_3_3_28_2","volume-title":"CUDA C Programming Guide","author":"Corporation NVIDIA","year":"2013","unstructured":"NVIDIA Corporation. 2013. CUDA C Programming Guide. https:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/"},{"key":"e_1_3_3_3_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00067"},{"key":"e_1_3_3_3_30_2","unstructured":"A. Paszke. 2019. Pytorch: An imperative style high-performance deep learning library. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1912.01703 (2019)."},{"key":"e_1_3_3_3_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00015"},{"key":"e_1_3_3_3_32_2","doi-asserted-by":"crossref","unstructured":"Kazunori Seki and Masataka Nakamura. 2017. The mechanism of collapse of the Friendster network: what can we learn from the core structure of Friendster? Social Network Analysis and Mining 7 (2017) 1\u201321.","DOI":"10.1007\/s13278-017-0429-2"},{"key":"e_1_3_3_3_33_2","doi-asserted-by":"crossref","unstructured":"Volker Strassen. 1969. Gaussian elimination is not optimal. Numer. Math. 13 4 (1969) 354\u2013356.","DOI":"10.1007\/BF02165411"},{"key":"e_1_3_3_3_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3552326.3567501"},{"key":"e_1_3_3_3_35_2","first-page":"515","volume-title":"Proceedings of 15th USENIX Symposium on Operating Systems Design and Implementation (OSDI)","author":"Wang Yuke","year":"2021","unstructured":"Yuke Wang, Boyuan Feng, Gushu Li, Shuangchen Li, Lei Deng, Yuan Xie, and Yufei Ding. 2021. GNNAdvisor: An adaptive and efficient runtime system for GNN acceleration on GPUs. In Proceedings of 15th USENIX Symposium on Operating Systems Design and Implementation (OSDI). 515\u2013531."},{"key":"e_1_3_3_3_36_2","doi-asserted-by":"crossref","unstructured":"Zonghan Wu Shirui Pan Fengwen Chen Guodong Long Chengqi Zhang and Philip\u00a0S. Yu. 2020. A comprehensive survey on graph neural networks. IEEE Transactions on Neural Networks and Learning Systems 32 1 (2020) 4\u201324.","DOI":"10.1109\/TNNLS.2020.2978386"},{"key":"e_1_3_3_3_37_2","volume-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems (NIPS)","author":"Yadati Naganand","year":"2019","unstructured":"Naganand Yadati, Madhav Nimishakavi, Prateek Yadav, Vikram Nitin, Anand Louis, and Partha Talukdar. 2019. HyperGCN: a new method of training graph convolutional networks on hypergraphs. In Proceedings of the 33rd International Conference on Neural Information Processing Systems (NIPS). Article 135, 12\u00a0pages."},{"key":"e_1_3_3_3_38_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00012"},{"key":"e_1_3_3_3_39_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071102"},{"key":"e_1_3_3_3_40_2","doi-asserted-by":"crossref","unstructured":"Feng Zhang Chenyang Zhang Lin Yang Shuhao Zhang Bingsheng He Wei Lu and Xiaoyong Du. 2021. Fine-grained multi-query stream processing on integrated architectures. IEEE Transactions on Parallel and Distributed Systems 32 9 (2021) 2303\u20132320.","DOI":"10.1109\/TPDS.2021.3066407"},{"key":"e_1_3_3_3_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446702"},{"key":"e_1_3_3_3_42_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00030"},{"key":"e_1_3_3_3_43_2","unstructured":"Ke Zhao. 2021. Strassen Algorithm and Its Performance. International Core Journal of Engineering 7 4 (2021) 75\u201381."},{"key":"e_1_3_3_3_44_2","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3406712"},{"key":"e_1_3_3_3_45_2","unstructured":"Rong Zhu Kun Zhao Hongxia Yang Wei Lin Chang Zhou Baole Ai Yong Li and Jingren Zhou. 2019. Aligraph: A comprehensive graph neural network platform. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1902.08730 (2019)."}],"event":{"name":"SC '25: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St. Louis MO USA","acronym":"SC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759826","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T18:39:35Z","timestamp":1773254375000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3712285.3759826"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":44,"alternative-id":["10.1145\/3712285.3759826","10.1145\/3712285"],"URL":"https:\/\/doi.org\/10.1145\/3712285.3759826","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}