{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,2]],"date-time":"2026-07-02T23:36:59Z","timestamp":1783035419304,"version":"3.54.6"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T00:00:00Z","timestamp":1743292800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"The Research Grants Council of Hong Kong, China","award":["No.14205520"],"award-info":[{"award-number":["No.14205520"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,30]]},"DOI":"10.1145\/3689031.3717460","type":"proceedings-article","created":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T06:25:20Z","timestamp":1742970320000},"page":"803-817","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Groot: Graph-Centric Row Reordering with Tree for Sparse Matrix Multiplications on Tensor Cores"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3392-8388","authenticated-orcid":false,"given":"YuAng","family":"Chen","sequence":"first","affiliation":[{"name":"The Chinese University of Hong Kong"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4535-8359","authenticated-orcid":false,"given":"Jiadong","family":"Xie","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1210-4672","authenticated-orcid":false,"given":"Siyi","family":"Teng","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9770-6522","authenticated-orcid":false,"given":"Wenqi","family":"Zeng","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9738-827X","authenticated-orcid":false,"given":"Jeffrey Xu","family":"Yu","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,3,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Practical and optimal LSH for angular distance. Advances in neural information processing systems 28","author":"Andoni Alexandr","year":"2015","unstructured":"Alexandr Andoni, Piotr Indyk, Thijs Laarhoven, Ilya Razenshteyn, and Ludwig Schmidt. 2015. Practical and optimal LSH for angular distance. Advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2016.110"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.is.2019.02.006"},{"key":"e_1_3_2_1_4_1","volume-title":"Studying the Effect of Lightweight Graph Reordering Across Applications and Input Graphs. In 2018 IEEE International Symposium on Workload Characterization (IISWC). 203--214","author":"Balaji V.","unstructured":"V. Balaji and B. Lucia. 2018. When is Graph Reordering an Optimization? Studying the Effect of Lightweight Graph Reordering Across Applications and Input Graphs. In 2018 IEEE International Symposium on Workload Characterization (IISWC). 203--214."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3577193.3593725"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476182"},{"key":"e_1_3_2_1_7_1","volume-title":"Generating long sequences with sparse transformers. arXiv preprint arXiv:1904.10509","author":"Child Rewon","year":"2019","unstructured":"Rewon Child, Scott Gray, Alec Radford, and Ilya Sutskever. 2019. Generating long sequences with sparse transformers. arXiv preprint arXiv:1904.10509 (2019)."},{"key":"e_1_3_2_1_8_1","volume-title":"The traveling salesman problem: a computational study","author":"Cook William J","unstructured":"William J Cook, David L Applegate, Robert E Bixby, and Vasek Chvatal. 2011. The traveling salesman problem: a computational study. Princeton university press."},{"key":"e_1_3_2_1_9_1","unstructured":"NVIDIA Corporation. 2014. The API Reference guide for cuBLAS the CUDA Basic Linear Algebra Subroutine library. https:\/\/docs.nvidia.com\/cuda\/cublas\/. Accessed: 2024-01-05."},{"key":"e_1_3_2_1_10_1","unstructured":"NVIDIA Corporation. 2014. cuSPARSE the CUDA sparse matrix library. https:\/\/docs.nvidia.com\/cuda\/cusparse\/. Accessed: 2024-06-05."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530508"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1963405.1963487"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC47752.2019.9041948"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651378"},{"key":"e_1_3_2_1_15_1","volume-title":"Fast graph representation learning with PyTorch Geometric. arXiv preprint arXiv:1903.02428","author":"Fey Matthias","year":"2019","unstructured":"Matthias Fey and Jan Eric Lenssen. 2019. Fast graph representation learning with PyTorch Geometric. arXiv preprint arXiv:1903.02428 (2019)."},{"key":"e_1_3_2_1_16_1","volume-title":"The traveling-salesman problem. Operations research 4, 1","author":"Flood Merrill M","year":"1956","unstructured":"Merrill M Flood. 1956. The traveling-salesman problem. Operations research 4, 1 (1956), 61--75."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.3390\/app11010177"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/3433701.3433723"},{"key":"e_1_3_2_1_19_1","volume-title":"Low-Power Computer Vision","author":"Gholami Amir","unstructured":"Amir Gholami, Sehoon Kim, Zhen Dong, Zhewei Yao, Michael W Mahoney, and Kurt Keutzer. 2022. A survey of quantization methods for efficient neural network inference. In Low-Power Computer Vision. Chapman and Hall\/CRC, 291--326."},{"key":"e_1_3_2_1_20_1","volume-title":"Gpu kernels for block-sparse weights. arXiv preprint arXiv:1711.09224 3, 2","author":"Gray Scott","year":"2017","unstructured":"Scott Gray, Alec Radford, and Diederik P Kingma. 2017. Gpu kernels for block-sparse weights. arXiv preprint arXiv:1711.09224 3, 2 (2017), 2."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.616"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3293883.3295712"},{"key":"e_1_3_2_1_23_1","volume-title":"SC20: International Conference for High Performance Computing, Networking, Storage and Analysis. IEEE, 1--12","author":"Huang Guyue","year":"2020","unstructured":"Guyue Huang, Guohao Dai, Yu Wang, and Huazhong Yang. 2020. Gespmm: General-purpose sparse matrix-matrix multiplication on gpus for graph neural networks. In SC20: International Conference for High Performance Computing, Networking, Storage and Analysis. IEEE, 1--12."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441585"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/276698.276876"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.57"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3332466.3374546"},{"key":"e_1_3_2_1_28_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1090\/S0002-9939-1956-0078686-7"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3380600"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.5555\/3571885.3571934"},{"key":"e_1_3_2_1_32_1","first-page":"1475","article-title":"Approximate nearest neighbor search on high dimensional data - experiments, analyses, and improvement","volume":"32","author":"Li Wen","year":"2019","unstructured":"Wen Li, Ying Zhang, Yifang Sun, Wei Wang, Mingjie Li, Wenjie Zhang, and Xuemin Lin. 2019. Approximate nearest neighbor search on high dimensional data - experiments, analyses, and improvement. IEEE TKDE 32, 8 (2019), 1475--1488.","journal-title":"IEEE TKDE"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.is.2013.10.006"},{"key":"e_1_3_2_1_34_1","volume-title":"Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs","author":"Malkov Yu A","year":"2018","unstructured":"Yu A Malkov and Dmitry A Yashunin. 2018. Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs. IEEE transactions on pattern analysis and machine intelligence 42, 4 (2018), 824--836."},{"key":"e_1_3_2_1_35_1","volume-title":"Erwin Laure, Ivy Bo Peng, and Jeffrey S Vetter.","author":"Markidis Stefano","year":"2018","unstructured":"Stefano Markidis, Steven Wei Der Chien, Erwin Laure, Ivy Bo Peng, and Jeffrey S Vetter. 2018. Nvidia Tensor Core programmability, performance & precision. In 2018 IEEE international parallel and distributed processing symposium workshops (IPDPSW). IEEE, 522--531."},{"key":"e_1_3_2_1_36_1","volume-title":"Amsterdam, The Netherlands","volume":"153","author":"Martinez Julieta","year":"2016","unstructured":"Julieta Martinez, Joris Clement, Holger H. Hoos, and James J. Little. 2016. Revisiting Additive Quantization. In Computer Vision - ECCV 2016 - 14th European Conference, Amsterdam, The Netherlands, October 11-14, 2016, Proceedings, Part II (Lecture Notes in Computer Science, Vol. 9906). Springer, 137--153."},{"key":"e_1_3_2_1_37_1","unstructured":"Pasi Fr\u00e4nti Henrik Nenonen. [n. d.]. Modifying Kruskal algorithm to solve open loop TSP. ([n. d.])."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00034"},{"key":"e_1_3_2_1_39_1","volume-title":"Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research 15, 1","author":"Srivastava Nitish","year":"2014","unstructured":"Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever, and Ruslan Salakhutdinov. 2014. Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research 15, 1 (2014), 1929--1958."},{"key":"e_1_3_2_1_40_1","volume-title":"Attention-based graph neural network for semi-supervised learning. arXiv preprint arXiv:1803.03735","author":"Thekumparampil Kiran K","year":"2018","unstructured":"Kiran K Thekumparampil, Chong Wang, Sewoong Oh, and Li-Jia Li. 2018. Attention-based graph neural network for semi-supervised learning. arXiv preprint arXiv:1803.03735 (2018)."},{"key":"e_1_3_2_1_41_1","volume-title":"Jingkuan Song, and Jianqiu Ji.","author":"Wang Jingdong","year":"2014","unstructured":"Jingdong Wang, Heng Tao Shen, Jingkuan Song, and Jianqiu Ji. 2014. Hashing for similarity search: A survey. arXiv preprint arXiv:1408.2927 (2014)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.14778\/3476249.3476255"},{"key":"e_1_3_2_1_43_1","volume-title":"A comprehensive survey and experimental comparison of graph-based approximate nearest neighbor search. arXiv preprint arXiv:2101.12631","author":"Wang Mengzhao","year":"2021","unstructured":"Mengzhao Wang, Xiaoliang Xu, Qiang Yue, and Yuxiang Wang. 2021. A comprehensive survey and experimental comparison of graph-based approximate nearest neighbor search. arXiv preprint arXiv:2101.12631 (2021)."},{"key":"e_1_3_2_1_44_1","volume-title":"ICLR workshop on representation learning on graphs and manifolds.","author":"Wang Minjie Yu","year":"2019","unstructured":"Minjie Yu Wang. 2019. Deep graph library: Towards efficient and scalable deep learning on graphs. In ICLR workshop on representation learning on graphs and manifolds."},{"key":"e_1_3_2_1_45_1","volume-title":"2023 USENIX Annual Technical Conference (USENIX ATC 23)","author":"Wang Yuke","year":"2023","unstructured":"Yuke Wang, Boyuan Feng, Zheng Wang, Guyue Huang, and Yufei Ding. 2023. {TC-GNN}: Bridging Sparse {GNN} Computation and Dense Tensor Cores on {GPUs}. In 2023 USENIX Annual Technical Conference (USENIX ATC 23). 149--164."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2915220"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-96983-1_48"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466795"},{"key":"e_1_3_2_1_49_1","volume-title":"Xiyue Gao, Qianru Wang, Yanguo Peng, and Jiangtao Cui.","author":"Yang Shuo","year":"2024","unstructured":"Shuo Yang, Jiadong Xie, Yingfan Liu, Jeffrey Xu Yu, Xiyue Gao, Qianru Wang, Yanguo Peng, and Jiangtao Cui. 2024. Revisiting the Index Construction of Proximity Graph-Based Approximate Nearest Neighbor Search. arXiv preprint arXiv:2410.01231 (2024)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3210259.3210263"},{"key":"e_1_3_2_1_51_1","volume-title":"Todor Mihaylov, Myle Ott, Sam Shleifer, Kurt Shuster, Daniel Simig, Punit Singh Koura, Anjali Sridhar, Tianlu Wang, and Luke Zettlemoyer.","author":"Zhang Susan","year":"2022","unstructured":"Susan Zhang, Stephen Roller, Naman Goyal, Mikel Artetxe, Moya Chen, Shuohui Chen, Christopher Dewan, Mona Diab, Xian Li, Xi Victoria Lin, Todor Mihaylov, Myle Ott, Sam Shleifer, Kurt Shuster, Daniel Simig, Punit Singh Koura, Anjali Sridhar, Tianlu Wang, and Luke Zettlemoyer. 2022. OPT: Open Pre-trained Transformer Language Models. arXiv:2205.01068 [cs.CL]"}],"event":{"name":"EuroSys '25: Twentieth European Conference on Computer Systems","location":"Rotterdam Netherlands","acronym":"EuroSys '25","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the Twentieth European Conference on Computer Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3689031.3717460","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3689031.3717460","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T11:19:59Z","timestamp":1755775199000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3689031.3717460"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,30]]},"references-count":51,"alternative-id":["10.1145\/3689031.3717460","10.1145\/3689031"],"URL":"https:\/\/doi.org\/10.1145\/3689031.3717460","relation":{},"subject":[],"published":{"date-parts":[[2025,3,30]]},"assertion":[{"value":"2025-03-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}