{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,16]],"date-time":"2026-07-16T06:17:55Z","timestamp":1784182675122,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T00:00:00Z","timestamp":1714176000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,27]]},"DOI":"10.1145\/3620666.3651322","type":"proceedings-article","created":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T12:08:21Z","timestamp":1713960501000},"page":"528-544","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Hector: An Efficient Programming and Compilation Framework for Implementing Relational Graph Neural Networks in GPU Architectures"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0149-1409","authenticated-orcid":false,"given":"Kun","family":"Wu","sequence":"first","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Champaign, Illinois, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9276-5075","authenticated-orcid":false,"given":"Mert","family":"Hidayeto\u011flu","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5030-5054","authenticated-orcid":false,"given":"Xiang","family":"Song","sequence":"additional","affiliation":[{"name":"AWS AI, Santa Clara, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7669-1467","authenticated-orcid":false,"given":"Sitao","family":"Huang","sequence":"additional","affiliation":[{"name":"University of California, Irvine, Irvine, California, United States of America"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8115-5415","authenticated-orcid":false,"given":"Da","family":"Zheng","sequence":"additional","affiliation":[{"name":"AWS AI, Santa Clara, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5022-5716","authenticated-orcid":false,"given":"Israt","family":"Nisa","sequence":"additional","affiliation":[{"name":"AWS AI, Santa Clara, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2532-5349","authenticated-orcid":false,"given":"Wen-Mei","family":"Hwu","sequence":"additional","affiliation":[{"name":"Nvidia, Champaign, Illinois, United States of America"},{"name":"University of Illinois at Urbana-Champaign, Champaign, Illinois, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,4,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-76298-0_5"},{"key":"e_1_3_2_1_2_1","volume-title":"Relational graph attention networks. arXiv preprint arXiv:1904.05811","author":"Busbridge Dan","year":"2019","unstructured":"Dan Busbridge, Dane Sherburn, Pietro Cavallo, and Nils Y. Hammerla. Relational graph attention networks. arXiv preprint arXiv:1904.05811, 2019. https:\/\/arxiv.org\/abs\/1904.05811."},{"key":"e_1_3_2_1_3_1","first-page":"578","volume-title":"Arvind Krishnamurthy. TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18), pages 578--594, 2018. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/chen."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-30284-8_56"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-40988-2_39"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1021\/jm00106a046"},{"key":"e_1_3_2_1_7_1","volume-title":"Fast graph representation learning with pytorch geometric. arXiv preprint arXiv:1903.02428","author":"Fey Matthias","year":"2019","unstructured":"Matthias Fey and Jan Eric Lenssen. Fast graph representation learning with pytorch geometric. arXiv preprint arXiv:1903.02428, 2019. https:\/\/arxiv.org\/abs\/1903.02428."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3502181.3531467"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.5555\/3571885.3571980"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC43674.2020.9286206"},{"key":"e_1_3_2_1_11_1","unstructured":"Weihua Hu Matthias Fey Marinka Zitnik Yuxiao Dong Hongyu Ren Bowen Liu Michele Catasta and Jure Leskovec. Open Graph Benchmark: Datasets for Machine Learning on Graphs. http:\/\/arxiv.org\/abs\/2005.00687 February 2021."},{"key":"e_1_3_2_1_12_1","first-page":"1","volume-title":"Yida Wang. FeatGraph: A Flexible and Efficient Backend for Graph Neural Network Systems. In SC20: International Conference for High Performance Computing, Networking, Storage and Analysis","author":"Hu Yuwei","year":"2020","unstructured":"Yuwei Hu, Zihao Ye, Minjie Wang, Jiali Yu, Da Zheng, Mu Li, Zheng Zhang, Zhiru Zhang, and Yida Wang. FeatGraph: A Flexible and Efficient Backend for Graph Neural Network Systems. In SC20: International Conference for High Performance Computing, Networking, Storage and Analysis, pages 1--13, Atlanta, GA, USA, November 2020. IEEE. https:\/\/ieeexplore.ieee.org\/document\/9355318\/."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380027"},{"key":"e_1_3_2_1_14_1","unstructured":"Guyue Huang Guohao Dai Yu Wang Yufei Ding and Yuan Xie. Efficient Sparse Matrix Kernels based on Adaptive Workload-Balancing and Parallel-Reduction. http:\/\/arxiv.org\/abs\/2106.16064 October 2021."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.5555\/3433701.3433796"},{"key":"e_1_3_2_1_16_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and Max Welling. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907, 2016. https:\/\/arxiv.org\/abs\/1609.02907."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2019.8661185"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133901"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2833157.2833162"},{"key":"e_1_3_2_1_20_1","volume-title":"Oleksandr Zinenko. MLIR: Scaling Compiler Infrastructure for Domain Specific Computation. In CGO 2021","author":"Lattner Chris","year":"2021","unstructured":"Chris Lattner, Mehdi Amini, Uday Bondhugula, Albert Cohen, Andy Davis, Jacques Arnaud Pienaar, River Riddle, Tatiana Shpeisman, Nicolas Vasilache, and Oleksandr Zinenko. MLIR: Scaling Compiler Infrastructure for Domain Specific Computation. In CGO 2021, 2021. https:\/\/ieeexplore.ieee.org\/document\/9370308."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.435"},{"key":"e_1_3_2_1_22_1","volume-title":"Programming Massively Parallel Processors. Morgan Kaufmann","author":"Hwu Wen","year":"2023","unstructured":"Wen mei W. Hwu, David B. Kirk, and Izzat El Hajj. Programming Massively Parallel Processors. Morgan Kaufmann, fourth edition, 2023. https:\/\/www.sciencedirect.com\/book\/9780323912310\/programming-massively-parallel-processors."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.14778\/3476249.3476264"},{"key":"e_1_3_2_1_24_1","volume-title":"January","author":"Nisa Israt","year":"2022","unstructured":"Israt Nisa. [Feature] Gather mm by isratnisa \u00b7 Pull Request #3641 \u00b7 dmlc\/dgl. https:\/\/github.com\/dmlc\/dgl\/pull\/3641, January 2022."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW59300.2023.00042"},{"key":"e_1_3_2_1_26_1","volume-title":"September","year":"2020","unstructured":"Nvidia. Controlling Data Movement to Boost Performance on the NVIDIA Ampere Architecture. https:\/\/developer.nvidia.com\/blog\/controlling-data-movement-to-boost-performance-on-ampere-architecture\/, September 2020."},{"key":"e_1_3_2_1_27_1","volume-title":"March","year":"2021","unstructured":"Nvidia. Accelerating Matrix Multiplication with Block Sparse Format and NVIDIA Tensor Cores | NVIDIA Technical Blog. https:\/\/developer.nvidia.com\/blog\/accelerating-matrix-multiplication-with-block-sparse-format-and-nvidia-tensor-cores\/, March 2021."},{"key":"e_1_3_2_1_28_1","volume-title":"July","year":"2023","unstructured":"Nvidia. cublas<t>gemmBatched() | cuBLAS Library User Guide v12.2. https:\/\/docs.nvidia.com\/cuda\/cublas\/index.html#cublas-t-gemmbatched#:~:text=make%20multiple%20calls%20to%20cublas%3Ct%3Egemm, July 2023."},{"key":"e_1_3_2_1_29_1","volume-title":"January","author":"TorchScript","year":"2024","unstructured":"PyTorch. TorchScript --- PyTorch 2.2 documentation. https:\/\/pytorch.org\/docs\/stable\/jit.html, January 2024."},{"key":"e_1_3_2_1_30_1","first-page":"256","volume-title":"Ariful Azad. FusedMM: A Unified SDDMM-SpMM Kernel for Graph Embedding and Graph Neural Networks. In 2021 IEEE International Parallel and Distributed Processing Symposium (IPDPS)","author":"Khaledur Rahman Md.","year":"2021","unstructured":"Md. Khaledur Rahman, Majedul Haque Sujon, and Ariful Azad. FusedMM: A Unified SDDMM-SpMM Kernel for Graph Embedding and Graph Neural Networks. In 2021 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pages 256--266, May 2021. https:\/\/ieeexplore.ieee.org\/document\/9460486."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-93417-4_38"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W15-4007"},{"key":"e_1_3_2_1_33_1","volume-title":"Deep graph library: A graph-centric, highly-performant package for graph neural networks. arXiv preprint arXiv:1909.01315","author":"Wang Minjie","year":"2019","unstructured":"Minjie Wang, Da Zheng, Zihao Ye, Quan Gan, Mufei Li, Xiang Song, Jinjing Zhou, Chao Ma, Lingfan Yu, Yu Gai, et al. Deep graph library: A graph-centric, highly-performant package for graph neural networks. arXiv preprint arXiv:1909.01315, 2019. https:\/\/arxiv.org\/abs\/1909.01315."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.03.015"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447786.3456247"},{"key":"e_1_3_2_1_36_1","first-page":"515","volume-title":"Graphiler: Optimizing graph neural networks with message passing data flow graph","author":"Xie Zhiqiang","year":"2022","unstructured":"Zhiqiang Xie, Minjie Wang, Zihao Ye, Zheng Zhang, and Rui Fan. Graphiler: Optimizing graph neural networks with message passing data flow graph. In D. Marculescu, Y. Chi, and C. Wu, editors, Proceedings of Machine Learning and Systems, volume 4, pages 515--528, 2022. https:\/\/proceedings.mlsys.org\/paper\/2022\/file\/a87ff679a2f3e71d9181a67b7542122c-Paper.pdf."},{"key":"e_1_3_2_1_37_1","volume-title":"January","author":"Xie Zhiqiang","year":"2023","unstructured":"Zhiqiang Xie and Zihao Ye. Graphiler Repository on Github. https:\/\/github.com\/xiezhq-hermann\/graphiler, January 2023."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582047"},{"key":"e_1_3_2_1_39_1","volume-title":"August","author":"Zheng Da","year":"2021","unstructured":"Da Zheng and George Karypis. The Nature of Graph Neural Network Workloads. https:\/\/hc33.hotchips.org\/assets\/program\/tutorials\/HC2021.Amazon.DaZheng.v2.pdf, August 2021."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/IA351965.2020.00011"}],"event":{"name":"ASPLOS '24: 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","location":"La Jolla CA USA","acronym":"ASPLOS '24","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620666.3651322","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:42Z","timestamp":1750291422000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620666.3651322"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,27]]},"references-count":40,"alternative-id":["10.1145\/3620666.3651322","10.1145\/3620666"],"URL":"https:\/\/doi.org\/10.1145\/3620666.3651322","relation":{},"subject":[],"published":{"date-parts":[[2024,4,27]]},"assertion":[{"value":"2024-04-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}