{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T23:09:24Z","timestamp":1774307364127,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,3,28]],"date-time":"2022-03-28T00:00:00Z","timestamp":1648425600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2124039"],"award-info":[{"award-number":["2124039"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,4,2]]},"DOI":"10.1145\/3503221.3508408","type":"proceedings-article","created":{"date-parts":[[2022,3,28]],"date-time":"2022-03-28T13:58:22Z","timestamp":1648475902000},"page":"107-119","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":43,"title":["QGTC"],"prefix":"10.1145","author":[{"given":"Yuke","family":"Wang","sequence":"first","affiliation":[{"name":"University of California"}]},{"given":"Boyuan","family":"Feng","sequence":"additional","affiliation":[{"name":"University of California"}]},{"given":"Yufei","family":"Ding","sequence":"additional","affiliation":[{"name":"University of California"}]}],"member":"320","published-online":{"date-parts":[[2022,3,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the 12th USEN1X Conference on Operating Systems Design and Implementation (OSDI'16)","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, Manjunath Kudlur, Josh Levenberg, Rajat Monga, Sherry Moore, Derek G. Murray, Benoit Steiner, Paul Tucker, Vijay Vasudevan, Pete Warden, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng. 2016. TensorFlow: A System for Large-Scale Machine Learning. In Proceedings of the 12th USEN1X Conference on Operating Systems Design and Implementation (OSDI'16). Savannah, GA, USA."},{"key":"e_1_3_2_1_2_1","volume-title":"Fast Batched Matrix Multiplication for Small Sizes Using Half-Precision Arithmetic on GPUs. In 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS).","author":"Abdelfattah A.","unstructured":"A. Abdelfattah, S. Tomov, and J. Dongarra. 2019. Fast Batched Matrix Multiplication for Small Sizes Using Half-Precision Arithmetic on GPUs. In 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS)."},{"key":"e_1_3_2_1_3_1","volume-title":"Binary Graph Neural Networks. arXiv preprint arXiv:2012.15823","author":"Bahri Mehdi","year":"2020","unstructured":"Mehdi Bahri, Ga\u00e9tan Bahl, and Stefanos Zafeiriou. 2020. Binary Graph Neural Networks. arXiv preprint arXiv:2012.15823 (2020)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330925"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368826.3377912"},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the 1969 24th National Conference.","author":"Cuthill E.","unstructured":"E. Cuthill and J. McKee. [n.d.]. Reducing the Bandwidth of Sparse Symmetric Matrices. In Proceedings of the 1969 24th National Conference."},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the ACM International Conference on Supercomputing.","author":"Dakkak Abdul","unstructured":"Abdul Dakkak, Cheng Li, Jinjun Xiong, Isaac Gelado, and Wen-mei Hwu. [n.d.]. Accelerating Reduction and Scan Using Tensor Core Units. In Proceedings of the ACM International Conference on Supercomputing."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441599"},{"key":"e_1_3_2_1_9_1","volume-title":"SGQuant: Squeezing the Last Bit on Graph Neural Networks with Specialized Quantization. In IEEE 32nd International Conference on Tools with Artificial Intelligence (ICTAI).","author":"Feng Boyuan","year":"2020","unstructured":"Boyuan Feng, Yuke Wang, Xu Li, Shu Yang, Xueqiao Peng, and Yufei Ding. 2020. SGQuant: Squeezing the Last Bit on Graph Neural Networks with Specialized Quantization. In IEEE 32nd International Conference on Tools with Artificial Intelligence (ICTAI)."},{"key":"e_1_3_2_1_10_1","volume-title":"Fast Graph Representation Learning with PyTorch Geometric. In ICLR Workshop on Representation Learning on Graphs and Manifolds (ICLR).","author":"Fey Matthias","unstructured":"Matthias Fey and Jan E. Lenssen. 2019. Fast Graph Representation Learning with PyTorch Geometric. In ICLR Workshop on Representation Learning on Graphs and Manifolds (ICLR)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939754"},{"key":"e_1_3_2_1_12_1","unstructured":"Will Hamilton Zhitao Ying and Jure Leskovec. 2017. Inductive representation learning on large graphs. In Advances in neural information processing systems (NeurIPS)."},{"key":"e_1_3_2_1_13_1","volume-title":"Open Graph Benchmark: Datasets for Machine Learning on Graphs. arXiv preprint arXiv:2005.00687","author":"Hu Weihua","year":"2020","unstructured":"Weihua Hu, Matthias Fey, Marinka Zitnik, Yuxiao Dong, Hongyu Ren, Bowen Liu, Michele Catasta, and Jure Leskovec. 2020. Open Graph Benchmark: Datasets for Machine Learning on Graphs. arXiv preprint arXiv:2005.00687 (2020)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467300"},{"key":"e_1_3_2_1_15_1","volume-title":"POLE: Polarized Embedding for Signed Networks. arXiv preprint arXiv:2110.09899.","author":"Huang Zexi","year":"2022","unstructured":"Zexi Huang, Arlei Silva, and Ambuj Singh. 2022. POLE: Polarized Embedding for Signed Networks. arXiv preprint arXiv:2110.09899."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.5555\/3157382.3157557"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.80"},{"key":"e_1_3_2_1_18_1","unstructured":"George Karypis and Vipin Kumar. 2009. MeTis: Unstructured Graph Partitioning and Sparse Matrix Ordering System Version 4.0. http:\/\/www.cs.umn.edu\/~metis."},{"key":"e_1_3_2_1_19_1","unstructured":"Kristian Kersting Nils M. Kriege Christopher Morris Petra Mutzel and Marion Neumann. 2016. Benchmark Data Sets for Graph Kernels. http:\/\/graphkernels.cs.tu-dortmund.de"},{"key":"e_1_3_2_1_20_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Kipf Thomas N","year":"2017","unstructured":"Thomas N Kipf and Max Welling. 2017. Semi-supervised classification with graph convolutional networks. International Conference on Learning Representations (ICLR) (2017)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3045828"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.5555\/1241540.1241551"},{"key":"e_1_3_2_1_23_1","volume-title":"USENIX Annual Technical Conference.","author":"Ma Lingxiao","year":"2019","unstructured":"Lingxiao Ma, Zhi Yang, Youshan Miao, Jilong Xue, Ming Wu, Lidong Zhou, and Yafei Dai. 2019. Neugraph: parallel deep neural network computation on large graphs. In USENIX Annual Technical Conference."},{"key":"e_1_3_2_1_24_1","unstructured":"Nvidia. [n.d.]. CUBLAS Library. developer.nvidia.com\/cublas"},{"key":"e_1_3_2_1_25_1","unstructured":"NVIDIA. [n.d.]. CUDA Template Library for Dense Linear Algebra at All Levels and Scales (CUTLASS)."},{"key":"e_1_3_2_1_26_1","unstructured":"Nvidia. [n.d.]. Warp Matrix Multiply-Accumulate (WMMA). docs.nvidia.com\/cuda\/cuda-c-programming-guide\/index.html#wmma"},{"key":"e_1_3_2_1_27_1","unstructured":"NVIDIA. 2017. Programming Tensor Cores in CUDA 9. devblogs.nvidia.com\/programming-tensor-cores-cuda-9\/"},{"key":"e_1_3_2_1_28_1","volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems (NeurIPS) 32."},{"key":"e_1_3_2_1_29_1","volume-title":"Near linear time algorithm to detect community structures in large-scale networks. Physical review E","author":"Raghavan Usha Nandini","year":"2007","unstructured":"Usha Nandini Raghavan, R\u00e9ka Albert, and Soundar Kumara. 2007. Near linear time algorithm to detect community structures in large-scale networks. Physical review E (2007)."},{"key":"e_1_3_2_1_30_1","volume-title":"Degree-Quant: Quantization-Aware Training for Graph Neural Networks. International Conference on Learning Representations","author":"Tailor Shyam A","year":"2021","unstructured":"Shyam A Tailor, Javier Fernandez-Marques, and Nicholas D Lane. 2021. Degree-Quant: Quantization-Aware Training for Graph Neural Networks. International Conference on Learning Representations (2021)."},{"key":"e_1_3_2_1_31_1","volume-title":"Graph Attention Networks. In International Conference on Learning Representations (ICLR).","author":"Veli\u010dkovi\u0107 Petar","year":"2018","unstructured":"Petar Veli\u010dkovi\u0107, Guillem Cucurull, Arantxa Casanova, Adriana Romero, Pietro Li\u00f2, and Yoshua Bengio. 2018. Graph Attention Networks. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_32_1","volume-title":"Deep Graph Library: Towards Efficient and Scalable Deep Learning on Graphs. ICLR Workshop on Representation Learning on Graphs and Manifolds","author":"Wang Minjie","year":"2019","unstructured":"Minjie Wang, Lingfan Yu, Da Zheng, Quan Gan, Yu Gai, Zihao Ye, Mufei Li, Jinjing Zhou, Qi Huang, Chao Ma, Ziyue Huang, Qipeng Guo, Hao Zhang, Haibin Lin, Junbo Zhao, Jinyang Li, Alexander J Smola, and Zheng Zhang. 2019. Deep Graph Library: Towards Efficient and Scalable Deep Learning on Graphs. ICLR Workshop on Representation Learning on Graphs and Manifolds (2019)."},{"key":"e_1_3_2_1_33_1","volume-title":"GNNAdvisor: An Efficient Runtime System for GNN Acceleration on GPUs. In USENIX Symposium on Operating Systems Design and Implementation (OSDI'21)","author":"Wang Yuke","year":"2021","unstructured":"Yuke Wang, Boyuan Feng, Gushu Li, Shuangchen Li, Lei Deng, Yuan Xie, and Yufei Ding. 2021. GNNAdvisor: An Efficient Runtime System for GNN Acceleration on GPUs. In USENIX Symposium on Operating Systems Design and Implementation (OSDI'21)."},{"key":"e_1_3_2_1_34_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Xu Keyulu","year":"2019","unstructured":"Keyulu Xu, Weihua Hu, Jure Leskovec, and Stefanie Jegelka. 2019. How Powerful are Graph Neural Networks?. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.5555\/3327345.3327389"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373087.3375312"}],"event":{"name":"PPoPP '22: 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","location":"Seoul Republic of Korea","acronym":"PPoPP '22","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503221.3508408","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3503221.3508408","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3503221.3508408","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3503221.3508408","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:00:49Z","timestamp":1750186849000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503221.3508408"}},"subtitle":["accelerating quantized graph neural networks via GPU tensor core"],"short-title":[],"issued":{"date-parts":[[2022,3,28]]},"references-count":36,"alternative-id":["10.1145\/3503221.3508408","10.1145\/3503221"],"URL":"https:\/\/doi.org\/10.1145\/3503221.3508408","relation":{},"subject":[],"published":{"date-parts":[[2022,3,28]]},"assertion":[{"value":"2022-03-28","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}