{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T11:07:26Z","timestamp":1762254446948,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,2,27]],"date-time":"2023-02-27T00:00:00Z","timestamp":1677456000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Natural Science Foundation of China","award":["62192784","62172052","U20B2045"],"award-info":[{"award-number":["62192784","62172052","U20B2045"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,2,27]]},"DOI":"10.1145\/3539597.3570480","type":"proceedings-article","created":{"date-parts":[[2023,2,22]],"date-time":"2023-02-22T23:27:00Z","timestamp":1677108420000},"page":"123-131","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Learning to Distill Graph Neural Networks"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7821-0030","authenticated-orcid":false,"given":"Cheng","family":"Yang","sequence":"first","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1913-014X","authenticated-orcid":false,"given":"Yuxin","family":"Guo","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4748-8133","authenticated-orcid":false,"given":"Yao","family":"Xu","sequence":"additional","affiliation":[{"name":"Researcher, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3734-0266","authenticated-orcid":false,"given":"Chuan","family":"Shi","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2437-0455","authenticated-orcid":false,"given":"Jiawei","family":"Liu","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5778-9490","authenticated-orcid":false,"given":"Chunchen","family":"Wang","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6888-7064","authenticated-orcid":false,"given":"Xin","family":"Li","sequence":"additional","affiliation":[{"name":"Researcher, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2292-432X","authenticated-orcid":false,"given":"Ning","family":"Guo","sequence":"additional","affiliation":[{"name":"Researcher, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1395-261X","authenticated-orcid":false,"given":"Hongzhi","family":"Yin","sequence":"additional","affiliation":[{"name":"The University of Queensland, Brisbane, QLD, Australia"}]}],"member":"320","published-online":{"date-parts":[[2023,2,27]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Graph convolutional encoders for syntax-aware neural machine translation. arXiv preprint arXiv:1704.04675","author":"Bastings Jasmijn","year":"2017","unstructured":"Jasmijn Bastings, Ivan Titov, Wilker Aziz, Diego Marcheggiani, and Khalil Sima'an. 2017. Graph convolutional encoders for syntax-aware neural machine translation. arXiv preprint arXiv:1704.04675 (2017)."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2019.00059"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/314"},{"key":"e_1_3_2_2_4_1","volume-title":"arxiv","author":"Dong Bin","year":"2019","unstructured":"Bin Dong, Jikai Hou, Yiping Lu, and Zhihua Zhang. 2019. Distillation $approx$ Early Stopping? Harvesting Dark Knowledge Utilizing Anisotropic Information Retrieval For Overparameterized Neural Network. arxiv (2019)."},{"key":"e_1_3_2_2_5_1","volume-title":"FreeKD: Free-direction Knowledge Distillation for Graph Neural Networks. arXiv preprint arXiv:2206.06561","author":"Feng Kaituo","year":"2022","unstructured":"Kaituo Feng, Changsheng Li, Ye Yuan, and Guoren Wang. 2022. FreeKD: Free-direction Knowledge Distillation for Graph Neural Networks. arXiv preprint arXiv:2206.06561 (2022)."},{"key":"e_1_3_2_2_6_1","volume-title":"Proceedings of ICML.","author":"Furlanello Tommaso","year":"2018","unstructured":"Tommaso Furlanello, Zachary Lipton, Michael Tschannen, Laurent Itti, and Anima Anandkumar. 2018. Born Again Neural Networks. In Proceedings of ICML."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.14778\/3447689.3447704"},{"key":"e_1_3_2_2_8_1","volume-title":"Proceedings of ICML. 1263--1272","author":"Gilmer Justin","year":"2017","unstructured":"Justin Gilmer, Samuel S Schoenholz, Patrick F Riley, Oriol Vinyals, and George E Dahl. 2017. Neural message passing for Quantum chemistry. In Proceedings of ICML. 1263--1272."},{"key":"e_1_3_2_2_9_1","volume-title":"Proceedings of NeurIPS. 1024--1034","author":"Hamilton Will","year":"2017","unstructured":"Will Hamilton, Zhitao Ying, and Jure Leskovec. 2017. Inductive representation learning on large graphs. In Proceedings of NeurIPS. 1024--1034."},{"key":"e_1_3_2_2_10_1","volume-title":"Compressing Deep Graph Neural Networks via Adversarial Knowledge Distillation. arXiv preprint arXiv:2205.11678","author":"He Huarui","year":"2022","unstructured":"Huarui He, Jie Wang, Zhanqiu Zhang, and Feng Wu. 2022. Compressing Deep Graph Neural Networks via Adversarial Knowledge Distillation. arXiv preprint arXiv:2205.11678 (2022)."},{"key":"e_1_3_2_2_11_1","volume-title":"Proceedings of NeurIPS.","author":"Hinton Geoffrey","year":"2014","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. 2014. Distilling the knowledge in a neural network. Proceedings of NeurIPS."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01545"},{"key":"e_1_3_2_2_13_1","volume-title":"Proceedings of ICLR.","author":"Kipf Thomas N","year":"2017","unstructured":"Thomas N Kipf and Max Welling. 2017. Semi-supervised classification with graph convolutional networks. In Proceedings of ICLR."},{"key":"e_1_3_2_2_14_1","volume-title":"Proceedings of ICLR.","author":"Klicpera Johannes","year":"2018","unstructured":"Johannes Klicpera, Aleksandar Bojchevski, and Stephan G\u00fcnnemann. 2018. Predict then Propagate: Graph Neural Networks meet Personalized PageRank. In Proceedings of ICLR."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2020.3025100"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.14778\/3430915.3430924"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1159"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2022.3152585"},{"key":"e_1_3_2_2_19_1","volume-title":"Pitfalls of graph neural network evaluation. arxiv","author":"Shchur Oleksandr","year":"2018","unstructured":"Oleksandr Shchur, Maximilian Mumme, Aleksandar Bojchevski, and Stephan G\u00fcnnemann. 2018. Pitfalls of graph neural network evaluation. arxiv (2018)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.344"},{"key":"e_1_3_2_2_21_1","volume-title":"Visualizing data using t-SNE. JMLR 11","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. JMLR 11 (2008)."},{"key":"e_1_3_2_2_22_1","volume-title":"Proceedings of NeurIPS. 5998--6008","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Proceedings of NeurIPS. 5998--6008."},{"key":"e_1_3_2_2_23_1","volume-title":"Proceedings of ICLR.","author":"Petar Velivc","year":"2018","unstructured":"Petar Velivc kovi\u0107, Guillem Cucurull, Arantxa Casanova, Adriana Romero, Pietro Li\u00f2, and Yoshua Bengio. 2018. Graph Attention Networks. In Proceedings of ICLR."},{"key":"e_1_3_2_2_24_1","volume-title":"Proceedings of ICML. 6861--6871","author":"Wu Felix","year":"2019","unstructured":"Felix Wu, Amauri Souza, Tianyi Zhang, Christopher Fifty, Tao Yu, and Kilian Weinberger. 2019. Simplifying Graph Convolutional Networks. In Proceedings of ICML. 6861--6871."},{"key":"e_1_3_2_2_25_1","volume-title":"A comprehensive survey on graph neural networks. TNNLS","author":"Wu Zonghan","year":"2020","unstructured":"Zonghan Wu, Shirui Pan, Fengwen Chen, Guodong Long, Chengqi Zhang, and S Yu Philip. 2020. A comprehensive survey on graph neural networks. TNNLS (2020)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.330"},{"volume-title":"Feature normalized knowledge distillation for image classification","author":"Xu Kunran","key":"e_1_3_2_2_27_1","unstructured":"Kunran Xu, Lai Rui, Yishi Li, and Lin Gu. 2020. Feature normalized knowledge distillation for image classification. In Proceedings of ECCV. Springer, 664--680."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403236"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450068"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00710"},{"key":"e_1_3_2_2_31_1","volume-title":"MuL-GRN: Multi-Level Graph Relation Network for Few-Shot Node Classification","author":"Zhang Lingling","year":"2022","unstructured":"Lingling Zhang, Shaowei Wang, Jun Liu, Qika Lin, Xiaojun Chang, Yaqiang Wu, and Qinghua Zheng. 2022. MuL-GRN: Multi-Level Graph Relation Network for Few-Shot Node Classification. IEEE Transactions on Knowledge and Data Engineering (2022)."},{"key":"e_1_3_2_2_32_1","volume-title":"Proceedings of ICLR.","author":"Zhang Shichang","year":"2021","unstructured":"Shichang Zhang, Yozen Liu, Yizhou Sun, and Neil Shah. 2021b. Graph-less neural networks: Teaching old mlps new tricks via distillation. In Proceedings of ICLR."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467221"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389706"},{"key":"e_1_3_2_2_35_1","volume-title":"Proceedings of NeurIPS.","author":"Zhang Zhilu","year":"2020","unstructured":"Zhilu Zhang and Mert Sabuncu. 2020. Self-Distillation as Instance-Specific Label Smoothing. Proceedings of NeurIPS."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE51399.2021.00054"}],"event":{"name":"WSDM '23: The Sixteenth ACM International Conference on Web Search and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Singapore Singapore","acronym":"WSDM '23"},"container-title":["Proceedings of the Sixteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539597.3570480","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3539597.3570480","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:15Z","timestamp":1750186935000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539597.3570480"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,27]]},"references-count":36,"alternative-id":["10.1145\/3539597.3570480","10.1145\/3539597"],"URL":"https:\/\/doi.org\/10.1145\/3539597.3570480","relation":{},"subject":[],"published":{"date-parts":[[2023,2,27]]},"assertion":[{"value":"2023-02-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}