{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T07:30:35Z","timestamp":1772782235962,"version":"3.50.1"},"reference-count":66,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,10,15]],"date-time":"2025-10-15T00:00:00Z","timestamp":1760486400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,15]],"date-time":"2025-10-15T00:00:00Z","timestamp":1760486400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach. Intell. Res."],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s11633-025-1541-9","type":"journal-article","created":{"date-parts":[[2025,10,15]],"date-time":"2025-10-15T03:27:18Z","timestamp":1760498838000},"page":"1138-1152","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Learning Efficient Linear Graph Transformer via Graph-attention Distillation"],"prefix":"10.1007","volume":"22","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-1258-6112","authenticated-orcid":false,"given":"Haotian","family":"Tao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9429-1635","authenticated-orcid":false,"given":"Ziyan","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6238-1596","authenticated-orcid":false,"given":"Bo","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Luo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,15]]},"reference":[{"key":"1541_CR1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN52387.2021.9534136","volume-title":"Proceedings of International Joint Conference on Neural Networks","author":"Y Li","year":"2021","unstructured":"Y. Li, Y. Ji, S. Li, S. He, Y. Cao, Y. Liu, H. Liu, X. Li, J. Shi, Y. Yang. Relevance-aware anomalous users detection in social network via graph neural network. In Proceedings of International Joint Conference on Neural Networks, Shenzhen, China, 2021. DOI: https:\/\/doi.org\/10.1109\/IJCNN52387.2021.9534136."},{"key":"1541_CR2","doi-asserted-by":"publisher","first-page":"95223","DOI":"10.1109\/ACCESS.2020.2993876","volume":"8","author":"T Zhong","year":"2020","unstructured":"T. Zhong, T. Wang, J. Wang, J. Wu, F. Zhou. Multiple-aspect attentional graph neural networks for online social network user localization. IEEE Access, vol. 8, pp. 95223\u201395234, 2020. DOI: https:\/\/doi.org\/10.1109\/ACCESS.2020.2993876.","journal-title":"IEEE Access"},{"key":"1541_CR3","doi-asserted-by":"publisher","unstructured":"K. Huang, C. Xiao, L. M. Glass, M. Zitnik, J. Sun. Skip-GNN: Predicting molecular interactions with skip-graph networks. Scientific Reports, vol. 10, no. 1, Article number 21092, 2020. DOI: https:\/\/doi.org\/10.1038\/s41598-020-77766-9.","DOI":"10.1038\/s41598-020-77766-9"},{"issue":"3","key":"1541_CR4","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1038\/s42256-022-00447-x","volume":"4","author":"Y Wang","year":"2022","unstructured":"Y. Wang, J. Wang, Z. Cao, A. B. Farimani. Molecular contrastive learning of representations via graph neural networks. Nature Machine Intelligence, vol. 4, no. 3, pp. 279\u2013287, 2022. DOI: https:\/\/doi.org\/10.1038\/s42256-022-00447-x.","journal-title":"Nature Machine Intelligence"},{"key":"1541_CR5","doi-asserted-by":"publisher","first-page":"2052","DOI":"10.24963\/ijcai.2022\/285","volume-title":"Proceedings of the 31st International Joint Conference on Artificial Intelligence","author":"S Gu","year":"2022","unstructured":"S. Gu, X. Wang, C. Shi, D. Xiao. Self-supervised graph neural networks for multi-behavior recommendation. In Proceedings of the 31st International Joint Conference on Artificial Intelligence, Vienna, Austria, pp. 2052\u20132058, 2022. DOI: https:\/\/doi.org\/10.24963\/ijcai.2022\/285."},{"issue":"5","key":"1541_CR6","doi-asserted-by":"publisher","first-page":"4741","DOI":"10.1109\/TKDE.2022.3151618","volume":"35","author":"M Zhang","year":"2023","unstructured":"M. Zhang, S. Wu, X. Yu, Q. Liu, L. Wang. Dynamic graph neural networks for sequential recommendation. IEEE Transactions on Knowledge Data Engineering, vol. 35, no. 5, pp. 4741\u20134753, 2023. DOI: https:\/\/doi.org\/10.1109\/TKDE.2022.3151618.","journal-title":"IEEE Transactions on Knowledge Data Engineering"},{"issue":"3","key":"1541_CR7","doi-asserted-by":"publisher","first-page":"481","DOI":"10.1007\/s11633-023-1440-x","volume":"21","author":"C Li","year":"2024","unstructured":"C. Li, Y. Cao, Y. Zhu, D. Cheng, C. Li, Y. Morimoto. Ripple knowledge graph convolutional networks for recommendation systems. Machine Intelligence Research, vol. 21, no. 3, pp. 481\u2013494, 2024. DOI: https:\/\/doi.org\/10.1007\/s11633-023-1440-x.","journal-title":"Machine Intelligence Research"},{"key":"1541_CR8","volume-title":"Proceedings of the 5th International Conference on Learning Representations","author":"T N Kipf","year":"2017","unstructured":"T. N. Kipf, M. Welling. Semi-supervised classification with graph convolutional networks. In Proceedings of the 5th International Conference on Learning Representations, Toulon, France, 2017."},{"key":"1541_CR9","first-page":"3844","volume-title":"Proceedings of the 30th International Conference on Neural Information Processing Systems","author":"M Defferrard","year":"2016","unstructured":"M. Defferrard, X. Bresson, P. Vandergheynst. Convolutional neural networks on graphs with fast localized spectral filtering. In Proceedings of the 30th International Conference on Neural Information Processing Systems, Barcelona, Spain, pp. 3844\u20133852, 2016."},{"key":"1541_CR10","volume-title":"Proceedings of the 6th International Conference on Learning Representations","author":"P Veli\u010dkovi\u0107","year":"2018","unstructured":"P. Veli\u010dkovi\u0107, G. Cucurull, A. Casanova, A. Romero, P. Li\u00f2, Y. Bengio. Graph attention networks. In Proceedings of the 6th International Conference on Learning Representations, Vancouver, Canada, 2018."},{"key":"1541_CR11","volume-title":"Proceedings of the 7th International Conference on Learning Representations","author":"K Xu","year":"2019","unstructured":"K. Xu, W. Hu, J. Leskovec, S. Jegelka. How powerful are graph neural networks? In Proceedings of the 7th International Conference on Learning Representations, New Orleans, USA, 2019."},{"key":"1541_CR12","volume-title":"Proceedings of the 10th International Conference on Learning Representations","author":"H Wang","year":"2022","unstructured":"H. Wang, H. Yin, M. Zhang, P. Li. Equivariant and stable positional encoding for more powerful graph neural networks. In Proceedings of the 10th International Conference on Learning Representations, 2022."},{"key":"1541_CR13","volume-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems","author":"S Yun","year":"2019","unstructured":"S. Yun, M. Jeong, R. Kim, J. Kang, H. J. Kim. Graph transformer networks. In Proceedings of the 33rd International Conference on Neural Information Processing Systems, Vancouver, Canada, Article number 1073, 2019."},{"key":"1541_CR14","first-page":"3469","volume-title":"Proceedings of the 39th International Conference on Machine Learning","author":"D Chen","year":"2022","unstructured":"D. Chen, L. O\u2019Bray, K. Borgwardt. Structure-aware transformer for graph representation learning. In Proceedings of the 39th International Conference on Machine Learning, Baltimore, USA, pp. 3469\u20133489, 2022."},{"key":"1541_CR15","volume-title":"Proceedings of the 35th International Conference on Neural Information Processing Systems","author":"Z Wu","year":"2021","unstructured":"Z. Wu, P. Jain, M. Wright, A. Mirhoseini, J. E. Gonzalez, I. Stoica. Representing long-range context for graph neural networks with global attention. In Proceedings of the 35th International Conference on Neural Information Processing Systems, Article number 1016, 2021."},{"key":"1541_CR16","volume-title":"Proceedings of the 35th International Conference on Neural Information Processing Systems","author":"C Ying","year":"2021","unstructured":"C. Ying, T. Cai, S. Luo, S. Zheng, G. Ke, D. He, Y. Shen, T. Y. Liu. Do transformers really perform bad for graph representation? In Proceedings of the 35th International Conference on Neural Information Processing Systems, Article number 2212, 2021."},{"key":"1541_CR17","first-page":"533","volume-title":"Proceedings of ACM Web Conference","author":"X Ma","year":"2023","unstructured":"X. Ma, Q. Chen, Y. Wu, G. Song, L. Wang, B. Zheng. Rethinking structural encodings: Adaptive graph transformer for node classification task. In Proceedings of ACM Web Conference, Austin, USA, pp. 533\u2013544, 2023."},{"key":"1541_CR18","unstructured":"J. Zhao, C. Li, Q. Wen, Y. Wang, Y. Liu, H. Sun, X. Xie, Y. Ye. Gophormer: Ego-graph transformer for node classification, [Online], Available: https:\/\/arxiv.org\/abs\/2110.13094, 2021."},{"key":"1541_CR19","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"J Kim","year":"2022","unstructured":"J. Kim, T. D. Nguyen, S. Min, S. J. Cho, M. Lee, H. Lee, S. Hong. Pure transformers are powerful graph learners. In Proceedings of the 36th International Conference on Neural Information Processing Systems, New Orleans, USA, Article number 1060, 2022."},{"key":"1541_CR20","first-page":"21171","volume-title":"Proceedings of the 36th Conference on Neural Information Processing Systems","author":"Z Zhang","year":"2022","unstructured":"Z. Zhang, Q. Liu, Q. Hu, C. K. Lee. Hierarchical graph transformer with adaptive node sampling. In Proceedings of the 36th Conference on Neural Information Processing Systems, New Orleans, USA, pp. 21171\u201321183, 2022."},{"key":"1541_CR21","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"L Ramp\u00e1\u0161ek","year":"2022","unstructured":"L. Ramp\u00e1\u0161ek, M. Galkin, V. P. Dwivedi, A. T. Luu, G. Wolf, D. Beaini. Recipe for a general, powerful, scalable graph transformer. In Proceedings of the 36th International Conference on Neural Information Processing Systems, New Orleans, USA, Article number 1054, 2022."},{"key":"1541_CR22","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"Q Wu","year":"2022","unstructured":"Q. Wu, W. Zhao, Z. Li, D. P. Wipf, J. Yan. Node-Former: A scalable graph structure learning transformer for node classification. In Proceedings of the 36th International Conference on Neural Information Processing Systems, New Orleans, USA, Article number 1986, 2022."},{"key":"1541_CR23","volume-title":"Proceedings of the 32nd International Joint Conference on Artificial Intelligence","author":"Y Wu","year":"2023","unstructured":"Y. Wu, Y. Xu, W. Zhu, G. Song, Z. Lin, L. Wang, S. Liu. KDLGT: A linear graph transformer framework via kernel decomposition approach. In Proceedings of the 32nd International Joint Conference on Artificial Intelligence, Macao, China, Article number 263, 2023."},{"key":"1541_CR24","volume-title":"Proceedings of the 11th International Conference on Learning Representations","author":"J Chen","year":"2023","unstructured":"J. Chen, K. Gao, G. Li, K. He. NAGphormer: A tokenized graph transformer for node classification in large graphs. In Proceedings of the 11th International Conference on Learning Representations, Kigali, Rwanda, 2023."},{"key":"1541_CR25","doi-asserted-by":"crossref","unstructured":"J. Chen, S. Jiang, K. He. NTFormer: A composite node tokenized graph transformer for node classification, [Online], Available: https:\/\/arxiv.org\/abs\/2406.19249, 2024.","DOI":"10.1109\/TBDATA.2025.3624955"},{"key":"1541_CR26","unstructured":"G. E. Hinton, O. Vinyals, J. Dean. Distilling the knowledge in a neural network, [Online], Available: https:\/\/arxiv.org\/abs\/1503.02531, 2015."},{"key":"1541_CR27","doi-asserted-by":"publisher","first-page":"1365","DOI":"10.1109\/ICCV.2019.00145","volume-title":"Proceedings of IEEE\/CVF International Conference on Computer Vision","author":"F Tung","year":"2019","unstructured":"F. Tung, G. Mori. Similarity-preserving knowledge distillation. In Proceedings of IEEE\/CVF International Conference on Computer Vision, Seoul, Republic of Korea, pp. 1365\u20131374, 2019. DOI: https:\/\/doi.org\/10.1109\/ICCV.2019.00145."},{"key":"1541_CR28","doi-asserted-by":"publisher","first-page":"11943","DOI":"10.1109\/CVPR52688.2022.01165","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"B Zhao","year":"2022","unstructured":"B. Zhao, Q. Cui, R. Song, Y. Qiu, J. Liang. Decoupled knowledge distillation. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, New Orleans, USA, pp. 11943\u201311952, 2022. DOI: https:\/\/doi.org\/10.1109\/CVPR52688.2022.01165."},{"issue":"1","key":"1541_CR29","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1109\/TETCI.2023.3304948","volume":"8","author":"Z Xiao","year":"2024","unstructured":"Z. Xiao, H. Xing, B. Zhao, R. Qu, S. Luo, P. Dai, K. Li, Z. Zhu. Deep contrastive representation learning with self-distillation. IEEE Transactions on Emerging Topics in Computational Intelligence, vol. 8, no. 1, pp. 3\u201315, 2024. DOI: https:\/\/doi.org\/10.1109\/TETCI.2023.3304948.","journal-title":"IEEE Transactions on Emerging Topics in Computational Intelligence"},{"key":"1541_CR30","first-page":"21618","volume-title":"Proceedings of the 35th Conference on Neural Information Processing Systems","author":"D Kreuzer","year":"2021","unstructured":"D. Kreuzer, D. Beaini, W. Hamilton, V. L\u00e9tourneau, P. Tossou. Rethinking graph transformers with spectral attention. In Proceedings of the 35th Conference on Neural Information Processing Systems, pp. 21618\u201321629, 2021."},{"issue":"1","key":"1541_CR31","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1109\/TETCI.2023.3309626","volume":"8","author":"B Chen","year":"2024","unstructured":"B. Chen, Y. Liu, Z. Zhang, G. Lu, A. W. K. Kong. TransAttUnet: Multi-level attention-guided U-net with transformer for medical image segmentation. IEEE Transactions on Emerging Topics in Computational Intelligence, vol. 8, no. 1, pp. 55\u201368, 2024. DOI: https:\/\/doi.org\/10.1109\/TETCI.2023.3309626.","journal-title":"IEEE Transactions on Emerging Topics in Computational Intelligence"},{"key":"1541_CR32","doi-asserted-by":"publisher","first-page":"1508","DOI":"10.1145\/3539618.3591716","volume-title":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","author":"Y Wei","year":"2023","unstructured":"Y. Wei, W. Liu, F. Liu, X. Wang, L. Nie, T. S. Chua. LightGT: A light graph transformer for multimedia recommendation. In Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, Taipei, China, pp. 1508\u20131517, 2023. DOI: https:\/\/doi.org\/10.1145\/3539618.3591716."},{"key":"1541_CR33","doi-asserted-by":"publisher","first-page":"1285","DOI":"10.1145\/3626772.3657721","volume-title":"Proceedings of the 47th International ACM SIGIR Conference on Research and DevelopmentinInformationRetrieval","author":"P Zhang","year":"2024","unstructured":"P. Zhang, Y. Yan, X. Zhang, C. Li, S. Wang, F. Huang, S. Kim. TransGNN: Harnessing the collaborative power of transformers and graph neural networks for recommender systems. In Proceedings of the 47th International ACM SIGIR Conference on Research and DevelopmentinInformationRetrieval, Washington DC, USA pp. 1285\u20131295, 2024. DOI: https:\/\/doi.org\/10.1145\/3626772.3657721."},{"key":"1541_CR34","doi-asserted-by":"publisher","first-page":"14420","DOI":"10.1109\/CVPR52729.2023.01386","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"X Liu","year":"2023","unstructured":"X. Liu, H. Peng, N. Zheng, Y. Yang, H. Hu, Y. Yuan. EfficientViT: Memory efficient vision transformer with cascaded group attention. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Vancouver, Canada, pp. 14420\u201314430, 2023. DOI: https:\/\/doi.org\/10.1109\/CVPR52729.2023.01386."},{"key":"1541_CR35","volume-title":"Proceedings of the 37th Conference on Neural Information Processing Systems","author":"Q Wu","year":"2023","unstructured":"Q. Wu, W. Zhao, C. Yang, H. Zhang, F. Nie, H. Jiang, Y. Bian, J. Yan. Simplifying and empowering transformers for large-graph representations. In Proceedings of the 37th Conference on Neural Information Processing Systems, New Orleans, USA, Article number 2826, 2023."},{"key":"1541_CR36","volume-title":"Proceedings of the 38th Conference on Neural Information Processing Systems","author":"J Chen","year":"2024","unstructured":"J. Chen, H. Liu, J. E. Hopcroft, K. He. Leveraging contrastive learning for enhanced node representations in tokenized graph transformers. In Proceedings of the 38th Conference on Neural Information Processing Systems, New Orleans, USA, 2024."},{"key":"1541_CR37","doi-asserted-by":"publisher","first-page":"7072","DOI":"10.1109\/CVPR42600.2020.00710","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Y Yang","year":"2020","unstructured":"Y. Yang, J. Qiu, M. Song, D. Tao, X. Wang. Distilling knowledge from graph convolutional networks. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Seattle, USA, pp. 7072\u20137081, 2020. DOI: https:\/\/doi.org\/10.1109\/CVPR42600.2020.00710."},{"key":"1541_CR38","volume-title":"Proceedings of the 10th International Conference on Learning Representations","author":"W Xu","year":"2022","unstructured":"W. Xu, B. Jiang, J. Desai, B. Han, F. Yan, F. Iannacci. KDSTM: Neural semi-supervised topic modeling with knowledge distillation. In Proceedings of the 10th International Conference on Learning Representations, 2022."},{"key":"1541_CR39","unstructured":"Y. Wang, P. Zhang, L. Bai, J. Xue. Enhancing mapless trajectory prediction through knowledge distillation, [Online], Available: https:\/\/arxiv.org\/abs\/2306.14177, 2023."},{"key":"1541_CR40","doi-asserted-by":"publisher","first-page":"12674","DOI":"10.1109\/CVPR52729.2023.01219","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"J Hyung","year":"2023","unstructured":"J. Hyung, S. Hwang, D. Kim, H. Lee, J. Choo. Local 3D editing via 3D distillation of clip knowledge. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Vancouver, Canada, pp. 12674\u201312684, 2023. DOI: https:\/\/doi.org\/10.1109\/CVPR52729.2023.01219."},{"issue":"6","key":"1541_CR41","doi-asserted-by":"publisher","first-page":"909","DOI":"10.1007\/s11633-022-1385-5","volume":"20","author":"M Li","year":"2023","unstructured":"M. Li, K. Huang, X. Ma, Y. Wang, W. Fan, Q. Chen. Mask distillation network for conjunctival hyperemia severity classification. Machine Intelligence Research, vol. 20, no. 6, pp. 909\u2013922, 2023. DOI: https:\/\/doi.org\/10.1007\/s11633-022-1385-5.","journal-title":"Machine Intelligence Research"},{"key":"1541_CR42","volume-title":"Proceedings of the 11th International Conference on Learning Representations","author":"Y Tian","year":"2023","unstructured":"Y. Tian, C. Zhang, Z. Guo, X. Zhang, N. Chawla. Learning MLPs on graphs: A unified view of effectiveness, robustness, and efficiency. In Proceedings of the 11th International Conference on Learning Representations, Kigali, Rwanda, 2023."},{"key":"1541_CR43","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"L Wu","year":"2023","unstructured":"L. Wu, H. Lin, Y. Huang, S. Z. Li. Quantifying the knowledge in GNNs for reliable distillation into MLPs. In Proceedings of the 40th International Conference on Machine Learning, Honolulu, USA, Article number 1564, 2023."},{"key":"1541_CR44","first-page":"2441","volume-title":"Proceedings of the 31st International Joint Conference on Artificial Intelligence","author":"Y Zhuang","year":"2022","unstructured":"Y. Zhuang, L. Lyu, C. Shi, C. Yang, L. Sun. Data-free adversarial knowledge distillation for graph neural networks. In Proceedings of the 31st International Joint Conference on Artificial Intelligence, Vienna, Austria, pp. 2441\u20132447, 2022."},{"key":"1541_CR45","first-page":"4339","volume-title":"Proceedings of the 37th AAAI Conference on Artificial Intelligence","author":"C Huo","year":"2023","unstructured":"C. Huo, D. Jin, Y. Li, D. He, Y. B. Yang, L. Wu. T2-GNN: Graph neural networks for graphs with incomplete features and structure via teacher-student distillation. In Proceedings of the 37th AAAI Conference on Artificial Intelligence, Washington DC, USA, pp. 4339\u20134346, 2023."},{"key":"1541_CR46","first-page":"1177","volume-title":"Proceedings of the 21st International Conference on Neural Information Processing Systems","author":"A Rahimi","year":"2007","unstructured":"A. Rahimi, B. Recht. Random features for large-scale kernel machines. In Proceedings of the 21st International Conference on Neural Information Processing Systems, Voncouver, Canada, pp. 1177\u20131184, 2007."},{"key":"1541_CR47","doi-asserted-by":"publisher","first-page":"720","DOI":"10.1007\/978-3-642-04898-2_327","volume-title":"International Encyclopedia of Statistical Science","author":"J M Joyce","year":"2011","unstructured":"J. M. Joyce. Kullback-Leibler divergence. International Encyclopedia of Statistical Science, M. Lovric, Ed., Berlin, Heidelberg: Springer, pp. 720\u2013722, 2011. DOI: https:\/\/doi.org\/10.1007\/978-3-642-04898-2_327."},{"key":"1541_CR48","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"W Wang","year":"2020","unstructured":"W. Wang, F. Wei, L. Dong, H. Bao, N. Yang, M. Zhou. MINILM: Deep self-attention distillation for task-agnostic compression of pre-trained transformers. In Proceedings of the 34th International Conference on Neural Information Processing Systems, Vancouver, Canada, Article number 485, 2020."},{"key":"1541_CR49","first-page":"40","volume-title":"Proceedings of the 33rd International Conference on International Conference on Machine Learning","author":"Z Yang","year":"2016","unstructured":"Z. Yang, W. W. Cohen, R. Salakhudinov. Revisiting semi-supervised learning with graph embeddings. In Proceedings of the 33rd International Conference on International Conference on Machine Learning, New York, USA, pp. 40\u201348, 2016."},{"key":"1541_CR50","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1145\/3340531.3411866","volume-title":"Proceedings of the 29th ACM International Conference on Information & Knowledge Management","author":"B Rozemberczki","year":"2020","unstructured":"B. Rozemberczki, R. Sarkar. Characteristic functions on graphs: Birds of a feather, from statistical descriptors to parametric models. In Proceedings of the 29th ACM International Conference on Information & Knowledge Management, pp. 1325\u20131334, 2020. DOI: https:\/\/doi.org\/10.1145\/3340531.3411866."},{"key":"1541_CR51","volume-title":"Proceedings of the 8th International Conference on Learning Representations","author":"H Pei","year":"2020","unstructured":"H. Pei, B. Wei, K. C. C. Chang, Y. Lei, B. Yang. Geom-GCN: Geometric graph convolutional networks. In Proceedings of the 8th International Conference on Learning Representations, Addis Ababa, Ethiopia, 2020."},{"key":"1541_CR52","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"W Hu","year":"2020","unstructured":"W. Hu, M. Fey, M. Zitnik, Y. Dong, H. Ren, B. Liu, M. Catasta, J. Leskovec. Open graph benchmark: Datasets for machine learning on graphs. In Proceedings of the 34th International Conference on Neural Information Processing Systems, Vancouver, Canada, Article number 1855, 2020."},{"key":"1541_CR53","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"F. Pedregosa, G. Varoquaux, A. Gramfort, V. Michel, B. Thirion, O. Grisel, M. Blondel, P. Prettenhofer, R. Weiss, V. Dubourg, J. Vanderplas, A. Passos, D. Cournapeau, M. Brucher, M. Perrot, \u00c9. Duchesnay. Scikit-learn: Machine learning in python. Journal of Machine Learning Research, vol. 12, pp. 2825\u20132830, 2011.","journal-title":"Journal of Machine Learning Research"},{"key":"1541_CR54","first-page":"3637","volume-title":"Proceedings of the 30th International Conference on Neural Information Processing Systems","author":"O Vinyals","year":"2016","unstructured":"O. Vinyals, C. Blundell, T. Lillicrap, K. Kavukcuoglu, D. Wierstra. Matching networks for one shot learning. In Proceedings of the 30th International Conference on Neural Information Processing Systems, Barcelona, Spain, pp. 3637\u20133645, 2016."},{"key":"1541_CR55","volume-title":"Proceedings of the 31st Conference on Neural Information Processing Systems","author":"A Paszke","year":"2017","unstructured":"A. Paszke, S. Gross, S. Chintala, G. Chanan, E. Yang, Z. DeVito, Z. Lin, A. Desmaison, L. Antiga, A. Lerer. Automatic differentiation in PyTorch. In Proceedings of the 31st Conference on Neural Information Processing Systems, Long Beach, USA, 2017."},{"key":"1541_CR56","unstructured":"D. P Kingma, J. Ba. Adam: A method for stochastic optimization, [Online], Available: https:\/\/arxiv.org\/abs\/1412.6980, 2014."},{"key":"1541_CR57","first-page":"5453","volume-title":"Proceedings of the 35th International Conference on Machine Learning","author":"K Xu","year":"2018","unstructured":"K. Xu, C. Li, Y. Tian, T. Sonobe, K. I. Kawarabayashi, S. Jegelka. Representation learning on graphs with jumping knowledge networks. In Proceedings of the 35th International Conference on Machine Learning, Stockholm, Sweden, pp. 5453\u20135462, 2018."},{"key":"1541_CR58","volume-title":"Proceedings of the 8th International Conference on Learning Representations","author":"Y Rong","year":"2019","unstructured":"Y. Rong, W. Huang, T. Xu, J. Huang. DropEdge: Towards deep graph convolutional networks on node classification. In Proceedings of the 8th International Conference on Learning Representations, Addis Ababa, Ethiopia, 2019."},{"key":"1541_CR59","first-page":"21","volume-title":"Proceedings of the 36th International Conference on Machine Learning","author":"S Abu-El-Haija","year":"2019","unstructured":"S. Abu-El-Haija, B. Perozzi, A. Kapoor, N. Alipourfard, K. Lerman, H. Harutyunyan, G. V. Steeg, A. Galstyan. MixHop: Higher-order graph convolutional architectures via sparsified neighborhood mixing. In Proceedings of the 36th International Conference on Machine Learning, Long Beach, USA, pp. 21\u201329, 2019."},{"key":"1541_CR60","first-page":"1972","volume-title":"Proceedings of the 36th International Conference on Machine Learning","author":"L Franceschi","year":"2019","unstructured":"L. Franceschi, M. Niepert, M. Pontil, X. He. Learning discrete structures for graph neural networks. In Proceedings of the 36th International Conference on Machine Learning, Long Beach, USA, pp. 1972\u20131982, 2019."},{"key":"1541_CR61","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"Y Chen","year":"2020","unstructured":"Y. Chen, L. Wu, M. J. Zaki. Iterative deep graph learning for graph neural networks: Better and robust node embeddings. In Proceedings of the 34th International Conference on Neural Information Processing Systems, Vancouver, Canada, Article number 1620, 2020."},{"key":"1541_CR62","first-page":"6861","volume-title":"Proceedings of the 36th International Conference on Machine Learning","author":"F Wu","year":"2019","unstructured":"F. Wu, A. Souza, T. Zhang, C. Fifty, T. Yu, K. Weinberger. Simplifying graph convolutional networks. In Proceedings of the 36th International Conference on Machine Learning, Long Beach, USA, pp. 6861\u20136871, 2019."},{"key":"1541_CR63","volume-title":"Proceedings of the 8th International Conference on Learning Representations","author":"H Zeng","year":"2019","unstructured":"H. Zeng, H. Zhou, A. Srivastava, R. Kannan, V. Prasanna. GraphSAINT: Graph sampling based inductive learning method. In Proceedings of the 8th International Conference on Learning Representations, Addis Ababa, Ethiopia, 2019."},{"key":"1541_CR64","first-page":"6000","volume-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems","author":"A Vaswani","year":"2017","unstructured":"A. Vaswani, N. Shazeer, N. Parmar, J. Uszkoreit, L. Jones, A. N. Gomez, \u0141. Kaiser, I. Polosukhin. Attention is all you need. In Proceedings of the 31st International Conference on Neural Information Processing Systems, Long Beach, USA, pp. 6000\u20136010, 2017."},{"key":"1541_CR65","doi-asserted-by":"crossref","unstructured":"A. M. Mansourian, A. Jalali, R. Ahmadi, S. Kasaei. Attention-guided feature distillation for semantic segmentation, [Online], Available: https:\/\/arxiv.org\/abs\/2403.05451, 2024.","DOI":"10.2139\/ssrn.4992647"},{"issue":"86","key":"1541_CR66","first-page":"2579","volume":"9","author":"L van der Maaten","year":"2008","unstructured":"L. van der Maaten, G. Hinton. Visualizing data using t-SNE. Journal of Machine Learning Research, vol. 9, no. 86, pp. 2579\u20132605, 2008","journal-title":"Journal of Machine Learning Research"}],"container-title":["Machine Intelligence Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-025-1541-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11633-025-1541-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-025-1541-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T10:03:29Z","timestamp":1764151409000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11633-025-1541-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,15]]},"references-count":66,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["1541"],"URL":"https:\/\/doi.org\/10.1007\/s11633-025-1541-9","relation":{},"ISSN":["2731-538X","2731-5398"],"issn-type":[{"value":"2731-538X","type":"print"},{"value":"2731-5398","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,15]]},"assertion":[{"value":"22 October 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 January 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declared that they have no conflicts of interest to this work.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations of conflict of interest"}}]}}