{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T21:50:17Z","timestamp":1777499417503,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Beijing Outstanding Young Scientist Program","award":["No.BJJWZYJH012019100020098"],"award-info":[{"award-number":["No.BJJWZYJH012019100020098"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100018537","name":"National Science and Technology Major Project","doi-asserted-by":"publisher","award":["2022ZD0114800"],"award-info":[{"award-number":["2022ZD0114800"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100018537","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No. U2241212, No. 61932001"],"award-info":[{"award-number":["No. U2241212, No. 61932001"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Beijing Natural Science Foundation","award":["No. 4222028"],"award-info":[{"award-number":["No. 4222028"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3679776","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:21Z","timestamp":1729452861000},"page":"1878-1887","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Beyond Over-smoothing: Uncovering the Trainability Challenges in Deep Graph Neural Networks"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-4021-3668","authenticated-orcid":false,"given":"Jie","family":"Peng","sequence":"first","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2700-3840","authenticated-orcid":false,"given":"Runlin","family":"Lei","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3620-5086","authenticated-orcid":false,"given":"Zhewei","family":"Wei","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCSP.2017.8286426"},{"key":"e_1_3_2_1_2_1","volume-title":"A convergence analysis of gradient descent for deep linear neural networks. arXiv preprint arXiv:1810.02281","author":"Arora Sanjeev","year":"2018","unstructured":"Sanjeev Arora, Nadav Cohen, Noah Golowich, and Wei Hu. 2018. A convergence analysis of gradient descent for deep linear neural networks. arXiv preprint arXiv:1810.02281 (2018)."},{"key":"e_1_3_2_1_3_1","volume-title":"A Note on Over-Smoothing for Graph Neural Networks. arxiv","author":"Cai Chen","year":"2006","unstructured":"Chen Cai and Yusu Wang. 2020. A Note on Over-Smoothing for Graph Neural Networks. arxiv: 2006.13318 [cs.LG]"},{"key":"e_1_3_2_1_4_1","volume-title":"International conference on machine learning. PMLR, 1725--1735","author":"Chen Ming","year":"2020","unstructured":"Ming Chen, Zhewei Wei, Zengfeng Huang, Bolin Ding, and Yaliang Li. 2020. Simple and deep graph convolutional networks. In International conference on machine learning. PMLR, 1725--1735."},{"key":"e_1_3_2_1_5_1","first-page":"9936","article-title":"On provable benefits of depth in training graph convolutional networks","volume":"34","author":"Cong Weilin","year":"2021","unstructured":"Weilin Cong, Morteza Ramezani, and Mehrdad Mahdavi. 2021. On provable benefits of depth in training graph convolutional networks. Advances in Neural Information Processing Systems, Vol. 34 (2021), 9936--9949.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_6_1","volume-title":"Convolutional neural networks on graphs with fast localized spectral filtering. Advances in neural information processing systems","author":"Defferrard Micha\u00ebl","year":"2016","unstructured":"Micha\u00ebl Defferrard, Xavier Bresson, and Pierre Vandergheynst. 2016. Convolutional neural networks on graphs with fast localized spectral filtering. Advances in neural information processing systems, Vol. 29 (2016)."},{"key":"e_1_3_2_1_7_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_8_1","volume-title":"International Conference on Machine Learning. PMLR, 9224--9245","author":"Eliasof Moshe","year":"2023","unstructured":"Moshe Eliasof, Lars Ruthotto, and Eran Treister. 2023. Improving graph neural networks with learnable propagation operators. In International Conference on Machine Learning. PMLR, 9224--9245."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Wenqi Fan Yao Ma Qing Li Yuan He Eric Zhao Jiliang Tang and Dawei Yin. 2019. Graph neural networks for social recommendation. In The world wide web conference. 417--426.","DOI":"10.1145\/3308558.3313488"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i4.25545"},{"key":"e_1_3_2_1_11_1","volume-title":"Graph random neural networks for semi-supervised learning on graphs. Advances in neural information processing systems","author":"Feng Wenzheng","year":"2020","unstructured":"Wenzheng Feng, Jie Zhang, Yuxiao Dong, Yu Han, Huanbo Luan, Qian Xu, Qiang Yang, Evgeny Kharlamov, and Jie Tang. 2020. Graph random neural networks for semi-supervised learning on graphs. Advances in neural information processing systems, Vol. 33 (2020), 22092--22103."},{"key":"e_1_3_2_1_12_1","volume-title":"International conference on machine learning. PMLR, 1263--1272","author":"Gilmer Justin","year":"2017","unstructured":"Justin Gilmer, Samuel S Schoenholz, Patrick F Riley, Oriol Vinyals, and George E Dahl. 2017. Neural message passing for quantum chemistry. In International conference on machine learning. PMLR, 1263--1272."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3614997"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the thirteenth international conference on artificial intelligence and statistics. JMLR Workshop and Conference Proceedings, 249--256","author":"Glorot Xavier","year":"2010","unstructured":"Xavier Glorot and Yoshua Bengio. 2010. Understanding the difficulty of training deep feedforward neural networks. In Proceedings of the thirteenth international conference on artificial intelligence and statistics. JMLR Workshop and Conference Proceedings, 249--256."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01820"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_18_1","volume-title":"International conference on machine learning. pmlr, 448--456","author":"Ioffe Sergey","year":"2015","unstructured":"Sergey Ioffe and Christian Szegedy. 2015. Batch normalization: Accelerating deep network training by reducing internal covariate shift. In International conference on machine learning. pmlr, 448--456."},{"key":"e_1_3_2_1_19_1","volume-title":"Language modeling with deep transformers. arXiv preprint arXiv:1905.04226","author":"Irie Kazuki","year":"2019","unstructured":"Kazuki Irie, Albert Zeyer, Ralf Schl\u00fcter, and Hermann Ney. 2019. Language modeling with deep transformers. arXiv preprint arXiv:1905.04226 (2019)."},{"key":"e_1_3_2_1_20_1","first-page":"7561","article-title":"Old can be gold: Better gradient flow can make vanilla-gcns great again","volume":"35","author":"Jaiswal Ajay","year":"2022","unstructured":"Ajay Jaiswal, Peihao Wang, Tianlong Chen, Justin Rousseau, Ying Ding, and Zhangyang Wang. 2022. Old can be gold: Better gradient flow can make vanilla-gcns great again. Advances in Neural Information Processing Systems, Vol. 35 (2022), 7561--7574.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_21_1","volume-title":"A survey of the recent architectures of deep convolutional neural networks. Artificial intelligence review","author":"Khan Asifullah","year":"2020","unstructured":"Asifullah Khan, Anabia Sohail, Umme Zahoora, and Aqsa Saeed Qureshi. 2020. A survey of the recent architectures of deep convolutional neural networks. Artificial intelligence review, Vol. 53 (2020), 5455--5516."},{"key":"e_1_3_2_1_22_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)."},{"key":"e_1_3_2_1_23_1","volume-title":"Markov chains and mixing times","author":"Levin David A","unstructured":"David A Levin and Yuval Peres. 2017. Markov chains and mixing times. Vol. 107. American Mathematical Soc."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00936"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11604"},{"key":"e_1_3_2_1_26_1","volume-title":"International Conference on Complex Networks and Their Applications. Springer, 49--60","author":"Luan Sitao","year":"2023","unstructured":"Sitao Luan, Mingde Zhao, Xiao-Wen Chang, and Doina Precup. 2023. Training matters: Unlocking potentials of deeper graph convolutional neural networks. In International Conference on Complex Networks and Their Applications. Springer, 49--60."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/1879141.1879191"},{"key":"e_1_3_2_1_28_1","volume-title":"Graph neural networks exponentially lose expressive power for node classification. arXiv preprint arXiv:1905.10947","author":"Oono Kenta","year":"2019","unstructured":"Kenta Oono and Taiji Suzuki. 2019. Graph neural networks exponentially lose expressive power for node classification. arXiv preprint arXiv:1905.10947 (2019)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICESC51422.2021.9532631"},{"key":"e_1_3_2_1_30_1","volume-title":"Dropedge: Towards deep graph convolutional networks on node classification. arXiv preprint arXiv:1907.10903","author":"Rong Yu","year":"2019","unstructured":"Yu Rong, Wenbing Huang, Tingyang Xu, and Junzhou Huang. 2019. Dropedge: Towards deep graph convolutional networks on node classification. arXiv preprint arXiv:1907.10903 (2019)."},{"key":"e_1_3_2_1_31_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Rusch T Konstantin","year":"2022","unstructured":"T Konstantin Rusch, Ben Chamberlain, James Rowbottom, Siddhartha Mishra, and Michael Bronstein. 2022. Graph-coupled oscillator networks. In International Conference on Machine Learning. PMLR, 18888--18909."},{"key":"e_1_3_2_1_32_1","volume-title":"International conference on machine learning. PMLR, 9323--9332","author":"Satorras Victor Garcia","year":"2021","unstructured":"Victor Garcia Satorras, Emiel Hoogeboom, and Max Welling. 2021. E (n) equivariant graph neural networks. In International conference on machine learning. PMLR, 9323--9332."},{"key":"e_1_3_2_1_33_1","volume-title":"Collective classification in network data. AI magazine","author":"Sen Prithviraj","year":"2008","unstructured":"Prithviraj Sen, Galileo Namata, Mustafa Bilgic, Lise Getoor, Brian Galligher, and Tina Eliassi-Rad. 2008. Collective classification in network data. AI magazine, Vol. 29, 3 (2008), 93--93."},{"key":"e_1_3_2_1_34_1","volume-title":"Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research","author":"Srivastava Nitish","year":"2014","unstructured":"Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever, and Ruslan Salakhutdinov. 2014. Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research, Vol. 15, 1 (2014), 1929--1958."},{"key":"e_1_3_2_1_35_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_36_1","volume-title":"Improving graph attention networks with large margin-based constraints. arXiv preprint arXiv:1910.11945","author":"Wang Guangtao","year":"2019","unstructured":"Guangtao Wang, Rex Ying, Jing Huang, and Jure Leskovec. 2019. Improving graph attention networks with large margin-based constraints. arXiv preprint arXiv:1910.11945 (2019)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-022-00447-x"},{"key":"e_1_3_2_1_38_1","volume-title":"International conference on machine learning. PMLR, 6861--6871","author":"Wu Felix","year":"2019","unstructured":"Felix Wu, Amauri Souza, Tianyi Zhang, Christopher Fifty, Tao Yu, and Kilian Weinberger. 2019. Simplifying graph convolutional networks. In International conference on machine learning. PMLR, 6861--6871."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2024.3376000"},{"key":"e_1_3_2_1_40_1","volume-title":"International conference on machine learning. PMLR, 40--48","author":"Yang Zhilin","year":"2016","unstructured":"Zhilin Yang, William Cohen, and Ruslan Salakhudinov. 2016. Revisiting semi-supervised learning with graph embeddings. In International conference on machine learning. PMLR, 40--48."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539374"},{"key":"e_1_3_2_1_42_1","volume-title":"Pairnorm: Tackling oversmoothing in gnns. arXiv preprint arXiv:1909.12223","author":"Zhao Lingxiao","year":"2019","unstructured":"Lingxiao Zhao and Leman Akoglu. 2019. Pairnorm: Tackling oversmoothing in gnns. arXiv preprint arXiv:1909.12223 (2019)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00354"},{"key":"e_1_3_2_1_44_1","volume-title":"A survey of dynamic graph neural networks. arXiv preprint arXiv:2404.18211","author":"Zheng Yanping","year":"2024","unstructured":"Yanping Zheng, Lu Yi, and Zhewei Wei. 2024. A survey of dynamic graph neural networks. arXiv preprint arXiv:2404.18211 (2024)."},{"key":"e_1_3_2_1_45_1","volume-title":"Bryan Hooi, Huan Xu, and Jiashi Feng.","author":"Zhou Kuangqi","year":"2020","unstructured":"Kuangqi Zhou, Yanfei Dong, Wee Sun Lee, Bryan Hooi, Huan Xu, and Jiashi Feng. 2020. Effective training strategies for deep graph neural networks. arXiv preprint arXiv:2006.07107 (2020)."},{"key":"e_1_3_2_1_46_1","volume-title":"Towards deeper graph neural networks with differentiable group normalization. Advances in neural information processing systems","author":"Zhou Kaixiong","year":"2020","unstructured":"Kaixiong Zhou, Xiao Huang, Yuening Li, Daochen Zha, Rui Chen, and Xia Hu. 2020. Towards deeper graph neural networks with differentiable group normalization. Advances in neural information processing systems, Vol. 33 (2020), 4917--4928."},{"key":"e_1_3_2_1_47_1","first-page":"21834","article-title":"Dirichlet energy constrained learning for deep graph neural networks","volume":"34","author":"Zhou Kaixiong","year":"2021","unstructured":"Kaixiong Zhou, Xiao Huang, Daochen Zha, Rui Chen, Li Li, Soo-Hyun Choi, and Xia Hu. 2021. Dirichlet energy constrained learning for deep graph neural networks. Advances in Neural Information Processing Systems, Vol. 34 (2021), 21834--21846.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_48_1","volume-title":"Parallelized stochastic gradient descent. Advances in neural information processing systems","author":"Zinkevich Martin","year":"2010","unstructured":"Martin Zinkevich, Markus Weimer, Lihong Li, and Alex Smola. 2010. Parallelized stochastic gradient descent. Advances in neural information processing systems, Vol. 23 (2010)."}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","location":"Boise ID USA","acronym":"CIKM '24","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679776","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3679776","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:28Z","timestamp":1750294708000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679776"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":48,"alternative-id":["10.1145\/3627673.3679776","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3679776","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}