{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T16:48:13Z","timestamp":1755794893834,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":95,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,3]]},"DOI":"10.1145\/3711896.3736833","type":"proceedings-article","created":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T20:54:17Z","timestamp":1754254457000},"page":"1635-1646","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Advancing Graph Foundation Models: A Data-Centric Perspective"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1324-5819","authenticated-orcid":false,"given":"Yuhan","family":"Li","sequence":"first","affiliation":[{"name":"HKUST (GZ), Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8430-4363","authenticated-orcid":false,"given":"Yuyao","family":"Wang","sequence":"additional","affiliation":[{"name":"HKUST (GZ), Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9341-7312","authenticated-orcid":false,"given":"Jianheng","family":"Tang","sequence":"additional","affiliation":[{"name":"HKUST (GZ), Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4978-8041","authenticated-orcid":false,"given":"Heng","family":"Chang","sequence":"additional","affiliation":[{"name":"Huawei Technologies Co., Ltd., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8829-3984","authenticated-orcid":false,"given":"Yuxiang","family":"Ren","sequence":"additional","affiliation":[{"name":"Nanjing University, Suzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6362-4385","authenticated-orcid":false,"given":"Jia","family":"Li","sequence":"additional","affiliation":[{"name":"HKUST (GZ), Guangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2025,8,3]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58517-4_9"},{"key":"e_1_3_2_1_2_1","volume-title":"Coresets via bilevel optimization for continual learning and streaming. Advances in neural information processing systems","author":"Borsos Zal\u00e1n","year":"2020","unstructured":"Zal\u00e1n Borsos, Mojmir Mutny, and Andreas Krause. 2020. Coresets via bilevel optimization for continual learning and streaming. Advances in neural information processing systems, Vol. 33 (2020), 14879-14890."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599548"},{"key":"e_1_3_2_1_4_1","volume-title":"Jeffrey Xu Yu, and Liang Wang","author":"Chen Dingshuo","year":"2024","unstructured":"Dingshuo Chen, Zhixun Li, Yuyan Ni, Guibin Zhang, Ding Wang, Qiang Liu, Shu Wu, Jeffrey Xu Yu, and Liang Wang. 2024a. Beyond efficiency: Molecular data pruning for enhanced generalization. arXiv preprint arXiv:2409.01081(2024)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3672010"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2021.3087905"},{"key":"e_1_3_2_1_7_1","volume-title":"Learning on attribute-missing graphs","author":"Chen Xu","year":"2020","unstructured":"Xu Chen, Siheng Chen, Jiangchao Yao, Huangjie Zheng, Ya Zhang, and Ivor W Tsang. 2020. Learning on attribute-missing graphs. IEEE transactions on pattern analysis and machine intelligence, Vol. 44, 2 (2020), 740-757."},{"key":"e_1_3_2_1_8_1","unstructured":"Cody Coleman Christopher Yeh Stephen Mussmann Baharan Mirzasoleiman Peter Bailis Percy Liang Jure Leskovec and Matei Zaharia. 2019. Selection via proxy: Efficient data selection for deep learning. arXiv preprint arXiv:1906.11829(2019)."},{"key":"e_1_3_2_1_9_1","unstructured":"Melanie Ducoffe and Frederic Precioso. 2018. Adversarial active learning for deep networks: a margin based approach. arXiv preprint arXiv:1802.09841(2018)."},{"key":"e_1_3_2_1_10_1","volume-title":"International conference on machine learning. PMLR","author":"Franceschi Luca","year":"2019","unstructured":"Luca Franceschi, Mathias Niepert, Massimiliano Pontil, and Xiao He. 2019. Learning discrete structures for graph neural networks. In International conference on machine learning. PMLR, 1972-1982."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-023-36736-1"},{"key":"e_1_3_2_1_12_1","first-page":"73079","article-title":"Towards stable representations for protein interface prediction","volume":"37","author":"Gao Ziqi","year":"2024","unstructured":"Ziqi Gao, Zijing Liu, Yu Li, and Jia Li. 2024a. Towards stable representations for protein interface prediction. Advances in Neural Information Processing Systems, Vol. 37 (2024), 73079-73097.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_13_1","unstructured":"Ziqi Gao Xiangguo Sun Zijing Liu Yu Li Hong Cheng and Jia Li. 2024b. Protein multimer structure prediction via prompt learning. arXiv preprint arXiv:2402.18813(2024)."},{"key":"e_1_3_2_1_14_1","unstructured":"Johannes Gasteiger Aleksandar Bojchevski and Stephan G\u00fcnnemann. 2018. Predict then propagate: Graph neural networks meet personalized pagerank. arXiv preprint arXiv:1810.05997(2018)."},{"key":"e_1_3_2_1_15_1","volume-title":"NeurIPS","volume":"32","author":"Gasteiger Johannes","year":"2019","unstructured":"Johannes Gasteiger, Stefan Wei\u00dfenberger, and Stephan G\u00fcnnemann. 2019. Diffusion improves graph learning. NeurIPS, Vol. 32 (2019)."},{"key":"e_1_3_2_1_16_1","volume-title":"Shaun McGlinchey, David Michalovich, Bissan Al-Lazikani, et al.","author":"Gaulton Anna","year":"2012","unstructured":"Anna Gaulton, Louisa J Bellis, A Patricia Bento, Jon Chambers, Mark Davies, Anne Hersey, Yvonne Light, Shaun McGlinchey, David Michalovich, Bissan Al-Lazikani, et al., 2012. ChEMBL: a large-scale bioactivity database for drug discovery. Nucleic acids research, Vol. 40, D1 (2012), D1100-D1107."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/276675.276685"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-12423-5_14"},{"key":"e_1_3_2_1_19_1","volume-title":"Data-centric graph learning: A survey","author":"Guo Yuxin","year":"2024","unstructured":"Yuxin Guo, Deyu Bo, Cheng Yang, Zhiyuan Lu, Zhongjian Zhang, Jixi Liu, Yufei Peng, and Chuan Shi. 2024. Data-centric graph learning: A survey. IEEE Transactions on Big Data(2024)."},{"key":"e_1_3_2_1_20_1","first-page":"638","article-title":"A Data-centric Framework to Endow Graph Neural Networks with Out-Of-Distribution Detection Ability","author":"Guo Yuxin","year":"2023","unstructured":"Yuxin Guo, Cheng Yang, Yuluo Chen, Jixi Liu, Chuan Shi, and Junping Du. 2023. A Data-centric Framework to Endow Graph Neural Networks with Out-Of-Distribution Detection Ability. In SIGKDD. 638-648.","journal-title":"SIGKDD."},{"key":"e_1_3_2_1_21_1","volume-title":"International Conference on Machine Learning. PMLR, 8230-8248","author":"Han Xiaotian","year":"2022","unstructured":"Xiaotian Han, Zhimeng Jiang, Ninghao Liu, and Xia Hu. 2022. G-mixup: Graph data augmentation for graph classification. In International Conference on Machine Learning. PMLR, 8230-8248."},{"key":"e_1_3_2_1_22_1","unstructured":"Xiaoxin He Xavier Bresson Thomas Laurent Adam Perold Yann LeCun and Bryan Hooi. 2023. Harnessing explanations: Llm-to-lm interpreter for enhanced text-attributed graph representation learning. arXiv preprint arXiv:2305.19523(2023)."},{"key":"e_1_3_2_1_23_1","unstructured":"Yufei He and Bryan Hooi. 2024. UniGraph: Learning a Cross-Domain Graph Foundation Model From Natural Language. arXiv preprint arXiv:2402.13630(2024)."},{"key":"e_1_3_2_1_24_1","volume-title":"Open graph benchmark: Datasets for machine learning on graphs. Advances in neural information processing systems","author":"Hu Weihua","year":"2020","unstructured":"Weihua Hu, Matthias Fey, Marinka Zitnik, Yuxiao Dong, Hongyu Ren, Bowen Liu, Michele Catasta, and Jure Leskovec. 2020. Open graph benchmark: Datasets for machine learning on graphs. Advances in neural information processing systems, Vol. 33 (2020), 22118-22133."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645627"},{"key":"e_1_3_2_1_26_1","unstructured":"Johannes Jakubik Michael V\u00f6ssing Niklas K\u00fchl Jannis Walk and Gerhard Satzger. 2024. Data-centric artificial intelligence. Business & Information Systems Engineering(2024) 1-9."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449914"},{"key":"e_1_3_2_1_28_1","first-page":"66","article-title":"Graph structure learning for robust graph neural networks","author":"Jin Wei","year":"2020","unstructured":"Wei Jin, Yao Ma, Xiaorui Liu, Xianfeng Tang, Suhang Wang, and Jiliang Tang. 2020. Graph structure learning for robust graph neural networks. In SIGKDD. 66-74.","journal-title":"SIGKDD."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02332"},{"key":"e_1_3_2_1_30_1","volume-title":"International Conference on Machine Learning. PMLR, 5464-5474","author":"Killamsetty Krishnateja","year":"2021","unstructured":"Krishnateja Killamsetty, Sivasubramanian Durga, Ganesh Ramakrishnan, Abir De, and Rishabh Iyer. 2021a. Grad-match: Gradient matching based data subset selection for efficient deep model training. In International Conference on Machine Learning. PMLR, 5464-5474."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i9.16988"},{"key":"e_1_3_2_1_32_1","volume-title":"Gofa: A generative one-for-all model for joint graph language modeling. arXiv preprint arXiv:2407.09709(2024).","author":"Kong Lecheng","year":"2024","unstructured":"Lecheng Kong, Jiarui Feng, Hao Liu, Chengsong Huang, Jiaxin Huang, Yixin Chen, and Muhan Zhang. 2024. Gofa: A generative one-for-all model for joint graph language modeling. arXiv preprint arXiv:2407.09709(2024)."},{"key":"e_1_3_2_1_33_1","first-page":"18685","article-title":"Similar: Submodular information measures based active learning in realistic scenarios","volume":"34","author":"Kothawade Suraj","year":"2021","unstructured":"Suraj Kothawade, Nathan Beck, Krishnateja Killamsetty, and Rishabh Iyer. 2021. Similar: Submodular information measures based active learning in realistic scenarios. NeurIPS, Vol. 34 (2021), 18685-18697.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2024\/898"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671982"},{"key":"e_1_3_2_1_36_1","first-page":"42349","article-title":"Glbench: A comprehensive benchmark for graph with large language models","volume":"37","author":"Li Yuhan","year":"2024","unstructured":"Yuhan Li, Peisong Wang, Xiao Zhu, Aochuan Chen, Haiyun Jiang, Deng Cai, Victor W Chan, and Jia Li. 2024d. Glbench: A comprehensive benchmark for graph with large language models. Advances in Neural Information Processing Systems, Vol. 37 (2024), 42349-42368.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_37_1","unstructured":"Yuhan Li Jian Wu Zhiwei Yu B\u00f6rje F Karlsson Wei Shen Manabu Okumura and Chin-Yew Lin. 2023. All data on the table: Novel dataset and benchmark for cross-modality scientific information extraction. arXiv preprint arXiv:2311.08189(2023)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3696410.3714727"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671826"},{"key":"e_1_3_2_1_40_1","volume-title":"One For All: Towards Training One Graph Model For All Classification Tasks. In The Twelfth International Conference on Learning Representations.","author":"Liu Hao","year":"2024","unstructured":"Hao Liu, Jiarui Feng, Lecheng Kong, Ningyue Liang, Dacheng Tao, Yixin Chen, and Muhan Zhang. 2024. One For All: Towards Training One Graph Model For All Classification Tasks. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_41_1","first-page":"4385","article-title":"Modelling high-order social relations for item recommendation","volume":"34","author":"Liu Yang","year":"2020","unstructured":"Yang Liu, Liang Chen, Xiangnan He, Jiaying Peng, Zibin Zheng, and Jie Tang. 2020. Modelling high-order social relations for item recommendation. TKDE, Vol. 34, 9 (2020), 4385-4397.","journal-title":"TKDE"},{"key":"e_1_3_2_1_42_1","volume-title":"International conference on machine learning. PMLR, 7192-7203","author":"Luo Youzhi","year":"2021","unstructured":"Youzhi Luo, Keqiang Yan, and Shuiwang Ji. 2021. Graphdf: A discrete flow model for molecular graph generation. In International conference on machine learning. PMLR, 7192-7203."},{"key":"e_1_3_2_1_43_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Ma Jiaqi","year":"2019","unstructured":"Jiaqi Ma, Weijing Tang, Ji Zhu, and Qiaozhu Mei. 2019. A flexible generative framework for graph-based semi-supervised learning. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_44_1","first-page":"139","article-title":"One-class SVMs for document classification","volume":"2","author":"Manevitz Larry M","year":"2001","unstructured":"Larry M Manevitz and Malik Yousef. 2001. One-class SVMs for document classification. Journal of machine Learning research, Vol. 2, Dec (2001), 139-154.","journal-title":"Journal of machine Learning research"},{"key":"e_1_3_2_1_45_1","volume-title":"Position: Graph Foundation Models Are Already Here. In Forty-first International Conference on Machine Learning.","author":"Mao Haitao","year":"2024","unstructured":"Haitao Mao, Zhikai Chen, Wenzhuo Tang, Jianan Zhao, Yao Ma, Tong Zhao, Neil Shah, Mikhail Galkin, and Jiliang Tang. 2024. Position: Graph Foundation Models Are Already Here. In Forty-first International Conference on Machine Learning."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Katerina Margatina Giorgos Vernikos Lo\u00efc Barrault and Nikolaos Aletras. 2021. Active learning by acquiring contrastive examples. arXiv preprint arXiv:2109.03764(2021).","DOI":"10.18653\/v1\/2021.emnlp-main.51"},{"key":"e_1_3_2_1_47_1","volume-title":"Wiki-cs: A wikipedia-based benchmark for graph neural networks. arXiv preprint arXiv:2007.02901(2020).","author":"Mernyei P\u00e9ter","year":"2020","unstructured":"P\u00e9ter Mernyei and C\u0103t\u0103lina Cangea. 2020. Wiki-cs: A wikipedia-based benchmark for graph neural networks. arXiv preprint arXiv:2007.02901(2020)."},{"key":"e_1_3_2_1_48_1","first-page":"6950","article-title":"Coresets for data-efficient training of machine learning models","author":"Mirzasoleiman Baharan","year":"2020","unstructured":"Baharan Mirzasoleiman, Jeff Bilmes, and Jure Leskovec. 2020. Coresets for data-efficient training of machine learning models. In ICML. PMLR, 6950-6960.","journal-title":"ICML. PMLR"},{"volume-title":"Networks","author":"Newman Mark","key":"e_1_3_2_1_49_1","unstructured":"Mark Newman. 2018. Networks. Oxford university press."},{"key":"e_1_3_2_1_50_1","unstructured":"Jianmo Ni Jiacheng Li and Julian McAuley. 2019. Justifying recommendations using distantly-labeled reviews and fined-grained aspects. In EMNLP."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i2.20095"},{"key":"e_1_3_2_1_52_1","volume-title":"InversionGNN: A Dual Path Network for Multi-Property Molecular Optimization. In The Thirteenth International Conference on Learning Representations.","author":"Niu Yifan","year":"2025","unstructured":"Yifan Niu, Ziqi Gao, Tingyang Xu, Yang Liu, Yatao Bian, Yu Rong, Junzhou Huang, and Jia Li. 2025. InversionGNN: A Dual Path Network for Multi-Property Molecular Optimization. In The Thirteenth International Conference on Learning Representations."},{"key":"e_1_3_2_1_53_1","volume-title":"Deep learning on a data diet: Finding important examples early in training. Advances in neural information processing systems","author":"Paul Mansheej","year":"2021","unstructured":"Mansheej Paul, Surya Ganguli, and Gintare Karolina Dziugaite. 2021. Deep learning on a data diet: Finding important examples early in training. Advances in neural information processing systems, Vol. 34 (2021), 20596-20607."},{"key":"e_1_3_2_1_54_1","first-page":"2062","article-title":"GHashing: Semantic graph hashing for approximate similarity search in graph databases","author":"Qin Zongyue","year":"2020","unstructured":"Zongyue Qin, Yunsheng Bai, and Yizhou Sun. 2020. GHashing: Semantic graph hashing for approximate similarity search in graph databases. In SIGKDD. 2062-2072.","journal-title":"SIGKDD."},{"key":"e_1_3_2_1_55_1","volume-title":"Infobatch: Lossless training speed up by unbiased dynamic data pruning. arXiv preprint arXiv:2303.04947(2023).","author":"Qin Ziheng","year":"2023","unstructured":"Ziheng Qin, Kai Wang, Zangwei Zheng, Jianyang Gu, Xiangyu Peng, Zhaopan Xu, Daquan Zhou, Lei Shang, Baigui Sun, Xuansong Xie, et al., 2023. Infobatch: Lossless training speed up by unbiased dynamic data pruning. arXiv preprint arXiv:2303.04947(2023)."},{"key":"e_1_3_2_1_56_1","unstructured":"Ravi S Raju Kyle Daruwalla and Mikko Lipasti. 2021. Accelerating deep learning with dynamic data pruning. arXiv preprint arXiv:2111.12621(2021)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"N Reimers. 2019. Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. arXiv preprint arXiv:1908.10084(2019).","DOI":"10.18653\/v1\/D19-1410"},{"key":"e_1_3_2_1_58_1","first-page":"4393","article-title":"Deep one-class classification","author":"Ruff Lukas","year":"2018","unstructured":"Lukas Ruff, Robert Vandermeulen, Nico Goernitz, Lucas Deecke, Shoaib Ahmed Siddiqui, Alexander Binder, Emmanuel M\u00fcller, and Marius Kloft. 2018. Deep one-class classification. In ICML. PMLR, 4393-4402.","journal-title":"ICML. PMLR"},{"key":"e_1_3_2_1_59_1","first-page":"1702","article-title":"Graphon neural networks and the transferability of graph neural networks","volume":"33","author":"Ruiz Luana","year":"2020","unstructured":"Luana Ruiz, Luiz Chamon, and Alejandro Ribeiro. 2020. Graphon neural networks and the transferability of graph neural networks. Advances in Neural Information Processing Systems, Vol. 33 (2020), 1702-1712.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/1989323.1989399"},{"key":"e_1_3_2_1_61_1","volume-title":"Markus Hagenbuchner, and Gabriele Monfardini.","author":"Scarselli Franco","year":"2008","unstructured":"Franco Scarselli, Marco Gori, Ah Chung Tsoi, Markus Hagenbuchner, and Gabriele Monfardini. 2008. The graph neural network model. IEEE transactions on neural networks, Vol. 20, 1 (2008), 61-80."},{"key":"e_1_3_2_1_62_1","volume-title":"Collective classification in network data. AI magazine","author":"Sen Prithviraj","year":"2008","unstructured":"Prithviraj Sen, Galileo Namata, Mustafa Bilgic, Lise Getoor, Brian Galligher, and Tina Eliassi-Rad. 2008. Collective classification in network data. AI magazine, Vol. 29, 3 (2008), 93-93."},{"key":"e_1_3_2_1_63_1","volume-title":"Active Learning for Convolutional Neural Networks: A Core-Set Approach. In International Conference on Learning Representations.","author":"Sener Ozan","year":"2018","unstructured":"Ozan Sener and Silvio Savarese. 2018. Active Learning for Convolutional Neural Networks: A Core-Set Approach. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_64_1","article-title":"Weisfeiler-lehman graph kernels","volume":"12","author":"Shervashidze Nino","year":"2011","unstructured":"Nino Shervashidze, Pascal Schweitzer, Erik Jan Van Leeuwen, Kurt Mehlhorn, and Karsten M Borgwardt. 2011. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research, Vol. 12, 9 (2011).","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_65_1","volume-title":"International Conference on Machine Learning. PMLR, 9005-9015","author":"Sinha Samarth","year":"2020","unstructured":"Samarth Sinha, Han Zhang, Anirudh Goyal, Yoshua Bengio, Hugo Larochelle, and Augustus Odena. 2020. Small-gan: Speeding up gan training using core-sets. In International Conference on Machine Learning. PMLR, 9005-9015."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599256"},{"key":"e_1_3_2_1_67_1","volume-title":"The Thirteenth International Conference on Learning Representations.","author":"Tang Jianheng","year":"2025","unstructured":"Jianheng Tang, Qifan Zhang, Yuhan Li, Nuo Chen, and Jia Li. 2025. Grapharena: Evaluating and exploring large language models on graph computation. In The Thirteenth International Conference on Learning Representations."},{"key":"e_1_3_2_1_68_1","volume-title":"Adam Trischler, Yoshua Bengio, and Geoffrey J Gordon.","author":"Toneva Mariya","year":"2018","unstructured":"Mariya Toneva, Alessandro Sordoni, Remi Tachet des Combes, Adam Trischler, Yoshua Bengio, and Geoffrey J Gordon. 2018. An empirical study of example forgetting during deep neural network learning. arXiv preprint arXiv:1812.05159(2018)."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449796"},{"key":"e_1_3_2_1_70_1","volume-title":"International conference on machine learning. PMLR","author":"Wei Kai","year":"2015","unstructured":"Kai Wei, Rishabh Iyer, and Jeff Bilmes. 2015. Submodularity in data subset selection and active learning. In International conference on machine learning. PMLR, 1954-1963."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM54844.2022.00158"},{"key":"e_1_3_2_1_72_1","volume-title":"MoleculeNet: a benchmark for molecular machine learning. Chemical science","author":"Wu Zhenqin","year":"2018","unstructured":"Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh S Pappu, Karl Leswing, and Vijay Pande. 2018. MoleculeNet: a benchmark for molecular machine learning. Chemical science, Vol. 9, 2 (2018), 513-530."},{"key":"e_1_3_2_1_73_1","volume-title":"The Eleventh International Conference on Learning Representations.","author":"Xia Xiaobo","year":"2022","unstructured":"Xiaobo Xia, Jiale Liu, Jun Yu, Xu Shen, Bo Han, and Tongliang Liu. 2022. Moderate coreset: A universal method of data selection for real-world data-efficient deep learning. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_1_74_1","first-page":"56946","article-title":"Better with less: A data-active perspective on pre-training graph neural networks","volume":"36","author":"Xu Jiarong","year":"2023","unstructured":"Jiarong Xu, Renhong Huang, Xin Jiang, Yuxuan Cao, Carl Yang, Chunping Wang, and Yang Yang. 2023. Better with less: A data-active perspective on pre-training graph neural networks. Advances in Neural Information Processing Systems, Vol. 36 (2023), 56946-56978.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_75_1","unstructured":"Keyulu Xu Weihua Hu Jure Leskovec and Stefanie Jegelka. 2018. How powerful are graph neural networks? arXiv preprint arXiv:1810.00826(2018)."},{"key":"e_1_3_2_1_76_1","first-page":"17238","article-title":"A comprehensive study on text-attributed graphs: Benchmarking and rethinking","volume":"36","author":"Yan Hao","year":"2023","unstructured":"Hao Yan, Chaozhuo Li, Ruosong Long, Chao Yan, Jianan Zhao, Wenwen Zhuang, Jun Yin, Peiyan Zhang, Weihao Han, Hao Sun, et al., 2023. A comprehensive study on text-attributed graphs: Benchmarking and rethinking. Advances in Neural Information Processing Systems, Vol. 36 (2023), 17238-17264.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_77_1","unstructured":"Shuo Yang Zeke Xie Hanyu Peng Min Xu Mingming Sun and Ping Li. 2022. Dataset pruning: Reducing training data by examining generalization influence. arXiv preprint arXiv:2205.09329(2022)."},{"key":"e_1_3_2_1_78_1","volume-title":"International conference on machine learning. PMLR, 40-48","author":"Yang Zhilin","year":"2016","unstructured":"Zhilin Yang, William Cohen, and Ruslan Salakhudinov. 2016. Revisiting semi-supervised learning with graph embeddings. In International conference on machine learning. PMLR, 40-48."},{"key":"e_1_3_2_1_79_1","volume-title":"International conference on machine learning. PMLR, 5708-5717","author":"You Jiaxuan","year":"2018","unstructured":"Jiaxuan You, Rex Ying, Xiang Ren, William Hamilton, and Jure Leskovec. 2018. Graphrnn: Generating realistic graphs with deep auto-regressive models. In International conference on machine learning. PMLR, 5708-5717."},{"key":"e_1_3_2_1_80_1","unstructured":"Dianzhi Yu Xinni Zhang Yankai Chen Aiwei Liu Yifei Zhang Philip S Yu and Irwin King. 2024. Recent Advances of Multimodal Continual Learning: A Comprehensive Survey. arXiv preprint arXiv:2410.05352(2024)."},{"key":"e_1_3_2_1_81_1","volume-title":"Graph Information Bottleneck for Subgraph Recognition. In International Conference on Learning Representations.","author":"Yu Junchi","year":"2020","unstructured":"Junchi Yu, Tingyang Xu, Yu Rong, Yatao Bian, Junzhou Huang, and Ran He. 2020. Graph Information Bottleneck for Subgraph Recognition. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_82_1","volume-title":"Kwei-Herng Lai, Fan Yang, Zhimeng Jiang, Shaochen Zhong, and Xia Hu.","author":"Zha Daochen","year":"2023","unstructured":"Daochen Zha, Zaid Pervaiz Bhat, Kwei-Herng Lai, Fan Yang, Zhimeng Jiang, Shaochen Zhong, and Xia Hu. 2023. Data-centric artificial intelligence: A survey. Comput. Surveys(2023)."},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645682"},{"key":"e_1_3_2_1_84_1","volume-title":"Gcoder: Improving large language model for generalized graph problem solving. arXiv preprint arXiv:2410.19084(2024).","author":"Zhang Qifan","year":"2024","unstructured":"Qifan Zhang, Xiaobin Hong, Jianheng Tang, Nuo Chen, Yuhan Li, Wenzhong Li, Jing Tang, and Jia Li. 2024b. Gcoder: Improving large language model for generalized graph problem solving. arXiv preprint arXiv:2410.19084(2024)."},{"key":"e_1_3_2_1_85_1","unstructured":"Xinni Zhang Yankai Chen Cuiyun Gao Qing Liao Shenglin Zhao and Irwin King. 2022. Knowledge-aware neural networks with personalized feature referencing for cold-start recommendation. arXiv preprint arXiv:2209.13973(2022)."},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i8.28790"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583263"},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2023.3324912"},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671913"},{"key":"e_1_3_2_1_90_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Zhao Haiteng","year":"2024","unstructured":"Haiteng Zhao, Shengchao Liu, Ma Chang, Hannan Xu, Jie Fu, Zhihong Deng, Lingpeng Kong, and Qi Liu. 2024b. Gimlet: A unified graph-text model for instruction-based molecule zero-shot learning. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_91_1","unstructured":"Haihong Zhao Chenyi Zi Aochuan Chen and Jia Li. 2025. A Survey of Cross-domain Graph Learning: Progress and Future Directions. arXiv preprint arXiv:2503.11086(2025)."},{"key":"e_1_3_2_1_92_1","unstructured":"Wayne Xin Zhao Kun Zhou Junyi Li Tianyi Tang Xiaolei Wang Yupeng Hou Yingqian Min Beichen Zhang Junjie Zhang Zican Dong et al. 2023c. A survey of large language models. arXiv preprint arXiv:2303.18223(2023)."},{"key":"e_1_3_2_1_93_1","volume-title":"Alan Wee-Chung Liew, and Shirui Pan","author":"Zheng Xin","year":"2023","unstructured":"Xin Zheng, Yixin Liu, Zhifeng Bao, Meng Fang, Xia Hu, Alan Wee-Chung Liew, and Shirui Pan. 2023. Towards data-centric graph machine learning: Review and outlook. arXiv preprint arXiv:2309.10979(2023)."},{"key":"e_1_3_2_1_94_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583399"},{"key":"e_1_3_2_1_95_1","volume-title":"Graphclip: Enhancing transferability in graph foundation models for text-attributed graphs. arXiv preprint arXiv:2410.10329(2024).","author":"Zhu Yun","year":"2024","unstructured":"Yun Zhu, Haizhou Shi, Xiaotang Wang, Yongchao Liu, Yaoke Wang, Boci Peng, Chuntao Hong, and Siliang Tang. 2024. Graphclip: Enhancing transferability in graph foundation models for text-attributed graphs. arXiv preprint arXiv:2410.10329(2024)."}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Toronto ON Canada","acronym":"KDD '25"},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711896.3736833","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T14:32:05Z","timestamp":1755354725000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711896.3736833"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,3]]},"references-count":95,"alternative-id":["10.1145\/3711896.3736833","10.1145\/3711896"],"URL":"https:\/\/doi.org\/10.1145\/3711896.3736833","relation":{},"subject":[],"published":{"date-parts":[[2025,8,3]]},"assertion":[{"value":"2025-08-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}