{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,20]],"date-time":"2026-07-20T15:06:25Z","timestamp":1784559985718,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":58,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,24]],"date-time":"2024-08-24T00:00:00Z","timestamp":1724457600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"New Cornerstone Science Foundation through the XPLORER PRIZE.","award":["None"],"award-info":[{"award-number":["None"]}]},{"name":"Natural Science Foundation of China (NSFC)","award":["62425601,62276148"],"award-info":[{"award-number":["62425601,62276148"]}]},{"name":"Technology and Innovation Major Project of the Ministry of Science and Technology of China","award":["2020AAA0108400"],"award-info":[{"award-number":["2020AAA0108400"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,25]]},"DOI":"10.1145\/3637528.3672354","type":"proceedings-article","created":{"date-parts":[[2024,8,25]],"date-time":"2024-08-25T04:54:55Z","timestamp":1724561695000},"page":"6214-6225","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["OAG-Bench: A Human-Curated Benchmark for Academic Graph Mining"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8551-1966","authenticated-orcid":false,"given":"Fanjin","family":"Zhang","sequence":"first","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0597-7337","authenticated-orcid":false,"given":"Shijie","family":"Shi","sequence":"additional","affiliation":[{"name":"Zhipu AI, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7695-1633","authenticated-orcid":false,"given":"Yifan","family":"Zhu","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9629-5493","authenticated-orcid":false,"given":"Bo","family":"Chen","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5682-2810","authenticated-orcid":false,"given":"Yukuo","family":"Cen","sequence":"additional","affiliation":[{"name":"Zhipu AI, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3430-4048","authenticated-orcid":false,"given":"Jifan","family":"Yu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9461-2889","authenticated-orcid":false,"given":"Yelin","family":"Chen","sequence":"additional","affiliation":[{"name":"Zhipu AI, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5584-2908","authenticated-orcid":false,"given":"Lulu","family":"Wang","sequence":"additional","affiliation":[{"name":"Zhipu AI, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-5962-2997","authenticated-orcid":false,"given":"Qingfei","family":"Zhao","sequence":"additional","affiliation":[{"name":"Zhipu AI, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3634-1202","authenticated-orcid":false,"given":"Yuqing","family":"Cheng","sequence":"additional","affiliation":[{"name":"Zhipu AI, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5829-6030","authenticated-orcid":false,"given":"Tianyi","family":"Han","sequence":"additional","affiliation":[{"name":"Zhipu AI, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6866-9589","authenticated-orcid":false,"given":"Yuwei","family":"An","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1115-3945","authenticated-orcid":false,"given":"Dan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4517-3593","authenticated-orcid":false,"given":"Weng Lam","family":"Tam","sequence":"additional","affiliation":[{"name":"Zhipu AI, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2004-4006","authenticated-orcid":false,"given":"Kun","family":"Cao","sequence":"additional","affiliation":[{"name":"Zhipu AI, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-2425-8836","authenticated-orcid":false,"given":"Yunhe","family":"Pang","sequence":"additional","affiliation":[{"name":"Zhipu AI, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6901-7925","authenticated-orcid":false,"given":"Xinyu","family":"Guan","sequence":"additional","affiliation":[{"name":"Biendata, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1936-6323","authenticated-orcid":false,"given":"Huihui","family":"Yuan","sequence":"additional","affiliation":[{"name":"Zhipu AI, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9024-4954","authenticated-orcid":false,"given":"Jian","family":"Song","sequence":"additional","affiliation":[{"name":"Zhipu AI, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6124-4204","authenticated-orcid":false,"given":"Xiaoyan","family":"Li","sequence":"additional","affiliation":[{"name":"Zhipu AI, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6092-2002","authenticated-orcid":false,"given":"Yuxiao","family":"Dong","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3487-4593","authenticated-orcid":false,"given":"Jie","family":"Tang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,8,24]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_2_2_1","unstructured":"Anthropic. 2023. Introducing Claude. https:\/\/www.anthropic.com\/news\/introducing-claude."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1371"},{"key":"e_1_3_2_2_4_1","volume-title":"Random forests. Machine learning","author":"Breiman Leo","year":"2001","unstructured":"Leo Breiman. 2001. Random forests. Machine learning, Vol. 45, 1 (2001), 5--32."},{"key":"e_1_3_2_2_5_1","unstructured":"Laurent Charlin and Richard Zemel. 2013. The Toronto paper matching system: an automated paper-reviewer assignment system. (2013)."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599930"},{"key":"e_1_3_2_2_7_1","first-page":"1","article-title":"GCCAD: Graph Contrastive Learning for Anomaly Detection","volume":"01","author":"Chen Bo","year":"2023","unstructured":"Bo Chen, Jing Zhang, Xiaokang Zhang, Yuxiao Dong, Jian Song, Peng Zhang, Kaibo Xu, Evgeny Kharlamov, and Jie Tang. 2023. GCCAD: Graph Contrastive Learning for Anomaly Detection. IEEE Transactions on Knowledge & Data Engineering 01 (2023), 1--14.","journal-title":"IEEE Transactions on Knowledge & Data Engineering"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.207"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1097-4571(199009)41:6<391::AID-ASI1>3.0.CO;2-9"},{"key":"e_1_3_2_2_10_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 4171--4186","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 4171--4186."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.26"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.62"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1214\/aos\/1013203451"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458754"},{"key":"e_1_3_2_2_16_1","volume-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems. 1025--1035","author":"Hamilton Will","year":"2017","unstructured":"Will Hamilton, Zhitao Ying, and Jure Leskovec. 2017. Inductive representation learning on large graphs. In Proceedings of the 31st International Conference on Neural Information Processing Systems. 1025--1035."},{"key":"e_1_3_2_2_17_1","volume-title":"Proceedings of the 11th International Conference on Learning Representations.","author":"He Pengcheng","year":"2022","unstructured":"Pengcheng He, Jianfeng Gao, and Weizhu Chen. 2022. DeBERTaV3: Improving DeBERTa using ELECTRA-Style Pre-Training with Gradient-Disentangled Embedding Sharing. In Proceedings of the 11th International Conference on Learning Representations."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401063"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467342"},{"key":"e_1_3_2_2_20_1","unstructured":"Zhiheng Huang Wei Xu and Kai Yu. 2015. Bidirectional LSTM-CRF models for sequence tagging. (2015). arxiv: 1508.01991"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3511935"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1259"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401075"},{"key":"e_1_3_2_2_25_1","volume-title":"Proceedings of the 4th International Conference on Learning Representations.","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and Max Welling. 2016. Semi-Supervised Classification with Graph Convolutional Networks. In Proceedings of the 4th International Conference on Learning Representations."},{"key":"e_1_3_2_2_26_1","volume-title":"Proceedings of the 8th International Conference on Learning Representations.","author":"Lan Zhenzhong","year":"2019","unstructured":"Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu Soricut. 2019. ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. In Proceedings of the 8th International Conference on Learning Representations."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.14778\/3421424.3421431"},{"key":"e_1_3_2_2_28_1","volume-title":"Towards General Text Embeddings with Multi-stage Contrastive Learning. arXiv preprint arXiv:2308.03281","author":"Li Zehan","year":"2023","unstructured":"Zehan Li, Xin Zhang, Yanzhao Zhang, Dingkun Long, Pengjun Xie, and Meishan Zhang. 2023. Towards General Text Embeddings with Multi-stage Contrastive Learning. arXiv preprint arXiv:2308.03281 (2023)."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3186150"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-short.8"},{"key":"e_1_3_2_2_31_1","volume-title":"Proceedings of the 9th International Conference on Learning Representations.","author":"Liu Yinhan","year":"2020","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2020. RoBERTa: A Robustly Optimized BERT Pretraining Approach. In Proceedings of the 9th International Conference on Learning Representations."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.447"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.395"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-acl.146"},{"key":"e_1_3_2_2_35_1","unstructured":"OpenAI. 2022. Introducing ChatGPT. https:\/\/openai.com\/blog\/chatgpt."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330855"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313446"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380132"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482264"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/2740908.2742839"},{"key":"e_1_3_2_2_43_1","volume-title":"Global Pointer: Novel Efficient Span-based Approach for Named Entity Recognition.","author":"Su Jianlin","year":"2022","unstructured":"Jianlin Su, Ahmed Murtadha, Shengfeng Pan, Jing Hou, Jun Sun, Wanwei Huang, Bo Wen, and Yunfeng Liu. 2022. Global Pointer: Novel Efficient Span-based Approach for Named Entity Recognition. (2022). arxiv: 2208.03054"},{"key":"e_1_3_2_2_44_1","volume-title":"Proceedings of the 22nd International Conference on Neural Information Processing Systems. 1821--1828","author":"Sutskever Ilya","year":"2009","unstructured":"Ilya Sutskever, Ruslan Salakhutdinov, and Joshua B Tenenbaum. 2009. Modelling relational data using Bayesian Clustered Tensor Factorization. In Proceedings of the 22nd International Conference on Neural Information Processing Systems. 1821--1828."},{"key":"e_1_3_2_2_45_1","volume-title":"Parameter-efficient prompt tuning makes generalized and calibrated neural text retrievers. arXiv preprint arXiv:2207.07087","author":"Tam Weng Lam","year":"2022","unstructured":"Weng Lam Tam, Xiao Liu, Kaixuan Ji, Lilong Xue, Xingjian Zhang, Yuxiao Dong, Jiahua Liu, Maodi Hu, and Jie Tang. 2022. Parameter-efficient prompt tuning makes generalized and calibrated neural text retrievers. arXiv preprint arXiv:2207.07087 (2022)."},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2736277.2741093"},{"key":"e_1_3_2_2_47_1","volume-title":"Galactica: A large language model for science. arXiv preprint arXiv:2211.09085","author":"Taylor Ross","year":"2022","unstructured":"Ross Taylor, Marcin Kardas, Guillem Cucurull, Thomas Scialom, Anthony Hartshorn, Elvis Saravia, Andrew Poulton, Viktor Kerkez, and Robert Stojnic. 2022. Galactica: A large language model for science. arXiv preprint arXiv:2211.09085 (2022)."},{"key":"e_1_3_2_2_48_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_2_49_1","volume-title":"AAAI workshop: Scholarly big data","volume":"15","author":"Valenzuela Marco","year":"2015","unstructured":"Marco Valenzuela, Vu Ha, and Oren Etzioni. 2015. Identifying Meaningful Citations.. In AAAI workshop: Scholarly big data, Vol. 15. 13."},{"key":"e_1_3_2_2_50_1","volume-title":"The science of science","author":"Wang Dashun","unstructured":"Dashun Wang and Albert-L\u00e1szl\u00f3 Barab\u00e1si. 2021. The science of science. Cambridge University Press."},{"key":"e_1_3_2_2_51_1","volume-title":"Text embeddings by weakly-supervised contrastive pre-training. arXiv preprint arXiv:2212.03533","author":"Wang Liang","year":"2022","unstructured":"Liang Wang, Nan Yang, Xiaolong Huang, Binxing Jiao, Linjun Yang, Daxin Jiang, Rangan Majumder, and Furu Wei. 2022. Text embeddings by weakly-supervised contrastive pre-training. arXiv preprint arXiv:2212.03533 (2022)."},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3269252"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331267"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-short.123"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645533"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583530"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2022.3222168"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330785"},{"key":"e_1_3_2_2_59_1","volume-title":"Retrieve anything to augment large language models. arXiv preprint arXiv:2310.07554","author":"Zhang Peitian","year":"2023","unstructured":"Peitian Zhang, Shitao Xiao, Zheng Liu, Zhicheng Dou, and Jian-Yun Nie. 2023. Retrieve anything to augment large language models. arXiv preprint arXiv:2310.07554 (2023)."}],"event":{"name":"KDD '24: The 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Barcelona Spain","acronym":"KDD '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3672354","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3637528.3672354","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:04:23Z","timestamp":1750291463000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3672354"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,24]]},"references-count":58,"alternative-id":["10.1145\/3637528.3672354","10.1145\/3637528"],"URL":"https:\/\/doi.org\/10.1145\/3637528.3672354","relation":{},"subject":[],"published":{"date-parts":[[2024,8,24]]},"assertion":[{"value":"2024-08-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}