{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T02:13:32Z","timestamp":1777342412668,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,5,8]]},"DOI":"10.1145\/3701716.3715302","type":"proceedings-article","created":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T14:10:32Z","timestamp":1750687832000},"page":"757-760","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Multi-Scale Heterogeneous Text-Attributed Graph Datasets From Diverse Domains"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-3337-0886","authenticated-orcid":false,"given":"Yunhui","family":"Liu","sequence":"first","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5595-6132","authenticated-orcid":false,"given":"Qizhuo","family":"Xie","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4029-7615","authenticated-orcid":false,"given":"Jinwei","family":"Shi","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0894-2733","authenticated-orcid":false,"given":"Jiaxu","family":"Shen","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9649-1796","authenticated-orcid":false,"given":"Tieke","family":"He","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,5,23]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Twhin: Embedding the twitter heterogeneous information network for personalized recommendation. In KDD. 2842--2850.","author":"El-Kishky Ahmed","year":"2022","unstructured":"Ahmed El-Kishky, Thomas Markovich, Serim Park, Chetan Verma, Baekjin Kim, Ramy Eskander, Yury Malkov, Frank Portman, Sof\u00eda Samaniego, Ying Xiao, et al. 2022. Twhin: Embedding the twitter heterogeneous information network for personalized recommendation. In KDD. 2842--2850."},{"key":"e_1_3_2_2_2_1","volume-title":"Magnn: Metapath aggregated graph neural network for heterogeneous graph embedding. InWWW. 2331--2341.","author":"Fu Xinyu","year":"2020","unstructured":"Xinyu Fu, Jiani Zhang, Ziqiao Meng, and Irwin King. 2020. Magnn: Metapath aggregated graph neural network for heterogeneous graph embedding. InWWW. 2331--2341."},{"key":"e_1_3_2_2_3_1","volume-title":"Inductive representation learning on large graphs. NeurIPS 30","author":"Hamilton Will","year":"2017","unstructured":"Will Hamilton, Zhitao Ying, and Jure Leskovec. 2017. Inductive representation learning on large graphs. NeurIPS 30 (2017)."},{"key":"e_1_3_2_2_4_1","unstructured":"Weihua Hu Matthias Fey Hongyu Ren Maho Nakata Yuxiao Dong and Jure Leskovec. 2021. OGB-LSC: A Large-Scale Challenge for Machine Learning on Graphs. In NeurIPS."},{"key":"e_1_3_2_2_5_1","first-page":"22118","article-title":"Open graph benchmark: Datasets for machine learning on graphs","volume":"33","author":"Hu Weihua","year":"2020","unstructured":"Weihua Hu, Matthias Fey, Marinka Zitnik, Yuxiao Dong, Hongyu Ren, Bowen Liu, Michele Catasta, and Jure Leskovec. 2020. Open graph benchmark: Datasets for machine learning on graphs. NeurIPS 33 (2020), 22118--22133.","journal-title":"NeurIPS"},{"key":"e_1_3_2_2_6_1","volume-title":"Large language models on graphs: A comprehensive survey. TKDE","author":"Jin Bowen","year":"2024","unstructured":"Bowen Jin, Gang Liu, Chi Han, Meng Jiang, Heng Ji, and Jiawei Han. 2024. Large language models on graphs: A comprehensive survey. TKDE (2024)."},{"key":"e_1_3_2_2_7_1","volume-title":"Bhagyashree Taleka, Tengfei Ma, Xiang Song, and Wen-mei Hwu.","author":"Khatua Arpandeep","year":"2023","unstructured":"Arpandeep Khatua, Vikram Sharma Mailthody, Bhagyashree Taleka, Tengfei Ma, Xiang Song, and Wen-mei Hwu. 2023. Igb: Addressing the gaps in labeling, features, heterogeneity, and size of public graph datasets for deep learning research. In KDD. 4284--4295."},{"key":"e_1_3_2_2_8_1","volume-title":"Kipf and Max Welling","author":"Thomas","year":"2017","unstructured":"Thomas N. Kipf and Max Welling. 2017. Semi-Supervised Classification with Graph Convolutional Networks. In ICLR."},{"key":"e_1_3_2_2_9_1","volume-title":"TEG-DB: A Comprehensive Dataset and Benchmark of Textual-Edge Graphs. NeurIPS","author":"Li Zhuofeng","year":"2024","unstructured":"Zhuofeng Li, Zixing Gou, Xiangnan Zhang, Zhongyuan Liu, Sirui Li, Yuntong Hu, Chen Ling, Zheng Zhang, and Liang Zhao. 2024. TEG-DB: A Comprehensive Dataset and Benchmark of Textual-Edge Graphs. NeurIPS (2024)."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"crossref","unstructured":"Zizheng Lin Haowen Ke Ngo-Yin Wong Jiaxin Bai Yangqiu Song Huan Zhao and Junpeng Ye. 2021. Multi-relational graph based heterogeneous multi-task learning in community question answering. In CIKM. 1038--1047.","DOI":"10.1145\/3459637.3482279"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"crossref","unstructured":"Qingsong Lv Ming Ding Qiang Liu Yuxiang Chen Wenzheng Feng Siming He Chang Zhou Jianguo Jiang Yuxiao Dong and Jie Tang. 2021. Are we really making much progress? revisiting benchmarking and refining heterogeneous graph neural networks. In KDD. 1150--1160.","DOI":"10.1145\/3447548.3467350"},{"key":"e_1_3_2_2_12_1","volume-title":"Position: Graph Foundation Models Are Already Here. In ICML. 34670--34692.","author":"Mao Haitao","year":"2024","unstructured":"Haitao Mao, Zhikai Chen, Wenzhuo Tang, Jianan Zhao, Yao Ma, Tong Zhao, Neil Shah, Mikhail Galkin, and Jiliang Tang. 2024. Position: Graph Foundation Models Are Already Here. In ICML. 34670--34692."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"crossref","unstructured":"Nils Reimers and Iryna Gurevych. 2019. Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. In EMNLP-IJCNLP. 3982--3992.","DOI":"10.18653\/v1\/D19-1410"},{"key":"e_1_3_2_2_14_1","volume-title":"The Semantic Web","author":"Schlichtkrull Michael","unstructured":"Michael Schlichtkrull, Thomas N. Kipf, Peter Bloem, Rianne van den Berg, Ivan Titov, and Max Welling. 2018. Modeling Relational Data with Graph Convolutional Networks. In The Semantic Web. Springer, 593--607."},{"key":"e_1_3_2_2_15_1","unstructured":"Mirac Suzgun Luke Melas-Kyriazi Suproteem Sarkar Scott D Kominers and Stuart Shieber. 2023. The harvard uspto patent dataset: A large-scale wellstructured and multi-purpose corpus of patent applications. In NeurIPS."},{"key":"e_1_3_2_2_16_1","unstructured":"Petar Veli\u010dkovi\u0107 Guillem Cucurull Arantxa Casanova Adriana Romero Pietro Li\u00f2 and Yoshua Bengio. 2018. Graph Attention Networks. In ICLR."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"crossref","unstructured":"Mengting Wan and Julian McAuley. 2018. Item recommendation on monotonic behavior chains. In RecSys. 86--94.","DOI":"10.1145\/3240323.3240369"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1162\/qss_a_00021"},{"key":"e_1_3_2_2_19_1","unstructured":"MinjieWang Da Zheng Zihao Ye Quan Gan Mufei Li Xiang Song Jinjing Zhou Chao Ma Lingfan Yu Yu Gai et al. 2019. Deep graph library: A graph-centric highly-performant package for graph neural networks. arXiv (2019)."},{"key":"e_1_3_2_2_20_1","volume-title":"Minilm: Deep self-attention distillation for task-agnostic compression of pre-trained transformers. NeurIPS","author":"Wang Wenhui","year":"2020","unstructured":"Wenhui Wang, Furu Wei, Li Dong, Hangbo Bao, Nan Yang, and Ming Zhou. 2020. Minilm: Deep self-attention distillation for task-agnostic compression of pre-trained transformers. NeurIPS (2020)."},{"key":"e_1_3_2_2_21_1","unstructured":"Hao Yan Chaozhuo Li Ruosong Long Chao Yan Jianan Zhao Wenwen Zhuang Jun Yin Peiyan Zhang Weihao Han Hao Sun et al. 2023. A comprehensive study on text-attributed graphs: Benchmarking and rethinking. NeurIPS (2023)."},{"key":"e_1_3_2_2_22_1","first-page":"1637","article-title":"Interpretable and efficient heterogeneous graph convolutional network","volume":"35","author":"Yang Yaming","year":"2021","unstructured":"Yaming Yang, Ziyu Guan, Jianxin Li, Wei Zhao, Jiangtao Cui, and Quan Wang. 2021. Interpretable and efficient heterogeneous graph convolutional network. TKDE 35, 2 (2021), 1637--1650.","journal-title":"TKDE"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"crossref","unstructured":"Fanjin Zhang Shijie Shi Yifan Zhu Bo Chen Yukuo Cen Jifan Yu Yelin Chen LuluWang Qingfei Zhao Yuqing Cheng et al. 2024. Oag-bench: a human-curated benchmark for academic graph mining. In KDD. 6214--6225.","DOI":"10.1145\/3637528.3672354"}],"event":{"name":"WWW '25: The ACM Web Conference 2025","location":"Sydney NSW Australia","acronym":"WWW '25","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Companion Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3701716.3715302","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,7]],"date-time":"2025-10-07T18:25:46Z","timestamp":1759861546000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3715302"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,8]]},"references-count":23,"alternative-id":["10.1145\/3701716.3715302","10.1145\/3701716"],"URL":"https:\/\/doi.org\/10.1145\/3701716.3715302","relation":{},"subject":[],"published":{"date-parts":[[2025,5,8]]},"assertion":[{"value":"2025-05-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}