{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:39:58Z","timestamp":1766219998074,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","funder":[{"name":"National Natural Science Foundation of China","award":["62025208"],"award-info":[{"award-number":["62025208"]}]},{"name":"National Natural Science Foundation of China","award":["62421002"],"award-info":[{"award-number":["62421002"]}]},{"name":"National Natural Science Foundation of China","award":["62302512"],"award-info":[{"award-number":["62302512"]}]},{"name":"National Natural Science Foundation of China","award":["62402503"],"award-info":[{"award-number":["62402503"]}]},{"name":"Youth Independent Innovation Science Foundation of National University of Defense Technology","award":["ZK24-02"],"award-info":[{"award-number":["ZK24-02"]}]},{"name":"Open Fund of PDL","award":["2023-KJWPDL-04"],"award-info":[{"award-number":["2023-KJWPDL-04"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,8]]},"DOI":"10.1145\/3754598.3754630","type":"proceedings-article","created":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:34:32Z","timestamp":1766219672000},"page":"374-384","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["HMGraph: Boosting GNN Training on Hierarchical Memory via Coordinated Cache"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6727-1962","authenticated-orcid":false,"given":"Lizhi","family":"Zhang","sequence":"first","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5639-7882","authenticated-orcid":false,"given":"Menghan","family":"Jia","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3458-4732","authenticated-orcid":false,"given":"Zhiquan","family":"Lai","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4579-4268","authenticated-orcid":false,"given":"Qiao","family":"Li","sequence":"additional","affiliation":[{"name":"Xiamen University, Xiamen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6450-8485","authenticated-orcid":false,"given":"Yiming","family":"Zhang","sequence":"additional","affiliation":[{"name":"Xiamen University, Xiamen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9743-2034","authenticated-orcid":false,"given":"Dongsheng","family":"Li","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]}],"member":"320","published-online":{"date-parts":[[2025,12,20]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1145\/3572848.3577528"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651353"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330925"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Aaron Clauset Cosma\u00a0Rohilla Shalizi and M.\u00a0E.\u00a0J. Newman. 2009. Power-Law Distributions in Empirical Data. SIAM Rev. 51 4 (2009) 661\u2013703.","DOI":"10.1137\/070710111"},{"key":"e_1_3_3_1_6_2","volume-title":"ICLR Workshop on Representation Learning on Graphs and Manifolds","author":"Fey Matthias","year":"2019","unstructured":"Matthias Fey and Jan\u00a0E. Lenssen. 2019. Fast Graph Representation Learning with PyTorch Geometric. In ICLR Workshop on Representation Learning on Graphs and Manifolds."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295399"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Keshi Ge Zhejiang Ran Zhiquan Lai Lizhi Zhang and Dongsheng Li. 2022. BRGraph: An efficient graph neural network training system by reusing batch data on GPU. Concurrency and Computation: Practice and Experience 34 15 (2022) e6961.","DOI":"10.1002\/cpe.6961"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/250"},{"key":"e_1_3_3_1_10_2","unstructured":"Will Hamilton Zhitao Ying and Jure Leskovec. 2017. Inductive representation learning on large graphs. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_1_11_2","unstructured":"Mark Harriso. 2017. Unified Memory for CUDA Beginners. https:\/\/developer.nvidia.com\/blog\/unified-memory-cuda-beginners\/."},{"key":"e_1_3_3_1_12_2","unstructured":"Weihua Hu Matthias Fey Marinka Zitnik Yuxiao Dong Hongyu Ren Bowen Liu Michele Catasta and Jure Leskovec. 2020. Open graph benchmark: Datasets for machine learning on graphs. Advances in neural information processing systems 33 (2020) 22118\u201322133."},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3673038.3673063"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599843"},{"key":"e_1_3_3_1_15_2","unstructured":"Thomas\u00a0N. Kipf and Max Welling. 2017. Semi-Supervised Classification with Graph Convolutional Networks. arxiv:https:\/\/arXiv.org\/abs\/1609.02907\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/1609.02907"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421281"},{"key":"e_1_3_3_1_17_2","first-page":"103","volume-title":"20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Liu Tianfeng","year":"2023","unstructured":"Tianfeng Liu, Yangrui Chen, Dan Li, Chuan Wu, Yibo Zhu, Jun He, Yanghua Peng, Hongzheng Chen, Hongzhi Chen, and Chuanxiong Guo. 2023. BGL: GPU-Efficient GNN Training by Optimizing Graph Data I\/O and Preprocessing. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23). USENIX Association, Boston, MA, 103\u2013118."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"Seung\u00a0Won Min Kun Wu Sitao Huang Mert Hidayeto\u011flu Jinjun Xiong Eiman Ebrahimi Deming Chen and Wen-mei Hwu. 2021. Large graph convolutional network training with GPU-oriented data communication architecture. Proc. VLDB Endow. 14 11 (jul 2021) 2087\u20132100.","DOI":"10.14778\/3476249.3476264"},{"key":"e_1_3_3_1_19_2","first-page":"533","volume-title":"15th USENIX Symposium on Operating Systems Design and Implementation (OSDI 21)","author":"Mohoney Jason","year":"2021","unstructured":"Jason Mohoney, Roger Waleffe, Henry Xu, Theodoros Rekatsinas, and Shivaram Venkataraman. 2021. Marius: Learning Massive Graph Embeddings on a Single Machine. In 15th USENIX Symposium on Operating Systems Design and Implementation (OSDI 21). 533\u2013549."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Jeongmin\u00a0Brian Park Vikram\u00a0Sharma Mailthody Zaid Qureshi and Wen-mei Hwu. 2024. Accelerating Sampling and Aggregation Operations in GNN Frameworks with GPU Initiated Direct Storage Accesses. Proc. VLDB Endow. 17 6 (may 2024) 1227\u20131240.","DOI":"10.14778\/3648160.3648166"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"crossref","unstructured":"Yeonhong Park Sunhong Min and Jae\u00a0W. Lee. 2022. Ginex: SSD-enabled billion-scale graph neural network training on a single machine via provably optimal in-memory caching. Proc. VLDB Endow. 15 11 (jul 2022) 2626\u20132639.","DOI":"10.14778\/3551793.3551819"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575748"},{"key":"e_1_3_3_1_23_2","first-page":"165","volume-title":"2023 USENIX Annual Technical Conference (USENIX ATC 23)","author":"Sun Jie","year":"2023","unstructured":"Jie Sun, Li Su, Zuocheng Shi, Wenting Shen, Zeke Wang, Lei Wang, Jie Zhang, Yong Li, Wenyuan Yu, Jingren Zhou, and Fei Wu. 2023. Legion: Automatically Pushing the Envelope of Multi-GPU System for Billion-Scale GNN Training. In 2023 USENIX Annual Technical Conference (USENIX ATC 23). USENIX Association, Boston, MA, 165\u2013179."},{"key":"e_1_3_3_1_24_2","unstructured":"Zeyuan Tan Xiulong Yuan Congjie He Man-Kit Sit Guo Li Xiaoze Liu Baole Ai Kai Zeng Peter\u00a0R. Pietzuch and Luo Mai. 2023. Quiver: Supporting GPUs for Low-Latency High-Throughput GNN Serving with Workload Awareness. CoRR abs\/2305.10863 (2023). arXiv:https:\/\/arXiv.org\/abs\/2305.10863"},{"key":"e_1_3_3_1_25_2","unstructured":"Petar Velickovic Guillem Cucurull Arantxa Casanova Adriana Romero Pietro Lio Yoshua Bengio et\u00a0al. 2017. Graph attention networks. stat 1050 20 (2017) 10\u201348550."},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3552326.3567501"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"Kuansan Wang Zhihong Shen Chiyuan Huang Chieh-Han Wu Yuxiao Dong and Anshul Kanakia. 2020. Microsoft academic graph: When experts are not enough. Quantitative Science Studies 1 1 (2020) 396\u2013413.","DOI":"10.1162\/qss_a_00021"},{"key":"e_1_3_3_1_28_2","unstructured":"Minjie Wang Lingfan Yu Da Zheng Quan Gan Yu Gai Zihao Ye Mufei Li Jinjing Zhou Qi Huang Chao Ma Ziyue Huang Qipeng Guo Hao Zhang Haibin Lin Junbo Zhao Jinyang Li Alexander\u00a0J. Smola and Zheng Zhang. 2019. Deep Graph Library: Towards Efficient and Scalable Deep Learning on Graphs. CoRR abs\/1909.01315 (2019). arXiv:https:\/\/arXiv.org\/abs\/1909.01315"},{"key":"e_1_3_3_1_29_2","unstructured":"Minjie Wang and Da Zheng. 2023. Stochastic Training of GNNs with GraphBolt. https:\/\/docs.dgl.ai\/en\/latest\/stochastic_training\/index.html."},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"Zonghan Wu Shirui Pan Fengwen Chen Guodong Long Chengqi Zhang and S\u00a0Yu Philip. 2020. A comprehensive survey on graph neural networks. IEEE transactions on neural networks and learning systems 32 1 (2020) 4\u201324.","DOI":"10.1109\/TNNLS.2020.2978386"},{"key":"e_1_3_3_1_31_2","unstructured":"C. Xie L. Yan W.\u00a0J. Li and Z. Zhang. 2014. Distributed power-law graph computing: Theoretical and empirical analysis. Advances in Neural Information Processing Systems 2 (2014) 1673\u20131681."},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3492321.3519557"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219890"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/Cluster48925.2021.00036"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"crossref","unstructured":"Lizhi Zhang Kai Lu Zhiquan Lai Yongquan Fu Yu Tang and Dongsheng Li. 2023. Accelerating GNN Training by Adapting Large Graphs to Distributed Heterogeneous Architectures. IEEE Trans. Comput. 72 12 (2023) 3473\u20133488.","DOI":"10.1109\/TC.2023.3305077"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.5555\/3327345.3327423"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"crossref","unstructured":"Xin Zhang Yanyan Shen Yingxia Shao and Lei Chen. 2023. DUCATI: A Dual-Cache Training System for Graph Neural Networks on Giant Graphs with the GPU. Proceedings of the ACM on Management of Data 1 2 (2023) 1\u201324.","DOI":"10.1145\/3589311"}],"event":{"name":"ICPP '25: 54th International Conference on Parallel Processing","location":"San Diego CA USA","acronym":"ICPP '25"},"container-title":["Proceedings of the 54th International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3754598.3754630","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:38:01Z","timestamp":1766219881000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3754598.3754630"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,8]]},"references-count":36,"alternative-id":["10.1145\/3754598.3754630","10.1145\/3754598"],"URL":"https:\/\/doi.org\/10.1145\/3754598.3754630","relation":{},"subject":[],"published":{"date-parts":[[2025,9,8]]},"assertion":[{"value":"2025-12-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}