{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:07:11Z","timestamp":1755907631393,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100003453","name":"Natural Science Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2022A1515110073"],"award-info":[{"award-number":["2022A1515110073"]}],"id":[{"id":"10.13039\/501100003453","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,8]]},"DOI":"10.1145\/3721145.3730429","type":"proceedings-article","created":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:57:17Z","timestamp":1755867437000},"page":"88-103","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Cherry: Breaking the GPU Memory Wall for Large-Scale GNN Training via Micro-Batching"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3350-5108","authenticated-orcid":false,"given":"Yan","family":"Wang","sequence":"first","affiliation":[{"name":"Guangzhou Institute of Technology, Xidian University, Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5883-832X","authenticated-orcid":false,"given":"Qinghua","family":"Guo","sequence":"additional","affiliation":[{"name":"Guangzhou Institute of Technology, Xidian University, Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8265-5698","authenticated-orcid":false,"given":"Haoran","family":"Kong","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6602-7212","authenticated-orcid":false,"given":"Kai","family":"Sheng","sequence":"additional","affiliation":[{"name":"Guangzhou Institute of Technology, Xidian University, Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3516-2192","authenticated-orcid":false,"given":"Zhen","family":"Xie","sequence":"additional","affiliation":[{"name":"Binghamton University, Binghamton, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9857-6283","authenticated-orcid":false,"given":"Hao","family":"Chen","sequence":"additional","affiliation":[{"name":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8539-8326","authenticated-orcid":false,"given":"Weile","family":"Jia","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5422-4497","authenticated-orcid":false,"given":"Dingwen","family":"Tao","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-1481-3179","authenticated-orcid":false,"given":"Xin","family":"He","sequence":"additional","affiliation":[{"name":"Guangzhou Institute of Technology, Xidian University, Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,8,22]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1145\/3447786.3456233"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645517"},{"key":"e_1_3_3_2_4_2","unstructured":"Jie Chen Tengfei Ma and Cao Xiao. 2018. Fastgcn: fast learning with graph convolutional networks via importance sampling. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1801.10247 (2018)."},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330925"},{"key":"e_1_3_3_2_6_2","unstructured":"Matthias Fey and Jan\u00a0Eric Lenssen. 2019. Fast graph representation learning with PyTorch Geometric. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1903.02428 (2019)."},{"key":"e_1_3_3_2_7_2","first-page":"3294","volume-title":"International conference on machine learning","author":"Fey Matthias","year":"2021","unstructured":"Matthias Fey, Jan\u00a0E Lenssen, Frank Weichert, and Jure Leskovec. 2021. Gnnautoscale: Scalable and expressive graph neural networks via historical embeddings. In International conference on machine learning. 3294\u20133304."},{"key":"e_1_3_3_2_8_2","first-page":"551","volume-title":"15th USENIX Symposium on Operating Systems Design and Implementation","author":"Gandhi Swapnil","year":"2021","unstructured":"Swapnil Gandhi and Anand\u00a0Padmanabha Iyer. 2021. P3: Distributed deep graph learning at scale. In 15th USENIX Symposium on Operating Systems Design and Implementation. 551\u2013568."},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3539765"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613168"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"crossref","unstructured":"Daniele Grattarola and Cesare Alippi. 2021. Graph neural networks in tensorflow and keras with spektral application notes. IEEE Computational Intelligence Magazine 16 1 (2021) 99\u2013106.","DOI":"10.1109\/MCI.2020.3039072"},{"key":"e_1_3_3_2_12_2","unstructured":"Will Hamilton Zhitao Ying and Jure Leskovec. 2017. Inductive representation learning on large graphs. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_2_13_2","unstructured":"Yaochen Hu Amit Levi Ishaan Kumar Yingxue Zhang and Mark Coates. 2021. On Batch-size Selection for Stochastic Training for Graph Neural Networks. (2021)."},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1145\/3627703.3650063"},{"key":"e_1_3_3_2_15_2","unstructured":"Yanping Huang Youlong Cheng Ankur Bapna Orhan Firat Dehao Chen Mia Chen HyoukJoong Lee Jiquan Ngiam Quoc\u00a0V Le Yonghui Wu et\u00a0al. 2019. Gpipe: Efficient training of giant neural networks using pipeline parallelism. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_3_2_16_2","first-page":"187","volume-title":"Proceedings of Machine Learning and Systems","volume":"2","author":"Jia Zhihao","year":"2020","unstructured":"Zhihao Jia, Sina Lin, Mingyu Gao, Matei Zaharia, and Alex Aiken. 2020. Improving the Accuracy, Scalability, and Performance of Graph Neural Networks with Roc. In Proceedings of Machine Learning and Systems , Vol.\u00a02. 187\u2013198."},{"key":"e_1_3_3_2_17_2","unstructured":"George Karypis. 1997. METIS: Unstructured graph partitioning and sparse matrix ordering system. Technical report (1997)."},{"key":"e_1_3_3_2_18_2","unstructured":"Thomas\u00a0N Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1609.02907 (2016)."},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219980"},{"key":"e_1_3_3_2_20_2","first-page":"120","volume-title":"Proceedings of Machine Learning and Systems","volume":"1","author":"Lerer Adam","year":"2019","unstructured":"Adam Lerer, Ledell Wu, Jiajun Shen, Timothee Lacroix, Luca Wehrstedt, Abhijit Bose, and Alex Peysakhovich. 2019. Pytorch-BigGraph: A Large Scale Graph Embedding System. In Proceedings of Machine Learning and Systems , Vol.\u00a01. 120\u2013131."},{"key":"e_1_3_3_2_21_2","unstructured":"Guohao Li Chenxin Xiong Ali Thabet and Bernard Ghanem. 2020. Deepergcn: All you need to train deeper gcns. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2006.07739 (2020)."},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3636534.3649391"},{"key":"e_1_3_3_2_23_2","first-page":"161","volume-title":"2021 USENIX Annual Technical Conference","author":"Lim Gangmuk","year":"2021","unstructured":"Gangmuk Lim, Jeongseob Ahn, Wencong Xiao, Youngjin Kwon, and Myeongjae Jeon. 2021. Zico: Efficient GPU memory sharing for concurrent DNN training. In 2021 USENIX Annual Technical Conference. 161\u2013175."},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3419111.3421281"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/SmartWorld-UIC-ATC-SCALCOM-IOP-SCI.2019.00056"},{"key":"e_1_3_3_2_26_2","first-page":"443","volume-title":"2019 USENIX Annual Technical Conference","author":"Ma Lingxiao","year":"2019","unstructured":"Lingxiao Ma, Zhi Yang, Youshan Miao, Jilong Xue, Ming Wu, Lidong Zhou, and Yafei Dai. 2019. NeuGraph: Parallel deep neural network computation on large graphs. In 2019 USENIX Annual Technical Conference. 443\u2013458."},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3480856"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2018.00058"},{"key":"e_1_3_3_2_29_2","unstructured":"Adam Paszke Sam Gross Francisco Massa Adam Lerer James Bradbury Gregory Chanan Trevor Killeen Zeming Lin Natalia Gimelshein Luca Antiga et\u00a0al. 2019. Pytorch: An imperative style high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378505"},{"key":"e_1_3_3_2_31_2","unstructured":"XinYu Piao DoangJoo Synn JooYoung Park and Jong-Kook Kim. 2021. Micro Batch Streaming: Allowing the Training of DNN Models to Use a large Batch Size in Memory Constrained Environments. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2110.12484 (2021)."},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"crossref","unstructured":"XinYu Piao DoangJoo Synn JooYoung Park and Jong-Kook Kim. 2023. Enabling Large Batch Size Training for DNN Models Beyond the Memory Limit While Maintaining Performance. IEEE Access (2023).","DOI":"10.1109\/ACCESS.2023.3312572"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476205"},{"key":"e_1_3_3_2_34_2","first-page":"551","volume-title":"2021 USENIX Annual Technical Conference","author":"Ren Jie","year":"2021","unstructured":"Jie Ren, Samyam Rajbhandari, Reza\u00a0Yazdani Aminabadi, Olatunji Ruwase, Shuangyan Yang, Minjia Zhang, Dong Li, and Yuxiong He. 2021. Zero-offload: Democratizing billion-scale model training. In 2021 USENIX Annual Technical Conference. 551\u2013564."},{"key":"e_1_3_3_2_35_2","unstructured":"Yu Rong Wenbing Huang Tingyang Xu and Junzhou Huang. 2019. Dropedge: Towards deep graph convolutional networks on node classification. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1907.10903 (2019)."},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"crossref","unstructured":"Andrea Rossi Denilson Barbosa Donatella Firmani Antonio Matinata and Paolo Merialdo. 2021. Knowledge graph embedding for link prediction: A comparative analysis. ACM Transactions on Knowledge Discovery from Data 15 2 (2021) 1\u201349.","DOI":"10.1145\/3424672"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00132"},{"key":"e_1_3_3_2_38_2","unstructured":"Zhanhong Tan Zijian Zhu and Kaisheng Ma. 2024. Cocco: Hardware-Mapping Co-Exploration towards Memory Capacity-Communication Optimization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.00629 (2024)."},{"key":"e_1_3_3_2_39_2","unstructured":"Petar Veli\u010dkovi\u0107 Guillem Cucurull Arantxa Casanova Adriana Romero Pietro Lio and Yoshua Bengio. 2017. Graph attention networks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1710.10903 (2017)."},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3552326.3567501"},{"key":"e_1_3_3_2_41_2","unstructured":"Minjie Wang Da Zheng Zihao Ye Quan Gan Mufei Li Xiang Song Jinjing Zhou Chao Ma Lingfan Yu Yu Gai et\u00a0al. 2019. Deep graph library: A graph-centric highly-performant package for graph neural networks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1909.01315 (2019)."},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"crossref","unstructured":"Qiange Wang Yao Chen Weng-Fai Wong and Bingsheng He. 2023. HongTu: Scalable Full-Graph GNN Training on Multiple GPUs. Proc. of the ACM on Management of Data 1 4 (2023) 1\u201327.","DOI":"10.1145\/3626733"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538928"},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00033"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"crossref","unstructured":"Shunxin Xiao Shiping Wang Yuanfei Dai and Wenzhong Guo. 2022. Graph neural networks in node classification: survey and evaluation. Machine Vision and Applications 33 1 (2022) 4.","DOI":"10.1007\/s00138-021-01251-0"},{"key":"e_1_3_3_2_46_2","unstructured":"Cong Xie Ling Yan Wu-Jun Li and Zhihua Zhang. 2014. Distributed power-law graph computing: Theoretical and empirical analysis. Advances in neural information processing systems 27 (2014)."},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575725"},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA61900.2025.00083"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/574"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219890"},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM51629.2021.00095"},{"key":"e_1_3_3_2_52_2","unstructured":"Hanqing Zeng Hongkuan Zhou Ajitesh Srivastava Rajgopal Kannan and Viktor Prasanna. 2019. Graphsaint: Graph sampling based inductive learning method. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1907.04931 (2019)."},{"key":"e_1_3_3_2_53_2","unstructured":"Muhan Zhang and Yixin Chen. 2018. Link prediction based on graph neural networks. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_3_2_54_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11782"},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"publisher","DOI":"10.1109\/IA351965.2020.00011"},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539177"},{"key":"e_1_3_3_2_57_2","unstructured":"Rong Zhu Kun Zhao Hongxia Yang Wei Lin Chang Zhou Baole Ai Yong Li and Jingren Zhou. 2019. Aligraph: A comprehensive graph neural network platform. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1902.08730 (2019)."}],"event":{"name":"ICS '25: 2025 International Conference on Supercomputing","location":"Salt Lake City USA","acronym":"ICS '25","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 39th ACM International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721145.3730429","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:57:39Z","timestamp":1755867459000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721145.3730429"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,8]]},"references-count":56,"alternative-id":["10.1145\/3721145.3730429","10.1145\/3721145"],"URL":"https:\/\/doi.org\/10.1145\/3721145.3730429","relation":{},"subject":[],"published":{"date-parts":[[2025,6,8]]},"assertion":[{"value":"2025-08-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}