{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T09:53:45Z","timestamp":1773482025115,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,8,7]],"date-time":"2023-08-07T00:00:00Z","timestamp":1691366400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,8,7]]},"DOI":"10.1145\/3588195.3592990","type":"proceedings-article","created":{"date-parts":[[2023,8,7]],"date-time":"2023-08-07T20:47:00Z","timestamp":1691441220000},"page":"17-30","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":16,"title":["Redundancy-Free High-Performance Dynamic GNN Training with Hierarchical Pipeline Parallelism"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-8101-785X","authenticated-orcid":false,"given":"Yaqi","family":"Xia","sequence":"first","affiliation":[{"name":"Wuhan University, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6599-9976","authenticated-orcid":false,"given":"Zheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Wuhan University, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8329-0145","authenticated-orcid":false,"given":"Hulin","family":"Wang","sequence":"additional","affiliation":[{"name":"Wuhan University, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3913-3623","authenticated-orcid":false,"given":"Donglin","family":"Yang","sequence":"additional","affiliation":[{"name":"Nvidia Corporation, Santa Clara, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9500-3390","authenticated-orcid":false,"given":"Xiaobo","family":"Zhou","sequence":"additional","affiliation":[{"name":"University of Macau, Macau, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2869-7623","authenticated-orcid":false,"given":"Dazhao","family":"Cheng","sequence":"additional","affiliation":[{"name":"Wuhan University, Wuhan, China"}]}],"member":"320","published-online":{"date-parts":[[2023,8,7]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Danilo Jimenez Rezende, and Koray kavukcuoglu","author":"Battaglia Peter","year":"2016","unstructured":"Peter Battaglia, Razvan Pascanu, Matthew Lai, Danilo Jimenez Rezende, and Koray kavukcuoglu. 2016. Interaction networks for learning about objects, relations and physics. In NeurIPS 2016. 4509--4517."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/181014.181756"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447786.3456233"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3480858"},{"key":"e_1_3_2_1_5_1","volume-title":"EMNLP","author":"Cho Kyunghyun","year":"2014","unstructured":"Kyunghyun Cho, Bart van Merri\u00ebnboer, cC aug lar Gulcc ehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio. 2014. Learning Phrase Representations using RNN Encoder--Decoder for Statistical Machine Translation. In EMNLP 2014. 1724--1734."},{"key":"e_1_3_2_1_6_1","volume-title":"NeurIPS","author":"Defferrard Micha\u00ebl","year":"2016","unstructured":"Micha\u00ebl Defferrard, Xavier Bresson, and Pierre Vandergheynst. 2016. Convolutional neural networks on graphs with fast localized spectral filtering. In NeurIPS 2016. 3844--3852."},{"key":"e_1_3_2_1_7_1","volume-title":"Fast graph representation learning with PyTorch Geometric. arXiv preprint arXiv:1903.02428","author":"Fey Matthias","year":"2019","unstructured":"Matthias Fey and Jan Eric Lenssen. 2019. Fast graph representation learning with PyTorch Geometric. arXiv preprint arXiv:1903.02428 (2019)."},{"key":"e_1_3_2_1_8_1","first-page":"122","article-title":"TLPGNN","volume":"2022","author":"Fu Qiang","year":"2022","unstructured":"Qiang Fu, Yuede Ji, and H Howie Huang. 2022. TLPGNN: A Lightweight Two-Level Parallelism Paradigm for Graph Neural Network Computation on GPU. In ACM HPDC 2022. 122--134.","journal-title":"In ACM HPDC"},{"key":"e_1_3_2_1_9_1","volume-title":"USENIX OSDI","author":"Gandhi Swapnil","year":"2021","unstructured":"Swapnil Gandhi, Anand Padmanabha Iyer, Henry Xu, Theodoros Rekatsinas, Shivaram Venkataraman, Yuan Xie, Yufei Ding, Keval Vora, Ravi Netravali, Miryung Kim, et al. 2021. P3: Distributed deep graph learning at scale. In USENIX OSDI 2021. 551--568."},{"key":"e_1_3_2_1_10_1","volume-title":"ICML","author":"Gilmer Justin","year":"2017","unstructured":"Justin Gilmer, Samuel S Schoenholz, Patrick F Riley, Oriol Vinyals, and George E Dahl. 2017. Neural message passing for quantum chemistry. In ICML 2017. PMLR, 1263--1272."},{"key":"e_1_3_2_1_11_1","first-page":"17","article-title":"PowerGraph","volume":"2012","author":"Gonzalez Joseph E","year":"2012","unstructured":"Joseph E Gonzalez, Yucheng Low, Haijie Gu, Danny Bickson, and Carlos Guestrin. 2012. PowerGraph: Distributed Graph-Parallel Computation on Natural Graphs. In USENIX OSDI 2012. 17--30.","journal-title":"Distributed Graph-Parallel Computation on Natural Graphs. In USENIX OSDI"},{"key":"e_1_3_2_1_12_1","volume-title":"NeurIPS","author":"Hamilton William L","year":"2017","unstructured":"William L Hamilton, Rex Ying, and Jure Leskovec. 2017. Inductive representation learning on large graphs. In NeurIPS 2017. 1025--1035."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441585"},{"key":"e_1_3_2_1_14_1","volume-title":"Dehao Chen, HyoukJoong Lee, Jiquan Ngiam, Quoc V Le, Yonghui Wu, et al.","author":"Huang Yanping","year":"2019","unstructured":"Yanping Huang, Youlong Cheng, Ankur Bapna, Orhan Firat, Mia Xu Chen, Dehao Chen, HyoukJoong Lee, Jiquan Ngiam, Quoc V Le, Yonghui Wu, et al. 2019. GPipe: efficient training of giant neural networks using pipeline parallelism. In NeurIPS 2019. 103--112."},{"key":"e_1_3_2_1_15_1","first-page":"187","article-title":"Improving the accuracy, scalability, and performance of graph neural networks with roc","volume":"2020","author":"Jia Zhihao","year":"2020","unstructured":"Zhihao Jia, Sina Lin, Mingyu Gao, Matei Zaharia, and Alex Aiken. 2020. Improving the accuracy, scalability, and performance of graph neural networks with roc. MLSys 2020, 187--198.","journal-title":"MLSys"},{"key":"e_1_3_2_1_16_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_17_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330895"},{"key":"e_1_3_2_1_19_1","volume-title":"Gdelt: Global data on events, location, and tone","author":"Leetaru Kalev","year":"2013","unstructured":"Kalev Leetaru and Philip A Schrodt. 2013. Gdelt: Global data on events, location, and tone, 1979--2012. In ISA annual convention 2013, Vol. 2. Citeseer, 1--49."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.14778\/3415478.3415530"},{"key":"e_1_3_2_1_21_1","volume-title":"Gated graph sequence neural networks. arXiv preprint arXiv:1511.05493","author":"Li Yujia","year":"2015","unstructured":"Yujia Li, Daniel Tarlow, Marc Brockschmidt, and Richard Zemel. 2015. Gated graph sequence neural networks. arXiv preprint arXiv:1511.05493 (2015)."},{"key":"e_1_3_2_1_22_1","volume-title":"Nam Sung Kim, and Alexander Schwing","author":"Li Youjie","year":"2018","unstructured":"Youjie Li, Mingchao Yu, Songze Li, Salman Avestimehr, Nam Sung Kim, and Alexander Schwing. 2018. Pipe-SGD: a decentralized pipelined SGD framework for distributed deep net training. In NeurIPS 2018. 8056--8067."},{"key":"e_1_3_2_1_23_1","volume-title":"NeurIPS","author":"Liu Qi","year":"2019","unstructured":"Qi Liu, Maximilian Nickel, and Douwe Kiela. 2019. Hyperbolic graph neural networks. In NeurIPS 2019. 8230--8241."},{"key":"e_1_3_2_1_24_1","volume-title":"USENIX ATC","author":"Ma Lingxiao","year":"2019","unstructured":"Lingxiao Ma, Zhi Yang, Youshan Miao, Jilong Xue, Ming Wu, Lidong Zhou, and Yafei Dai. 2019. Neugraph: parallel deep neural network computation on large graphs. In USENIX ATC 2019. 443--457."},{"key":"e_1_3_2_1_25_1","volume-title":"Encoding Sentences with Graph Convolutional Networks for Semantic Role Labeling. In EMNLP","author":"Marcheggiani Diego","year":"2017","unstructured":"Diego Marcheggiani and Ivan Titov. 2017. Encoding Sentences with Graph Convolutional Networks for Semantic Role Labeling. In EMNLP 2017. Association for Computational Linguistics, 1506--1515."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534540.3534692"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3480856"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.576"},{"key":"e_1_3_2_1_29_1","unstructured":"NVIDIA. [n.d.]. Optimized primitives for collective multi-GPU communication. https:\/\/github.com\/NVIDIA\/nccl."},{"key":"e_1_3_2_1_30_1","unstructured":"Adam Paszke Sam Gross Francisco Massa Adam Lerer James Bradbury Gregory Chanan Trevor Killeen Zeming Lin Natalia Gimelshein Luca Antiga et al. 2019. PyTorch: an imperative style high-performance deep learning library. In NeurIPS 2019. 8026--8037."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.14778\/3538598.3538614"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01240-3_25"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729586"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3172867"},{"key":"e_1_3_2_1_35_1","volume-title":"Temporal graph networks for deep learning on dynamic graphs. arXiv preprint arXiv:2006.10637","author":"Rossi Emanuele","year":"2020","unstructured":"Emanuele Rossi, Ben Chamberlain, Fabrizio Frasca, Davide Eynard, Federico Monti, and Michael Bronstein. 2020. Temporal graph networks for deep learning on dynamic graphs. arXiv preprint arXiv:2006.10637 (2020)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482014"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"J\u00fcrgen Schmidhuber Sepp Hochreiter et al. 1997. Long short-term memory. In Neural Comput 1997. 1735--1780.","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_1_38_1","volume-title":"USENIX OSDI","author":"Thorpe John","year":"2021","unstructured":"John Thorpe, Yifan Qiao, Jonathan Eyolfson, Shen Teng, Guanzhou Hu, Zhihao Jia, Keval Vora, Ravi Netravali, Miryung Kim, and Guoqing Harry Xu. 2021. Dorylus: Affordable, Scalable, and Accurate GNN Training with Distributed CPU Servers and Serverless Threads. In USENIX OSDI 2021."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00074"},{"key":"e_1_3_2_1_40_1","volume-title":"ICLR","author":"Petar Velivc","year":"2018","unstructured":"Petar Velivc kovi\u0107 , Guillem Cucurull, Arantxa Casanova, Adriana Romero, Pietro Lio, and Yoshua Bengio. 2018. Graph attention networks. In ICLR 2018. 4."},{"key":"e_1_3_2_1_41_1","volume-title":"USENIX ATC","author":"Vora Keval","year":"2019","unstructured":"Keval Vora. 2019. LUMOS: dependency-driven disk-based graph processing. In USENIX ATC 2019. 429--442."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037747"},{"key":"e_1_3_2_1_43_1","volume-title":"MariusGNN: Resource-Efficient Out-of-Core Training of Graph Neural Networks. In Eighteenth European Conference on Computer Systems (EuroSys' 23)","author":"Waleffe Roger","year":"2023","unstructured":"Roger Waleffe, Jason Mohoney, Theodoros Rekatsinas, and Shivaram Venkataraman. 2023. MariusGNN: Resource-Efficient Out-of-Core Training of Graph Neural Networks. In Eighteenth European Conference on Computer Systems (EuroSys' 23)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3572848.3577487"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447786.3456229"},{"key":"e_1_3_2_1_46_1","volume-title":"ICLR workshop on representation learning on graphs and manifolds","author":"Wang Minjie Yu","year":"2019","unstructured":"Minjie Yu Wang. 2019. Deep graph library: Towards efficient and scalable deep learning on graphs. In ICLR workshop on representation learning on graphs and manifolds 2019."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3526134"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00094"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457564"},{"key":"e_1_3_2_1_50_1","first-page":"515","article-title":"GNNAdvisor","volume":"2021","author":"Wang Yuke","year":"2021","unstructured":"Yuke Wang, Boyuan Feng, Gushu Li, Shuangchen Li, Lei Deng, Yuan Xie, and Yufei Ding. 2021a. GNNAdvisor: An Adaptive and Efficient Runtime System for GNN Acceleration on GPUs. In USENIX OSDI 2021. 515--531.","journal-title":"In USENIX OSDI"},{"key":"e_1_3_2_1_51_1","volume-title":"Recurrent neural network regularization. arXiv preprint arXiv:1409.2329","author":"Zaremba Wojciech","year":"2014","unstructured":"Wojciech Zaremba, Ilya Sutskever, and Oriol Vinyals. 2014. Recurrent neural network regularization. arXiv preprint arXiv:1409.2329 (2014)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.14778\/3415478.3415539"},{"key":"e_1_3_2_1_53_1","volume-title":"NeurIPS","author":"Zhang Muhan","year":"2018","unstructured":"Muhan Zhang and Yixin Chen. 2018. Link prediction based on graph neural networks. In NeurIPS 2018. 5171--5181."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/IA351965.2020.00011"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539177"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.14778\/3529337.3529342"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.14778\/3352063.3352127"}],"event":{"name":"HPDC '23: The 32nd International Symposium on High-Performance Parallel and Distributed Computing","location":"Orlando FL USA","acronym":"HPDC '23","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 32nd International Symposium on High-Performance Parallel and Distributed Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3588195.3592990","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3588195.3592990","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:25Z","timestamp":1750178845000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3588195.3592990"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,7]]},"references-count":57,"alternative-id":["10.1145\/3588195.3592990","10.1145\/3588195"],"URL":"https:\/\/doi.org\/10.1145\/3588195.3592990","relation":{},"subject":[],"published":{"date-parts":[[2023,8,7]]},"assertion":[{"value":"2023-08-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}