{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T12:25:41Z","timestamp":1773318341833,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3712285.3759890","type":"proceedings-article","created":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T16:04:47Z","timestamp":1762963487000},"page":"200-216","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Plexus: Taming Billion-edge Graphs with 3D Parallel Full-graph GNN Training"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-5390-7800","authenticated-orcid":false,"given":"Aditya K.","family":"Ranjan","sequence":"first","affiliation":[{"name":"Department of Computer Science, University of Maryland, College Park, Maryland, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2756-4290","authenticated-orcid":false,"given":"Siddharth","family":"Singh","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Maryland, College Park, Maryland, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8910-4951","authenticated-orcid":false,"given":"Cunyang","family":"Wei","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Maryland, College Park, Maryland, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3069-3701","authenticated-orcid":false,"given":"Abhinav","family":"Bhatele","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Maryland, College Park, Maryland, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_3_2_2","unstructured":"2021. OSLO: Open Source for Large-scale Optimization. https:\/\/github.com\/EleutherAI\/oslo."},{"key":"e_1_3_3_3_3_2","doi-asserted-by":"publisher","unstructured":"Seher Acer Oguz Selvitopi and Cevdet Aykanat. 2016. Improving performance of sparse matrix dense matrix multiplication on large-scale parallel systems. Parallel Comput. 59 C (Nov. 2016) 71\u201396. 10.1016\/j.parco.2016.10.001","DOI":"10.1016\/j.parco.2016.10.001"},{"key":"e_1_3_3_3_4_2","doi-asserted-by":"publisher","unstructured":"R.\u00a0C. Agarwal S.\u00a0M. Balle F.\u00a0G. Gustavson M. Joshi and P. Palkar. 1995. A three-dimensional approach to parallel matrix multiplication. IBM Journal of Research and Development 39 5 (1995) 575\u2013582. 10.1147\/rd.395.0575","DOI":"10.1147\/rd.395.0575"},{"key":"e_1_3_3_3_5_2","unstructured":"Xin Ai Hao Yuan Zeyu Ling Qiange Wang Yanfeng Zhang Zhenbo Fu Chaoyi Chen Yu Gu and Ge Yu. 2024. NeutronTP: Load-Balanced Distributed Full-Graph GNN Training with Tensor Parallelism. arxiv:https:\/\/arXiv.org\/abs\/2412.20379\u00a0[cs.DC] https:\/\/arxiv.org\/abs\/2412.20379"},{"key":"e_1_3_3_3_6_2","doi-asserted-by":"publisher","unstructured":"Ariful Azad Georgios\u00a0A Pavlopoulos Christos\u00a0A Ouzounis Nikos\u00a0C Kyrpides and Aydin Bulu\u00e7. 2018. HipMCL: a high-performance parallel implementation of the Markov clustering algorithm for large-scale networks. Nucleic Acids Research 46 6 (01 2018) e33\u2013e33. arXiv:https:\/\/academic.oup.com\/nar\/article-pdf\/46\/6\/e33\/24525991\/gkx1313.pdf10.1093\/nar\/gkx1313","DOI":"10.1093\/nar\/gkx1313"},{"key":"e_1_3_3_3_7_2","unstructured":"Muhammed\u00a0Fatih Bal\u0131n Kaan Sancak and \u00dcmit V.\u00a0\u00c7ataly\u00fcrek. 2021. MG-GCN: Scalable Multi-GPU GCN Training Framework. arxiv:https:\/\/arXiv.org\/abs\/2110.08688\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2110.08688"},{"key":"e_1_3_3_3_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3447786.3456233"},{"key":"e_1_3_3_3_9_2","unstructured":"Jie Chen Tengfei Ma and Cao Xiao. 2018. FastGCN: Fast Learning with Graph Convolutional Networks via Importance Sampling. arxiv:https:\/\/arXiv.org\/abs\/1801.10247\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/1801.10247"},{"key":"e_1_3_3_3_10_2","unstructured":"Jianfei Chen Jun Zhu and Le Song. 2018. Stochastic Training of Graph Convolutional Networks with Variance Reduction. arxiv:https:\/\/arXiv.org\/abs\/1710.10568\u00a0[stat.ML] https:\/\/arxiv.org\/abs\/1710.10568"},{"key":"e_1_3_3_3_11_2","unstructured":"Shenggan Cheng Ziming Liu Jiangsu Du and Yang You. 2023. ATP: Adaptive Tensor Parallelism for Foundation Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2301.08658 (2023)."},{"key":"e_1_3_3_3_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330925"},{"key":"e_1_3_3_3_13_2","unstructured":"Siddhartha\u00a0Shankar Das S\u00a0M Ferdous Mahantesh\u00a0M Halappanavar Edoardo Serra and Alex Pothen. 2024. AGS-GNN: Attribute-guided Sampling for Graph Neural Networks. arxiv:https:\/\/arXiv.org\/abs\/2405.15218\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2405.15218"},{"key":"e_1_3_3_3_14_2","unstructured":"Matthias Fey and Jan\u00a0Eric Lenssen. 2019. Fast Graph Representation Learning with PyTorch Geometric. arxiv:https:\/\/arXiv.org\/abs\/1903.02428\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/1903.02428"},{"key":"e_1_3_3_3_15_2","unstructured":"Geofabrik GmbH. 2010. DIMACS10\/europe_osm. SuiteSparse Matrix Collection. https:\/\/sparse.tamu.edu\/DIMACS10\/europe_osm"},{"key":"e_1_3_3_3_16_2","unstructured":"William\u00a0L. Hamilton Rex Ying and Jure Leskovec. 2018. Inductive Representation Learning on Large Graphs. arxiv:https:\/\/arXiv.org\/abs\/1706.02216\u00a0[cs.SI] https:\/\/arxiv.org\/abs\/1706.02216"},{"key":"e_1_3_3_3_17_2","unstructured":"Weihua Hu Matthias Fey Marinka Zitnik Yuxiao Dong Hongyu Ren Bowen Liu Michele Catasta and Jure Leskovec. 2021. Open Graph Benchmark: Datasets for Machine Learning on Graphs. arxiv:https:\/\/arXiv.org\/abs\/2005.00687\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2005.00687"},{"key":"e_1_3_3_3_18_2","first-page":"187","volume-title":"Proceedings of Machine Learning and Systems","volume":"2","author":"Jia Zhihao","year":"2020","unstructured":"Zhihao Jia, Sina Lin, Mingyu Gao, Matei Zaharia, and Alex Aiken. 2020. Improving the Accuracy, Scalability, and Performance of Graph Neural Networks with Roc. In Proceedings of Machine Learning and Systems , I.\u00a0Dhillon, D.\u00a0Papailiopoulos, and V.\u00a0Sze (Eds.), Vol.\u00a02. 187\u2013198. https:\/\/proceedings.mlsys.org\/paper_files\/paper\/2020\/file\/91fc23ceccb664ebb0cf4257e1ba9c51-Paper.pdf"},{"key":"e_1_3_3_3_19_2","doi-asserted-by":"publisher","unstructured":"George Karypis and Vipin Kumar. 1999. Kumar V.: A Fast and High Quality Multilevel Scheme for Partitioning Irregular Graphs. SIAM Journal on Scientific Computing 20(1) 359-392. Siam Journal on Scientific Computing 20 (01 1999). 10.1137\/S1064827595287997","DOI":"10.1137\/S1064827595287997"},{"key":"e_1_3_3_3_20_2","unstructured":"Thomas\u00a0N. Kipf and Max Welling. 2016. Semi-Supervised Classification with Graph Convolutional Networks. CoRR abs\/1609.02907 (2016). arXiv:https:\/\/arXiv.org\/abs\/1609.02907http:\/\/arxiv.org\/abs\/1609.02907"},{"key":"e_1_3_3_3_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS54959.2023.00058"},{"key":"e_1_3_3_3_22_2","first-page":"396","volume-title":"Advances in Neural Information Processing Systems","author":"LeCun Yann","year":"1990","unstructured":"Yann LeCun, Bernhard Boser, John Denker, Donnie Henderson, R. Howard, Wayne Hubbard, and Lawrence Jackel. 1990. Handwritten Digit Recognition with a Back-Propagation Network. In Advances in Neural Information Processing Systems , D.\u00a0Touretzky (Ed.), Vol.\u00a02. Morgan-Kaufmann, 396\u2013404. https:\/\/proceedings.neurips.cc\/paper\/1989\/file\/53c3bce66e43be4f209556518c2fcb54-Paper.pdf"},{"key":"e_1_3_3_3_23_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11604"},{"key":"e_1_3_3_3_24_2","unstructured":"Shengwei Li Zhiquan Lai Yanqi Hao Weijie Liu Keshi Ge Xiaoge Deng Dongsheng Li and Kai Lu. 2023. Automated Tensor Model Parallelism with Overlapped Communication for Efficient Foundation Model Training. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.16121 (2023)."},{"key":"e_1_3_3_3_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/3605573.3605613"},{"key":"e_1_3_3_3_26_2","unstructured":"Xin Liu Mingyu Yan Lei Deng Guoqi Li Xiaochun Ye and Dongrui Fan. 2021. Sampling methods for efficient training of graph convolutional networks: A survey. arxiv:https:\/\/arXiv.org\/abs\/2103.05872\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2103.05872"},{"key":"e_1_3_3_3_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3673038.3673152"},{"key":"e_1_3_3_3_28_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1018"},{"key":"e_1_3_3_3_29_2","unstructured":"NVIDIA. [n. d.]. NVIDIA Nsight Compute. https:\/\/developer.nvidia.com\/nsight-compute."},{"key":"e_1_3_3_3_30_2","unstructured":"F. Pedregosa G. Varoquaux A. Gramfort V. Michel B. Thirion O. Grisel M. Blondel P. Prettenhofer R. Weiss V. Dubourg J. Vanderplas A. Passos D. Cournapeau M. Brucher M. Perrot and E. Duchesnay. 2011. Scikit-learn: Machine Learning in Python. Journal of Machine Learning Research 12 (2011) 2825\u20132830."},{"key":"e_1_3_3_3_31_2","doi-asserted-by":"publisher","unstructured":"Jingshu Peng Zhao Chen Yingxia Shao Yanyan Shen Lei Chen and Jiannong Cao. 2022. Sancus: staleness-aware communication-avoiding full-graph decentralized training in large-scale graph neural networks. Proc. VLDB Endow. 15 9 (May 2022) 1937\u20131950. 10.14778\/3538598.3538614","DOI":"10.14778\/3538598.3538614"},{"key":"e_1_3_3_3_32_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-24685-5_1"},{"key":"e_1_3_3_3_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3447818.3461472"},{"key":"e_1_3_3_3_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS.2017.00053"},{"key":"e_1_3_3_3_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS53621.2022.00065"},{"key":"e_1_3_3_3_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00010"},{"key":"e_1_3_3_3_37_2","unstructured":"Jaeyong Song Hongsun Jang Jaewon Jung Youngsok Kim and Jinho Lee. 2024. GraNNDis: Efficient Unified Distributed Training Framework for Deep GNNs on Large Clusters. arxiv:https:\/\/arXiv.org\/abs\/2311.06837\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2311.06837"},{"key":"e_1_3_3_3_38_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-39924-7_38"},{"key":"e_1_3_3_3_39_2","series-title":"(SC \u201920)","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","author":"Tripathy Alok","year":"2020","unstructured":"Alok Tripathy, Katherine Yelick, and Ayd\u0131n Bulu\u00e7. 2020. Reducing communication in graph neural network training. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (Atlanta, Georgia) (SC \u201920). IEEE Press, Article 70, 17\u00a0pages."},{"key":"e_1_3_3_3_40_2","unstructured":"Petar Veli\u010dkovi\u0107 Guillem Cucurull Arantxa Casanova Adriana Romero Pietro Li\u00f2 and Yoshua Bengio. 2018. Graph Attention Networks. arxiv:https:\/\/arXiv.org\/abs\/1710.10903\u00a0[stat.ML] https:\/\/arxiv.org\/abs\/1710.10903"},{"key":"e_1_3_3_3_41_2","unstructured":"Borui Wan Juntao Zhao and Chuan Wu. 2023. Adaptive Message Quantization and Parallelization for Distributed Full-graph GNN Training. arxiv:https:\/\/arXiv.org\/abs\/2306.01381\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2306.01381"},{"key":"e_1_3_3_3_42_2","unstructured":"Cheng Wan Youjie Li Ang Li Nam\u00a0Sung Kim and Yingyan Lin. 2022. BNS-GCN: Efficient Full-Graph Training of Graph Convolutional Networks with Partition-Parallelism and Random Boundary Node Sampling. arxiv:https:\/\/arXiv.org\/abs\/2203.10983\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2203.10983"},{"key":"e_1_3_3_3_43_2","unstructured":"Cheng Wan Youjie Li Cameron\u00a0R. Wolfe Anastasios Kyrillidis Nam\u00a0Sung Kim and Yingyan Lin. 2022. PipeGCN: Efficient Full-Graph Training of Graph Convolutional Networks with Pipelined Feature Communication. arxiv:https:\/\/arXiv.org\/abs\/2203.10428\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2203.10428"},{"key":"e_1_3_3_3_44_2","unstructured":"Minjie Wang Da Zheng Zihao Ye Quan Gan Mufei Li Xiang Song Jinjing Zhou Chao Ma Lingfan Yu Yu Gai Tianjun Xiao Tong He George Karypis Jinyang Li and Zheng Zhang. 2020. Deep Graph Library: A Graph-Centric Highly-Performant Package for Graph Neural Networks. arxiv:https:\/\/arXiv.org\/abs\/1909.01315\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/1909.01315"},{"key":"e_1_3_3_3_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3526134"},{"key":"e_1_3_3_3_46_2","first-page":"779","volume-title":"17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23)","author":"Wang Yuke","year":"2023","unstructured":"Yuke Wang, Boyuan Feng, Zheng Wang, Tong Geng, Kevin Barker, Ang Li, and Yufei Ding. 2023. MGG: Accelerating Graph Neural Networks with Fine-Grained Intra-Kernel Communication-Computation Pipelining on Multi-GPU Platforms. In 17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23). USENIX Association, Boston, MA, 779\u2013795. https:\/\/www.usenix.org\/conference\/osdi23\/presentation\/wang-yuke"},{"key":"e_1_3_3_3_47_2","unstructured":"Keyulu Xu Weihua Hu Jure Leskovec and Stefanie Jegelka. 2019. How Powerful are Graph Neural Networks? arxiv:https:\/\/arXiv.org\/abs\/1810.00826\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/1810.00826"},{"key":"e_1_3_3_3_48_2","doi-asserted-by":"publisher","unstructured":"Carl Yang Aydin Buluc and John\u00a0D. Owens. 2018. Design Principles for Sparse Matrix Multiplication on the GPU. 10.48550\/ARXIV.1803.08601","DOI":"10.48550\/ARXIV.1803.08601"},{"key":"e_1_3_3_3_49_2","unstructured":"Taraneh Younesian Daniel Daza Emile van Krieken Thiviyan Thanapalasingam and Peter Bloem. 2024. GRAPES: Learning to Sample Graphs for Scalable Graph Neural Networks. arxiv:https:\/\/arXiv.org\/abs\/2310.03399\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2310.03399"},{"key":"e_1_3_3_3_50_2","unstructured":"Hao Yuan Yajiong Liu Yanfeng Zhang Xin Ai Qiange Wang Chaoyi Chen Yu Gu and Ge Yu. 2024. Comprehensive Evaluation of GNN Training Systems: A Data Management Perspective. arxiv:https:\/\/arXiv.org\/abs\/2311.13279\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2311.13279"},{"key":"e_1_3_3_3_51_2","unstructured":"Shuai Zhang Zite Jiang and Haihang You. 2024. CDFGNN: a Systematic Design of Cache-based Distributed Full-Batch Graph Neural Network Training with Communication Reduction. arxiv:https:\/\/arXiv.org\/abs\/2408.00232\u00a0[cs.DC] https:\/\/arxiv.org\/abs\/2408.00232"},{"key":"e_1_3_3_3_52_2","unstructured":"Lianmin Zheng Zhuohan Li Hao Zhang Yonghao Zhuang Zhifeng Chen Yanping Huang Yida Wang Yuanzhong Xu Danyang Zhuo Joseph\u00a0E. Gonzalez and Ion Stoica. 2022. Alpa: Automating Inter- and Intra-Operator Parallelism for Distributed Deep Learning. CoRR abs\/2201.12023 (2022). arXiv:https:\/\/arXiv.org\/abs\/2201.12023"},{"key":"e_1_3_3_3_53_2","unstructured":"Difan Zou Ziniu Hu Yewen Wang Song Jiang Yizhou Sun and Quanquan Gu. 2019. Layer-Dependent Importance Sampling for Training Deep and Large Graph Convolutional Networks. arxiv:https:\/\/arXiv.org\/abs\/1911.07323\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/1911.07323"}],"event":{"name":"SC '25: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St. Louis MO USA","acronym":"SC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759890","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T18:41:44Z","timestamp":1773254504000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3712285.3759890"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":52,"alternative-id":["10.1145\/3712285.3759890","10.1145\/3712285"],"URL":"https:\/\/doi.org\/10.1145\/3712285.3759890","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}