{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T19:23:52Z","timestamp":1774121032084,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T00:00:00Z","timestamp":1714176000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000028","name":"Semiconductor Research Corporation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000028","id-type":"DOI","asserted-by":"publisher"}]},{"name":"IBM-Illinois Discovery Accelerator Institute"},{"DOI":"10.13039\/100000015","name":"DOE U.S. Department of Energy","doi-asserted-by":"publisher","award":["DE-SC0022098"],"award-info":[{"award-number":["DE-SC0022098"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["PPoSS CCF 2316233"],"award-info":[{"award-number":["PPoSS CCF 2316233"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS 1956007"],"award-info":[{"award-number":["CNS 1956007"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CCF 2107470"],"award-info":[{"award-number":["CCF 2107470"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["DGE 21-46756"],"award-info":[{"award-number":["DGE 21-46756"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2138259"],"award-info":[{"award-number":["2138259"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2138286"],"award-info":[{"award-number":["2138286"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2138307"],"award-info":[{"award-number":["2138307"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2137603"],"award-info":[{"award-number":["2137603"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2138296"],"award-info":[{"award-number":["2138296"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,27]]},"DOI":"10.1145\/3620665.3640427","type":"proceedings-article","created":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T14:18:06Z","timestamp":1713795486000},"page":"1200-1217","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Two-Face: Combining Collective and One-Sided Communication for Efficient Distributed SpMM"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-7770-003X","authenticated-orcid":false,"given":"Charles","family":"Block","sequence":"first","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Urbana, Illinois, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7946-2683","authenticated-orcid":false,"given":"Gerasimos","family":"Gerogiannis","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Urbana, Illinois, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8140-2321","authenticated-orcid":false,"given":"Charith","family":"Mendis","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Urbana, Illinois, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1332-8630","authenticated-orcid":false,"given":"Ariful","family":"Azad","sequence":"additional","affiliation":[{"name":"Indiana University, Bloomington, Indiana, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2595-5228","authenticated-orcid":false,"given":"Josep","family":"Torrellas","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Urbana, Illinois, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2024,4,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2023.3295848"},{"key":"e_1_3_2_1_2_1","unstructured":"Bruno Abreu Galen Arnold Gregory Bauer Brett Bode Craig Steffan et al. 2024. Delta User Documentation. National Center for supercomputing Applications. Retrieved Jan 2024 from https:\/\/docs.ncsa.illinois.edu\/systems\/delta\/en\/latest\/"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2016.10.001"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS57527.2023.00025"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750386"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3094091"},{"key":"e_1_3_2_1_7_1","volume-title":"Lois Curfman McInnes","author":"Balay Satish","year":"2023","unstructured":"Satish Balay, Shrirang Abhyankar, Mark F. Adams, Steven Benson, Jed Brown, Peter Brune, Kris Buschelman, Emil M. Constantinescu, Lisandro Dalcin, Alp Dener, Victor Eijkhout, Jacob Faibussowitsch, William D. Gropp, V\u00e1clav Hapla, Tobin Isaac, Pierre Jolivet, Dmitry Karpeev, Dinesh Kaushik, Matthew G. Knepley, Fande Kong, Scott Kruger, Dave A. May, Lois Curfman McInnes, Richard Tran Mills, Lawrence Mitchell, Todd Munson, Jose E. Roman, Karl Rupp, Patrick Sanan, Jason Sarich, Barry F. Smith, Stefano Zampini, Hong Zhang, Hong Zhang, and Junchao Zhang. 2023. PETSc Web page. https:\/\/petsc.org\/. https:\/\/petsc.org\/"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS53621.2022.00014"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Ronald Boisvert Roldan Pozo and K Remington. 1996. The Matrix Market Exchange Formats: Initial Design.","DOI":"10.6028\/NIST.IR.5935"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447786.3456233"},{"key":"e_1_3_2_1_11_1","volume-title":"NIPS Workshop.","author":"Canny John","year":"2013","unstructured":"John Canny and Huasha Zhao. 2013. Bidmach: Large-scale learning with zero memory allocation. In BigLearning, NIPS Workshop."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1122971.1122975"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3605573.3605611"},{"key":"e_1_3_2_1_14_1","volume-title":"Intel\u00ae oneAPI Math Kernel Library","author":"Intel Corporation","year":"2023","unstructured":"Intel Corporation. 2023. Intel\u00ae oneAPI Math Kernel Library. Intel Corporation. Retrieved 2023 from https:\/\/intel.com\/content\/www\/us\/en\/developer\/tools\/oneapi\/onemkl.html"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2049662.2049663"},{"key":"e_1_3_2_1_16_1","volume-title":"HPE Slingshot interconnect. Hewlett Packard Enterprise. Retrieved","author":"Enterprise Hewlett Packard","year":"2024","unstructured":"Hewlett Packard Enterprise. 2024. HPE Slingshot interconnect. Hewlett Packard Enterprise. Retrieved Jan 2024 from www.hpe.com\/us\/en\/compute\/hpc\/slingshot-interconnect.html"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS54959.2023.00057"},{"key":"e_1_3_2_1_18_1","volume-title":"Fast Graph Representation Learning with PyTorch Geometric. In ICLR Workshop on Representation Learning on Graphs and Manifolds.","author":"Fey Matthias","unstructured":"Matthias Fey and Jan E. Lenssen. 2019. Fast Graph Representation Learning with PyTorch Geometric. In ICLR Workshop on Representation Learning on Graphs and Manifolds."},{"key":"e_1_3_2_1_19_1","volume-title":"Woodall","author":"Gabriel Edgar","year":"2004","unstructured":"Edgar Gabriel, Graham E. Fagg, George Bosilca, Thara Angskun, Jack J. Dongarra, Jeffrey M. Squyres, Vishal Sahay, Prabhanjan Kambadur, Brian Barrett, Andrew Lumsdaine, Ralph H. Castain, David J. Daniel, Richard L. Graham, and Timothy S. Woodall. 2004. Open MPI: Goals, Concept, and Design of a Next Generation MPI Implementation. In Proceedings, 11th European PVM\/MPI Users' Group Meeting. 97--104."},{"key":"e_1_3_2_1_20_1","volume-title":"HotTiles: Accelerating SpMM with Heterogeneous Accelerator Architectures. In 2024 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE.","author":"Gerogiannis Gerasimos","year":"2024","unstructured":"Gerasimos Gerogiannis, Sriram Aananthakrishnan, Josep Torrellas, and Ibrahim Hur. 2024. HotTiles: Accelerating SpMM with Heterogeneous Accelerator Architectures. In 2024 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589054"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3508041"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527403"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3350755.3400216"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.5555\/3294771.3294869"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358275"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3293883.3295712"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582051"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00075"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","unstructured":"Guyue Huang Guohao Dai Yu Wang and Huazhong Yang. 2020. GE-SpMM: General-Purpose Sparse Matrix-Matrix Multiplication on GPUs for Graph Neural Networks. In SC20: International Conference for High Performance Computing Networking Storage and Analysis. 1--12. 10.1109\/SC41405.2020.00076","DOI":"10.1109\/SC41405.2020.00076"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00018"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.14778\/3157794.3157799"},{"key":"e_1_3_2_1_33_1","first-page":"187","article-title":"Improving the accuracy, scalability, and performance of graph neural networks with Roc","volume":"2","author":"Jia Zhihao","year":"2020","unstructured":"Zhihao Jia, Sina Lin, Mingyu Gao, Matei Zaharia, and Alex Aiken. 2020. Improving the accuracy, scalability, and performance of graph neural networks with Roc. Proceedings of Machine Learning and Systems 2 (2020), 187--198.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358286"},{"key":"e_1_3_2_1_35_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772751"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00042"},{"key":"e_1_3_2_1_38_1","volume-title":"HASpMV: Heterogeneity-Aware Sparse Matrix-Vector Multiplication on Modern Asymmetric Multicore Processors. In 2023 IEEE International Conference on Cluster Computing (CLUSTER)","author":"Li Wenxuan","unstructured":"Wenxuan Li, Helin Cheng, Zhengyang Lu, Yuechen Lu, and Weifeng Liu. 2023. HASpMV: Heterogeneity-Aware Sparse Matrix-Vector Multiplication on Modern Asymmetric Multicore Processors. In 2023 IEEE International Conference on Cluster Computing (CLUSTER). IEEE Computer Society, 209--220."},{"key":"e_1_3_2_1_39_1","volume-title":"MPI: A Message-Passing Interface Standard Version 4.0. https:\/\/www.mpi-forum.org\/docs\/mpi-4.0\/mpi40-report.pdf https:\/\/www.mpi-forum.org\/docs\/mpi-4.0\/mpi40-report.pdf.","author":"Interface Forum Message Passing","year":"2021","unstructured":"Message Passing Interface Forum. 2021. MPI: A Message-Passing Interface Standard Version 4.0. https:\/\/www.mpi-forum.org\/docs\/mpi-4.0\/mpi40-report.pdf https:\/\/www.mpi-forum.org\/docs\/mpi-4.0\/mpi40-report.pdf."},{"key":"e_1_3_2_1_40_1","volume-title":"Introduction to linear regression analysis","author":"Montgomery Douglas C","unstructured":"Douglas C Montgomery, Elizabeth A Peck, and G Geoffrey Vining. 2021. Introduction to linear regression analysis. John Wiley & Sons."},{"key":"e_1_3_2_1_41_1","volume-title":"cuSPARSE. Retrieved","author":"NVIDIA.","year":"2024","unstructured":"NVIDIA. 2024. cuSPARSE. Retrieved Jan 2024 from https:\/\/developer.nvidia.com\/cusparse"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582064"},{"key":"e_1_3_2_1_43_1","unstructured":"OpenMP Architecture Review Board. 2015. OpenMP Application Program Interface Version 4.5. https:\/\/openmp.org\/wp-content\/uploads\/openmp-4.5.pdf."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527400"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071089"},{"key":"e_1_3_2_1_46_1","volume-title":"Eigen v3.4. Retrieved","author":"Project Eigen","year":"2024","unstructured":"Eigen Project. 2023. Eigen v3.4. Retrieved Jan 2024 from https:\/\/eigen.tuxfamily.org"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527382"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447818.3461472"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOTI.2015.13"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00062"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342005051521"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS53621.2022.00011"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00074"},{"key":"e_1_3_2_1_54_1","volume-title":"Graph attention networks. arXiv preprint arXiv:1710.10903","author":"Veli\u010dkovi\u0107 Petar","year":"2017","unstructured":"Petar Veli\u010dkovi\u0107, Guillem Cucurull, Arantxa Casanova, Adriana Romero, Pietro Lio, and Yoshua Bengio. 2017. Graph attention networks. arXiv preprint arXiv:1710.10903 (2017)."},{"key":"e_1_3_2_1_55_1","volume-title":"Highly-Performant Package for Graph Neural Networks. arXiv preprint arXiv:1909.01315","author":"Wang Minjie","year":"2019","unstructured":"Minjie Wang, Da Zheng, Zihao Ye, Quan Gan, Mufei Li, Xiang Song, Jinjing Zhou, Chao Ma, Lingfan Yu, Yu Gai, Tianjun Xiao, Tong He, George Karypis, Jinyang Li, and Zheng Zhang. 2019. Deep Graph Library: A Graph-Centric, Highly-Performant Package for Graph Neural Networks. arXiv preprint arXiv:1909.01315 (2019)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575742"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3572848.3577506"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3524059.3532369"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00053"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358256"}],"event":{"name":"ASPLOS '24: 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","location":"La Jolla CA USA","acronym":"ASPLOS '24","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620665.3640427","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3620665.3640427","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3620665.3640427","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3620665.3640427","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:42Z","timestamp":1750291422000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620665.3640427"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,27]]},"references-count":60,"alternative-id":["10.1145\/3620665.3640427","10.1145\/3620665"],"URL":"https:\/\/doi.org\/10.1145\/3620665.3640427","relation":{},"subject":[],"published":{"date-parts":[[2024,4,27]]},"assertion":[{"value":"2024-04-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}