{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T05:02:50Z","timestamp":1764997370703,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,6,21]],"date-time":"2021-06-21T00:00:00Z","timestamp":1624233600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1533768"],"award-info":[{"award-number":["1533768"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"U.S. Department of Energy, Office of Science, Office of Advanced Scientific Computing Research","award":["DE-AC05-00OR22725"],"award-info":[{"award-number":["DE-AC05-00OR22725"]}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["1710371"],"award-info":[{"award-number":["1710371"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,6,21]]},"DOI":"10.1145\/3431379.3460651","type":"proceedings-article","created":{"date-parts":[[2021,6,17]],"date-time":"2021-06-17T04:09:26Z","timestamp":1623902966000},"page":"121-131","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Scalable All-pairs Shortest Paths for Huge Graphs on Multi-GPU Clusters"],"prefix":"10.1145","author":[{"given":"piyush","family":"sao","sequence":"first","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, TN, USA"}]},{"given":"Hao","family":"lu","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, TN, USA"}]},{"given":"Ramakrishnan","family":"Kannan","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, TN, USA"}]},{"given":"Vijay","family":"Thakkar","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"Richard","family":"Vuduc","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"Thomas","family":"Potok","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, TN, USA"}]}],"member":"320","published-online":{"date-parts":[[2021,6,21]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Nvidia\/cutlass: Cuda templates for linear algebra subroutines. https:\/\/github.com\/NVIDIA\/cutlass. (Accessed on 01\/24\/2021).  Nvidia\/cutlass: Cuda templates for linear algebra subroutines. https:\/\/github.com\/NVIDIA\/cutlass. (Accessed on 01\/24\/2021)."},{"key":"e_1_3_2_1_2_1","unstructured":"rapidsai\/cugraph: cugraph - rapids graph analytics library. https:\/\/github.com\/rapidsai\/cugraph. (Accessed on 01\/24\/2021).  rapidsai\/cugraph: cugraph - rapids graph analytics library. https:\/\/github.com\/rapidsai\/cugraph. (Accessed on 01\/24\/2021)."},{"issue":"3","key":"e_1_3_2_1_3_1","first-page":"153","article-title":"Optimized HPL for AMD GPU and multi-core CPU usage","volume":"26","author":"Bach Matthias","year":"2011","unstructured":"Matthias Bach , Matthias Kretz , Volker Lindenstruth , and David Rohr . Optimized HPL for AMD GPU and multi-core CPU usage . Computer Science-R&D , 26 ( 3-4 ): 153 - 164 , 2011 . Matthias Bach, Matthias Kretz, Volker Lindenstruth, and David Rohr. Optimized HPL for AMD GPU and multi-core CPU usage. Computer Science-R&D, 26(3-4):153-164, 2011.","journal-title":"Computer Science-R&D"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1093\/imamat\/15.2.161"},{"key":"e_1_3_2_1_5_1","volume-title":"On a routing problem. Quarterly of applied mathematics, 16(1):87--90","author":"Bellman Richard","year":"1958","unstructured":"Richard Bellman . On a routing problem. Quarterly of applied mathematics, 16(1):87--90 , 1958 . Richard Bellman. On a routing problem. Quarterly of applied mathematics, 16(1):87--90, 1958."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342011403516"},{"key":"e_1_3_2_1_7_1","first-page":"1","volume-title":"International Workshop on Applied Parallel Computing","author":"Buttari Alfredo","year":"2006","unstructured":"Alfredo Buttari , Jack Dongarra , Jakub Kurzak , Julien Langou , Piotr Luszczek , and Stanimire Tomov . The impact of multicore on math software . In International Workshop on Applied Parallel Computing , pages 1 -- 10 . Springer , 2006 . Alfredo Buttari, Jack Dongarra, Jakub Kurzak, Julien Langou, Piotr Luszczek, and Stanimire Tomov. The impact of multicore on math software. In International Workshop on Applied Parallel Computing, pages 1--10. Springer, 2006."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1093\/imamat\/7.3.273"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1109557.1109614"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2012.02.007"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01386390"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2015.06.008"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1513895.1513901"},{"key":"e_1_3_2_1_14_1","first-page":"45","volume-title":"Graph Algorithms in the Language of Linear Algebra","author":"Jeremy","year":"2011","unstructured":"Jeremy T. Fineman and Eric Robinson. Fundamental graph algorithms . In Jeremy Kepner and John Gilbert, editors, Graph Algorithms in the Language of Linear Algebra , chapter 5, pages 45 -- 58 . Society of Industrial and Applied Mathematics , Philadelphia, PA, USA , 2011 . Jeremy T. Fineman and Eric Robinson. Fundamental graph algorithms. In Jeremy Kepner and John Gilbert, editors, Graph Algorithms in the Language of Linear Algebra, chapter 5, pages 45--58. Society of Industrial and Applied Mathematics, Philadelphia, PA, USA, 2011."},{"key":"e_1_3_2_1_15_1","volume-title":"Rand Corp Santa Monica Ca","author":"Ford Lester R","year":"1956","unstructured":"Lester R Ford Jr . Network flow theory. Technical report , Rand Corp Santa Monica Ca , 1956 . Lester R Ford Jr. Network flow theory. Technical report, Rand Corp Santa Monica Ca, 1956."},{"key":"e_1_3_2_1_16_1","volume-title":"Fibonacci heaps and their uses in improved network optimization algorithms. Journal of the ACM (JACM), 34(3):596--615","author":"Fredman Michael L","year":"1987","unstructured":"Michael L Fredman and Robert Endre Tarjan . Fibonacci heaps and their uses in improved network optimization algorithms. Journal of the ACM (JACM), 34(3):596--615 , 1987 . Michael L Fredman and Robert Endre Tarjan. Fibonacci heaps and their uses in improved network optimization algorithms. Journal of the ACM (JACM), 34(3):596--615, 1987."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.14778\/3384345.3384358"},{"key":"e_1_3_2_1_18_1","volume-title":"dioids and semirings: new models and algorithms","author":"Gondran Michel","year":"2008","unstructured":"Michel Gondran and Michel Minoux . Graphs , dioids and semirings: new models and algorithms , volume 41 . Springer Science & Business Media , 2008 . Michel Gondran and Michel Minoux. Graphs, dioids and semirings: new models and algorithms, volume 41. Springer Science & Business Media, 2008."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2013.113"},{"key":"e_1_3_2_1_20_1","first-page":"713","volume-title":"Proc Int Conf Parallel Process 1987","author":"Jenq Jing Fu","year":"1987","unstructured":"Jing Fu Jenq and Sartaj Sahni . All pairs shortest paths on a hypercube multiprocessor . In Proc Int Conf Parallel Process 1987 , pages 713 -- 716 . Pennsylvania State Univ Press , 1987 . Jing Fu Jenq and Sartaj Sahni. All pairs shortest paths on a hypercube multiprocessor. In Proc Int Conf Parallel Process 1987, pages 713--716. Pennsylvania State Univ Press, 1987."},{"key":"e_1_3_2_1_21_1","volume-title":"Efficient algorithms for shortest paths in sparse networks. Journal of the ACM (JACM), 24(1):1--13","author":"Johnson Donald B","year":"1977","unstructured":"Donald B Johnson . Efficient algorithms for shortest paths in sparse networks. Journal of the ACM (JACM), 24(1):1--13 , 1977 . Donald B Johnson. Efficient algorithms for shortest paths in sparse networks. Journal of the ACM (JACM), 24(1):1--13, 1977."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00010"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2016.7761646"},{"key":"e_1_3_2_1_24_1","volume-title":"November","author":"Kerr Andrew","year":"2019","unstructured":"Andrew Kerr . Cutlass: Cuda templates for linear algebra subroutines , November 2019 . Andrew Kerr. Cutlass: Cuda templates for linear algebra subroutines, November 2019."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/0743-7315(91)90083-L"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-36433-4_4"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2019.00053"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0196-6774(03)00076-2"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2567634.2567648"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2909428.2909435"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3332466.3374533"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2019.03.004"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2015.104"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-09873-9_41"},{"key":"e_1_3_2_1_35_1","volume-title":"On the all-pairs-shortest-path problem in unweighted undirected graphs. Journal of computer and system sciences, 51(3):400--403","author":"Seidel Raimund","year":"1995","unstructured":"Raimund Seidel . On the all-pairs-shortest-path problem in unweighted undirected graphs. Journal of computer and system sciences, 51(3):400--403 , 1995 . Raimund Seidel. On the all-pairs-shortest-path problem in unweighted undirected graphs. Journal of computer and system sciences, 51(3):400--403, 1995."},{"key":"e_1_3_2_1_36_1","volume-title":"The boost graph library: user guide and reference manual","author":"Siek Jeremy","year":"2002","unstructured":"Jeremy Siek , Andrew Lumsdaine , and Lie-Quan Lee . The boost graph library: user guide and reference manual . Addison-Wesley , 2002 . Jeremy Siek, Andrew Lumsdaine, and Lie-Quan Lee. The boost graph library: user guide and reference manual. Addison-Wesley, 2002."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.21236\/ADA580350"},{"key":"e_1_3_2_1_38_1","volume-title":"A comparison of lookahead and algorithmic blocking techniques for parallel matrix factorization","author":"Peter Strazdins","year":"1998","unstructured":"Peter Strazdins et al. A comparison of lookahead and algorithmic blocking techniques for parallel matrix factorization . 1998 . Peter Strazdins et al. A comparison of lookahead and algorithmic blocking techniques for parallel matrix factorization. 1998."},{"key":"e_1_3_2_1_39_1","volume-title":"Md Mostofa Ali Patwary, Subramanya R Dulloor, Satya Gautam Vadlamudi, Dipankar Das, and Pradeep Dubey. Graphmat: High performance graph analytics made productive. arXiv preprint arXiv:1503.07241","author":"Sundaram Narayanan","year":"2015","unstructured":"Narayanan Sundaram , Nadathur Rajagopalan Satish , Md Mostofa Ali Patwary, Subramanya R Dulloor, Satya Gautam Vadlamudi, Dipankar Das, and Pradeep Dubey. Graphmat: High performance graph analytics made productive. arXiv preprint arXiv:1503.07241 , 2015 . Narayanan Sundaram, Nadathur Rajagopalan Satish, Md Mostofa Ali Patwary, Subramanya R Dulloor, Satya Gautam Vadlamudi, Dipankar Das, and Pradeep Dubey. Graphmat: High performance graph analytics made productive. arXiv preprint arXiv:1503.07241, 2015."},{"key":"e_1_3_2_1_40_1","series-title":"SIAM Computational Sciences and Engineering","volume-title":"Dense semiring linear algebra on modern cuda hardware","author":"Thakkar V.","year":"2021","unstructured":"V. Thakkar , R. Kannan , P. Sao , H. Lu , D. Herrmannova , R. Patton , R. Vuduc , and T. Potok . Dense semiring linear algebra on modern cuda hardware . In SIAM Computational Sciences and Engineering . SIAM , 2021 . V. Thakkar, R. Kannan, P. Sao, H. Lu, D. Herrmannova, R. Patton, R. Vuduc, and T. Potok. Dense semiring linear algebra on modern cuda hardware. In SIAM Computational Sciences and Engineering. SIAM, 2021."},{"key":"e_1_3_2_1_41_1","volume-title":"Undirected single-source shortest paths with positive integer weights in linear time. Journal of the ACM (JACM), 46(3):362--394","author":"Thorup Mikkel","year":"1999","unstructured":"Mikkel Thorup . Undirected single-source shortest paths with positive integer weights in linear time. Journal of the ACM (JACM), 46(3):362--394 , 1999 . Mikkel Thorup. Undirected single-source shortest paths with positive integer weights in linear time. Journal of the ACM (JACM), 46(3):362--394, 1999."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-48224-5_15"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2009.12.005"},{"key":"e_1_3_2_1_44_1","volume-title":"et al. Gunrock: Gpu graph analytics. ACM Transactions on Parallel Computing (TOPC), 4(1):1--49","author":"Wang Yangzihao","year":"2017","unstructured":"Yangzihao Wang , Yuechao Pan , Andrew Davidson , Yuduo Wu , Carl Yang , Leyuan Wang , Muhammad Osama , Chenshan Yuan , Weitang Liu , Andy T Riffel , et al. Gunrock: Gpu graph analytics. ACM Transactions on Parallel Computing (TOPC), 4(1):1--49 , 2017 . Yangzihao Wang, Yuechao Pan, Andrew Davidson, Yuduo Wu, Carl Yang, Leyuan Wang, Muhammad Osama, Chenshan Yuan, Weitang Liu, Andy T Riffel, et al. Gunrock: Gpu graph analytics. ACM Transactions on Parallel Computing (TOPC), 4(1):1--49, 2017."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1137\/15M1024524"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/FOCS.2010.67"}],"event":{"name":"HPDC '21: The 30th International Symposium on High-Performance Parallel and Distributed Computing","sponsor":["University of Arizona University of Arizona","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","SIGARCH ACM Special Interest Group on Computer Architecture"],"location":"Virtual Event Sweden","acronym":"HPDC '21"},"container-title":["Proceedings of the 30th International Symposium on High-Performance Parallel and Distributed Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3431379.3460651","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3431379.3460651","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3431379.3460651","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3431379.3460651","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:24:46Z","timestamp":1750195486000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3431379.3460651"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,21]]},"references-count":46,"alternative-id":["10.1145\/3431379.3460651","10.1145\/3431379"],"URL":"https:\/\/doi.org\/10.1145\/3431379.3460651","relation":{},"subject":[],"published":{"date-parts":[[2021,6,21]]},"assertion":[{"value":"2021-06-21","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}