{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T14:43:05Z","timestamp":1775054585808,"version":"3.50.1"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,6,15]],"date-time":"2024-06-15T00:00:00Z","timestamp":1718409600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,6,15]],"date-time":"2024-06-15T00:00:00Z","timestamp":1718409600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100000161","name":"National Institute of Standards and Technology","doi-asserted-by":"publisher","award":["70NANB20H018"],"award-info":[{"award-number":["70NANB20H018"]}],"id":[{"id":"10.13039\/100000161","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"DOI":"10.1007\/s42979-024-02917-y","type":"journal-article","created":{"date-parts":[[2024,6,15]],"date-time":"2024-06-15T08:02:37Z","timestamp":1718438557000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["An Illustration of Extending Hedgehog to Multi-Node GPU Architectures Using GEMM"],"prefix":"10.1007","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2616-7971","authenticated-orcid":false,"given":"Nitish","family":"Shingde","sequence":"first","affiliation":[]},{"given":"Timothy","family":"Blattner","sequence":"additional","affiliation":[]},{"given":"Alexandre","family":"Bardakoff","sequence":"additional","affiliation":[]},{"given":"Walid","family":"Keyrouz","sequence":"additional","affiliation":[]},{"given":"Martin","family":"Berzins","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,6,15]]},"reference":[{"key":"2917_CR1","doi-asserted-by":"publisher","unstructured":"Shingde N, Berzins M, Blattner T, Keyrouz W, Bardakoff A. Extending Hedgehog\u2019s dataflow graphs to multi-node GPU architectures. In Lecture Notes in Computer Science 2023;(pp. 1-12). https:\/\/doi.org\/10.1007\/978-3-031-32316-4_1","DOI":"10.1007\/978-3-031-32316-4_1"},{"key":"2917_CR2","doi-asserted-by":"publisher","unstructured":"Bardakoff A, Bachelet B, Blattner T, Keyrouz W, Kroiz GC, Yon L. \"Hedgehog: Understandable Scheduler-Free Heterogeneous Asynchronous Multithreaded Data-Flow Graphs,\" 2020 IEEE\/ACM 3rd Annual Parallel Applications Workshop: Alternatives To MPI+X (PAW-ATM), 2020, pp. 1-15., https:\/\/doi.org\/10.1109\/PAWATM51920.2020.00006.","DOI":"10.1109\/PAWATM51920.2020.00006"},{"key":"2917_CR3","doi-asserted-by":"publisher","unstructured":"Herault T, Robert Y, Bosilca G, Dongarra J. \"Generic Matrix Multiplication for Multi-GPU Accelerated Distributed-Memory Platforms over PaRSEC,\" 2019 IEEE\/ACM 10th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems (ScalA), 2019, pp. 33-41, https:\/\/doi.org\/10.1109\/ScalA49573.2019.00010.","DOI":"10.1109\/ScalA49573.2019.00010"},{"key":"2917_CR4","doi-asserted-by":"publisher","unstructured":"Gates M, Kurzak J, Charara A, YarKhan A, Dongarra J. SLATE: design of a modern distributed and accelerated linear algebra library. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC \u201919). Association for Computing Machinery, New York, NY, USA, Article 26, 2019;1-18. https:\/\/doi.org\/10.1145\/3295500.3356223","DOI":"10.1145\/3295500.3356223"},{"key":"2917_CR5","doi-asserted-by":"crossref","unstructured":"Bauer M, Treichler S, Slaughter E, Aiken A. Legion: Expressing locality and independence with logical regions. In Proc. of the Int. Conf. on High Perf. Comput., Networking, Storage and Analysis. IEEE Computer Society Press, 2012;66.","DOI":"10.1109\/SC.2012.71"},{"issue":"5","key":"2917_CR6","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1137\/15M1023270","volume":"38","author":"M Berzins","year":"2016","unstructured":"Berzins M, Beckvermit J, Harman T, Bezdjian A, Humphrey A, Meng Q, Schmidt J, Wight C. Extending the Uintah Framework through the Petascale Modeling of Detonation in Arrays of High Explosive Devices. SIAM Journal on Scientific Computing. 2016;38(5):101\u201322.","journal-title":"SIAM Journal on Scientific Computing"},{"issue":"6","key":"2917_CR7","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1109\/MCSE.2013.98","volume":"15","author":"G Bosilca","year":"2013","unstructured":"Bosilca G, Bouteiller A, Danalis A, Faverge M, Herault T, Dongarra JJ. PaRSEC: Exploiting Heterogeneity to Enhance Scalability. Computing in Science Engineering. 2013;15(6):36\u201345.","journal-title":"Computing in Science Engineering"},{"issue":"12","key":"2917_CR8","doi-asserted-by":"publisher","first-page":"3202","DOI":"10.1016\/j.jpdc.2014.07.003","volume":"74","author":"HC Edwards","year":"2014","unstructured":"Edwards HC, Trott CR, Sunderland D. Kokkos: Enabling manycore performance portability through polymorphic memory access patterns. J Parallel and Distrib Comput. 2014;74(12):3202\u201316.","journal-title":"J. Parallel and Distrib. Comput."},{"key":"2917_CR9","doi-asserted-by":"crossref","unstructured":"Holmen JK, Sahasrabudhe D, Berzins M. \u201cA Heterogeneous MPI+PPL Task Scheduling Approach for Asynchronous Many-Task Runtime Systems,\u201d In Proceedings of the Practice and Experience in Advanced Research Computing 2021 on Sustainability, Success and Impact (PEARC21), ACM, (2021)","DOI":"10.1145\/3437359.3465581"},{"key":"2917_CR10","doi-asserted-by":"crossref","unstructured":"Holmen JK, Peterson B, Berzins M. \u201cAn Approach for Indirectly Adopting a Performance Portability Layer in Large Legacy Codes,\u201d In 2nd International Workshop on Performance, Portability, and Productivity in HPC (P3HPC), SC19, 2019.","DOI":"10.1109\/P3HPC49587.2019.00009"},{"key":"2917_CR11","doi-asserted-by":"crossref","unstructured":"Kaiser H, Heller T, Adelstein-Lelbach B, Serio A, Fey D. HPX: A Task Based Programming Model in a Global Address Space. In Proceedings of the 8th International Conference on Partitioned Global Address Space Programming Models (Eugene, OR, USA) (PGAS \u201914). ACM, New York, NY, USA, Article 6 2014.","DOI":"10.1145\/2676870.2676883"},{"key":"2917_CR12","doi-asserted-by":"crossref","unstructured":"Kale LV, Krishnan S. CHARM++: A Portable Concurrent Object Oriented System Based on C++. In Proceedings of the Eighth Annual Conference on Object-oriented Programming Systems, Languages, and Applications (Washington, D.C., USA) (OOPSLA \u201993). ACM, New York, NY, USA, 1993;91-108.","DOI":"10.1145\/165854.165874"},{"key":"2917_CR13","doi-asserted-by":"crossref","unstructured":"Meng Q, Humphrey A, Berzins M. \u201cThe Uintah Framework: A Unified Heterogeneous Task Scheduling and Runtime System,\u201d In Digital Proceedings of The International Conference for High Performance Computing, Networking, Storage and Analysis, SC\u201912, WOLFHPC 2012 Worshop, 2012;pp. 2441\u20132448.","DOI":"10.1109\/SCC.2012.6674233"},{"key":"2917_CR14","doi-asserted-by":"crossref","unstructured":"Holmen JK, Sahasrabudhe D, Berzins M. \u201cPorting Uintah to Heterogeneous Systems,\u201d In Proceedings of the Platform for Advanced Scientific Computing Conference (PASC22) Best Paper Award, ACM, 2022.","DOI":"10.1145\/3539781.3539794"},{"issue":"23","key":"2917_CR15","first-page":"187","volume":"2009","author":"C Augonnet","year":"2011","unstructured":"Augonnet C, Thibault S, Namyst R, Wacrenier P. StarPU: A Unified Platform for Task Scheduling on Heterogeneous Multicore Architectures CCPE - Concurrency and Computation: Practice and Experience. Special Issue: Euro-Par. 2011;2009(23):187\u201398.","journal-title":"Special Issue: Euro-Par."},{"issue":"1","key":"2917_CR16","doi-asserted-by":"publisher","first-page":"202","DOI":"10.1137\/S0097539793259471","volume":"27","author":"RD Blumofe","year":"1998","unstructured":"Blumofe RD, Leiserson CE. Space-Efficient Scheduling of Multithreaded Computations. SIAM Journal on Computing. 1998;27(1):202\u201329.","journal-title":"SIAM Journal on Computing"},{"key":"2917_CR17","unstructured":"Bardakoff Alexandre. Analysis and Execution of a Data-Flow Graph Explicit Model Using Static Metaprogramming. Universit\u00e9 Clermont Auvergne, 2021. https:\/\/theses.hal.science\/tel-03813645"},{"key":"2917_CR18","unstructured":"Computation Platform for AI\/ML | NIST. (2019b, December 17). NIST. https:\/\/www.nist.gov\/programs-projects\/computation-platform-aiml"},{"key":"2917_CR19","unstructured":"Center for High Performance Computing - the University of Utah. (n.d.). https:\/\/chpc.utah.edu\/"},{"key":"2917_CR20","doi-asserted-by":"publisher","unstructured":"Kaiser et al. HPX - The C++ Standard Library for Parallelism and Concurrency. Journal of Open Source Software, 2020;5(53), 2352, https:\/\/doi.org\/10.21105\/joss.02352","DOI":"10.21105\/joss.02352"},{"key":"2917_CR21","doi-asserted-by":"crossref","unstructured":"Bauer M, Treichler S, Slaughter E, Aiken A. Legion: Expressing locality and independence with logical regions. In Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, 2012; 1-11. Supercomputing, IEEE.","DOI":"10.1109\/SC.2012.71"},{"issue":"2","key":"2917_CR22","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1002\/cpe.1631","volume":"23","author":"C Augonnet","year":"2011","unstructured":"Augonnet C, Thibault S, Namyst R, Wacrenier P-A. Starpu: a unified platform for task scheduling on heterogeneous multicore architectures. Concurrency and Computation: Practice and Experience. 2011;23(2):187\u201398.","journal-title":"Concurrency and Computation: Practice and Experience"},{"key":"2917_CR23","doi-asserted-by":"publisher","unstructured":"Garland M, et al \"Parallel Computing Experiences with CUDA,\" in IEEE Micro, vol. 28, no. 4, pp. 13-27, July-Aug. 2008. keywords: Parallel processing;Programming profession;Parallel programming;Concurrent computing;Computer architecture;Computer graphics;Kernel;Throughput;Central Processing Unit,https:\/\/doi.org\/10.1109\/MM.2008.57","DOI":"10.1109\/MM.2008.57"},{"issue":"10","key":"2917_CR24","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1145\/167962.165874","volume":"28","author":"LV Kale","year":"1993","unstructured":"Kale LV, Krishnan S. Charm++: A portable concurrent object oriented system based on c++. SIGPLAN Notices. 1993;28(10):91\u2013108.","journal-title":"SIGPLAN Notices"},{"key":"2917_CR25","unstructured":"Bennett J, Clay R, Baker G, Gamell M, Hollman D, Knight S, Kolla H, Sjaardema G, Slattengren N, Teranishi K, et al. Asc atdm level 2 milestone #5325: Asynchronous many-task runtime system analysis and assessment for next generation platforms. Technical Report SAND2015-8312, US Department of Energy, Sandia National Laboratories 2015"},{"key":"2917_CR26","doi-asserted-by":"publisher","unstructured":"Abdullah Alperen, Afibuzzaman Md, Rabbi Fazlay, Yusuf Ozkaya M, Catalyurek Umit, Metin Aktulga Hasan. \u201cAn Evaluation of Task-Parallel Frameworks for Sparse Solvers on Multicore and Manycore CPU Architectures.\u201d In 50th International Conference on Parallel Processing, 1-11. Lemont IL USA: ACM, 2021. https:\/\/doi.org\/10.1145\/3472456.3472476.","DOI":"10.1145\/3472456.3472476"},{"key":"2917_CR27","doi-asserted-by":"publisher","unstructured":"Ruidong Gu, Becchi Michela. \u201cA Comparative Study of Parallel Programming Frameworks for Distributed GPU Applications.\u201d In Proceedings of the 16th ACM International Conference on Computing Frontiers, 268-73. CF \u201919. New York, NY, USA: Association for Computing Machinery, 2019. https:\/\/doi.org\/10.1145\/3310273.3323071.","DOI":"10.1145\/3310273.3323071"},{"key":"2917_CR28","doi-asserted-by":"publisher","unstructured":"Emmanuel Agullo, Buttari Alfredo, Guermouche Abdou, Herrmann Julien, Jego Antoine. \u201cTask-Based Parallel Programming for Scalable Matrix Product Algorithms.\u201d ACM Transactions on Mathematical Software 49, no. 2 2023; 1-23. https:\/\/doi.org\/10.1145\/3583560.","DOI":"10.1145\/3583560"},{"key":"2917_CR29","doi-asserted-by":"publisher","unstructured":"David Rohr, Lindenstruth Volker. \u201cA Flexible and Portable Large-Scale DGEMM Library for Linpack on Next-Generation Multi-GPU Systems.\u201d In 2015 23rd Euromicro International Conference on Parallel, Distributed, and Network-Based Processing, 2015; 664-68, https:\/\/doi.org\/10.1109\/PDP.2015.89.","DOI":"10.1109\/PDP.2015.89"},{"key":"2917_CR30","doi-asserted-by":"crossref","unstructured":"Baker Gavin Matthew, Bettencourt Matthew Tyler, Bova Steven W, Franko Ken, Gamell Marc, Grant Ryan, Hammond Simon David, Hollman David S, Knight Samuel, Kolla Hemanth, Lin Paul, Olivier Stephen Lecler, Sjaardema Gregory D, Slattengren Nicole Lemaster, Teranishi Keita, Wilke Jeremiah J, Bennett Janine Camille, Clay Robert L, Kale Laxkimant, Jain Nikhil, Mikida Eric, Aiken Alex, Bauer Michael, Lee Wonchan, Slaughter Elliott, Treichler Sean, Berzins Martin, Harman Todd, Humphreys Alan, Schmidt John, Sunderland Dan, Mccormick Pat, Gutierrez Samuel, Shulz Martin, Gamblin Todd, Bremer Peer, -Timo. ASC ATDM Level 2 Milestone #5325: Asynchronous Many-Task Runtime System Analysis and Assessment for Next Generation Platforms. United States. 2015.","DOI":"10.2172\/1432926"},{"key":"2917_CR31","doi-asserted-by":"publisher","unstructured":"Nanmiao Wu, Gonidelis Ioannis, Liu Simeng, Fink Zane, Gupta Nikunj , Mohammadiporshokooh Karame, Diehl Patrick, Kaiser Hartmut, Kale Laxmikant V. \u201cQuantifying Overheads in Charm++ and HPX Using Task Bench.\u201d In Euro-Par 2022: Parallel Processing Workshops, edited by Jeremy Singer, Yehia Elkhatib, Dora Blanco Heras, Patrick Diehl, Nick Brown, and Aleksandar Ilic, 5-16. Lecture Notes in Computer Science. Cham: Springer Nature Switzerland, 2023. https:\/\/doi.org\/10.1007\/978-3-031-31209-0_1.","DOI":"10.1007\/978-3-031-31209-0_1"}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-024-02917-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-024-02917-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-024-02917-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,15]],"date-time":"2024-06-15T08:33:00Z","timestamp":1718440380000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-024-02917-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,15]]},"references-count":31,"journal-issue":{"issue":"5","published-online":{"date-parts":[[2024,6]]}},"alternative-id":["2917"],"URL":"https:\/\/doi.org\/10.1007\/s42979-024-02917-y","relation":{},"ISSN":["2661-8907"],"issn-type":[{"value":"2661-8907","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,6,15]]},"assertion":[{"value":"12 December 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 April 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 June 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Certain equipment, instruments, software, or materials, commercial or non-commercial, are identified in this paper in order to specify the experimental procedure adequately. Such identification is not intended to imply recommendation or endorsement of any product or service by NIST, nor is it intended to imply that the materials or equipment identified are necessarily the best available for the purpose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"654"}}