{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:39:34Z","timestamp":1766219974445,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","funder":[{"name":"US Department of Energy","award":["DE-AC05-00OR22725"],"award-info":[{"award-number":["DE-AC05-00OR22725"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,8]]},"DOI":"10.1145\/3754598.3754652","type":"proceedings-article","created":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:34:32Z","timestamp":1766219672000},"page":"764-773","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["IRIS-MASH: Efficient Multi-device Asynchronous Multi-Stream Heterogeneous Computing"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8259-8891","authenticated-orcid":false,"given":"Narasinga Rao","family":"Miniskar","sequence":"first","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5448-4667","authenticated-orcid":false,"given":"Aaron R.","family":"Young","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3419-4037","authenticated-orcid":false,"given":"Mohammad Alaul Haque","family":"Monil","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4004-4791","authenticated-orcid":false,"given":"Kazi","family":"Asifuzzaman","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5426-1415","authenticated-orcid":false,"given":"Beau","family":"Johnston","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6647-2690","authenticated-orcid":false,"given":"Keita","family":"Teranishi","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2449-6720","authenticated-orcid":false,"given":"Jeffrey S.","family":"Vetter","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,12,20]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-385963-1.00034-4"},{"key":"e_1_3_3_2_3_2","unstructured":"Emmanuel Agullo Olivier Aumage Mathieu Faverge Nathalie Furmento Florent Pruvost Marc Sergent and Samuel\u00a0Paul Thibault. 2017. Achieving high performance on supercomputers with a sequential task-based programming model. IEEE Transactions on Parallel and Distributed Systems (2017)."},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICEEE52452.2021.9415927"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"crossref","unstructured":"C\u00e9dric Augonnet Samuel Thibault Raymond Namyst and Pierre-Andr\u00e9 Wacrenier. 2011. StarPU: a unified platform for task scheduling on heterogeneous multicore architectures. Concurr. Comput. Pract. Exp. 23 2 (2011) 187\u2013198.","DOI":"10.1002\/cpe.1631"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/1248377.1248384"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"crossref","unstructured":"George Bosilca Aurelien Bouteiller et\u00a0al. 2010. Distibuted dense numerical linear algebra algorithms on massively parallel architectures: DPLASMA. (2010).","DOI":"10.1109\/IPDPS.2011.299"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"crossref","unstructured":"George Bosilca Aurelien Bouteiller Anthony Danalis Mathieu Faverge Thomas H\u00e9rault and Jack\u00a0J Dongarra. 2013. Parsec: Exploiting heterogeneity to enhance scalability. Computing in Science & Engineering 15 6 (2013) 36\u201345.","DOI":"10.1109\/MCSE.2013.98"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"crossref","unstructured":"L. Dagum and R. Menon. 1998. OpenMP: an industry standard API for shared-memory programming. IEEE Computational Science and Engineering 5 1 (1998) 46\u201355.","DOI":"10.1109\/99.660313"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/WOLFHPC.2014.8"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"crossref","unstructured":"Alejandro Duran Eduard Ayguad\u00e9 Rosa\u00a0M Badia Jes\u00fas Labarta Luis Martinell Xavier Martorell and Judit Planas. 2011. Ompss: a proposal for programming heterogeneous multi-core architectures. Parallel processing letters 21 02 (2011).","DOI":"10.1142\/S0129626411000151"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3624062.3624244"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW63119.2024.00017"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1145\/2676870.2676883"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC49654.2021.9622873"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC58863.2023.10363512"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3587278.3595642"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC56025.2022.00042"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/RSDHA56811.2022.00007"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3624062.3624184"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3605573.3605642"},{"key":"e_1_3_3_2_22_2","unstructured":"Judit Planas Rosa\u00a0M Badia Eduard Ayguad\u00e9 and Jes\u00fas Labarta. 2013. Selection of task implementations in the Nanos++ runtime. PRACE WP53 (2013)."},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"crossref","unstructured":"K Raju and Niranjan\u00a0N Chiplunkar. 2021. Performance enhancement of CUDA applications by overlapping data transfer and Kernel execution. Applied Computer Science 17 3 (2021).","DOI":"10.35784\/acs-2021-17"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3624062.3624242"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-09766-451"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2019.00137"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"crossref","unstructured":"David Schneider. 2022. The Exascale Era is Upon Us: The Frontier supercomputer may be the first to reach 1 000 000 000 000 000 000 operations per second. IEEE spectrum 59 1 (2022) 34\u201335.","DOI":"10.1109\/MSPEC.2022.9676353"},{"key":"e_1_3_3_2_28_2","unstructured":"Samuel Thibault. 2018. On runtime systems for task-based programming on heterogeneous platforms. Ph.\u00a0D. Dissertation. Universit\u00e9 de Bordeaux."},{"key":"e_1_3_3_2_29_2","first-page":"41","volume-title":"International Workshop on Languages and Compilers for Parallel Computing","author":"Tian Shilei","year":"2020","unstructured":"Shilei Tian, Johannes Doerfert, and Barbara Chapman. 2020. Concurrent execution of deferred OpenMP target tasks with hidden helper threads. In International Workshop on Languages and Compilers for Parallel Computing. Springer, 41\u201356."},{"key":"e_1_3_3_2_30_2","unstructured":"Jack Wells Buddy Bland Jeff Nichols Jim Hack Fernanda Foertter Gaute Hagen Thomas Maier Moetasim Ashfaq Bronson Messer and Suzanne Parete-Koon. 2016. Announcing Supercomputer Summit. https:\/\/www.osti.gov\/biblio\/1259664"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-23606-8_14"}],"event":{"name":"ICPP '25: 54th International Conference on Parallel Processing","location":"San Diego CA USA","acronym":"ICPP '25"},"container-title":["Proceedings of the 54th International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3754598.3754652","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:35:30Z","timestamp":1766219730000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3754598.3754652"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,8]]},"references-count":30,"alternative-id":["10.1145\/3754598.3754652","10.1145\/3754598"],"URL":"https:\/\/doi.org\/10.1145\/3754598.3754652","relation":{},"subject":[],"published":{"date-parts":[[2025,9,8]]},"assertion":[{"value":"2025-12-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}