{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T15:31:10Z","timestamp":1759073470004,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,12]],"date-time":"2023-11-12T00:00:00Z","timestamp":1699747200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,12]]},"DOI":"10.1145\/3624062.3624609","type":"proceedings-article","created":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T13:53:39Z","timestamp":1699624419000},"page":"1958-1967","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Memory Transfer Decomposition: Exploring Smart Data Movement Through Architecture-Aware Strategies"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2961-1772","authenticated-orcid":false,"given":"Diego A.","family":"Roa Perdomo","sequence":"first","affiliation":[{"name":"Electrical and Computer Engineering, University of Delaware - CAPSL, United States of America and Argonne National Laboratory (ANL), United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5830-0733","authenticated-orcid":false,"given":"Rodrigo","family":"Ceccato","sequence":"additional","affiliation":[{"name":"University of Campinas, Brazil and Argonne National Laboratory, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-5191-1532","authenticated-orcid":false,"given":"R\u00e9my","family":"Neveu","sequence":"additional","affiliation":[{"name":"University of Campinas, Brazil and Argonne National Laboratory, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1214-3431","authenticated-orcid":false,"given":"Herv\u00e9","family":"Yviquel","sequence":"additional","affiliation":[{"name":"University of Campinas, Brazil"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5079-3219","authenticated-orcid":false,"given":"Xiaoming","family":"Li","sequence":"additional","affiliation":[{"name":"University of Delaware, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6875-1685","authenticated-orcid":false,"given":"Jose M.","family":"Monsalve Diaz","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7870-8963","authenticated-orcid":false,"given":"Johannes","family":"Doerfert","sequence":"additional","affiliation":[{"name":"Lawrence Livermore National Laboratory, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2023,11,12]]},"reference":[{"volume-title":"Scaling OpenMP for Exascale Performance and Portability, Bronis\u00a0R. de\u00a0Supinski, Stephen\u00a0L","author":"Atkinson Patrick","key":"e_1_3_2_1_1_1","unstructured":"Patrick Atkinson and Simon McIntosh-Smith. 2017. On the Performance of Parallel Tasking Runtimes for an Irregular Fast Multipole Method Application. In Scaling OpenMP for Exascale Performance and Portability, Bronis\u00a0R. de\u00a0Supinski, Stephen\u00a0L. Olivier, Christian Terboven, Barbara\u00a0M. Chapman, and Matthias\u00a0S. M\u00fcller (Eds.). Springer International Publishing, Cham, 92\u2013106."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.1631"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3236367.3236381"},{"volume-title":"Scalability Issues in FFT Computation","author":"Ayala Alan","key":"e_1_3_2_1_4_1","unstructured":"Alan Ayala, Stanimire Tomov, Miroslav Stoyanov, and Jack Dongarra. 2021. Scalability Issues in FFT Computation. In Parallel Computing Technologies, Victor Malyshkin (Ed.). Springer International Publishing, Cham, 279\u2013287."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2019.00104"},{"key":"e_1_3_2_1_6_1","unstructured":"OpenMP Architecture\u00a0Review Board. [n. d.]. OpenMP API Specification. https:\/\/www.osti.gov\/servlets\/purl\/1648853"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/SBAC-PADW56527.2022.00014"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Alexandre Denis Emmanuel Jeannot Philippe Swartvagher and Samuel Thibault. 2020. Using Dynamic Broadcasts to improve Task-Based Runtime Performances. (2020).","DOI":"10.1007\/978-3-030-57675-2_28"},{"key":"e_1_3_2_1_9_1","unstructured":"Dawson Fox Jose\u00a0Monsalve Diaz and Xiaoming Li. 2023. On Memory Codelets: Prefetching Recoding Moving and Streaming Data. arXiv:arXiv:2302.00115"},{"key":"e_1_3_2_1_10_1","volume-title":"Jose M\u00a0Monsalve Diaz, and Xiaoming Li","author":"Fox Dawson","year":"2022","unstructured":"Dawson Fox, Jose M\u00a0Monsalve Diaz, and Xiaoming Li. 2022. Chiplets and the Codelet Model. arXiv:arXiv:2209.06083"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMHPC.2016.006"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the International Conference on Supercomputing","author":"Grasso Ivan","year":"2013","unstructured":"Ivan Grasso, Simone Pellegrini, Biagio Cosenza, and Thomas Fahringer. 2013. LibWater: Heterogeneous distributed computing made easy. Proceedings of the International Conference on Supercomputing (2013), 161\u2013170."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-016-1779-7"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2370036.2145866"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2007.370475"},{"key":"e_1_3_2_1_16_1","volume-title":"GPU Technology Conference (GTC), Vol.\u00a02.","author":"Jeaugey Sylvain","year":"2017","unstructured":"Sylvain Jeaugey. 2017. Nccl 2.0. In GPU Technology Conference (GTC), Vol.\u00a02."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/165854.165874"},{"key":"e_1_3_2_1_18_1","unstructured":"Jeongnim Kim. 2021. miniQMC - QMCPACK Miniapp. https:\/\/github.com\/QMCPACK\/miniqmc."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1088\/1361-648X\/aab9c3"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2676870.2676879"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356176"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ExaMPI49596.2019.00010"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2011.05.006"},{"key":"e_1_3_2_1_24_1","unstructured":"Emin Nuriyev and Alexey Lastovetsky. 2020. Accurate runtime selection of optimal MPI collective algorithms using analytical performance modelling. (2020) 1\u201315."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-07312-0_16"},{"key":"e_1_3_2_1_26_1","first-page":"127","article-title":"Performance analysis of MPI collective operations","volume":"10","author":"Pje\u0161ivac-Grbovi\u0107 Jelena","year":"2004","unstructured":"Jelena Pje\u0161ivac-Grbovi\u0107, Thara Angskun, George Bosilca, Graham\u00a0E Fagg, Edgar Gabriel, and Jack\u00a0J Dongarra. 2004. Performance analysis of MPI collective operations. Tertiary Education and Management 10, 2 (2004), 127\u2013143.","journal-title":"Tertiary Education and Management"},{"key":"e_1_3_2_1_27_1","unstructured":"Diego Roa and Rodrigo Ceccato. 2023. AutoStrategizer Repository. https:\/\/github.com\/Darptolus\/auto-strategizer-artifacts."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.anucene.2014.07.048"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2009.09.001"},{"volume-title":"Encyclopedia of Parallel Computing","author":"Steele L","key":"e_1_3_2_1_30_1","unstructured":"Guy\u00a0L Steele, Xiaowei Shen, Josep Torrellas, Mark Tuckerman, Eric\u00a0J Bohm, Laxmikant\u00a0V Kal\u00e9, Glenn Martyna, Pen-Chung Yew, H\u00a0Peter Hofstee, Matthew Sottile, Bruce Hendrickson, Bradford\u00a0L Chamberlain, Laxmikant\u00a0V Kal\u00e9, Martin Schulz, Charles\u00a0E Leiserson, Thomas\u00a0L Sterling, Daniel\u00a0P Siewiorek, E\u00a0D Gehringer, Robert\u00a0W Numrich, C\u00e9dric Bastoul, Robert Geijn, Jesperlarsson Tr\u00e4ff, Dhabaleswar\u00a0K Panda, Sayantan Sur, Hari Subramoni, Krishna Kandalla, Laxmikant\u00a0V Kal\u00e9, Pritish Jetley, Patrick\u00a0H Worley, Mariana Vertenstein, Anthony\u00a0P Craig, Geoffrey Fox, John\u00a0C Hart, Michael\u00a0G Burke, Kathleen Knobe, Ryan Newton, Vivek Sarkar, John Reppy, Pedro\u00a0J Garcia, Guy\u00a0L Steele, Guy\u00a0L Steele, Guy\u00a0L Steele, John Swensen, M\u2019hamed Souli, Timothy Prince, Jason Wang, Michael Dungworth, James Harrell, Michael Levine, Stephen Nelson, Steven Oberlin, Steven\u00a0P Reinhardt, James\u00a0L Schwarzmeier, Larry Kaplan, Jeff Brooks, Gerry Kirschner, Dennis Abts, A\u00a0W Roscoe, Jim Davies, Monty Denneau, and Michael Schlansker. 2011. Chapel (Cray Inc. HPCS Language). In Encyclopedia of Parallel Computing. Springer US, Boston, MA, 249\u2013256."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342005051521"},{"key":"e_1_3_2_1_32_1","volume-title":"International Workshop on Languages and Compilers for Parallel Computing. Springer, 41\u201356","author":"Tian Shilei","year":"2020","unstructured":"Shilei Tian, Johannes Doerfert, and Barbara Chapman. 2020. Concurrent execution of deferred OpenMP target tasks with hidden helper threads. In International Workshop on Languages and Compilers for Parallel Computing. Springer, 41\u201356."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2007.12.001"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-15976-8_3"},{"key":"e_1_3_2_1_35_1","volume-title":"PHYSOR 2014 - The Role of Reactor Physics toward a Sustainable Future. Kyoto. https:\/\/www.mcs.anl.gov\/papers\/P5064-0114","author":"Tramm R","year":"2014","unstructured":"John\u00a0R Tramm, Andrew\u00a0R Siegel, Tanzima Islam, and Martin Schulz. 2014. XSBench - The Development and Verification of a Performance Abstraction for Monte Carlo Reactor Analysis. In PHYSOR 2014 - The Role of Reactor Physics toward a Sustainable Future. Kyoto. https:\/\/www.mcs.anl.gov\/papers\/P5064-0114.pdf"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2013.6702676"},{"key":"e_1_3_2_1_37_1","volume-title":"The OpenMP Cluster Programming Model. In Workshop Proceedings of the 51st International Conference on Parallel Processing (Bordeaux, France) (ICPP Workshops \u201922","author":"Yviquel Herv\u00e9","year":"2023","unstructured":"Herv\u00e9 Yviquel, Marcio Pereira, Em\u00edlio Francesquini, Guilherme Valarini, Gustavo Leite, Pedro Rosso, Rodrigo Ceccato, Carla Cusihualpa, Vitoria Dias, Sandro Rigo, Alan Souza, and Guido Araujo. 2023. The OpenMP Cluster Programming Model. In Workshop Proceedings of the 51st International Conference on Parallel Processing (Bordeaux, France) (ICPP Workshops \u201922, Article 17). Association for Computing Machinery, New York, NY, USA, 1\u201311."}],"event":{"name":"SC-W 2023: Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis","acronym":"SC-W 2023","location":"Denver CO USA"},"container-title":["Proceedings of the SC '23 Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624609","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3624062.3624609","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T03:01:27Z","timestamp":1755745287000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624609"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,12]]},"references-count":37,"alternative-id":["10.1145\/3624062.3624609","10.1145\/3624062"],"URL":"https:\/\/doi.org\/10.1145\/3624062.3624609","relation":{},"subject":[],"published":{"date-parts":[[2023,11,12]]},"assertion":[{"value":"2023-11-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}