{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T07:27:12Z","timestamp":1768030032288,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":21,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,1,18]],"date-time":"2024-01-18T00:00:00Z","timestamp":1705536000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"European Union","award":["101093261"],"award-info":[{"award-number":["101093261"]}]},{"DOI":"10.13039\/501100006374","name":"Lawrence Livermore National Laboratory","doi-asserted-by":"publisher","award":["DE-AC52-07NA27344"],"award-info":[{"award-number":["DE-AC52-07NA27344"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,1,18]]},"DOI":"10.1145\/3635035.3635036","type":"proceedings-article","created":{"date-parts":[[2024,1,20]],"date-time":"2024-01-20T00:23:32Z","timestamp":1705710212000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Non-Blocking GPU-CPU Notifications to Enable More GPU-CPU Parallelism"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0781-8206","authenticated-orcid":false,"given":"Bengisu","family":"Elis","sequence":"first","affiliation":[{"name":"Technical University of Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1904-9627","authenticated-orcid":false,"given":"Olga","family":"Pearce","sequence":"additional","affiliation":[{"name":"Lawrence Livermore National Laboratory, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4159-1519","authenticated-orcid":false,"given":"David","family":"Boehme","sequence":"additional","affiliation":[{"name":"Lawrence Livermore National Laboratory, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3036-0108","authenticated-orcid":false,"given":"Jason","family":"Burmark","sequence":"additional","affiliation":[{"name":"Lawrence Livermore National Laboratory, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9013-435X","authenticated-orcid":false,"given":"Martin","family":"Schulz","sequence":"additional","affiliation":[{"name":"Technical University of Munich, Germany"}]}],"member":"320","published-online":{"date-parts":[[2024,1,19]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00023"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3480855"},{"key":"e_1_3_2_1_3_1","volume-title":"An interface for halo exchange pattern. www.prace-ri.eu\/IMG\/pdf\/wp86.pdf","author":"Bianco Mauro","year":"2014","unstructured":"Mauro Bianco. 2014. An interface for halo exchange pattern. www.prace-ri.eu\/IMG\/pdf\/wp86.pdf (2014)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00055"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3392717.3392737"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2021.102827"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ExaMPI52011.2020.00006"},{"key":"e_1_3_2_1_8_1","unstructured":"Mark Harris and Kyrylo Perelygin. 2023. Cooperative groups: Flexible cuda thread programming. https:\/\/developer.nvidia.com\/blog\/cooperative-groups\/"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2012.227"},{"key":"e_1_3_2_1_10_1","unstructured":"Jiri Kraus. 2022. An introduction to cuda-aware MPI. https:\/\/developer.nvidia.com\/blog\/introduction-cuda-aware-mpi\/"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Michael LeBeane Khaled Hamidouche Brad Benton Mauricio Breternitz Steven\u00a0K Reinhardt and Lizy\u00a0K John. 2017. GPU Triggered Networking for Intra-Kernel Communications. (2017) 12.","DOI":"10.1145\/3126908.3126950"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2928289"},{"key":"e_1_3_2_1_13_1","volume-title":"Euro-Par 2021: Parallel Processing, Leonel Sousa, Nuno Roma, and Pedro Tom\u00e1s (Eds.)","author":"Lin Dian-Lun","unstructured":"Dian-Lun Lin and Tsung-Wei Huang. 2021. Efficient GPU Computation Using Task Graph Parallelism. In Euro-Par 2021: Parallel Processing, Leonel Sousa, Nuno Roma, and Pedro Tom\u00e1s (Eds.). Springer, Cham, 435\u2013450."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389705"},{"key":"e_1_3_2_1_15_1","volume-title":"Exploring GPU Stream-Aware Message Passing using Triggered Operations. arXiv:2208.04817 (Aug","author":"Namashivayam Naveen","year":"2022","unstructured":"Naveen Namashivayam, Krishna Kandalla, Trey White, Nick Radcliffe, Larry Kaplan, and Mark Pagel. 2022. Exploring GPU Stream-Aware Message Passing using Triggered Operations. arXiv:2208.04817 (Aug 2022). http:\/\/arxiv.org\/abs\/2208.04817 arXiv:2208.04817 [cs]."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2011.249"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2022.102973"},{"key":"e_1_3_2_1_18_1","volume-title":"Euro-Par 2010 Parallel Processing Workshops, Mario\u00a0R","author":"Stuart A.","unstructured":"Jeff\u00a0A. Stuart, Michael Cox, and John\u00a0D. Owens. 2011. GPU-to-CPU Callbacks. In Euro-Par 2010 Parallel Processing Workshops, Mario\u00a0R. Guarracino, Fr\u00e9d\u00e9ric Vivien, Jesper\u00a0Larsson Tr\u00e4ff, Mario Cannatoro, Marco Danelutto, Anders Hast, Francesca Perla, Andreas Kn\u00fcpfer, Beniamino Di\u00a0Martino, and Michael Alexander (Eds.). Springer Berlin Heidelberg, 365\u2013372."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2008.5214359"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3378678.3391881"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00057"}],"event":{"name":"HPCAsia 2024: International Conference on High Performance Computing in Asia-Pacific Region","location":"Nagoya Japan","acronym":"HPCAsia 2024"},"container-title":["Proceedings of the International Conference on High Performance Computing in Asia-Pacific Region"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3635035.3635036","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3635035.3635036","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T02:19:04Z","timestamp":1755915544000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3635035.3635036"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,18]]},"references-count":21,"alternative-id":["10.1145\/3635035.3635036","10.1145\/3635035"],"URL":"https:\/\/doi.org\/10.1145\/3635035.3635036","relation":{},"subject":[],"published":{"date-parts":[[2024,1,18]]},"assertion":[{"value":"2024-01-19","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}