{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T11:44:37Z","timestamp":1767959077370,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":22,"publisher":"ACM","funder":[{"name":"EuroHPC Joint Undertaking (SparCity)","award":["956213"],"award-info":[{"award-number":["956213"]}]},{"name":"Federal Ministry of Education and Research of Germany","award":["16HPC045"],"award-info":[{"award-number":["16HPC045"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,1,26]]},"DOI":"10.1145\/3773656.3773692","type":"proceedings-article","created":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T10:22:11Z","timestamp":1767954131000},"page":"284-295","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Revisiting Communication Software Offloading for MPI+Threads: Reducing Contention and Improving Overlap on Many-Core Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-7742-1427","authenticated-orcid":false,"given":"Sergej","family":"Breiter","sequence":"first","affiliation":[{"name":"Ludwig Maximilian University of Munich, Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6119-3852","authenticated-orcid":false,"given":"Minh","family":"Chung","sequence":"additional","affiliation":[{"name":"Leibniz Supercomputing Centre, Munich, Germany and Ludwig Maximilian University of Munich, Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0398-4087","authenticated-orcid":false,"given":"Karl","family":"F\u00fcrlinger","sequence":"additional","affiliation":[{"name":"Ludwig Maximilian University of Munich, Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7159-1432","authenticated-orcid":false,"given":"Josef","family":"Weidendorfer","sequence":"additional","affiliation":[{"name":"Leibniz Supercomputing Centre, Munich, Germany and Technical University of Munich, Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8319-0123","authenticated-orcid":false,"given":"Dieter","family":"Kranzlm\u00fcller","sequence":"additional","affiliation":[{"name":"Leibniz Supercomputing Centre, Munich, Germany and Ludwig Maximilian University of Munich, Munich, Germany"}]}],"member":"320","published-online":{"date-parts":[[2026,1,25]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","unstructured":"Abdelhalim Amer Huiwei Lu Pavan Balaji Milind Chabbi Yanjie Wei Jeff Hammond and Satoshi Matsuoka. 2019. Lock Contention Management in Multithreaded MPI. ACM Trans. Parallel Comput. 5 3 Article 12 (Jan. 2019) 21\u00a0pages. 10.1145\/3275443","DOI":"10.1145\/3275443"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid.2015.93"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/2688500.2688522"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","unstructured":"Pavan Balaji Darius Buntinas David Goodell William Gropp and Rajeev Thakur. 2010. Fine-grained multithreading support for hybrid threaded MPI programming. The International Journal of High Performance Computing Applications 24 1 (2010) 49\u201357. 10.1177\/1094342009360206","DOI":"10.1177\/1094342009360206"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","unstructured":"David\u00a0E. Bernholdt Swen Boehm George Bosilca Manjunath Gorentla\u00a0Venkata Ryan\u00a0E. Grant Thomas Naughton Howard\u00a0P. Pritchard Martin Schulz and Geoffroy\u00a0R. Vallee. 2020. A survey of MPI usage in the US exascale computing project. Concurrency and Computation: Practice and Experience 32 3 (2020) e4851. arXiv:https:\/\/onlinelibrary.wiley.com\/doi\/pdf\/10.1002\/cpe.485110.1002\/cpe.4851e4851 cpe.4851.","DOI":"10.1002\/cpe.4851"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/CCGRID.2017.65"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","unstructured":"Jack\u00a0J Dongarra Steve\u00a0W Otto Marc Snir David Walker et\u00a0al. 1995. An introduction to the MPI standard. Commun. ACM 18 11 (1995). 10.5555\/898812","DOI":"10.5555\/898812"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","unstructured":"Matthew\u00a0G.F. Dosanjh Andrew Worley Derek Schafer Prema Soundararajan Sheikh Ghafoor Anthony Skjellum Purushotham\u00a0V. Bangalore and Ryan\u00a0E. Grant. 2021. Implementation and evaluation of MPI 4.0 partitioned communication libraries. Parallel Comput. 108 (2021) 102827. 10.1016\/j.parco.2021.102827","DOI":"10.1016\/j.parco.2021.102827"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3149457.3149475"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","unstructured":"Antonella Galizia Daniele D\u2019Agostino and Andrea Clematis. 2015. An MPI\u2013CUDA library for image processing on HPC architectures. J. Comput. Appl. Math. 273 (2015) 414\u2013427. 10.1016\/j.cam.2014.05.004","DOI":"10.1016\/j.cam.2014.05.004"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-20656-7_17"},{"key":"e_1_3_3_2_13_2","first-page":"5455","volume-title":"Proceedings of Cray User Group Conference","volume":"4","author":"Hager Georg","year":"2009","unstructured":"Georg Hager, Gabriele Jost, and Rolf Rabenseifner. 2009. Communication characteristics and hybrid MPI\/OpenMP parallel programming on clusters of multi-core SMP nodes. In Proceedings of Cray User Group Conference , Vol.\u00a04. 5455. https:\/\/masters.donntu.ru\/2013\/fknt\/korienev\/library\/article1.pdf"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/ExaMPI54564.2021.00007"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491204.3527498"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-79561-2_4"},{"key":"e_1_3_3_2_17_2","volume-title":"MPI: A Message-Passing Interface Standard Version 4.0","author":"Forum Message Passing Interface","year":"2021","unstructured":"Message Passing Interface Forum. 2021. MPI: A Message-Passing Interface Standard Version 4.0. https:\/\/www.mpi-forum.org\/docs\/mpi-4.0\/mpi40-report.pdf"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-032-07194-1_6"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.45"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807602"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-9739-5_2"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.5555\/3571885.3571987"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/PADSW.2018.8645059"}],"event":{"name":"SCA\/HPCAsia 2026: Supercomputing Asia and International Conference on High Performance Computing in Asia Pacific Region","location":"Osaka Japan","acronym":"SCA\/HPCAsia 2026"},"container-title":["Proceedings of the Supercomputing Asia and International Conference on High Performance Computing in Asia Pacific Region"],"original-title":[],"deposited":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T10:23:43Z","timestamp":1767954223000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3773656.3773692"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,25]]},"references-count":22,"alternative-id":["10.1145\/3773656.3773692","10.1145\/3773656"],"URL":"https:\/\/doi.org\/10.1145\/3773656.3773692","relation":{},"subject":[],"published":{"date-parts":[[2026,1,25]]},"assertion":[{"value":"2026-01-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}