{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T11:48:30Z","timestamp":1774957710143,"version":"3.50.1"},"publisher-location":"Cham","reference-count":48,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032071934","type":"print"},{"value":"9783032071941","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,15]],"date-time":"2025-10-15T00:00:00Z","timestamp":1760486400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,15]],"date-time":"2025-10-15T00:00:00Z","timestamp":1760486400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-07194-1_8","type":"book-chapter","created":{"date-parts":[[2025,10,14]],"date-time":"2025-10-14T18:07:24Z","timestamp":1760465244000},"page":"122-142","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Examining MPI and\u00a0its Extensions for\u00a0Asynchronous Multithreaded Communication"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6917-5525","authenticated-orcid":false,"given":"Jiakun","family":"Yan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3504-2468","authenticated-orcid":false,"given":"Marc","family":"Snir","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3731-5423","authenticated-orcid":false,"given":"Yanfei","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,15]]},"reference":[{"key":"8_CR1","doi-asserted-by":"publisher","unstructured":"Abdulah, S., et al.: Boosting earth system model outputs and saving petabytes in their storage using exascale climate emulators. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage, and Analysis. SC \u201924. IEEE Press (2024). https:\/\/doi.org\/10.1109\/SC41406.2024.00008","DOI":"10.1109\/SC41406.2024.00008"},{"key":"8_CR2","doi-asserted-by":"publisher","unstructured":"Amer, A., et al.: Lock contention management in multithreaded MPI. ACM Trans. Parallel Comput. 5(3), 12:1\u201312:21 (2019). https:\/\/doi.org\/10.1145\/3275443. https:\/\/dl.acm.org\/doi\/10.1145\/3275443","DOI":"10.1145\/3275443"},{"key":"8_CR3","doi-asserted-by":"publisher","unstructured":"Amer, A., Lu, H., Wei, Y., Balaji, P., Matsuoka, S.: MPI+threads: runtime contention and remedies. In: Proceedings of the 20th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, PPoPP 2015, pp. 239\u2013248. Association for Computing Machinery (2015). https:\/\/doi.org\/10.1145\/2688500.2688522. https:\/\/dl.acm.org\/doi\/10.1145\/2688500.2688522","DOI":"10.1145\/2688500.2688522"},{"key":"8_CR4","doi-asserted-by":"publisher","unstructured":"Bachan, J., et al.: UPC++: a high-performance communication framework for asynchronous computation. In: 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 963\u2013973 (2019). https:\/\/doi.org\/10.1109\/IPDPS.2019.00104","DOI":"10.1109\/IPDPS.2019.00104"},{"key":"8_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1007\/978-3-540-87475-1_20","volume-title":"Recent Advances in Parallel Virtual Machine and Message Passing Interface","author":"P Balaji","year":"2008","unstructured":"Balaji, P., Buntinas, D., Goodell, D., Gropp, W., Thakur, R.: Toward efficient support for multithreaded MPI communication. In: Lastovetsky, A., Kechadi, T., Dongarra, J. (eds.) EuroPVM\/MPI 2008. LNCS, vol. 5205, pp. 120\u2013129. Springer, Heidelberg (2008). https:\/\/doi.org\/10.1007\/978-3-540-87475-1_20"},{"key":"8_CR6","doi-asserted-by":"publisher","unstructured":"Bauer, M., Treichler, S., Slaughter, E., Aiken, A.: Legion: expressing locality and independence with logical regions. In: SC \u201912: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, pp. 1\u201311 (2012). https:\/\/doi.org\/10.1109\/SC.2012.71","DOI":"10.1109\/SC.2012.71"},{"key":"8_CR7","doi-asserted-by":"publisher","unstructured":"Bernholdt, D.E., et al.: A survey of MPI usage in the US exascale computing project. Concurr. Comput. Pract. Exp. 32(3), e4851 (2020). https:\/\/doi.org\/10.1002\/cpe.4851. https:\/\/onlinelibrary.wiley.com\/doi\/abs\/10.1002\/cpe.4851","DOI":"10.1002\/cpe.4851"},{"key":"8_CR8","doi-asserted-by":"publisher","unstructured":"Boerner, T.J., Deems, S., Furlani, T.R., Knuth, S.L., Towns, J.: Access: advancing innovation: Nsf\u2019s advanced cyberinfrastructure coordination ecosystem: services & support. In: Practice and Experience in Advanced Research Computing 2023: Computing for the Common Good, pp. 173\u2013176 (2023). https:\/\/doi.org\/10.1145\/3569951.3597559","DOI":"10.1145\/3569951.3597559"},{"key":"8_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"138","DOI":"10.1007\/978-3-030-34627-0_11","volume-title":"Languages and Compilers for Parallel Computing","author":"D Bonachea","year":"2019","unstructured":"Bonachea, D., Hargrove, P.H.: GASNet-EX: a high-performance, portable communication library for exascale. In: Hall, M., Sundar, H. (eds.) LCPC 2018. LNCS, vol. 11882, pp. 138\u2013158. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-34627-0_11"},{"key":"8_CR10","unstructured":"Bonachea, D., Jeong, J.: Gasnet: a portable high-performance communication layer for global address-space languages. CS258 Parallel Comput. Arch. Proj. Spring 31, 17 (2002)"},{"key":"8_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-642-13217-9_1","volume-title":"Beyond Loop Level Parallelism in OpenMP: Accelerators, Tasking and More","author":"P Carribault","year":"2010","unstructured":"Carribault, P., P\u00e9rache, M., Jourdren, H.: Enabling low-overhead hybrid MPI\/OpenMP parallelism with MPC. In: Sato, M., Hanawa, T., M\u00fcller, M.S., Chapman, B.M., de Supinski, B.R. (eds.) IWOMP 2010. LNCS, vol. 6132, pp. 1\u201314. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-13217-9_1"},{"key":"8_CR12","doi-asserted-by":"publisher","unstructured":"Chapman, B., et al.: Introducing OpenSHMEM: SHMEM for the PGAS community. In: Proceedings of the Fourth Conference on Partitioned Global Address Space Programming Model. PGAS \u201910. Association for Computing Machinery, New York (2010). https:\/\/doi.org\/10.1145\/2020373.2020375","DOI":"10.1145\/2020373.2020375"},{"key":"8_CR13","doi-asserted-by":"crossref","unstructured":"Chatterjee, S., et al.: Integrating asynchronous task parallelism with MPI. In: 2013 IEEE 27th International Symposium on Parallel and Distributed Processing, pp. 712\u2013725. IEEE (2013). https:\/\/ieeexplore.ieee.org\/abstract\/document\/6569856","DOI":"10.1109\/IPDPS.2013.78"},{"key":"8_CR14","doi-asserted-by":"crossref","unstructured":"Dai\u00df, G., et\u00a0al.: Asynchronous-many-task systems: Challenges and opportunities\u2013scaling an amr astrophysics code on exascale machines using kokkos and hpx. arXiv preprint arXiv:2412.15518 (2024)","DOI":"10.1177\/10943420251386503"},{"key":"8_CR15","doi-asserted-by":"publisher","unstructured":"Dang, H.V., Snir, M., Gropp, W.: Towards millions of communicating threads. In: Proceedings of the 23rd European MPI Users\u2019 Group Meeting, EuroMPI \u201916, pp. 1\u201314. Association for Computing Machinery, New York (2016). https:\/\/doi.org\/10.1145\/2966884.2966914","DOI":"10.1145\/2966884.2966914"},{"issue":"4","key":"8_CR16","doi-asserted-by":"publisher","first-page":"390","DOI":"10.1177\/1094342014548772","volume":"28","author":"J Dinan","year":"2014","unstructured":"Dinan, J., et al.: Enabling communication concurrency through flexible mpi endpoints. Int. J. High Perfor. Comput. Appl. 28(4), 390\u2013405 (2014)","journal-title":"Int. J. High Perfor. Comput. Appl."},{"key":"8_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1007\/978-3-642-15646-5_2","volume-title":"Recent Advances in the Message Passing Interface","author":"G D\u00f3zsa","year":"2010","unstructured":"D\u00f3zsa, G., et al.: Enabling concurrent multithreaded MPI communication on multicore petascale systems. In: Keller, R., Gabriel, E., Resch, M., Dongarra, J. (eds.) EuroMPI 2010. LNCS, vol. 6305, pp. 11\u201320. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-15646-5_2"},{"key":"8_CR18","doi-asserted-by":"publisher","unstructured":"El-Ghazawi, T., Smith, L.: UPC: Unified parallel C. In: Proceedings of the 2006 ACM\/IEEE Conference on Supercomputing, SC \u201906, p. 27\u2013es. Association for Computing Machinery, New York (2006). https:\/\/doi.org\/10.1145\/1188455.1188483","DOI":"10.1145\/1188455.1188483"},{"key":"8_CR19","unstructured":"Gropp, W., Boerner, T., Bode, B., Bauer, G.: Delta: balancing gpu performance with advanced system interfaces (2023)"},{"issue":"1","key":"8_CR20","doi-asserted-by":"publisher","first-page":"10689","DOI":"10.1038\/s41598-020-67416-5","volume":"10","author":"S Hofmeyr","year":"2020","unstructured":"Hofmeyr, S., et al.: Terabase-scale metagenome coassembly with metahipmer. Sci. Rep. 10(1), 10689 (2020)","journal-title":"Sci. Rep."},{"key":"8_CR21","doi-asserted-by":"publisher","unstructured":"Hori, A., et al.: Process-in-process: techniques for practical address-space sharing. In: Proceedings of the 27th International Symposium on High-Performance Parallel and Distributed Computing, HPDC \u201918, pp. 131\u2013143. Association for Computing Machinery, New York (2018). https:\/\/doi.org\/10.1145\/3208040.3208045","DOI":"10.1145\/3208040.3208045"},{"key":"8_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"306","DOI":"10.1007\/978-3-540-24644-2_20","volume-title":"Languages and Compilers for Parallel Computing","author":"C Huang","year":"2004","unstructured":"Huang, C., Lawlor, O., Kal\u00e9, L.V.: Adaptive MPI. In: Rauchwerger, L. (ed.) LCPC 2003. LNCS, vol. 2958, pp. 306\u2013322. Springer, Heidelberg (2004). https:\/\/doi.org\/10.1007\/978-3-540-24644-2_20"},{"issue":"53","key":"8_CR23","doi-asserted-by":"publisher","first-page":"2352","DOI":"10.21105\/joss.02352","volume":"5","author":"H Kaiser","year":"2020","unstructured":"Kaiser, H., et al.: HPX - the C++ standard library for parallelism and concurrency. J. Open Source Softw. 5(53), 2352 (2020)","journal-title":"J. Open Source Softw."},{"key":"8_CR24","doi-asserted-by":"publisher","unstructured":"Kale, L.V., Krishnan, S.: CHARM++: a portable concurrent object oriented system based on C++ 28(10), 91\u2013108. https:\/\/doi.org\/10.1145\/167962.165874. https:\/\/dl.acm.org\/doi\/10.1145\/167962.165874","DOI":"10.1145\/167962.165874"},{"key":"8_CR25","doi-asserted-by":"publisher","unstructured":"Kamal, H., Wagner, A.: FG-MPI: fine-grain MPI for multicore and clusters. In: 2010 IEEE International Symposium on Parallel and Distributed Processing, Workshops and Phd Forum (IPDPSW), pp.\u00a01\u20138 (2010). https:\/\/doi.org\/10.1109\/IPDPSW.2010.5470773","DOI":"10.1109\/IPDPSW.2010.5470773"},{"key":"8_CR26","doi-asserted-by":"publisher","unstructured":"Ltaief, H., et al.: Toward capturing genetic epistasis from multivariate genome-wide association studies using mixed-precision kernel ridge regression. In: SC24: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201312 (2024). https:\/\/doi.org\/10.1109\/SC41406.2024.00012","DOI":"10.1109\/SC41406.2024.00012"},{"key":"8_CR27","doi-asserted-by":"crossref","unstructured":"Mei, C., et al.: Enabling and scaling biomolecular simulations of 100 million atoms on petascale machines with a multicore-optimized message-driven runtime. In: SC \u201911: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201311 (2011)","DOI":"10.1145\/2063384.2063466"},{"key":"8_CR28","doi-asserted-by":"publisher","unstructured":"Mor, O., Bosilca, G., Snir, M.: Improving the scaling of an asynchronous many-task runtime with a lightweight communication engine. In: Proceedings of the 52nd International Conference on Parallel Processing, ICPP \u201923, pp. 153\u2013162. Association for Computing Machinery (2023). https:\/\/doi.org\/10.1145\/3605573.3605642. https:\/\/dl.acm.org\/doi\/10.1145\/3605573.3605642","DOI":"10.1145\/3605573.3605642"},{"key":"8_CR29","doi-asserted-by":"publisher","unstructured":"Morrison, A., Afek, Y.: Fast concurrent queues for x86 processors. In: Proceedings of the 18th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, PPoPP \u201913, pp. 103\u2013112. Association for Computing Machinery, New York (2013). https:\/\/doi.org\/10.1145\/2442516.2442527","DOI":"10.1145\/2442516.2442527"},{"key":"8_CR30","unstructured":"MPI Forum: MPI: a message passing interface standard (2023). https:\/\/www.mpi-forum.org\/docs\/mpi-4.1\/mpi41-report.pdf"},{"key":"8_CR31","unstructured":"MPICH Developers: MPICH: High-Performance Portable MPI. https:\/\/www.mpich.org (nd)"},{"key":"8_CR32","unstructured":"NVIDIA: Rdma aware networks programming user manual (2025). https:\/\/docs.nvidia.com\/networking\/display\/rdmaawareprogrammingv17"},{"key":"8_CR33","unstructured":"(OFIWG), O.W.G.: Libfabric programmer\u2019s manual (2023)"},{"key":"8_CR34","doi-asserted-by":"publisher","unstructured":"Patinyasakdikul, T., Eberius, D., Bosilca, G., Hjelm, N.: Give MPI threading a fair chance: a study of multithreaded MPI designs. In: 2019 IEEE International Conference on Cluster Computing (CLUSTER), pp. 1\u201311 (2019). https:\/\/doi.org\/10.1109\/CLUSTER.2019.8891015. https:\/\/ieeexplore.ieee.org\/abstract\/document\/8891015","DOI":"10.1109\/CLUSTER.2019.8891015"},{"key":"8_CR35","doi-asserted-by":"publisher","unstructured":"Patinyasakdikul, T., Luo, X., Eberius, D., Bosilca, G.: Multirate: a flexible mpi benchmark for fast assessment of multithreaded communication performance. In: 2019 IEEE\/ACM Workshop on Exascale MPI (ExaMPI), pp. 1\u201311 (2019). https:\/\/doi.org\/10.1109\/ExaMPI49596.2019.00006","DOI":"10.1109\/ExaMPI49596.2019.00006"},{"key":"8_CR36","unstructured":"Schuchart, J.: MPI continuations proposal (2021)"},{"key":"8_CR37","doi-asserted-by":"publisher","unstructured":"Schuchart, J., Samfass, P., Niethammer, C., Gracia, J., Bosilca, G.: Callback-based completion notification using MPI continuations. Parallel Comput. 106, 102793 (2021). https:\/\/doi.org\/10.1016\/j.parco.2021.102793. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0167819121000466","DOI":"10.1016\/j.parco.2021.102793"},{"key":"8_CR38","doi-asserted-by":"publisher","unstructured":"Shamis, P., et al.: UCX: an open source framework for HPC network APIs and beyond. In: 2015 IEEE 23rd Annual Symposium on High-Performance Interconnects, pp. 40\u201343 (2015). https:\/\/doi.org\/10.1109\/HOTI.2015.13","DOI":"10.1109\/HOTI.2015.13"},{"key":"8_CR39","doi-asserted-by":"publisher","unstructured":"Strande, S., et al.: Expanse: computing without boundaries: Architecture, deployment, and early operations experiences of a supercomputer designed for the rapid evolution in science and engineering. In: Practice and Experience in Advanced Research Computing 2021: Evolution Across All Dimensions. PEARC \u201921. Association for Computing Machinery, New York (2021). https:\/\/doi.org\/10.1145\/3437359.3465588","DOI":"10.1145\/3437359.3465588"},{"key":"8_CR40","doi-asserted-by":"publisher","unstructured":"Yadav, R., et al.: Legate sparse: distributed sparse computing in python. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis. SC \u201923. Association for Computing Machinery, New York (2023). https:\/\/doi.org\/10.1145\/3581784.3607033","DOI":"10.1145\/3581784.3607033"},{"key":"8_CR41","doi-asserted-by":"publisher","unstructured":"Yan, J., Kaiser, H., Snir, M.: Design and analysis of the network software stack of an asynchronous many-task system \u2013 the LCI parcelport of HPX. In: Proceedings of the SC \u201923 Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis, SC-W \u201923, pp. 1151\u20131161. Association for Computing Machinery, New York (2023). https:\/\/doi.org\/10.1145\/3624062.3624598","DOI":"10.1145\/3624062.3624598"},{"key":"8_CR42","unstructured":"Yan, J., Kaiser, H., Snir, M.: Understanding the communication needs of asynchronous many-task systems\u2013a case study of HPX+LCI. arXiv preprint arXiv:2503.12774 (2025)"},{"key":"8_CR43","doi-asserted-by":"crossref","unstructured":"Yan, J., Snir, M.: Lci: a lightweight communication interface for efficient asynchronous multithreaded communication. arXiv preprint arXiv:2505.01864 (2025)","DOI":"10.1145\/3712285.3759881"},{"key":"8_CR44","doi-asserted-by":"publisher","unstructured":"Zambre, R., Chandramowliswharan, A., Balaji, P.: How i learned to stop worrying about user-visible endpoints and love MPI. In: Proceedings of the 34th ACM International Conference on Supercomputing. ICS \u201920. Association for Computing Machinery, New York (2020). https:\/\/doi.org\/10.1145\/3392717.3392773","DOI":"10.1145\/3392717.3392773"},{"key":"8_CR45","doi-asserted-by":"publisher","unstructured":"Zambre, R., Chandramowliswharan, A., Balaji, P.: How I learned to stop worrying about user-visible endpoints and love MPI. In: Proceedings of the 34th ACM International Conference on Supercomputing, ICS \u201920, pp. 1\u201313. Association for Computing Machinery (2020). https:\/\/doi.org\/10.1145\/3392717.3392773. https:\/\/dl.acm.org\/doi\/10.1145\/3392717.3392773","DOI":"10.1145\/3392717.3392773"},{"key":"8_CR46","doi-asserted-by":"publisher","unstructured":"Zambre, R., Sahasrabudhe, D., Zhou, H., Berzins, M., Chandramowlishwaran, A., Balaji, P.: Logically parallel communication for fast MPI+threads applications 32(12), 3038\u20133052 (2021). https:\/\/doi.org\/10.1109\/TPDS.2021.3075157. https:\/\/ieeexplore.ieee.org\/document\/9411740","DOI":"10.1109\/TPDS.2021.3075157"},{"key":"8_CR47","doi-asserted-by":"crossref","unstructured":"Zhou, H., Raffenetti, K., Guo, Y., Thakur, R.: MPIX stream: an explicit solution to hybrid MPI+X programming. In: Proceedings of the 29th European MPI Users\u2019 Group Meeting, pp. 1\u201310 (2022)","DOI":"10.1145\/3555819.3555820"},{"key":"8_CR48","doi-asserted-by":"crossref","unstructured":"Zhou, H., Raffenetti, K., Zhang, J., Guo, Y., Thakur, R.: Frustrated with MPI+Threads? Try MPIxThreads! In: Proceedings of the 30th European MPI Users\u2019 Group Meeting, pp. 1\u201310 (2023)","DOI":"10.1145\/3615318.3615320"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in the Message Passing Interface"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-07194-1_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T10:53:45Z","timestamp":1774954425000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-07194-1_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,15]]},"ISBN":["9783032071934","9783032071941"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-07194-1_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,15]]},"assertion":[{"value":"15 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}