{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T08:44:56Z","timestamp":1773305096867,"version":"3.50.1"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031856969","type":"print"},{"value":"9783031856976","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-85697-6_18","type":"book-chapter","created":{"date-parts":[[2025,4,2]],"date-time":"2025-04-02T05:15:40Z","timestamp":1743570940000},"page":"273-286","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Flexible Algorithms for\u00a0Persistent MPI Allreduce Communication"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3327-4230","authenticated-orcid":false,"given":"Andreas","family":"Jocksch","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0768-4243","authenticated-orcid":false,"given":"C.\u00a0Nicole","family":"Avans","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7486-7542","authenticated-orcid":false,"given":"Riley","family":"Shipley","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5252-6600","authenticated-orcid":false,"given":"Anthony","family":"Skjellum","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,1]]},"reference":[{"issue":"19","key":"18_CR1","doi-asserted-by":"publisher","DOI":"10.1063\/5.0007045","volume":"152","author":"TD K\u00fchne","year":"2020","unstructured":"K\u00fchne, T.D., et al.: CP2K: an electronic structure and molecular dynamics software package-quickstep: efficient and accurate electronic structure calculations. J. Chem. Phys. 152(19), 194103 (2020). https:\/\/doi.org\/10.1063\/5.0007045","journal-title":"J. Chem. Phys."},{"key":"18_CR2","doi-asserted-by":"publisher","unstructured":"Karp, M.,\u00a0et al.: A high-fidelity flow solver for unstructured meshes on field-programmable gate arrays: design, evaluation, and future challenges. In: International Conference on High Performance Computing in Asia-Pacific Region, pp. 125\u2013136 (2022). https:\/\/doi.org\/10.1145\/3492805.3492808","DOI":"10.1145\/3492805.3492808"},{"key":"18_CR3","doi-asserted-by":"publisher","unstructured":"Holmes, D.J. et al.: Partitioned collective communication. In: 2021 Workshop on Exascale MPI (ExaMPI), pp. 9\u201317. IEEE (2021). https:\/\/doi.org\/10.1109\/ExaMPI54564.2021.00007.","DOI":"10.1109\/ExaMPI54564.2021.00007."},{"key":"18_CR4","doi-asserted-by":"publisher","unstructured":"Bouhrour, S.,\u00a0Pepin, T.,\u00a0Jaeger, J.: Towards leveraging collective performance with the support of MPI 4.0 features in MPC. Parallel Comput. 109, 102860 (2022). https:\/\/doi.org\/10.1016\/j.parco.2021.102860","DOI":"10.1016\/j.parco.2021.102860"},{"key":"18_CR5","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2021.102812","volume":"107","author":"A Jocksch","year":"2021","unstructured":"Jocksch, A., Ohana, N., Lanti, E., Koutsaniti, E., Karakasis, V., Villard, L.: An optimisation of allreduce communication in message-passing systems. Parallel Comput. 107, 102812 (2021). https:\/\/doi.org\/10.1016\/j.parco.2021.102812","journal-title":"Parallel Comput."},{"key":"18_CR6","doi-asserted-by":"publisher","unstructured":"Mohamed El\u00a0Maarouf, A.-K.,\u00a0Giraud, L.,\u00a0Guermouche, A.,\u00a0Guignon, T.: Combining reduction with synchronization barrier on multi-core processors. Concurr. Comput. Pract. Exp., e7402 (2023). https:\/\/doi.org\/10.1002\/cpe.7402","DOI":"10.1002\/cpe.7402"},{"key":"18_CR7","doi-asserted-by":"publisher","unstructured":"Hashmi, J.M., Chakraborty, S., Bayatpour, M., Subramoni, H., Panda, D.K.: Designing efficient shared address space reduction collectives for multi-\/many-cores. In: 2018 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 1020\u20131029. IEEE (2018). https:\/\/doi.org\/10.1109\/IPDPS.2018.00111.","DOI":"10.1109\/IPDPS.2018.00111."},{"issue":"04","key":"18_CR8","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1142\/S012962649300037X","volume":"3","author":"J Bruck","year":"1993","unstructured":"Bruck, J., Ho, C.-T.: Efficient global combine operations in multi-port message-passing systems. Parallel Process. Lett. 3(04), 335\u2013346 (1993). https:\/\/doi.org\/10.1142\/S012962649300037X","journal-title":"Parallel Process. Lett."},{"key":"18_CR9","doi-asserted-by":"publisher","unstructured":"Peng, J., et al.: Optimizing MPI collectives on shared memory multi-cores. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201315 (2023). https:\/\/doi.org\/10.1145\/3581784.3607074","DOI":"10.1145\/3581784.3607074"},{"issue":"4","key":"18_CR10","doi-asserted-by":"publisher","first-page":"1139","DOI":"10.1007\/s10586-014-0361-4","volume":"17","author":"S Li","year":"2014","unstructured":"Li, S., Hoefler, T., Hu, C., Snir, M.: Improved MPI collectives for MPI processes in shared address spaces. Clust. Comput. 17(4), 1139\u20131155 (2014). https:\/\/doi.org\/10.1007\/s10586-014-0361-4","journal-title":"Clust. Comput."},{"key":"18_CR11","doi-asserted-by":"publisher","unstructured":"Vienne, J.: Benefits of cross memory attach for MPI libraries on HPC clusters. In: Proceedings of the 2014 Annual Conference on Extreme Science and Engineering Discovery Environment, pp. 1\u20136 (2014). https:\/\/doi.org\/10.1145\/2616498.2616532","DOI":"10.1145\/2616498.2616532"},{"key":"18_CR12","doi-asserted-by":"publisher","unstructured":"Wilkins, M., et al.: Generalized collective algorithms for the exascale era. In: 2023 IEEE International Conference on Cluster Computing (CLUSTER), pp. 60\u201371. IEEE (2023). https:\/\/doi.org\/10.1109\/CLUSTER52292.2023.00013","DOI":"10.1109\/CLUSTER52292.2023.00013"},{"key":"18_CR13","doi-asserted-by":"publisher","unstructured":"Fan, K.,\u00a0Petruzza, S.,\u00a0Gilray, T.,\u00a0Kumar, S.: Configurable algorithms for all-to-all collectives (2024). https:\/\/doi.org\/10.23919\/ISC.2024.10528936","DOI":"10.23919\/ISC.2024.10528936"},{"key":"18_CR14","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1016\/j.parco.2017.08.004","volume":"69","author":"M Ruefenacht","year":"2017","unstructured":"Ruefenacht, M., Bull, M., Booth, S.: Generalisation of recursive doubling for allreduce: now with simulation. Parallel Comput. 69, 24\u201344 (2017). https:\/\/doi.org\/10.1016\/j.parco.2017.08.004","journal-title":"Parallel Comput."},{"issue":"1","key":"18_CR15","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1177\/1094342005051521","volume":"19","author":"R Thakur","year":"2005","unstructured":"Thakur, R., Rabenseifner, R., Gropp, W.: Optimization of collective communication operations in MPICH. Int. J. High Perform. Comput. Appl. 19(1), 49\u201366 (2005). https:\/\/doi.org\/10.1177\/1094342005051521","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"18_CR16","unstructured":"Jocksch, A., Piccinali, J.-G.: An efficient implementation of blocking and persistent MPI collective communication. In: EuroMPI Conference Bristol 2023, poster (2023)"},{"key":"18_CR17","doi-asserted-by":"publisher","unstructured":"Hoefler, T.,\u00a0Mehlan, T.,\u00a0Mietke, F.,\u00a0Rehm, W.: Fast barrier synchronization for infiniband\/spl trade. In: Proceedings 20th IEEE International Parallel & Distributed Processing Symposium, p. 7. IEEE (2006). https:\/\/doi.org\/10.1109\/IPDPS.2006.1639561","DOI":"10.1109\/IPDPS.2006.1639561"},{"key":"18_CR18","doi-asserted-by":"publisher","unstructured":"Nanjegowda, R.,\u00a0Hernandez, O.,\u00a0Chapman, B., Jin, H.H.: Scalability evaluation of barrier algorithms for OpenMP. In: Evolving OpenMP in an Age of Extreme Parallelism: 5th International Workshop on OpenMP, IWOMP 2009 Dresden, Germany, 3-5 June 2009 Proceedings 5, pp. 42\u201352. Springer (2009). https:\/\/doi.org\/10.1007\/978-3-642-02303-3_4","DOI":"10.1007\/978-3-642-02303-3_4"},{"key":"18_CR19","doi-asserted-by":"publisher","unstructured":"Speziale, E., Di Biagio, A., Agosta, G.: An optimized reduction design to minimize atomic operations in shared memory multiprocessors. In: 2011 IEEE International Symposium on Parallel and Distributed Processing Workshops and PhD Forum, pp. 1300\u20131309. IEEE (2011). https:\/\/doi.org\/10.1109\/IPDPS.2011.271.","DOI":"10.1109\/IPDPS.2011.271."},{"key":"18_CR20","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1007\/BF01407956","volume":"19","author":"BD Lubachevsky","year":"1990","unstructured":"Lubachevsky, B.D.: Synchronization barrier and related tools for shared memory parallel programming. Int. J. Parallel Prog. 19, 225\u2013250 (1990). https:\/\/doi.org\/10.1007\/BF01407956","journal-title":"Int. J. Parallel Prog."}],"container-title":["Lecture Notes in Computer Science","Parallel Processing and Applied Mathematics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-85697-6_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,2]],"date-time":"2025-04-02T05:15:44Z","timestamp":1743570944000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-85697-6_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031856969","9783031856976"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-85697-6_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"1 April 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PPAM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Parallel Processing and Applied Mathematics","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ostrava","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ppam2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ppam.edu.pl\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}