{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T13:40:01Z","timestamp":1759066801924,"version":"3.44.0"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032063427","type":"print"},{"value":"9783032063434","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T00:00:00Z","timestamp":1759104000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T00:00:00Z","timestamp":1759104000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-06343-4_2","type":"book-chapter","created":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T13:23:54Z","timestamp":1759065834000},"page":"18-32","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["OMPCCL: Extending OpenMP with\u00a0Portable Collective Operations for\u00a0Multiple Devices"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-2620-0768","authenticated-orcid":false,"given":"Jhonatan","family":"Cl\u00e9to","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0007-5191-1532","authenticated-orcid":false,"given":"R\u00e9my","family":"Neveu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5830-0733","authenticated-orcid":false,"given":"Rodrigo","family":"Ceccato","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8146-9104","authenticated-orcid":false,"given":"Guilherme","family":"Valarini","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6875-1685","authenticated-orcid":false,"given":"Jose M. Monsalve","family":"Diaz","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1214-3431","authenticated-orcid":false,"given":"Herv\u00e9","family":"Yviquel","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,29]]},"reference":[{"key":"2_CR1","unstructured":"AMD: Understanding RCCL Bandwidth and xGMI Performance on AMD Instinct\u2122 MI300X (2025). https:\/\/rocm.blogs.amd.com\/software-tools-optimization\/mi300x-rccl-xgmi\/README.html. Accessed 18 May 2025"},{"key":"2_CR2","doi-asserted-by":"publisher","unstructured":"Antao, S.F., et al.: Offloading support for OpenMP in clang and LLVM. In: 2016 Third Workshop on the LLVM Compiler Infrastructure in HPC (LLVM-HPC), pp. 1\u201311 (2016). https:\/\/doi.org\/10.1109\/LLVM-HPC.2016.006","DOI":"10.1109\/LLVM-HPC.2016.006"},{"key":"2_CR3","doi-asserted-by":"publisher","unstructured":"Broquedis, F., et al.: hwloc: a generic framework for managing hardware affinities in HPC applications. In: 2010 18th Euromicro Conference on Parallel, Distributed and Network-Based Processing, pp. 180\u2013186 (2010). https:\/\/doi.org\/10.1109\/PDP.2010.67","DOI":"10.1109\/PDP.2010.67"},{"key":"2_CR4","doi-asserted-by":"publisher","unstructured":"Ceccato, R., Cl\u00e9to, J., Leite, G., Rigo, S., Diaz, J.M.M., Yviquel, H.: Spinner: enhancing HPC experimentation with a streamlined parameter sweep tool. In: 2024 International Symposium on Computer Architecture and High Performance Computing Workshops (SBAC-PADW), pp. 1\u201311 (2024). https:\/\/doi.org\/10.1109\/SBAC-PADW64858.2024.00013","DOI":"10.1109\/SBAC-PADW64858.2024.00013"},{"key":"2_CR5","doi-asserted-by":"publisher","unstructured":"Ceccato, R., Yviquel, H., Pereira, M., Souza, A., Araujo, G.: Implementing the broadcast operation in a distributed task-based runtime. In: 2022 International Symposium on Computer Architecture and High Performance Computing Workshops (SBAC-PADW), pp. 25\u201332 (2022). https:\/\/doi.org\/10.1109\/SBAC-PADW56527.2022.00014","DOI":"10.1109\/SBAC-PADW56527.2022.00014"},{"key":"2_CR6","doi-asserted-by":"publisher","unstructured":"Cho, S., Son, H., Kim, J.: Logical\/physical topology-aware collective communication in deep learning training. In: 2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA), pp. 56\u201368 (2023). https:\/\/doi.org\/10.1109\/HPCA56546.2023.10071117","DOI":"10.1109\/HPCA56546.2023.10071117"},{"key":"2_CR7","doi-asserted-by":"publisher","unstructured":"Cl\u00e9to, J., Valarini, G., Pereira, M., Araujo, G., Yviquel, H.: Scalable OpenMP remote offloading via asynchronous MPI and coroutine-driven communication. In: Nagel, E., Goehringer, D., Diniz, C. (eds.) Euro-Par 2025: Parallel Processing, pp. 254\u2014267. Springer Nature Switzerland, Cham (2026). https:\/\/doi.org\/10.1007\/978-3-031-99872-0_18. ISBN No: 978-3-031-99872-0","DOI":"10.1007\/978-3-031-99872-0_18"},{"key":"2_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1007\/978-3-319-98521-3_8","volume-title":"Evolving OpenMP for Evolving Architectures","author":"J Doerfert","year":"2018","unstructured":"Doerfert, J., Finkel, H.: Compiler optimizations for OpenMP. In: de Supinski, B.R., Valero-Lara, P., Martorell, X., Mateo Bellido, S., Labarta, J. (eds.) IWOMP 2018. LNCS, vol. 11128, pp. 113\u2013127. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-98521-3_8"},{"key":"2_CR9","unstructured":"Hidayetoglu, M., et al.: HiCCL: A Hierarchical Collective Communication Library (2024). https:\/\/arxiv.org\/abs\/2408.05962"},{"key":"2_CR10","doi-asserted-by":"publisher","unstructured":"Huber, J., et al.: Efficient execution of OpenMP on GPUs. In: 2022 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO), pp. 41\u201352 (2022). https:\/\/doi.org\/10.1109\/CGO53902.2022.9741290","DOI":"10.1109\/CGO53902.2022.9741290"},{"key":"2_CR11","doi-asserted-by":"publisher","unstructured":"Kale, L.V., Zheng, G.: The Charm++ programming model. In: Kale, L.V., Bhatele, A. (eds.) Parallel Science and Engineering Applications: The Charm++ Approach, 1st edn, chap.\u00a01, pp. 1\u201316. CRC Press, Inc., Boca Raton (2013). https:\/\/doi.org\/10.1201\/b16251","DOI":"10.1201\/b16251"},{"key":"2_CR12","doi-asserted-by":"publisher","unstructured":"Kandalla, K., Subramoni, H., Vishnu, A., Panda, D.K.: Designing topology-aware collective communication algorithms for large scale InfiniBand clusters: case studies with Scatter and Gather. In: 2010 IEEE International Symposium on Parallel & Distributed Processing, Workshops and Phd Forum (IPDPSW), pp.\u00a01\u20138 (2010). https:\/\/doi.org\/10.1109\/IPDPSW.2010.5470853","DOI":"10.1109\/IPDPSW.2010.5470853"},{"key":"2_CR13","doi-asserted-by":"publisher","unstructured":"Klinkenberg, J., Kraus, J., Terboven, C., M\u00fcller, M.S.: Towards locality-aware host-to-device offloading in OpenMP. In: Advancing OpenMP for Future Accelerators: 20th International Workshop on OpenMP, IWOMP 2024, Perth, WA, Australia, 23\u201325 September 2024, Proceedings, pp. 3\u201315. Springer, Heidelberg (2024). https:\/\/doi.org\/10.1007\/978-3-031-72567-8_1","DOI":"10.1007\/978-3-031-72567-8_1"},{"key":"2_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1007\/978-3-642-40698-0_7","volume-title":"OpenMP in the Era of Low Power Devices and Accelerators","author":"C Liao","year":"2013","unstructured":"Liao, C., Yan, Y., Supinski, B.R., Quinlan, D.J., Chapman, B.: Early experiences with the OpenMP accelerator model. In: Rendell, A.P., Chapman, B.M., M\u00fcller, M.S. (eds.) IWOMP 2013. LNCS, vol. 8122, pp. 84\u201398. Springer, Heidelberg (2013). https:\/\/doi.org\/10.1007\/978-3-642-40698-0_7"},{"key":"2_CR15","unstructured":"Message Passing Interface Forum: MPI: A Message-Passing Interface Standard Version 4.1 (2023). https:\/\/www.mpi-forum.org\/docs\/mpi-4.1\/mpi41-report.pdf"},{"key":"2_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-981-15-7683-6_1","volume-title":"XcalableMP PGAS Programming Language","author":"H Murai","year":"2021","unstructured":"Murai, H., Nakao, M., Sato, M.: XcalableMP programming model and language. In: Sato, M. (ed.) XcalableMP PGAS Programming Language, pp. 1\u201371. Springer, Singapore (2021). https:\/\/doi.org\/10.1007\/978-981-15-7683-6_1"},{"key":"2_CR17","unstructured":"NVIDIA Corporation: NVLink & NVSwitch: Fastest HPC Data Center Platform (2025). https:\/\/www.nvidia.com\/en-us\/data-center\/nvlink\/. Accessed 13 May 2025"},{"key":"2_CR18","unstructured":"OpenACC Organization: OpenACC - Directives for Accelerators (2025). https:\/\/www.openacc.org\/. Accessed 21 May 2025"},{"key":"2_CR19","unstructured":"OpenMP Architecture Review Board: Openmp application programming interface version 6.0. Technical report, OpenMP Architecture Review Board (2024)"},{"key":"2_CR20","doi-asserted-by":"publisher","unstructured":"Patel, A., Doerfert, J.: Remote OpenMP offloading. In: Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, PPoPP 2022, pp. 441\u2013442. Association for Computing Machinery, New York (2022). https:\/\/doi.org\/10.1145\/3503221.3508416","DOI":"10.1145\/3503221.3508416"},{"key":"2_CR21","doi-asserted-by":"publisher","unstructured":"Roa\u00a0Perdomo, D.A., et al.: Memory transfer decomposition: exploring smart data movement through architecture-aware strategies. In: Proceedings of the SC 2023 Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis, SC-W 2023, pp. 1958\u20131967. Association for Computing Machinery (2023). https:\/\/doi.org\/10.1145\/3624062.3624609","DOI":"10.1145\/3624062.3624609"},{"key":"2_CR22","unstructured":"Shah, A., et al.: TACCL: guiding collective algorithm synthesis using communication sketches. In: Symposium on Networked Systems Design and Implementation (2021). https:\/\/api.semanticscholar.org\/CorpusID:250420873"},{"key":"2_CR23","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1007\/978-981-15-7683-6_4","volume-title":"XcalableMP PGAS Programming Language","author":"A Tabuchi","year":"2021","unstructured":"Tabuchi, A., Murai, H., Nakao, M., Odajima, T., Boku, T.: XcalableACC: an integration of XcalableMP and OpenACC. In: Sato, M. (ed.) XcalableMP PGAS Programming Language, pp. 123\u2013146. Springer, Singapore (2021). https:\/\/doi.org\/10.1007\/978-981-15-7683-6_4"},{"key":"2_CR24","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1007\/978-3-030-95953-1_4","volume-title":"Languages and Compilers for Parallel Computing","author":"S Tian","year":"2022","unstructured":"Tian, S., Doerfert, J., Chapman, B.: Concurrent execution of deferred OpenMP target tasks with hidden helper threads. In: Chapman, B., Moreira, J. (eds.) Languages and Compilers for Parallel Computing, pp. 41\u201356. Springer, Cham (2022)"},{"issue":"1","key":"2_CR25","doi-asserted-by":"publisher","first-page":"166","DOI":"10.1007\/s11390-023-2894-6","volume":"38","author":"A Weingram","year":"2023","unstructured":"Weingram, A., Li, Y., Qi, H., Ng, D., Dai, L., Lu, X.: xCCL: a survey of industry-led collective communication libraries for deep learning. J. Comput. Sci. Technol. 38(1), 166\u2013195 (2023). https:\/\/doi.org\/10.1007\/s11390-023-2894-6","journal-title":"J. Comput. Sci. Technol."},{"key":"2_CR26","doi-asserted-by":"publisher","unstructured":"Won, W., Elavazhagan, M., Srinivasan, S., Gupta, S., Krishna, T.: TACOS: topology-aware collective algorithm synthesizer for distributed machine learning. In: 2024 57th IEEE\/ACM International Symposium on Microarchitecture (MICRO), pp. 856\u2013870. IEEE (2024). https:\/\/doi.org\/10.1109\/micro61859.2024.00068","DOI":"10.1109\/micro61859.2024.00068"},{"key":"2_CR27","doi-asserted-by":"publisher","unstructured":"Yviquel, H., et al.: The OpenMP cluster programming model. In: Workshop Proceedings of the 51st International Conference on Parallel Processing. ICPP Workshops 2022. Association for Computing Machinery, New York (2023). https:\/\/doi.org\/10.1145\/3547276.3548444","DOI":"10.1145\/3547276.3548444"}],"container-title":["Lecture Notes in Computer Science","OpenMP: Balancing Productivity and Performance Portability"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-06343-4_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T13:23:57Z","timestamp":1759065837000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-06343-4_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,29]]},"ISBN":["9783032063427","9783032063434"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-06343-4_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,29]]},"assertion":[{"value":"29 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"IWOMP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on OpenMP","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Charlotte, NC","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iwomp2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.iwomp.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}