{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,2]],"date-time":"2025-11-02T15:12:13Z","timestamp":1762096333413,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":22,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,12]],"date-time":"2023-11-12T00:00:00Z","timestamp":1699747200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"XRAC","award":["#NCR-130002"],"award-info":[{"award-number":["#NCR-130002"]}]},{"name":"NSF","award":["#1818253,#1854828,#1931537,#2007991,#2018627,#2311830,#2312927"],"award-info":[{"award-number":["#1818253,#1854828,#1931537,#2007991,#2018627,#2311830,#2312927"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,12]]},"DOI":"10.1145\/3624062.3624153","type":"proceedings-article","created":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T13:53:39Z","timestamp":1699624419000},"page":"847-854","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["MPI-xCCL: A Portable MPI Library over Collective Communication Libraries for Various Accelerators"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7471-7552","authenticated-orcid":false,"given":"Chen-Chun","family":"Chen","sequence":"first","affiliation":[{"name":"The Ohio State University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5856-5483","authenticated-orcid":false,"given":"Kawthar","family":"Shafie Khorassani","sequence":"additional","affiliation":[{"name":"The Ohio State University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7507-0940","authenticated-orcid":false,"given":"Pouya","family":"Kousha","sequence":"additional","affiliation":[{"name":"The Ohio State University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8490-7990","authenticated-orcid":false,"given":"Qinghua","family":"Zhou","sequence":"additional","affiliation":[{"name":"The Ohio State University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7129-9508","authenticated-orcid":false,"given":"Jinghan","family":"Yao","sequence":"additional","affiliation":[{"name":"The Ohio State University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1200-2754","authenticated-orcid":false,"given":"Hari","family":"Subramoni","sequence":"additional","affiliation":[{"name":"The Ohio State University, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0356-1781","authenticated-orcid":false,"given":"Dhabaleswar K.","family":"Panda","sequence":"additional","affiliation":[{"name":"The Ohio State University, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,11,12]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"AMD. 2016. Radeon Open Compute Platform. https:\/\/rocmdocs.amd.com."},{"key":"e_1_3_2_2_2_1","unstructured":"AMD. 2018. RCCL. https:\/\/github.com\/ROCmSoftwarePlatform\/rccl."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33518-1_16"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW55747.2022.00014"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid57682.2023.00022"},{"key":"e_1_3_2_2_6_1","unstructured":"Github. 2023. Unified Collective Communication. https:\/\/github.com\/openucx\/ucc. Accessed: 2023\/09\/26 08:11:55."},{"key":"e_1_3_2_2_7_1","unstructured":"IBM. 2018. IBM Spectrum MPI: Accelerating high-performance application parallelization. https:\/\/www.ibm.com\/us-en\/marketplace\/spectrum-mpi."},{"key":"e_1_3_2_2_8_1","unstructured":"Intel. 2004. Intel MPI. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/tools \/oneapi\/mpi-library.html."},{"key":"e_1_3_2_2_9_1","unstructured":"Intel. 2021. HCCL. https:\/\/github.com\/HabanaAI\/hccl_ofi_wrapper."},{"key":"e_1_3_2_2_10_1","unstructured":"Microsoft. 2016. MSCCL. https:\/\/github.com\/microsoft\/msccl."},{"key":"e_1_3_2_2_11_1","unstructured":"MPICH. 1992. MPICH. https:\/\/developer.nvidia.com\/nccl."},{"key":"e_1_3_2_2_12_1","volume-title":"MVAPICH: MPI over InfiniBand, Omni-Path, Ethernet\/iWARP, and RoCE","author":"Computing Laboratory Network-Based","year":"2001","unstructured":"Network-Based Computing Laboratory. 2001. MVAPICH: MPI over InfiniBand, Omni-Path, Ethernet\/iWARP, and RoCE. http:\/\/mvapich.cse.ohio-state.edu\/."},{"key":"e_1_3_2_2_13_1","unstructured":"NVIDIA. 2017. NCCL2. https:\/\/developer.nvidia.com\/nccl."},{"key":"e_1_3_2_2_14_1","unstructured":"OLCF. 2021. HPE CRAY MPI \u2013 SPOCK WORKSHOP. https:\/\/www.olcf.ornl.gov\/wp-content\/uploads\/2021\/04\/HPE-Cray-MPIUpdate-nfr-presented.pdf."},{"key":"e_1_3_2_2_15_1","unstructured":"Open MPI. 2004. Open MPI: Open Source High Performance Computing. https:\/\/www.open-mpi.org\/."},{"key":"e_1_3_2_2_16_1","volume-title":"Efficient Inter-node MPI Communication Using GPUDirect RDMA for InfiniBand Clusters With NVIDIA GPUs. In 42nd International Conference on Parallel Processing (ICPP)","author":"Potluri Sreeram","year":"2013","unstructured":"Sreeram Potluri, Khaled Hamidouche, Akshay Venkatesh, Devendar Bureddy, and Dhabaleswar\u00a0K Panda. 2013. Efficient Inter-node MPI Communication Using GPUDirect RDMA for InfiniBand Clusters With NVIDIA GPUs. In 42nd International Conference on Parallel Processing (ICPP), 2013. IEEE, 80\u201389."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491418.3530773"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-78713-4_7"},{"key":"e_1_3_2_2_19_1","volume-title":"Designing Efficient Small Message Transfer Mechanism for Inter-node MPI Communication on InfiniBand GPU Clusters. In 2014 21st International Conference on High Performance Computing (HiPC). 1\u201310","author":"Shi R.","year":"2014","unstructured":"R. Shi, S. Potluri, K. Hamidouche, J. Perkins, M. Li, D. Rossetti, and D.\u00a0K. Panda. 2014. Designing Efficient Small Message Transfer Mechanism for Inter-node MPI Communication on InfiniBand GPU Clusters. In 2014 21st International Conference on High Performance Computing (HiPC). 1\u201310."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2013.222"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11390-023-2894-6"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00053"}],"event":{"name":"SC-W 2023: Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis","acronym":"SC-W 2023","location":"Denver CO USA"},"container-title":["Proceedings of the SC '23 Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624153","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3624062.3624153","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3624062.3624153","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T03:04:24Z","timestamp":1755745464000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624153"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,12]]},"references-count":22,"alternative-id":["10.1145\/3624062.3624153","10.1145\/3624062"],"URL":"https:\/\/doi.org\/10.1145\/3624062.3624153","relation":{},"subject":[],"published":{"date-parts":[[2023,11,12]]},"assertion":[{"value":"2023-11-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}