{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,25]],"date-time":"2026-01-25T13:40:27Z","timestamp":1769348427888,"version":"3.49.0"},"reference-count":32,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.23919\/isc.2024.10528923","type":"proceedings-article","created":{"date-parts":[[2024,5,10]],"date-time":"2024-05-10T17:22:23Z","timestamp":1715361743000},"page":"1-12","source":"Crossref","is-referenced-by-count":4,"title":["ROCm-Aware Leader-based Designs for MPI Neighbourhood Collectives"],"prefix":"10.23919","author":[{"given":"Y\u0131ltan Hassan","family":"Temu\u00e7in","sequence":"first","affiliation":[{"name":"Queen's University,ECE Department,Kingston,ON,Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mahdieh","family":"Gazimirsaeed","sequence":"additional","affiliation":[{"name":"DCGPU and Accelerated Processing Advanced Micro Devices Inc,Austin,TX,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ryan E.","family":"Grant","sequence":"additional","affiliation":[{"name":"Queen's University,ECE Department,Kingston,ON,Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ahmad","family":"Afsahi","sequence":"additional","affiliation":[{"name":"Queen's University,ECE Department,Kingston,ON,Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2015.02.028"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2022.3148670"},{"key":"ref3","article-title":"AMD","volume-title":"AMD INSTINCT\u2122 MI200 SERIES ACCELERATOR","year":"2022"},{"key":"ref4","article-title":"T. 500","volume-title":"TOP500","year":"2023"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-78713-4_7"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2009.5160935"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2010.5470407"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2012.86"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/2802658.2802663"},{"key":"ref10","article-title":"Message-Combining Algorithms for Isomorphic, Sparse Collective Communication","volume-title":"CoRR","volume":"abs\/1606.07676","author":"Tr\u00e4ff","year":"2016"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3337821.3337848"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3416315.3416319"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2017.00047"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2019.00087"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00038"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS54959.2023.00070"},{"key":"ref17","article-title":"Message Passing Interface Forum","volume-title":"MPI: A Message-Passing Interface Standard Version 4.0","year":"2021"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.4851"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2011.249"},{"key":"ref20","article-title":"AMD","volume-title":"AMD ROCm\u2122 Open Ecosystem","year":"2023"},{"key":"ref21","article-title":"O. MPI","volume-title":"Open MPI: Open Source High Performance Computing","year":"2022"},{"key":"ref22","article-title":"MVAPICH","volume-title":"MPI over InfiniBand, Omni-Path, Ethernet\/iWARP, and RoCE","year":"2022"},{"key":"ref23","article-title":"C. MPICH","volume-title":"Cray MPICH documentation","year":"2023"},{"key":"ref24","article-title":"AMD","year":"2023","journal-title":"GPU-aware MPI with AMD ROCm\u2122"},{"key":"ref25","volume-title":"ROCm System Management Interface (ROCm SMI) Library","year":"2022"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2016.44"},{"key":"ref27","doi-asserted-by":"crossref","DOI":"10.1145\/3392717.3392771","volume-title":"NV-Group: Link-Efficient Reduction for Distributed Deep Learning on Modern Dense GPU Systems","author":"Chu","year":"2020"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-64203-1_5"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3491418.3530773"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1016\/j.parco.2016.10.001","article-title":"Improving performance of sparse matrix dense matrix multiplication on large-scale parallel systems","volume":"59","author":"Acer","year":"2016","journal-title":"Parallel Computing"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/2049662.2049663"},{"key":"ref32","volume-title":"HipBone: A performance-portable GPU-accelerated C++ version of the NekBone benchmark","author":"Chalmers","year":"2022"}],"event":{"name":"ISC High Performance 2024 Research Paper Proceedings (39th International Conference)","location":"Hamburg, Germany","start":{"date-parts":[[2024,5,12]]},"end":{"date-parts":[[2024,5,16]]}},"container-title":["ISC High Performance 2024 Research Paper Proceedings (39th International Conference)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10528919\/10528920\/10528923.pdf?arnumber=10528923","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T17:29:45Z","timestamp":1732728585000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10528923\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5]]},"references-count":32,"URL":"https:\/\/doi.org\/10.23919\/isc.2024.10528923","relation":{},"subject":[],"published":{"date-parts":[[2024,5]]}}}