{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T12:16:46Z","timestamp":1773317806410,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T00:00:00Z","timestamp":1763164800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2311830, 2312927, 2323116, 2415201, 2504944"],"award-info":[{"award-number":["2311830, 2312927, 2323116, 2415201, 2504944"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100013580","name":"Access Group Center for Research and Policy Analysis","doi-asserted-by":"publisher","award":["NCR-130002"],"award-info":[{"award-number":["NCR-130002"]}],"id":[{"id":"10.13039\/100013580","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3712285.3759811","type":"proceedings-article","created":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T16:04:47Z","timestamp":1762963487000},"page":"1935-1950","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["A Streaming Collectives Interface Targeting Dataflow Acceleration and HPC Workloads"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-1918-397X","authenticated-orcid":false,"given":"Nicholas","family":"Contini","sequence":"first","affiliation":[{"name":"Ohio State University, Columbus, Ohio, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6421-2768","authenticated-orcid":false,"given":"Jake","family":"Queiser","sequence":"additional","affiliation":[{"name":"Ohio State University, Columbus, Ohio, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6430-8587","authenticated-orcid":false,"given":"Bharath","family":"Ramesh","sequence":"additional","affiliation":[{"name":"Ohio State University, Columbus, Ohio, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1200-2754","authenticated-orcid":false,"given":"Hari","family":"Subramoni","sequence":"additional","affiliation":[{"name":"Ohio State University, Columbus, Ohio, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0356-1781","authenticated-orcid":false,"given":"Dhabaleswar K.","family":"Panda","sequence":"additional","affiliation":[{"name":"Ohio State University, Columbus, Ohio, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICFPT56656.2022.9974258"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/SCW63240.2024.00211"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/3240765.3240850"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/H2RC56700.2022.00007"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/3577193.3593720"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356201"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1109\/ExaMPI52011.2020.00006"},{"key":"e_1_3_3_2_9_2","first-page":"211","volume-title":"18th USENIX Symposium on Operating Systems Design and Implementation (OSDI 24)","author":"He Zhenhao","year":"2024","unstructured":"Zhenhao He, Dario Korolija, Yu Zhu, Benjamin Ramhorst, Tristan Laan, Lucian Petrica, Michaela Blott, and Gustavo Alonso. 2024. { ACCL+} : an { FPGA-Based} Collective Engine for Distributed Applications. In 18th USENIX Symposium on Operating Systems Design and Implementation (OSDI 24). 211\u2013231."},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/H2RC54759.2021.00009"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00017"},{"key":"e_1_3_3_2_12_2","unstructured":"Intel2025. oneAPI: A New Era of Heterogeneous Computing \u2014 intel.com. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/tools\/oneapi\/overview.html##gs.mwjpkl. [Accessed 30-06-2025]."},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581576.3581602"},{"key":"e_1_3_3_2_14_2","first-page":"991","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Korolija Dario","year":"2020","unstructured":"Dario Korolija, Timothy Roscoe, and Gustavo Alonso. 2020. Do { OS} abstractions make sense on { FPGAs} ?. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). 991\u20131010."},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378482"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"crossref","unstructured":"Marius Meyer Tobias Kenter Lucian Petrica Kenneth O\u2019Brien Michaela Blott and Christian Pessl. 2024. Optimizing Communication for Latency Sensitive HPC Applications on up to 48 FPGAs Using ACCL. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.18374 (2024).","DOI":"10.1007\/978-3-031-69766-1_9"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/H2RC51942.2020.00007"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"crossref","unstructured":"Marius Meyer Tobias Kenter and Christian Plessl. 2023. Multi-fpga designs and scaling of HPC challenge benchmarks via MPI and circuit-switched inter-fpga networks. ACM Transactions on Reconfigurable Technology and Systems 16 2 (2023) 1\u201327.","DOI":"10.1145\/3576200"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-44534-8_24"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/2966986.2966995"},{"key":"e_1_3_3_2_21_2","unstructured":"Timothy Prickett Morgan2024. HPC Gets A Reconfigureable Dataflow Engine To Take On CPUs and GPUs. https:\/\/www.nextplatform.com\/2024\/10\/29\/hpc-gets-a-reconfigurable-dataflow-engine-to-take-on-cpus-and-gpus\/. Accessed: 2024-03-24."},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Thomas Norrie Nishant Patil Doe\u00a0Hyun Yoon George Kurian Sheng Li James Laudon Cliff Young Norman Jouppi and David Patterson. 2021. The design process for Google\u2019s training chips: TPUv2 and TPUv3. IEEE Micro 41 2 (2021) 56\u201363.","DOI":"10.1109\/MM.2021.3058217"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080255"},{"key":"e_1_3_3_2_24_2","first-page":"75","volume-title":"New Frontiers in High Performance Computing and Big Data","author":"Peng Ivy\u00a0Bo","year":"2017","unstructured":"Ivy\u00a0Bo Peng, Stefano Markidis, Roberto Gioiosa, Gokcen Kestor, and Erwin Laure. 2017. MPI streams for HPC applications. In New Frontiers in High Performance Computing and Big Data. IOS Press, 75\u201392."},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/2831129.2831131"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00078"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/H2RC51942.2020.00006"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00025"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3241793.3241800"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071015"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"crossref","unstructured":"Jaehyeong Sim Somin Lee and Lee-Sup Kim. 2019. An energy-efficient deep convolutional neural network inference processor with enhanced output stationary dataflow in 65-nm CMOS. IEEE Transactions on Very Large Scale Integration (VLSI) Systems 28 1 (2019) 87\u2013100.","DOI":"10.1109\/TVLSI.2019.2935251"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/SCW63240.2024.00065"},{"key":"e_1_3_3_2_33_2","unstructured":"AMDVitis Unified Software Platform. https:\/\/www.amd.com\/en\/products\/software\/adaptive-socs-and-fpgas\/vitis.html"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"crossref","unstructured":"Hao Wang Sreeram Potluri Miao Luo Ashish\u00a0Kumar Singh Sayantan Sur and Dhabaleswar\u00a0K Panda. 2011. MVAPICH2-GPU: optimized GPU to GPU communication for InfiniBand clusters. Computer Science-Research and Development 26 3 (2011) 257\u2013266.","DOI":"10.1007\/s00450-011-0171-3"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/3632775.3662830"},{"key":"e_1_3_3_2_36_2","unstructured":"Lingqi Zhang Mohamed Wahib and Satoshi Matsuoka. 2019. Understanding the overheads of launching CUDA kernels. ICPP19 (2019) 5\u20138."}],"event":{"name":"SC '25: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St. Louis MO USA","acronym":"SC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3712285.3759811","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759811","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759811","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T18:26:07Z","timestamp":1773253567000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3712285.3759811"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":35,"alternative-id":["10.1145\/3712285.3759811","10.1145\/3712285"],"URL":"https:\/\/doi.org\/10.1145\/3712285.3759811","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}