{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T12:21:57Z","timestamp":1773318117963,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":65,"publisher":"ACM","funder":[{"DOI":"10.13039\/100018693","name":"HORIZON EUROPE Framework Programme","doi-asserted-by":"publisher","award":["101175702"],"award-info":[{"award-number":["101175702"]}],"id":[{"id":"10.13039\/100018693","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","award":["101002047"],"award-info":[{"award-number":["101002047"]}],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3712285.3759835","type":"proceedings-article","created":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T16:05:39Z","timestamp":1762963539000},"page":"1901-1916","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Bine Trees: Enhancing Collective Operations by Optimizing Communication Locality"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7244-639X","authenticated-orcid":false,"given":"Daniele","family":"De Sensi","sequence":"first","affiliation":[{"name":"Sapienza University of Rome, Rome, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2007-7762","authenticated-orcid":false,"given":"Saverio","family":"Pasqualoni","sequence":"additional","affiliation":[{"name":"Sapienza University of Rome, Rome, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8071-8806","authenticated-orcid":false,"given":"Lorenzo","family":"Piarulli","sequence":"additional","affiliation":[{"name":"Sapienza University of Rome, Rome, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2345-473X","authenticated-orcid":false,"given":"Tommaso","family":"Bonato","sequence":"additional","affiliation":[{"name":"ETH Z\u00fcrich, Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8837-3116","authenticated-orcid":false,"given":"Seydou","family":"Ba","sequence":"additional","affiliation":[{"name":"RIKEN Center for Computational Science (R-CCS), Kobe, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5422-1891","authenticated-orcid":false,"given":"Matteo","family":"Turisini","sequence":"additional","affiliation":[{"name":"CINECA, Rome, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5343-414X","authenticated-orcid":false,"given":"Jens","family":"Domke","sequence":"additional","affiliation":[{"name":"RIKEN Center for Computational Science (R-CCS), Kobe, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1333-9797","authenticated-orcid":false,"given":"Torsten","family":"Hoefler","sequence":"additional","affiliation":[{"name":"ETH Z\u00fcrich, Zurich, Switzerland"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_3_2_2","unstructured":"Top 500. 2025. Top 500 List. https:\/\/top500.org\/."},{"key":"e_1_3_3_3_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1816004"},{"key":"e_1_3_3_3_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2018.00090"},{"key":"e_1_3_3_3_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/1088149.1088183"},{"key":"e_1_3_3_3_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607089"},{"key":"e_1_3_3_3_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607089"},{"key":"e_1_3_3_3_8_2","doi-asserted-by":"crossref","unstructured":"Fabio Banchelli Marta Garcia-Gasulla Filippo Mantovani Joan Vinyals Josep Pocurull David Vicente Beatriz Eguzkitza Flavio C.\u00a0C. Galeazzo Mario\u00a0C. Acosta and Sergi Girona. 2025. Introducing MareNostrum5: A European pre-exascale energy-efficient system designed to serve a broad spectrum of scientific workloads. arxiv:https:\/\/arXiv.org\/abs\/2503.09917\u00a0[cs.DC] https:\/\/arxiv.org\/abs\/2503.09917","DOI":"10.1016\/j.future.2025.108125"},{"key":"e_1_3_3_3_9_2","doi-asserted-by":"publisher","unstructured":"M. Barnett R. Littlefield D.G. Payne and R. Vandegeijn. 1995. Global Combine Algorithms for 2-D Meshes with Wormhole Routing. J. Parallel and Distrib. Comput. 24 2 (1995) 191\u2013201. 10.1006\/jpdc.1995.1018","DOI":"10.1006\/jpdc.1995.1018"},{"key":"e_1_3_3_3_10_2","doi-asserted-by":"crossref","unstructured":"M. Bayatpour S. Chakraborty H. Subramoni X. Lu and D. Panda. 2017. Scalable Reduction Collectives with Data Partitioning-based Multi-Leader Design.","DOI":"10.1145\/3126908.3126954"},{"key":"e_1_3_3_3_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3555819.3555825"},{"key":"e_1_3_3_3_12_2","series-title":"(NSDI\u201924)","volume-title":"Proceedings of the 21st USENIX Symposium on Networked Systems Design and Implementation","author":"Blach Nils","year":"2024","unstructured":"Nils Blach, Maciej Besta, Daniele De\u00a0Sensi, Jens Domke, Hussein Harake, Shigang Li, Patrick Iff, Marek Konieczny, Kartik Lakhotia, Ales Kubicek, Marcel Ferrari, Fabrizio Petrini, and Torsten Hoefler. 2024. A high-performance design, implementation, deployment, and evaluation of the slim fly network. In Proceedings of the 21st USENIX Symposium on Networked Systems Design and Implementation (Santa Clara, CA, USA) (NSDI\u201924). USENIX Association, USA, Article 57, 20\u00a0pages."},{"key":"e_1_3_3_3_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3712285.3759884"},{"key":"e_1_3_3_3_14_2","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441620"},{"key":"e_1_3_3_3_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356215"},{"key":"e_1_3_3_3_16_2","unstructured":"Daniele De Sensi. 2025. Swing Allreduce Simulator. https:\/\/github.com\/HLC-Lab\/swing-allreduce-sim\/blob\/main\/sst-elements-library-11.1.0\/src\/sst\/elements\/ember\/mpi\/motifs\/emberswingcoll.cc [Accessed 14-04-2025]."},{"key":"e_1_3_3_3_17_2","volume-title":"21th USENIX Symposium on Networked Systems Design and Implementation (NSDI 24)","author":"De\u00a0Sensi Daniele","year":"2024","unstructured":"Daniele De\u00a0Sensi, Tommaso Bonato, David Saam, and Torsten Hoefler. 2024. Swing: Short-cutting Rings for Higher Bandwidth Allreduce. In 21th USENIX Symposium on Networked Systems Design and Implementation (NSDI 24). USENIX Association, Santa Clara, CA."},{"key":"e_1_3_3_3_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356196"},{"key":"e_1_3_3_3_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/sc41405.2020.00039"},{"key":"e_1_3_3_3_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00039"},{"key":"e_1_3_3_3_21_2","unstructured":"Dylan Patel Daniel Nishball and Jeremie Eliahou Ontiveros. [n. d.]. Multi-Datacenter Training: OpenAI\u2019s Ambitious Plan To Beat Google\u2019s Infrastructure. https:\/\/semianalysis.com\/2024\/09\/04\/multi-datacenter-training-openais\/ [Accessed 19-03-2024]."},{"key":"e_1_3_3_3_22_2","unstructured":"EuroHPC Joint Undertaking. [n. d.]. LUMI Supercomputer. https:\/\/lumi-supercomputer.eu\/ [Accessed 19-03-2024]."},{"key":"e_1_3_3_3_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3524059.3532380"},{"key":"e_1_3_3_3_24_2","unstructured":"Fujitsu. 2025. Development Studio uTofu User\u2019s Guide. https:\/\/software.fujitsu.com\/jp\/manual\/manualfiles\/m210007\/j2ul2482\/02enz003\/j2ul-2482-02enz0.pdf."},{"key":"e_1_3_3_3_25_2","unstructured":"Fujitsu. 2025. Topology Awareness in the Tofu Interconnect Series. https:\/\/nowlab.cse.ohio-state.edu\/static\/media\/workshops\/presentations\/ExaComm16-Invited-Talk-2-Yuichiro-Ajima.pdf [Accessed 14-04-2025]."},{"key":"e_1_3_3_3_26_2","doi-asserted-by":"publisher","unstructured":"Gemini Team. 2023. Gemini: A Family of Highly Capable Multimodal Models. arXiv e-prints Article arXiv:2312.11805 (Dec. 2023) arXiv:2312.11805\u00a0pages. 10.48550\/arXiv.2312.11805 arxiv:https:\/\/arXiv.org\/abs\/2312.11805\u00a0[cs.CL]","DOI":"10.48550\/arXiv.2312.11805"},{"key":"e_1_3_3_3_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid.2011.42"},{"key":"e_1_3_3_3_28_2","unstructured":"Mert Hidayetoglu Simon\u00a0Garcia de Gonzalo Elliott Slaughter Pinku Surana Wen mei Hwu William Gropp and Alex Aiken. 2024. HiCCL: A Hierarchical Collective Communication Library. arxiv:https:\/\/arXiv.org\/abs\/2408.05962\u00a0[cs.DC] https:\/\/arxiv.org\/abs\/2408.05962"},{"key":"e_1_3_3_3_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807644"},{"key":"e_1_3_3_3_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/sc41404.2022.00016"},{"key":"e_1_3_3_3_31_2","volume-title":"Performance Evaluation of MPI Collective Communication on the Supercomputer Fugaku","author":"Hosono Nanatsuki","year":"2021","unstructured":"Nanatsuki Hosono, Masaki Iwasawa, and Junichiro Makino. 2021. Performance Evaluation of MPI Collective Communication on the Supercomputer Fugaku. Technical Report. Issue 15. The supercomputer \u201cFUGAKU\u201d is a massively parallel computer, which consists of 158,976 nodes. The performance of MPI collective communication is important to achieve highly effective performance. In this article, we will report the performance evaluation of Alltoall, Alltoallv, and Bcast on FUGAKU. We surveyed the effect of algorithms, segment size, and node topology. We will report the results and the optimal setting to achieve high efficiency.."},{"key":"e_1_3_3_3_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/1810085.1810093"},{"key":"e_1_3_3_3_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2009.5160896"},{"key":"e_1_3_3_3_34_2","doi-asserted-by":"publisher","unstructured":"Yao Kang Xin Wang Neil McGlohon Misbah Mubarak Sudheer Chunduri and Zhiling Lan. 2024. Modeling and Analysis of Application Interference on Dragonfly+. CoRR abs\/2406.15097 (2024). 10.48550\/ARXIV.2406.15097 arXiv:https:\/\/arXiv.org\/abs\/2406.15097","DOI":"10.48550\/ARXIV.2406.15097"},{"key":"e_1_3_3_3_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2000.846009"},{"key":"e_1_3_3_3_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2008.19"},{"key":"e_1_3_3_3_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/SBAC-PAD53543.2021.00028"},{"key":"e_1_3_3_3_38_2","unstructured":"Shouxi Luo Renyi Wang and Huanlai Xing. 2024. Efficient inter-datacenter ALLReduce with multiple trees. IEEE Transactions on Network Science and Engineering (2024)."},{"key":"e_1_3_3_3_39_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISPA-BDCloud-SocialCom-SustainCom52081.2021.00045"},{"key":"e_1_3_3_3_40_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2011.30"},{"key":"e_1_3_3_3_41_2","unstructured":"Meta. [n. d.]. Building Meta\u2019s GenAI Infrastructure. https:\/\/engineering.fb.com\/2024\/03\/12\/data-center-engineering\/building-metas-genai-infrastructure\/ [Accessed 19-03-2024]."},{"key":"e_1_3_3_3_42_2","unstructured":"Timothy\u00a0Prickett Morgan. 2025. So Who Is Building That 100 000 GPU Cluster for xAI? https:\/\/www.nextplatform.com\/2024\/07\/30\/so-who-is-building-that-100000-gpu-cluster-for-xai\/."},{"key":"e_1_3_3_3_43_2","unstructured":"Open MPI. 2025. Open MPI Binomial Tree Broadcast. https:\/\/github.com\/open-mpi\/ompi\/blob\/13d05225283eb9941bb13e0f17688fe6a96d7e19\/ompi\/mca\/coll\/base\/coll_base_bcast.c#L333."},{"key":"e_1_3_3_3_44_2","unstructured":"Open MPI. 2025. Open MPI Binomial Tree Construction. https:\/\/github.com\/open-mpi\/ompi\/blob\/13d05225283eb9941bb13e0f17688fe6a96d7e19\/ompi\/mca\/coll\/base\/coll_base_topo.c#L331."},{"key":"e_1_3_3_3_45_2","unstructured":"Open MPI. 2025. Open MPI Scatter Allgather Broadcast. https:\/\/github.com\/open-mpi\/ompi\/blob\/01da1c4c9cd3588737d0bba9a4111c764eca197d\/ompi\/mca\/coll\/base\/coll_base_bcast.c#L774."},{"key":"e_1_3_3_3_46_2","unstructured":"Open MPI. 2025. Open MPI Sparbit Allgather. https:\/\/github.com\/open-mpi\/ompi\/blob\/13d05225283eb9941bb13e0f17688fe6a96d7e19\/ompi\/mca\/coll\/base\/coll_base_allgather.c#L228."},{"key":"e_1_3_3_3_47_2","unstructured":"MPICH. 2025. Allreduce Implementations on v3.4a2. https:\/\/github.com\/pmodels\/mpich\/tree\/v3.4a2\/src\/mpi\/coll\/allreduce [Accessed 14-04-2025]."},{"key":"e_1_3_3_3_48_2","unstructured":"MPICH. 2025. MPICH Binomial Tree Broadcast. https:\/\/github.com\/pmodels\/mpich\/blob\/2329c3c5b576487f9d2dc79acd0253b315b51b12\/src\/mpi\/coll\/bcast\/bcast_intra_binomial.c#L13."},{"key":"e_1_3_3_3_49_2","unstructured":"MPICH. 2025. MPICH Binomial Tree Broadcast. https:\/\/github.com\/pmodels\/mpich\/blob\/2329c3c5b576487f9d2dc79acd0253b315b51b12\/src\/mpi\/coll\/bcast\/bcast_intra_scatter_recursive_doubling_allgather.c."},{"key":"e_1_3_3_3_50_2","unstructured":"HLC Lab Sapienza\u00a0University of Rome. 2025. Bine Trees Reference Implementation. https:\/\/github.com\/HLC-Lab\/bine-trees."},{"key":"e_1_3_3_3_51_2","unstructured":"HLC Lab Sapienza\u00a0University of Rome. 2025. PICO: Performance Insights for Collective Operations. https:\/\/github.com\/HLC-Lab\/pico\/."},{"key":"e_1_3_3_3_52_2","unstructured":"OpenAI. 2025. Pre-Training GPT-4.5. https:\/\/www.youtube.com\/watch?v=6nJZopACRuQ."},{"key":"e_1_3_3_3_53_2","unstructured":"Saverio Pasqualoni Lorenzo Piarulli and Daniele\u00a0De Sensi. 2025. PICO: Performance Insights for Collective Operations. arxiv:https:\/\/arXiv.org\/abs\/2508.16809\u00a0[cs.DC] https:\/\/arxiv.org\/abs\/2508.16809"},{"key":"e_1_3_3_3_54_2","doi-asserted-by":"publisher","unstructured":"Mitsuhisa Sato Yuetsu Kodama Miwako Tsuji and Tesuya Odajima. 2022. Co-Design and System for the Supercomputer \u201cFugaku\u201d. IEEE Micro 42 2 (2022) 26\u201334. 10.1109\/MM.2021.3136882","DOI":"10.1109\/MM.2021.3136882"},{"key":"e_1_3_3_3_55_2","unstructured":"SchedMD. 2025. sbatch User Guide. https:\/\/slurm.schedmd.com\/sbatch.html [Accessed 14-04-2025]."},{"key":"e_1_3_3_3_56_2","unstructured":"SchedMD. 2025. Slurm Topology Guide. https:\/\/slurm.schedmd.com\/topology.html [Accessed 14-04-2025]."},{"key":"e_1_3_3_3_57_2","doi-asserted-by":"crossref","unstructured":"Daniele\u00a0De Sensi Saverio Pasqualoni Lorenzo Piarulli Tommaso Bonato Seydou Ba Matteo Turisini Jens Domke and Torsten Hoefler. 2025. Bine Trees: Enhancing Collective Operations by Optimizing Communication Locality. arxiv:https:\/\/arXiv.org\/abs\/2508.17311\u00a0[cs.DC] https:\/\/arxiv.org\/abs\/2508.17311","DOI":"10.1145\/3712285.3759835"},{"key":"e_1_3_3_3_58_2","unstructured":"Aashaka Shah Vijay Chidambaram Meghan Cowan Saeed Maleki Madan Musuvathi Todd Mytkowicz Jacob Nelson Olli Saarikivi and Rachee Singh. 2022. TACCL: Guiding Collective Algorithm Synthesis using Communication Sketches. arxiv:https:\/\/arXiv.org\/abs\/2111.04867\u00a0[cs.DC]"},{"key":"e_1_3_3_3_59_2","doi-asserted-by":"publisher","DOI":"10.1109\/HiPINEB.2017.11"},{"key":"e_1_3_3_3_60_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00030"},{"key":"e_1_3_3_3_61_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2011.43"},{"key":"e_1_3_3_3_62_2","doi-asserted-by":"publisher","DOI":"10.1007\/11846802_16"},{"key":"e_1_3_3_3_63_2","doi-asserted-by":"publisher","unstructured":"Matteo Turisini Giorgio Amati and Mirko Cestari. 2024. LEONARDO: A Pan-European Pre-Exascale Supercomputer for HPC and AI applications. Journal of Large Scale Reasearch Facilities 8 A186 (2024). 10.17815\/jlsrf-8-1861","DOI":"10.17815\/jlsrf-8-1861"},{"key":"e_1_3_3_3_64_2","doi-asserted-by":"publisher","unstructured":"Xin Wang Yao Kang and Zhiling Lan. 2025. Preventing Workload Interference with Intelligent Routing and Flexible Job Placement Strategy on Dragonfly System. ACM Trans. Model. Comput. Simul. 35 2 Article 17 (April 2025) 22\u00a0pages. 10.1145\/3706104","DOI":"10.1145\/3706104"},{"key":"e_1_3_3_3_65_2","doi-asserted-by":"publisher","unstructured":"William Won Midhilesh Elavazhagan Sudarshan Srinivasan Swati Gupta and Tushar Krishna. 2024. TACOS: Topology-Aware Collective Algorithm Synthesizer for Distributed Machine Learning. 10.1109\/MICRO61859.2024.00068 arxiv:https:\/\/arXiv.org\/abs\/2304.05301\u00a0[cs.DC]","DOI":"10.1109\/MICRO61859.2024.00068"},{"key":"e_1_3_3_3_66_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2016.63"}],"event":{"name":"SC '25: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St. Louis MO USA","acronym":"SC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759835","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T18:36:08Z","timestamp":1773254168000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3712285.3759835"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":65,"alternative-id":["10.1145\/3712285.3759835","10.1145\/3712285"],"URL":"https:\/\/doi.org\/10.1145\/3712285.3759835","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}