{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T22:11:40Z","timestamp":1769033500375,"version":"3.49.0"},"reference-count":18,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T00:00:00Z","timestamp":1763078400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T00:00:00Z","timestamp":1763078400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62576213"],"award-info":[{"award-number":["62576213"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,11,14]]},"DOI":"10.1109\/cloudcom67567.2025.11331477","type":"proceedings-article","created":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T20:37:16Z","timestamp":1768941436000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["HERO: Hierarchical Efficient Reduction Optimization for Distributed Learning Systems"],"prefix":"10.1109","author":[{"given":"Xudong","family":"Xiong","sequence":"first","affiliation":[{"name":"Shenzhen MSU-BIT University,Faculty of Engineering,Shenzhen,China"}]},{"given":"Lihan","family":"Xu","sequence":"additional","affiliation":[{"name":"Shenzhen MSU-BIT University,Faculty of Engineering,Shenzhen,China"}]},{"given":"Jiaxin","family":"Chen","sequence":"additional","affiliation":[{"name":"Artificial Intelligence Research Institute, Shenzhen MSU-BIT University,Shenzhen,China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Megatron-LM: Training multi-billion parameter language models using model parallelism","author":"Shoeybi","year":"2019","journal-title":"arXiv preprint"},{"key":"ref2","first-page":"8583","article-title":"Scaling vision with sparse mixture of experts","volume":"34","author":"Riquelme","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3052862"},{"key":"ref4","article-title":"Communication-efficient data parallel distributed deep learning: A comprehensive survey","author":"Tang","year":"2023","journal-title":"arXiv preprint"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS60453.2023.00261"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS57875.2023.00054"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM41043.2020.9155446"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3572848.3577391"},{"key":"ref9","article-title":"PipeDream: Fast and efficient pipeline parallel DNN training","author":"Harlap","year":"2018","journal-title":"arXiv preprint"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TON.2025.3556013"},{"key":"ref11","article-title":"BlueConnect: Novel hierarchical all-reduce on multi-tired network for deep learning","volume-title":"32nd Conference on Neural Information Processing Systems (NeurIPS 2018)","author":"Cho","year":"2018"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/jproc.2025.3542324"},{"key":"ref13","first-page":"21616","article-title":"Distributed training with heterogeneous data: Bridging median-and mean-based algorithms","volume":"33","author":"Chen","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401147"},{"key":"ref15","first-page":"739","article-title":"TOPOOPT: Co-optimizing network topology and parallelization strategy for distributed training jobs","volume-title":"20th USENIX Symposium on Networked Systems Design and Implementation","author":"Wang","year":"2023"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1093\/mnras\/stad1822"},{"key":"ref17","article-title":"Salus: Fine-grained GPU sharing primitives for deep learning applications","author":"Yu","year":"2019","journal-title":"arXiv preprint"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i8.20819"}],"event":{"name":"2025 lEEE International Conference on Cloud Computing Technology and Science (CloudCom)","location":"Shenzhen, China","start":{"date-parts":[[2025,11,14]]},"end":{"date-parts":[[2025,11,16]]}},"container-title":["2025 lEEE International Conference on Cloud Computing Technology and Science (CloudCom)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11330195\/11331311\/11331477.pdf?arnumber=11331477","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T07:18:43Z","timestamp":1768979923000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11331477\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,14]]},"references-count":18,"URL":"https:\/\/doi.org\/10.1109\/cloudcom67567.2025.11331477","relation":{},"subject":[],"published":{"date-parts":[[2025,11,14]]}}}