{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T13:14:38Z","timestamp":1767705278361,"version":"3.37.3"},"reference-count":29,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Circuits Syst. I"],"published-print":{"date-parts":[[2021,10]]},"DOI":"10.1109\/tcsi.2021.3098841","type":"journal-article","created":{"date-parts":[[2021,7,29]],"date-time":"2021-07-29T20:08:43Z","timestamp":1627589323000},"page":"4194-4206","source":"Crossref","is-referenced-by-count":11,"title":["Scalable Fully Pipelined Hardware Architecture for In-Network Aggregated AllReduce Communication"],"prefix":"10.1109","volume":"68","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2719-4055","authenticated-orcid":false,"given":"Yao","family":"Liu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5530-5659","authenticated-orcid":false,"given":"Junyi","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3546-4070","authenticated-orcid":false,"given":"Shuo","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Qiaoling","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5192-1649","authenticated-orcid":false,"given":"Wangchen","family":"Dai","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6764-0729","authenticated-orcid":false,"given":"Ray Chak Chung","family":"Cheung","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2017.37"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3363554"},{"journal-title":"Bringing HPC Techniques to Deep Learning","year":"2017","author":"gibiansky","key":"ref12"},{"key":"ref13","first-page":"41","article-title":"Scalable hierarchical aggregation and reduction protocol (SHARP) streaming-aggregation hardware design and evaluation","author":"graham","year":"2020","journal-title":"Proc Int Conf High Perform Comput"},{"key":"ref14","article-title":"Scaling distributed machine learning with in-network aggregation","author":"sapio","year":"2019","journal-title":"arXiv 1903 06701"},{"key":"ref15","article-title":"NetReduce: RDMA-compatible in-network reduction for distributed DNN training acceleration","author":"liu","year":"2020","journal-title":"arXiv 2009 09736"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2019.2895036"},{"key":"ref17","first-page":"571","article-title":"Project adam: Building an efficient and scalable deep learning training system","author":"chilimbi","year":"2014","journal-title":"Proc of USENIX Symp on Operating Systems Design and Implementation (OSDI)"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2934872.2934908"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2014.2315631"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737367"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/2640087.2644155"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2021.3052981"},{"key":"ref29","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2014","journal-title":"arXiv 1409 1556"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322259"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3337821.3337889"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3236367.3236381"},{"key":"ref2","article-title":"Megatron-LM: Training multi-billion parameter language models using model parallelism","author":"shoeybi","year":"2019","journal-title":"arXiv 1909 08053"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2928289"},{"key":"ref1","article-title":"Google&#x2019;s neural machine translation system: Bridging the gap between human and machine translation","author":"wu","year":"2016","journal-title":"arXiv 1609 08144"},{"key":"ref20","article-title":"Horovod: Fast and easy distributed deep learning in TensorFlow","author":"sergeev","year":"2018","journal-title":"arXiv 1802 05799"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2018.2877414"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CCGRID.2019.00057"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2012.125"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/2534169.2486011"},{"key":"ref26","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc Adv Neural Inf Process Syst"},{"journal-title":"InfiniBand Architecture Specification Volume 1 Release 1 0a","year":"2015","key":"ref25"}],"container-title":["IEEE Transactions on Circuits and Systems I: Regular Papers"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8919\/9551015\/09501248.pdf?arnumber=9501248","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,10]],"date-time":"2022-01-10T20:58:52Z","timestamp":1641848332000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9501248\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10]]},"references-count":29,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tcsi.2021.3098841","relation":{},"ISSN":["1549-8328","1558-0806"],"issn-type":[{"type":"print","value":"1549-8328"},{"type":"electronic","value":"1558-0806"}],"subject":[],"published":{"date-parts":[[2021,10]]}}}