{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T14:15:22Z","timestamp":1774966522029,"version":"3.50.1"},"publisher-location":"New York, New York, USA","reference-count":27,"publisher":"ACM Press","license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1145\/3229543.3229544","type":"proceedings-article","created":{"date-parts":[[2018,8,1]],"date-time":"2018-08-01T19:07:07Z","timestamp":1533150427000},"page":"1-7","source":"Crossref","is-referenced-by-count":30,"title":["HiPS"],"prefix":"10.1145","author":[{"given":"Jinkun","family":"Geng","sequence":"first","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dan","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yang","family":"Cheng","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuai","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junfeng","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","reference":[{"key":"key-10.1145\/3229543.3229544-1","unstructured":"M. Abadi, P. Barham, J. Chen, et al. Tensorflow: A system for large-scale machine learning. In Proceedings of USENIX OSDI'16, pages 265--283, 2016."},{"key":"key-10.1145\/3229543.3229544-2","doi-asserted-by":"crossref","unstructured":"H. Abu-Libdeh, P. Costa, A. Rowstron, G. O'Shea, et al. Symbiotic routing in future data centers. In Proceedings of ACM SIGCOMM '10, pages 51--62, 2010.","DOI":"10.1145\/1851182.1851191"},{"key":"key-10.1145\/3229543.3229544-3","doi-asserted-by":"crossref","unstructured":"M. Al-Fares, A. Loukissas, and A. Vahdat. A scalable, commodity data center network architecture. In Proceedings of ACM SIGCOMM '08, pages 63--74, 2008.","DOI":"10.1145\/1402958.1402967"},{"key":"key-10.1145\/3229543.3229544-4","unstructured":"J. Albrecht, C. Tuttle, A. C. Snoeren, et al. Loose synchronization for large-scale networked systems. In Proceedings of the ATC '06, pages 28--28, 2006."},{"key":"key-10.1145\/3229543.3229544-5","unstructured":"S. Alexander and B. Mike, Del. Horovod: fast and easy distributed deep learning in tensorflow. arXiv preprint arXiv:1802.05799, 2018."},{"key":"key-10.1145\/3229543.3229544-6","unstructured":"G. Andrew. Bringing hpc techniques to deep learning, 2017."},{"key":"key-10.1145\/3229543.3229544-7","unstructured":"H. Cui, J. Cipar, Q. Ho, et al. Exploiting bounded staleness to speed up big data analytics. In Proceedings of the ATC'14, pages 37--48, 2014."},{"key":"key-10.1145\/3229543.3229544-8","doi-asserted-by":"crossref","unstructured":"A. Greenberg, J. R. Hamilton, N. Jain, S. Kandula, et al. VL2: A scalable and flexible data center network. In Proceedings of ACM SIGCOMM '09, pages 51--62, 2009.","DOI":"10.1145\/1592568.1592576"},{"key":"key-10.1145\/3229543.3229544-9","doi-asserted-by":"crossref","unstructured":"C. Guo, G. Lu, D. Li, et al. BCube: A high performance, server-centric network architecture for modular data centers. In Proceedings of ACM SIGCOMM '09, pages 63--74, 2009.","DOI":"10.1145\/1592568.1592577"},{"key":"key-10.1145\/3229543.3229544-10","doi-asserted-by":"crossref","unstructured":"C. Guo, H. Wu, Z. Deng, et al. RDMA over commodity ethernet at scale. In Proceedings of ACM SIGCOMM '16, pages 202--215, 2016.","DOI":"10.1145\/2934872.2934908"},{"key":"key-10.1145\/3229543.3229544-11","doi-asserted-by":"crossref","unstructured":"C. Guo, H. Wu, K. Tan, et al. DCell: A scalable and fault-tolerant network structure for data centers. In Proceedings of ACM SIGCOMM '08, pages 75--86, 2008.","DOI":"10.1145\/1402958.1402968"},{"key":"key-10.1145\/3229543.3229544-12","doi-asserted-by":"crossref","unstructured":"S. Hu, Y. Zhu, P. Cheng, et al. Deadlocks in datacenter networks: Why do they form, and how to avoid them. In Proceedings of the ACM HotNets '16, pages 92--98, 2016.","DOI":"10.1145\/3005745.3005760"},{"key":"key-10.1145\/3229543.3229544-13","unstructured":"Y. Jia, E. Shelhamer, J. Donahue, et al. Caffe: Convolutional architecture for fast feature embedding. arXiv preprint arXiv:1408.5093, 2014."},{"key":"key-10.1145\/3229543.3229544-14","doi-asserted-by":"crossref","unstructured":"D. Li, C. Guo, H. Wu, et al. Scalable and cost-effective interconnection of datacenter servers using dual server ports. IEEE\/ACM Trans. Netw., 19(1):102--114, Feb. 2011.","DOI":"10.1109\/TNET.2010.2053718"},{"key":"key-10.1145\/3229543.3229544-15","doi-asserted-by":"crossref","unstructured":"H. Li, A. Kadav, E. Kruus, et al. Malt: Distributed data-parallelism for existing ml applications. In Proceedings of ACM EuroSys '15, pages 3:1--3:16, 2015.","DOI":"10.1145\/2741948.2741965"},{"key":"key-10.1145\/3229543.3229544-16","doi-asserted-by":"crossref","unstructured":"M. Li, D. Andersen, J. W. Park, et al. Scaling distributed machine learning with the parameter server. In Proceedings of USENIX OSDI'14, pages 583--598, 2014.","DOI":"10.1145\/2640087.2644155"},{"key":"key-10.1145\/3229543.3229544-17","unstructured":"Mellanox. RoCE vs. iWARP competitive analysis. Technical report, 2017."},{"key":"key-10.1145\/3229543.3229544-18","doi-asserted-by":"crossref","unstructured":"R. Niranjan Mysore, A. Pamboris, N. Farrington, et al. Portland: A scalable fault-tolerant layer 2 data center network fabric. In Proceedings of ACM SIGCOMM '09, pages 39--50, 2009.","DOI":"10.1145\/1592568.1592575"},{"key":"key-10.1145\/3229543.3229544-19","doi-asserted-by":"crossref","unstructured":"P. Pitch and Y. Xin. Bandwidth optimal all-reduce algorithms for clusters of workstations. Journal of Parallel and Distributed Computing, 69(2):117--124, 2009.","DOI":"10.1016\/j.jpdc.2008.09.002"},{"key":"key-10.1145\/3229543.3229544-20","unstructured":"G. Priya, D. Piotr, R. Girshick, et al. Accurate, large minibatch sgd: Training imagenet in 1 hour. arXiv preprint arXiv:1706.02677, 2017."},{"key":"key-10.1145\/3229543.3229544-21","unstructured":"M. Radhika, S. Alex, P. Aurojit, Z. Eitan, K. Arvind, et al. Revisiting network support for RDMA. Technical report, 2017."},{"key":"key-10.1145\/3229543.3229544-22","unstructured":"C. Tianqi, L. Mu, L. Yutian, et al. Mxnet: A flexible and efficient machine learning library for heterogeneous distributed systems. arXiv preprint arXiv:1512.01274, 2015."},{"key":"key-10.1145\/3229543.3229544-23","doi-asserted-by":"crossref","unstructured":"L. G. Valiant. A bridging model for parallel computation. Commun. ACM, 33(8):103--111, Aug. 1990.","DOI":"10.1145\/79173.79181"},{"key":"key-10.1145\/3229543.3229544-24","doi-asserted-by":"crossref","unstructured":"J. Vienne, J. Chen, M. Wasi-Ur-Rahman, N. S. Islam, et al. Performance analysis and evaluation of infiniband fdr and 40gige roce on hpc and cloud computing systems. In Proceedings of IEEE HOTI '12, pages 48--55, 2012.","DOI":"10.1109\/HOTI.2012.19"},{"key":"key-10.1145\/3229543.3229544-25","doi-asserted-by":"crossref","unstructured":"P. Watcharapichat, V. L. Morales, R. C. Fernandez, et al. Ako: Decentralised deep learning with partial gradient exchange. In Proceedings of ACM SoCC '16, pages 84--97, 2016.","DOI":"10.1145\/2987550.2987586"},{"key":"key-10.1145\/3229543.3229544-26","doi-asserted-by":"crossref","unstructured":"B. Yi, J. Xia, L. Chen, and K. Chen. Towards zero copy dataflows using RDMA. In Proceedings of the ACM SIGCOMM'17 Posters and Demos, pages 28--30, 2017.","DOI":"10.1145\/3123878.3131975"},{"key":"key-10.1145\/3229543.3229544-27","doi-asserted-by":"crossref","unstructured":"Y. Zhu, H. Eran, D. Firestone, et al. Congestion control for large-scale RDMA deployments. In Proceedings of ACM SIGCOMM '15, pages 523--536, 2015.","DOI":"10.1145\/2785956.2787484"}],"event":{"name":"the 2018 Workshop","location":"Budapest, Hungary","acronym":"NetAI'18","sponsor":["SIGCOMM, ACM Special Interest Group on Data Communication"],"start":{"date-parts":[[2018,8,24]]},"end":{"date-parts":[[2018,8,24]]}},"container-title":["Proceedings of the 2018 Workshop on Network Meets AI &amp; ML  - NetAI'18"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3229543.3229544","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/dl.acm.org\/ft_gateway.cfm?id=3229544&ftid=1992146&dwn=1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T01:39:43Z","timestamp":1750210783000},"score":1,"resource":{"primary":{"URL":"http:\/\/dl.acm.org\/citation.cfm?doid=3229543.3229544"}},"subtitle":["Hierarchical Parameter Synchronization in Large-Scale Distributed Machine Learning"],"proceedings-subject":"Network Meets AI & ML","short-title":[],"issued":{"date-parts":[[2018]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1145\/3229543.3229544","relation":{},"subject":[],"published":{"date-parts":[[2018]]}}}