{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:05:30Z","timestamp":1765544730583,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,3,9]],"date-time":"2020-03-09T00:00:00Z","timestamp":1583712000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-1657333 CCF-1717754 CNS-1717984 CCF-1750656 CCF-1919289"],"award-info":[{"award-number":["CCF-1657333 CCF-1717754 CNS-1717984 CCF-1750656 CCF-1919289"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,3,9]]},"DOI":"10.1145\/3373376.3378499","type":"proceedings-article","created":{"date-parts":[[2020,3,13]],"date-time":"2020-03-13T22:37:01Z","timestamp":1584139021000},"page":"401-416","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":62,"title":["Prague: High-Performance Heterogeneity-Aware Asynchronous Decentralized Training"],"prefix":"10.1145","author":[{"given":"Qinyi","family":"Luo","sequence":"first","affiliation":[{"name":"University of Southern California, Los Angeles, CA, USA"}]},{"given":"Jiaao","family":"He","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"given":"Youwei","family":"Zhuo","sequence":"additional","affiliation":[{"name":"University of Southern California, Los Angeles, CA, USA"}]},{"given":"Xuehai","family":"Qian","sequence":"additional","affiliation":[{"name":"University of Southern California, Los Angeles, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2020,3,13]]},"reference":[{"volume-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","year":"2015","author":"Abadi Mart'in","key":"e_1_3_2_1_1_1"},{"key":"e_1_3_2_1_2_1","first-page":"265","volume-title":"12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16)","author":"Abadi Martin","year":"2016"},{"key":"e_1_3_2_1_3_1","first-page":"1709","volume-title":"Advances in Neural Information Processing Systems 30","author":"Alistarh Dan","year":"2017"},{"volume-title":"Demystifying parallel and distributed deep learning: An in-depth concurrency analysis","year":"2018","author":"Ben-Nun Tal","key":"e_1_3_2_1_4_1"},{"key":"e_1_3_2_1_5_1","unstructured":"Texas Advanced Computing Center. Maverick2 User Guide - TACC User Portal. https:\/\/portal.tacc.utexas.edu\/user-guides\/maverick2.  Texas Advanced Computing Center. Maverick2 User Guide - TACC User Portal. https:\/\/portal.tacc.utexas.edu\/user-guides\/maverick2."},{"volume-title":"International Conference on Learning Representations Workshop Track","year":"2016","author":"Chen Jianmin","key":"e_1_3_2_1_6_1"},{"volume-title":"Mxnet: A flexible and efficient machine learning library for heterogeneous distributed systems. CoRR, abs\/1512.01274","year":"2015","author":"Chen Tianqi","key":"e_1_3_2_1_7_1"},{"volume-title":"Blueconnect: Novel hierarchical all-reduce on multi-tired network for deep learning","year":"2018","author":"Cho Minsik","key":"e_1_3_2_1_8_1"},{"key":"e_1_3_2_1_9_1","first-page":"1337","volume-title":"Proceedings of the 30th International Conference on Machine Learning, volume 28.3 of Proceedings of Machine Learning Research","author":"Coates Adam","year":"2013"},{"volume-title":"MPI: A Message-Passing Interface Standard","year":"2015","author":"MPI","key":"e_1_3_2_1_10_1"},{"key":"e_1_3_2_1_11_1","unstructured":"IBM Corporation and Oak Ridge National Laboratory. Summit - IBM Power System AC922 IBM POWER9 22C 3.07GHz NVIDIA Volta GV100 Dual-rail Mellanox EDR Infiniband | TOP500 Supercomputer Sites. https:\/\/www.top500.org\/system\/179397.  IBM Corporation and Oak Ridge National Laboratory. Summit - IBM Power System AC922 IBM POWER9 22C 3.07GHz NVIDIA Volta GV100 Dual-rail Mellanox EDR Infiniband | TOP500 Supercomputer Sites. https:\/\/www.top500.org\/system\/179397."},{"key":"e_1_3_2_1_12_1","unstructured":"Intel Corporation. Intel? MPI Library | Intel? Software. https:\/\/software.intel.com\/en-us\/mpi-library.  Intel Corporation. Intel? MPI Library | Intel? Software. https:\/\/software.intel.com\/en-us\/mpi-library."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2408776.2408794"},{"issue":"969","key":"e_1_3_2_1_14_1","first-page":"02","article-title":"The impact of translation technologies on the process and product of translation","volume":"10","author":"Doherty Stephen","year":"2016","journal-title":"International Journal of Communication"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-30218-6_19"},{"volume-title":"Ross B. Girshick, Pieter Noordhuis, Lukasz Wesolowski, Aapo Kyrola, Andrew Tulloch, Yangqing Jia, and Kaiming He. Accurate, large minibatch SGD: training imagenet in 1 hour. CoRR, abs\/1706.02677","year":"2017","author":"Goyal Priya","key":"e_1_3_2_1_16_1"},{"volume-title":"Pipedream: Fast and efficient pipeline parallel DNN training. CoRR, abs\/1806.03377","year":"2018","author":"Harlap Aaron","key":"e_1_3_2_1_17_1"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00059"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_38"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"e_1_3_2_1_21_1","first-page":"1223","volume-title":"Proceedings of the 26th International Conference on Neural Information Processing Systems -","volume":"1","author":"Ho Qirong","year":"2013"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3267809.3275447"},{"volume-title":"11th USENIX Workshop on Hot Topics in Cloud Computing (HotCloud 19)","year":"2019","author":"Hong Rankyung","key":"e_1_3_2_1_23_1"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1090\/S0273-0979-06-01126-8"},{"key":"e_1_3_2_1_25_1","first-page":"629","volume-title":"14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Hsieh Kevin","year":"2017"},{"key":"e_1_3_2_1_26_1","unstructured":"Sylvain Jeaugey. Nccl 2.0. GTC 2017.  Sylvain Jeaugey. Nccl 2.0. GTC 2017."},{"volume-title":"Highly scalable deep learning training system with mixed-precision: Training imagenet in four minutes. arXiv preprint arXiv:1807.11205","year":"2018","author":"Jia Xianyan","key":"e_1_3_2_1_27_1"},{"volume-title":"Beyond data and model parallelism for deep neural networks. CoRR, abs\/1807.05358","year":"2018","author":"Jia Zhihao","key":"e_1_3_2_1_28_1"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3035918.3035933"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3293883.3299818"},{"volume-title":"Department of Computer Science","year":"2009","author":"Krizhevsky A.","key":"e_1_3_2_1_31_1"},{"first-page":"51","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage, and Analysis","author":"Kurth Thorsten","key":"e_1_3_2_1_32_1"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2640087.2644155"},{"key":"e_1_3_2_1_34_1","first-page":"8056","volume-title":"Proceedings of the 32Nd International Conference on Neural Information Processing Systems, NIPS'18","author":"Li Youjie","year":"2018"},{"key":"e_1_3_2_1_35_1","first-page":"5330","volume-title":"Advances in Neural Information Processing Systems 30","author":"Lian Xiangru","year":"2017"},{"key":"e_1_3_2_1_36_1","first-page":"3049","volume-title":"Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\"a ssan","author":"Lian Xiangru","year":"2018"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3267809.3267840"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304009"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356170"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2008.09.002"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359642"},{"key":"e_1_3_2_1_42_1","first-page":"693","volume-title":"Advances in Neural Information Processing Systems 24","author":"Recht Benjamin","year":"2011"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"volume-title":"Horovod: fast and easy distributed deep learning in TensorFlow. arXiv preprint arXiv:1802.05799","year":"2018","author":"Sergeev Alexander","key":"e_1_3_2_1_44_1"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2882950"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"volume-title":"Very deep convolutional networks for large-scale image recognition. CoRR, abs\/1409.1556","year":"2014","author":"Simonyan Karen","key":"e_1_3_2_1_47_1"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00036"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00027"},{"volume-title":"Optimizing network performance for distributed dnn training on gpu clusters: Imagenet\/alexnet training in 1.5 minutes. arXiv preprint arXiv:1902.06855","year":"2019","author":"Sun Peng","key":"e_1_3_2_1_50_1"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"volume-title":"NeurIPS","year":"2018","author":"Tang Hanlin","key":"e_1_3_2_1_52_1"},{"key":"e_1_3_2_1_53_1","first-page":"4848","volume-title":"Proceedings of the 35th International Conference on Machine Learning, volume 80 of Proceedings of Machine Learning Research","author":"Tang Hanlin","year":"2018"},{"volume-title":"Nicoletta Calzolari (Conference Chair)","year":"2012","author":"Tiedemann J\u00f6rg","key":"e_1_3_2_1_54_1"},{"key":"e_1_3_2_1_55_1","first-page":"5998","volume-title":"Advances in Neural Information Processing Systems 30","author":"Vaswani Ashish","year":"2017"},{"volume-title":"Adaptive communication strategies to achieve the best error-runtime trade-off in local-update sgd. ArXiv, abs\/1810.08313","year":"2018","author":"Wang Jianyu","key":"e_1_3_2_1_56_1"},{"volume-title":"Chien chin Huang, and Jinyang Li. Supporting very large models using automatic dataflow graph partitioning","year":"2018","author":"Wang Minjie","key":"e_1_3_2_1_57_1"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303953"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/2783258.2783323"},{"volume-title":"Yet another accelerated sgd: Resnet-50 training on imagenet in 74.7 seconds. arXiv preprint arXiv:1903.12650","year":"2019","author":"Yamazaki Masafumi","key":"e_1_3_2_1_60_1"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41551-018-0305-z"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.14778\/2732977.2733001"}],"event":{"name":"ASPLOS '20: Architectural Support for Programming Languages and Operating Systems","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture","SIGBED ACM Special Interest Group on Embedded Systems"],"location":"Lausanne Switzerland","acronym":"ASPLOS '20"},"container-title":["Proceedings of the Twenty-Fifth International Conference on Architectural Support for Programming Languages and Operating Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3373376.3378499","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3373376.3378499","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3373376.3378499","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:32:59Z","timestamp":1750199579000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3373376.3378499"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,3,9]]},"references-count":62,"alternative-id":["10.1145\/3373376.3378499","10.1145\/3373376"],"URL":"https:\/\/doi.org\/10.1145\/3373376.3378499","relation":{},"subject":[],"published":{"date-parts":[[2020,3,9]]},"assertion":[{"value":"2020-03-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}