{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T09:41:38Z","timestamp":1775122898093,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,8,10]],"date-time":"2020-08-10T00:00:00Z","timestamp":1597017600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,8,14]]},"DOI":"10.1145\/3405671.3405810","type":"proceedings-article","created":{"date-parts":[[2020,7,21]],"date-time":"2020-07-21T10:05:25Z","timestamp":1595325925000},"page":"8-13","source":"Crossref","is-referenced-by-count":57,"title":["Is Network the Bottleneck of Distributed Training?"],"prefix":"10.1145","author":[{"given":"Zhen","family":"Zhang","sequence":"first","affiliation":[{"name":"Johns Hopkins University"}]},{"given":"Chaokun","family":"Chang","sequence":"additional","affiliation":[{"name":"Amazon Web Services"}]},{"given":"Haibin","family":"Lin","sequence":"additional","affiliation":[{"name":"Amazon Web Services"}]},{"given":"Yida","family":"Wang","sequence":"additional","affiliation":[{"name":"Amazon Web Services"}]},{"given":"Raman","family":"Arora","sequence":"additional","affiliation":[{"name":"Johns Hopkins University"}]},{"given":"Xin","family":"Jin","sequence":"additional","affiliation":[{"name":"Johns Hopkins University"}]}],"member":"320","published-online":{"date-parts":[[2020,8,10]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"\"AI and Compute.\" https:\/\/openai.com\/blog\/ai-and-compute\/. \"AI and Compute.\" https:\/\/openai.com\/blog\/ai-and-compute\/."},{"key":"e_1_3_2_1_2_1","unstructured":"\"GPUs Power Five of World's Top Seven Supercomputers.\" https:\/\/www.hpcwire.com\/2018\/06\/25\/gpus-power-five-of-worlds-top-seven-supercomputers\/. \"GPUs Power Five of World's Top Seven Supercomputers.\" https:\/\/www.hpcwire.com\/2018\/06\/25\/gpus-power-five-of-worlds-top-seven-supercomputers\/."},{"key":"e_1_3_2_1_3_1","unstructured":"\"Cloud TPU.\" https:\/\/cloud.google.com\/tpu\/. \"Cloud TPU.\" https:\/\/cloud.google.com\/tpu\/."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Y. Peng Y. Zhu Y. Chen Y. Bao B. Yi C. Lan C. Wu and C. Guo \"A generic communication scheduler for distributed dnn training acceleration \" in ACM SOSP 2019. Y. Peng Y. Zhu Y. Chen Y. Bao B. Yi C. Lan C. Wu and C. Guo \"A generic communication scheduler for distributed dnn training acceleration \" in ACM SOSP 2019.","DOI":"10.1145\/3341301.3359642"},{"key":"e_1_3_2_1_5_1","unstructured":"Y. Lin S. Han H. Mao Y. Wang and W. J. Dally \"Deep gradient compression: Reducing the communication bandwidth for distributed training \" arXiv preprint arXiv:1712.01887 2017. Y. Lin S. Han H. Mao Y. Wang and W. J. Dally \"Deep gradient compression: Reducing the communication bandwidth for distributed training \" arXiv preprint arXiv:1712.01887 2017."},{"key":"e_1_3_2_1_6_1","unstructured":"A. Sapio M. Canini C.-Y. Ho J. Nelson P. Kalnis C. Kim A. Krishnamurthy M. Moshref D. R. Ports and P. Richt\u00e1rik \"Scaling distributed machine learning with in-network aggregation \" arXiv preprint arXiv:1903.06701 2019. A. Sapio M. Canini C.-Y. Ho J. Nelson P. Kalnis C. Kim A. Krishnamurthy M. Moshref D. R. Ports and P. Richt\u00e1rik \"Scaling distributed machine learning with in-network aggregation \" arXiv preprint arXiv:1903.06701 2019."},{"key":"e_1_3_2_1_7_1","unstructured":"W. Wen C. Xu F. Yan C. Wu Y. Wang Y. Chen and H. Li \"Terngrad: Ternary gradients to reduce communication in distributed deep learning \" in NeurIPS 2017. W. Wen C. Xu F. Yan C. Wu Y. Wang Y. Chen and H. Li \"Terngrad: Ternary gradients to reduce communication in distributed deep learning \" in NeurIPS 2017."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"D. Narayanan A. Harlap A. Phanishayee V. Seshadri N. R. Devanur G. R. Ganger P. B. Gibbons and M. Zaharia \"Pipedream: generalized pipeline parallelism for dnn training \" in ACM SOSP 2019. D. Narayanan A. Harlap A. Phanishayee V. Seshadri N. R. Devanur G. R. Ganger P. B. Gibbons and M. Zaharia \"Pipedream: generalized pipeline parallelism for dnn training \" in ACM SOSP 2019.","DOI":"10.1145\/3341301.3359646"},{"key":"e_1_3_2_1_9_1","volume-title":"Association for Computational Linguistics","author":"Aji A. F.","year":"2017"},{"key":"e_1_3_2_1_10_1","unstructured":"D. Alistarh J. Li R. Tomioka and M. Vojnovic \"Qsgd: Randomized quantization for communication-optimal stochastic gradient descent \" arXiv preprint arXiv:1610.02132 2016. D. Alistarh J. Li R. Tomioka and M. Vojnovic \"Qsgd: Randomized quantization for communication-optimal stochastic gradient descent \" arXiv preprint arXiv:1610.02132 2016."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"C.-Y. Chen J. Choi D. Brand A. Agrawal W. Zhang and K. Gopalakrishnan \"Adacomp: Adaptive residual gradient compression for data-parallel distributed training \" in Thirty-Second AAAI Conference on Artificial Intelligence 2018. C.-Y. Chen J. Choi D. Brand A. Agrawal W. Zhang and K. Gopalakrishnan \"Adacomp: Adaptive residual gradient compression for data-parallel distributed training \" in Thirty-Second AAAI Conference on Artificial Intelligence 2018.","DOI":"10.1609\/aaai.v32i1.11728"},{"key":"e_1_3_2_1_12_1","unstructured":"J. Kone\u010dny H. B. McMahan F. X. Yu P. Richt\u00e1rik A. T. Suresh and D. Bacon \"Federated learning: Strategies for improving communication efficiency \" arXiv preprint arXiv:1610.05492 2016. J. Kone\u010dny H. B. McMahan F. X. Yu P. Richt\u00e1rik A. T. Suresh and D. Bacon \"Federated learning: Strategies for improving communication efficiency \" arXiv preprint arXiv:1610.05492 2016."},{"key":"e_1_3_2_1_13_1","unstructured":"J. Wangni J. Wang J. Liu and T. Zhang \"Gradient sparsification for communication-efficient distributed optimization \" in Advances in Neural Information Processing Systems 2018. J. Wangni J. Wang J. Liu and T. Zhang \"Gradient sparsification for communication-efficient distributed optimization \" in Advances in Neural Information Processing Systems 2018."},{"key":"e_1_3_2_1_14_1","unstructured":"H. Wang S. Sievert S. Liu Z. Charles D. Papailiopoulos and S. Wright \"Atomo: Communication-efficient learning via atomic sparsification\" in Advances in Neural Information Processing Systems 2018. H. Wang S. Sievert S. Liu Z. Charles D. Papailiopoulos and S. Wright \"Atomo: Communication-efficient learning via atomic sparsification\" in Advances in Neural Information Processing Systems 2018."},{"key":"e_1_3_2_1_15_1","unstructured":"N. Ivkin D. Rothchild E. Ullah I. Stoica and R. Arora \"Communication-efficient distributed SGD with sketching \" in Advances in Neural Information Processing Systems 2019. N. Ivkin D. Rothchild E. Ullah I. Stoica and R. Arora \"Communication-efficient distributed SGD with sketching \" in Advances in Neural Information Processing Systems 2019."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"F. Seide H. Fu J. Droppo G. Li and D. Yu \"1-bit stochastic gradient descent and its application to data-parallel distributed training of speech dnns \" in Fifteenth Annual Conference of the International Speech Communication Association 2014. F. Seide H. Fu J. Droppo G. Li and D. Yu \"1-bit stochastic gradient descent and its application to data-parallel distributed training of speech dnns \" in Fifteenth Annual Conference of the International Speech Communication Association 2014.","DOI":"10.21437\/Interspeech.2014-274"},{"key":"e_1_3_2_1_17_1","unstructured":"H. Lim D. G. Andersen and M. Kaminsky \"3lc: Lightweight and effective traffic compression for distributed machine learning \" arXiv preprint arXiv:1802.07389 2018. H. Lim D. G. Andersen and M. Kaminsky \"3lc: Lightweight and effective traffic compression for distributed machine learning \" arXiv preprint arXiv:1802.07389 2018."},{"key":"e_1_3_2_1_18_1","unstructured":"A. Sergeev and M. Del Balso \"Horovod: fast and easy distributed dee plearning in tensorflow \" arXiv preprint arXiv:1802.05799 2018. A. Sergeev and M. Del Balso \"Horovod: fast and easy distributed dee plearning in tensorflow \" arXiv preprint arXiv:1802.05799 2018."},{"key":"e_1_3_2_1_19_1","unstructured":"\"TensorFlow.\" https:\/\/www.tensorflow.org\/. \"TensorFlow.\" https:\/\/www.tensorflow.org\/."},{"key":"e_1_3_2_1_20_1","unstructured":"\"PyTorch.\" https:\/\/pytorch.org\/. \"PyTorch.\" https:\/\/pytorch.org\/."},{"key":"e_1_3_2_1_21_1","unstructured":"\"MXNet.\" https:\/\/mxnet.apache.org\/. \"MXNet.\" https:\/\/mxnet.apache.org\/."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"K. He X. Zhang S. Ren and J. Sun \"Deep residual learning for image recognition\" in IEEE CVPR 2016. K. He X. Zhang S. Ren and J. Sun \"Deep residual learning for image recognition\" in IEEE CVPR 2016.","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_23_1","unstructured":"K. Simonyan and A. Zisserman \"Very deep convolutional networks for large-scale image recognition \" arXiv preprint arXiv:1409.1556 2014. K. Simonyan and A. Zisserman \"Very deep convolutional networks for large-scale image recognition \" arXiv preprint arXiv:1409.1556 2014."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"J. Deng W. Dong R. Socher L.-J. Li K. Li and L. Fei-Fei \"Imagenet: A large-scale hierarchical image database\" in IEEE CVPR 2009. J. Deng W. Dong R. Socher L.-J. Li K. Li and L. Fei-Fei \"Imagenet: A large-scale hierarchical image database\" in IEEE CVPR 2009.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_25_1","unstructured":"K. Mishchenko E. Gorbunov M. Tak\u00e1\u010d and P. Richt\u00e1rik \"Distributed learning with compressed gradient differences \" arXiv preprint arXiv:1901.09269 2019. K. Mishchenko E. Gorbunov M. Tak\u00e1\u010d and P. Richt\u00e1rik \"Distributed learning with compressed gradient differences \" arXiv preprint arXiv:1901.09269 2019."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Y. You Z. Zhang C.-J. Hsieh J. Demmel and K. Keutzer \"Imagenet training in minutes\" in Proceedings of the 47th International Conference on Parallel Processing 2018. Y. You Z. Zhang C.-J. Hsieh J. Demmel and K. Keutzer \"Imagenet training in minutes\" in Proceedings of the 47th International Conference on Parallel Processing 2018.","DOI":"10.1145\/3225058.3225069"},{"key":"e_1_3_2_1_27_1","unstructured":"\"Optimizing deep learning on P3 and P3dn with EFA.\" https:\/\/aws.amazon.com\/blogs\/compute\/optimizing-deep-learning-on-p3-and-p3dn-with-efa\/. \"Optimizing deep learning on P3 and P3dn with EFA.\" https:\/\/aws.amazon.com\/blogs\/compute\/optimizing-deep-learning-on-p3-and-p3dn-with-efa\/."},{"key":"e_1_3_2_1_28_1","unstructured":"\"Amazon Web Services achieves fastest training times for BERT and Mask R-CNN.\" https:\/\/aws.amazon.com\/blogs\/machine-learning\/amazon-web-services-achieves-fastest-training-times-for-bert-and-mask-r-cnn\/. \"Amazon Web Services achieves fastest training times for BERT and Mask R-CNN.\" https:\/\/aws.amazon.com\/blogs\/machine-learning\/amazon-web-services-achieves-fastest-training-times-for-bert-and-mask-r-cnn\/."}],"event":{"name":"SIGCOMM '20: Annual conference of the ACM Special Interest Group on Data Communication on the applications, technologies, architectures, and protocols for computer communication","location":"Virtual Event USA","acronym":"SIGCOMM '20","sponsor":["SIGCOMM ACM Special Interest Group on Data Communication"]},"container-title":["Proceedings of the Workshop on Network Meets AI &amp; ML"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3405671.3405810","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3405671.3405810","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:32:09Z","timestamp":1750195929000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3405671.3405810"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,8,10]]},"references-count":28,"alternative-id":["10.1145\/3405671.3405810","10.1145\/3405671"],"URL":"https:\/\/doi.org\/10.1145\/3405671.3405810","relation":{},"subject":[],"published":{"date-parts":[[2020,8,10]]}}}