{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T18:52:23Z","timestamp":1773773543697,"version":"3.50.1"},"reference-count":48,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,5,10]],"date-time":"2021-05-10T00:00:00Z","timestamp":1620604800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,5,10]],"date-time":"2021-05-10T00:00:00Z","timestamp":1620604800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,10]]},"DOI":"10.1109\/infocom42981.2021.9488810","type":"proceedings-article","created":{"date-parts":[[2021,7,27]],"date-time":"2021-07-27T00:07:32Z","timestamp":1627344452000},"page":"1-10","source":"Crossref","is-referenced-by-count":31,"title":["DC2: Delay-aware Compression Control for Distributed Machine Learning"],"prefix":"10.1109","author":[{"given":"Ahmed M.","family":"Abdelmoniem","sequence":"first","affiliation":[{"name":"KAUST"}]},{"given":"Marco","family":"Canini","sequence":"additional","affiliation":[{"name":"KAUST"}]}],"member":"263","reference":[{"key":"ref39","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2015","journal-title":"ICLRE"},{"key":"ref38","article-title":"Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism","author":"shoeybi","year":"2019"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359642"},{"key":"ref32","article-title":"Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour","author":"parikh","year":"2014","journal-title":"Foundations and Trends in Optimization"},{"key":"ref31","year":"0"},{"key":"ref30","year":"0"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/DASC\/PiCom\/DataCom\/CyberSciTec.2018.000-4"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2014.6848061"},{"key":"ref35","article-title":"1-bit stochastic gradient descent and its application to data-parallel distributed training of speech DNNs","author":"seide","year":"2014","journal-title":"InterSpeech"},{"key":"ref34","article-title":"Scaling Distributed Machine Learning with In-Network Aggregation","author":"sapio","year":"2021","journal-title":"USENIX NSDI"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5793"},{"key":"ref40","article-title":"On Scale-out Deep Learning Training for Cloud and HPC","author":"sridharan","year":"2018","journal-title":"SysML"},{"key":"ref11","article-title":"Poseidon: An Efficient Communication Architecture for Distributed Deep Learning on GPU Clusters","author":"hao zheng","year":"2017","journal-title":"USENIX ATC"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/NCA.2017.8171350"},{"key":"ref13","first-page":"770","article-title":"Deep Residual Learning for Image Recognition","author":"he","year":"2015","journal-title":"IEEE CVPR"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref16","article-title":"Gaia: Geo-distributed machine learning approaching LAN speeds","author":"hsieh","year":"2017","journal-title":"USENIX NSDI"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/2806777.2806780"},{"key":"ref18","article-title":"Analysis of Large-Scale Multi-Tenant GPU Clusters for DNN Training Workloads","author":"jeon","year":"2019","journal-title":"USENIX ATC"},{"key":"ref19","article-title":"Error Feedback Fixes SignSGD and other Gradient Compression Schemes","author":"karimireddy","year":"2019","journal-title":"ICML"},{"key":"ref28","article-title":"Treebank-3","author":"marcus","year":"0"},{"key":"ref4","article-title":"Coupling adaptive batch sizes with learning rates","author":"balles","year":"2017","journal-title":"Uncertainty in Artificial Intelligence (UAI)"},{"key":"ref27","article-title":"Optimizing Network Performance in Distributed Machine Learning","author":"mai","year":"2015","journal-title":"USENIX HotCloud"},{"key":"ref3","article-title":"The convergence of sparsified gradient methods","author":"alistarh","year":"2018","journal-title":"NeurIPS"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/0169-7552(89)90019-6"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/2785956.2787510"},{"key":"ref5","first-page":"993","article-title":"Latent Dirichlet allocation","volume":"3","author":"blei","year":"2003","journal-title":"Journal of Machine Learning Research"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref7","article-title":"Toward understanding the impact of staleness in distributed machine learning","author":"dai","year":"2019","journal-title":"ICLRE"},{"key":"ref2","article-title":"QSGD: Communication-efficient SGD via gradient quantization and encoding","author":"alistarh","year":"2017","journal-title":"NeurIPS"},{"key":"ref9","first-page":"2121","article-title":"Adaptive Subgradient Methods for Online Learning and Stochastic Optimization","volume":"12","author":"duchi","year":"2011","journal-title":"Journal of Machine Learning Research"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1045"},{"key":"ref46","article-title":"Compressed Communication for Distributed Deep Learning: Survey and Quantitative Evaluation","author":"xu","year":"2020","journal-title":"KAUST Technical Report"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"ref45","article-title":"Dynamic backup workers for parallel machine learning","author":"xu","year":"2020","journal-title":"IFIP Networking Conference"},{"key":"ref48","article-title":"Parallelized stochastic gradient descent","author":"zinkevich","year":"2010","journal-title":"NeurIPS"},{"key":"ref22","article-title":"ADAM: A Method for Stochastic Optimization","author":"kingma","year":"2015","journal-title":"ICLRE"},{"key":"ref47","article-title":"Asynchronous stochastic gradient descent with delay compensation","author":"zheng","year":"2017","journal-title":"ICML"},{"key":"ref21","article-title":"On Large-Batch Training for Deep Learning: Generalization Gap and Sharp Minima","author":"keskar","year":"2017","journal-title":"ICLRE"},{"key":"ref42","article-title":"Communication Compression for Decentralized Training","author":"tang","year":"2018","journal-title":"NeurIPS"},{"key":"ref24","article-title":"Learning multiple layers of features from tiny images","author":"krizhevsky","year":"2009","journal-title":"Technical Report University of Toronto"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref23","article-title":"Federated Learning: Strategies for Improving Communication Efficiency","author":"kone?n\u00fd","year":"2016","journal-title":"NeurIPS workshop"},{"key":"ref44","article-title":"TernGrad: Ternary gradients to reduce communication in distributed deep learning","author":"wen","year":"2017","journal-title":"NeurIPS"},{"key":"ref26","article-title":"PLink: Discovering and Exploiting Locality for Accelerated Distributed Training on the public Cloud","author":"luo","year":"2020","journal-title":"MLSys"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/3377454"},{"key":"ref25","article-title":"Deep Gradient Compression: Reducing the Communication Bandwidth for Distributed Training","author":"lin","year":"2018","journal-title":"ICLRE"}],"event":{"name":"IEEE INFOCOM 2021 - IEEE Conference on Computer Communications","location":"Vancouver, BC, Canada","start":{"date-parts":[[2021,5,10]]},"end":{"date-parts":[[2021,5,13]]}},"container-title":["IEEE INFOCOM 2021 - IEEE Conference on Computer Communications"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9488422\/9488423\/09488810.pdf?arnumber=9488810","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,2]],"date-time":"2022-08-02T23:14:26Z","timestamp":1659482066000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9488810\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,10]]},"references-count":48,"URL":"https:\/\/doi.org\/10.1109\/infocom42981.2021.9488810","relation":{},"subject":[],"published":{"date-parts":[[2021,5,10]]}}}