{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T15:18:17Z","timestamp":1759331897779,"version":"3.37.3"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2022,6,17]],"date-time":"2022-06-17T00:00:00Z","timestamp":1655424000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,6,17]],"date-time":"2022-06-17T00:00:00Z","timestamp":1655424000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2018YFB0203803"],"award-info":[{"award-number":["2018YFB0203803"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U1801266","U1711263"],"award-info":[{"award-number":["U1801266","U1711263"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Computing"],"published-print":{"date-parts":[[2022,11]]},"DOI":"10.1007\/s00607-022-01094-1","type":"journal-article","created":{"date-parts":[[2022,6,17]],"date-time":"2022-06-17T16:03:00Z","timestamp":1655481780000},"page":"2431-2451","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Efficient DNN training based on backpropagation parallelization"],"prefix":"10.1007","volume":"104","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6798-9683","authenticated-orcid":false,"given":"Danyang","family":"Xiao","sequence":"first","affiliation":[]},{"given":"Chengang","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Weigang","family":"Wu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,6,17]]},"reference":[{"key":"1094_CR1","unstructured":"Simonyan K, Zisserman A (2015) Very deep convolutional networks for large-scale image recognition. In: proceedings 3rd International Conference on Learning Representations, ICLR"},{"key":"1094_CR2","unstructured":"Dean J, Corrado G, Monga R, Chen K, Devin M, Mao MZ, Ranzato M, Senior AW, Tucker PA, Yang K, et\u00a0al (2012) Large scale distributed deep networks. In: Advances in Neural Information Processing Systems 25, 1232\u20131240"},{"key":"1094_CR3","unstructured":"Ho Q, Cipar J, Cui H, Lee S, Kim JK, Gibbons PB, Gibson GA, Ganger G, Xing EP (2013) More effective distributed ml via a stale synchronous parallel parameter server. In: Advances in Neural Information Processing Systems 26, 1223\u20131231"},{"key":"1094_CR4","doi-asserted-by":"crossref","unstructured":"Li M, Andersen DG, Park JW, Smola AJ, Ahmed A, Josifovski V, Long J, Shekita EJ, Su B-Y (2014) Scaling distributed machine learning with the parameter server. In: Proceedings of the 11th USENIX Conference on Operating Systems Design and Implementation, OSDI\u201914, 583\u2013598","DOI":"10.1145\/2640087.2644155"},{"key":"1094_CR5","unstructured":"Huang Y, Cheng Y, Bapna A, Firat O, Chen D, Chen M, Lee H, Ngiam J, Le QV, Wu Y, Chen z (2019) Gpipe: Efficient training of giant neural networks using pipeline parallelism. In: Advances in Neural Information Processing Systems 32:103\u2013112"},{"key":"1094_CR6","doi-asserted-by":"crossref","unstructured":"Narayanan D, Harlap A, Phanishayee A, Seshadri V, Devanur NR, Ganger GR, Gibbons PB, Zaharia M (2019) Pipedream: Generalized pipeline parallelism for DNN training. In: Proceedings of the 27th ACM Symposium on Operating Systems Principles, SOSP \u201919, 1-15","DOI":"10.1145\/3341301.3359646"},{"key":"1094_CR7","doi-asserted-by":"crossref","unstructured":"Aji AF, Heafield K (2017) Sparse communication for distributed gradient descent. In: Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, 440\u2013445","DOI":"10.18653\/v1\/D17-1045"},{"key":"1094_CR8","doi-asserted-by":"crossref","unstructured":"You Y, Zhang Z, Hsieh C-J, Demmel J, Keutzer K (2018) Imagenet training in minutes. In: Proceedings of the 47th International Conference on Parallel Processing, ICPP","DOI":"10.1145\/3225058.3225069"},{"key":"1094_CR9","unstructured":"Lee N, Ajanthan T, Torr PHS, Jaggi M (2021) Understanding the effects of data parallelism and sparsity on neural network training. In: 9th International Conference on Learning Representations, ICLR"},{"key":"1094_CR10","unstructured":"Cui H, Hao Z, Ganger GR, Gibbons PB, Xing EP (2016) Geeps: Scalable deep learning on distributed GPUs with a GPU-specialized parameter server. In: Eleventh European Conference on Computer Systems"},{"issue":"5","key":"1094_CR11","doi-asserted-by":"publisher","first-page":"1128","DOI":"10.1109\/TPDS.2021.3104242","volume":"33","author":"S Wang","year":"2022","unstructured":"Wang S, Pi A, Zhou X (2022) Elastic parameter server: accelerating ml training with scalable resource scheduling. IEEE Transactions on Parallel and Distributed Systems 33(5):1128\u20131143","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"1094_CR12","doi-asserted-by":"crossref","unstructured":"Seide F, Fu H, Droppo J, Li G, Yu D (2014) 1-bit stochastic gradient descent and its application to data-parallel distributed training of speech DNNs. In: INTERSPEECH 2014, 15th Annual Conference of the International Speech Communication Association, 1058\u20131062","DOI":"10.21437\/Interspeech.2014-274"},{"key":"1094_CR13","first-page":"693","volume":"24","author":"B Recht","year":"2011","unstructured":"Recht B, Re C, Wright S, Niu F (2011) Hogwild: A lock-free approach to parallelizing stochastic gradient descent. In Advances in Neural Information Processing Systems 24:693\u2013701","journal-title":"In Advances in Neural Information Processing Systems"},{"key":"1094_CR14","doi-asserted-by":"publisher","first-page":"118","DOI":"10.1016\/j.ins.2020.05.121","volume":"548","author":"D Xiao","year":"2021","unstructured":"Xiao D, Mei Y, Kuang D, Chen M, Guo B, Weigang W (2021) Egc: entropy-based gradient compression for distributed deep learning. Information Sciences 548:118\u2013134","journal-title":"Information Sciences"},{"key":"1094_CR15","unstructured":"Lin Y, Han S, Mao H, Wang Y, Dally B (2018) Deep Gradient Compression: Reducing the Communication Bandwidth for Distributed Training. In: International Conference on Learning Representations"},{"key":"1094_CR16","doi-asserted-by":"crossref","unstructured":"Yan Z, Xiao D, Chen M, Zhou J, Wu W (2020) Dual-way gradient sparsification for asynchronous distributed deep learning. In: 49th International Conference on Parallel Processing - ICPP, ICPP \u201920","DOI":"10.1145\/3404397.3404401"},{"key":"1094_CR17","doi-asserted-by":"crossref","unstructured":"Shi S, Wang Q, Chu X, Li B, Qin Y, Liu R, Zhao X (2020) Communication-efficient distributed deep learning with merged gradient sparsification on GPUs. In: IEEE INFOCOM 2020 - IEEE Conference on Computer Communications, pages 406\u2013415","DOI":"10.1109\/INFOCOM41043.2020.9155269"},{"key":"1094_CR18","first-page":"1509","volume":"30","author":"W Wen","year":"2017","unstructured":"Wen W, Cong X, Yan F, Chunpeng W, Wang Y, Chen Y, Li H (2017) Terngrad: ternary gradients to reduce communication in distributed deep learning. In Advances in Neural Information Processing Systems 30:1509\u20131519","journal-title":"In Advances in Neural Information Processing Systems"},{"key":"1094_CR19","first-page":"1709","volume":"30","author":"D Alistarh","year":"2017","unstructured":"Alistarh D, Grubic D, Li J, Tomioka R, Vojnovic Milan (2017) Qsgd: Communication-efficient SGD via gradient quantization and encoding. In Advances in Neural Information Processing Systems 30:1709\u20131720","journal-title":"In Advances in Neural Information Processing Systems"},{"key":"1094_CR20","doi-asserted-by":"crossref","unstructured":"Abdi A, Fekri F (2020) Quantized compressive sampling of stochastic gradients for efficient communication in distributed deep learning. In: The Thirty-Fourth AAAI Conference on Artificial Intelligence, AAAI 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, IAAI 2020, The Tenth AAAI Symposium on Educational Advances in Artificial Intelligence, EAAI 2020, New York, NY, USA, February 7-12, 3105\u20133112","DOI":"10.1609\/aaai.v34i04.5706"},{"key":"1094_CR21","doi-asserted-by":"crossref","unstructured":"Abdi A, Fekri F (2020) Indirect stochastic gradient quantization and its application in distributed deep learning. In: The Thirty-Fourth AAAI Conference on Artificial Intelligence, AAAI 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, IAAI 2020, The Tenth AAAI Symposium on Educational Advances in Artificial Intelligence, EAAI 2020, New York, NY, USA, February 7-12, 2020, 3113\u20133120","DOI":"10.1609\/aaai.v34i04.5707"},{"key":"1094_CR22","unstructured":"Lin DD, Talathi SS, Annapureddy VS (2016) Fixed point quantization of deep convolutional networks. In: Proceedings of the 33rd International Conference on International Conference on Machine Learning - Volume 48, ICML\u201916, 2849-2858"},{"key":"1094_CR23","unstructured":"Vanhoucke V, Senior A, Mao MZ (2011) Improving the speed of neural networks on cpus. In: Deep Learning and Unsupervised Feature Learning Workshop, NIPS"},{"key":"1094_CR24","doi-asserted-by":"crossref","unstructured":"Rastegari M, Ordonez V, Redmon J, Farhadi A (2016) Xnor-net: imagenet classification using binary convolutional neural networks. European Conference on Computer Vision, 525\u2013542","DOI":"10.1007\/978-3-319-46493-0_32"},{"key":"1094_CR25","doi-asserted-by":"crossref","unstructured":"Cai Z, He X, Sun J, Vasconcelos N (2017) Deep learning with low precision by half-wave gaussian quantization. Computer vision and pattern recognition, 5406\u20135414","DOI":"10.1109\/CVPR.2017.574"},{"key":"1094_CR26","unstructured":"Mishra AK, Nurvitadhi E, Cook JJ, Marr D (2018) WRPN: Wide reduced-precision networks. In: 6th International Conference on Learning Representations, ICLR"},{"key":"1094_CR27","doi-asserted-by":"crossref","unstructured":"Geng J, Li D, Wang S (2019) Elasticpipe: An efficient and dynamic model-parallel solution to DNN training. In: Proceedings of the 10th Workshop on Scientific Cloud Computing, ScienceCloud \u201919, 5-9","DOI":"10.1145\/3322795.3331463"},{"key":"1094_CR28","unstructured":"Lee S, Kim JK, Zheng X, Ho Q, Gibson GA, Xing EP (2014) On model parallelization and scheduling strategies for distributed machine learning. In: Advances in Neural Information Processing Systems 27, 2834\u20132842"},{"key":"1094_CR29","unstructured":"Chen C-C, Yang C-L, Cheng H-Y (2018) Efficient and robust parallel DNN training through model parallelism on multi-gpu Platform. CoRR arXiv:1809.02839"},{"key":"1094_CR30","doi-asserted-by":"crossref","unstructured":"Chen X, Eversole A, Li G, Yu D, Seide F (September 2012) Pipelined back-propagation for context-dependent deep neural networks. In: Interspeech","DOI":"10.21437\/Interspeech.2012-7"},{"issue":"6","key":"1094_CR31","doi-asserted-by":"publisher","first-page":"970","DOI":"10.1109\/72.286892","volume":"4","author":"A Petrowski","year":"1993","unstructured":"Petrowski A, Dreyfus G, Girault C (1993) Performance analysis of a pipelined backpropagation parallel algorithm. IEEE Transactions on Neural Networks 4(6):970\u2013981","journal-title":"IEEE Transactions on Neural Networks"},{"key":"1094_CR32","doi-asserted-by":"crossref","unstructured":"Deng L, Yu D, Platt J (2012) Scalable stacking and learning for building deep architectures. In: 2012 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2133\u20132136","DOI":"10.1109\/ICASSP.2012.6288333"},{"key":"1094_CR33","unstructured":"Gaunt AL, Johnson MA, Riechert M, Tarlow D, Tomioka R, Vytiniotis D, Webster S (2017) Ampnet: Asynchronous model-parallel training for dynamic neural networks. arXiv preprint arXiv:1705.09786"},{"key":"1094_CR34","doi-asserted-by":"crossref","unstructured":"Abuadbba S, Kim K, Kim M, Thapa C, Camtepe SA, Gao Y, Kim H, Nepal S (2020) Can we use split learning on 1d cnn models for privacy preserving training? In: Proceedings of the 15th ACM Asia Conference on Computer and Communications Security, ASIA CCS \u201920, 305-318","DOI":"10.1145\/3320269.3384740"},{"key":"1094_CR35","doi-asserted-by":"crossref","unstructured":"Gao Y, Kim M, Abuadbba S, Kim Y, Thapa C, Kim K, Camtep SA, Kim H, Nepal S (2020) End-to-end evaluation of federated learning and split learning for internet of things. In: 2020 International Symposium on Reliable Distributed Systems (SRDS), 91\u2013100","DOI":"10.1109\/SRDS51746.2020.00017"},{"key":"1094_CR36","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.jnca.2018.05.003","volume":"116","author":"O Gupta","year":"2018","unstructured":"Gupta O, Raskar R (2018) Distributed learning of deep neural network over multiple agents. Journal of Network and Computer Applications 116:1\u20138","journal-title":"Journal of Network and Computer Applications"},{"key":"1094_CR37","unstructured":"Vepakomma P, Gupta O, Dubey A, Raskar R (2019) Reducing leakage in distributed deep learning for sensitive health data. ICLR AI for social good workshop"},{"key":"1094_CR38","unstructured":"Vepakomma P, Gupta O, Swedish T, Raskar R (2018) Split learning for health: distributed deep learning without sharing raw patient data. arXiv preprint arXiv:1812.00564"},{"key":"1094_CR39","unstructured":"Vepakomma P, Swedish T, Raskar R, Gupta O, Dubey A (2018) No peek: A survey of private distributed deep learning. arXiv preprint arXiv:1812.03288"},{"issue":"2","key":"1094_CR40","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1109\/MSEC.2018.2888775","volume":"17","author":"M Al-Rubaie","year":"2019","unstructured":"Al-Rubaie M, Chang JM (2019) Privacy-preserving machine learning: threats and solutions. IEEE Security Privacy 17(2):49\u201358","journal-title":"IEEE Security Privacy"},{"key":"1094_CR41","unstructured":"Guo Y (2018) A survey on methods and theories of quantized neural networks. arXiv preprint arXiv:1808.04752"},{"key":"1094_CR42","unstructured":"Paszke A, Gross S, Chintala S, Chanan G, Yang E, DeVito Z, Lin Z, Antiga L, Lerer A (2017) Automatic differentiation in pytorch. In NIPS-W, Alban Desmaison"},{"key":"1094_CR43","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: Computer Vision and Pattern Recognition (CVPR)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"1094_CR44","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"1094_CR45","unstructured":"Verma V, Lamb A, Beckham C, Najafi A, Mitliagkas I, Lopez-Paz D, Bengio Y (June 2019) Manifold mixup: better representations by interpolating hidden states. In: Proceedings of the 36th International Conference on Machine Learning, ICML 2019, 9-15, Long Beach, California, USA, volume\u00a097 of Proceedings of Machine Learning Research, pages 6438\u20136447"},{"key":"1094_CR46","unstructured":"Wu Y, Schuster M, Chen Z, Le QV, Norouzi M, Macherey W, Krikun M, Cao Y (2016) Google\u2019s neural machine translation system: bridging the gap between human and machine translation. arXiv preprint arXiv:1609.08144"},{"key":"1094_CR47","first-page":"5145","volume":"31","author":"R Banner","year":"2018","unstructured":"Banner R, Hubara I, Hoffer E, Soudry D (2018) Scalable methods for 8-bit training of neural networks. In Advances in Neural Information Processing Systems 31:5145\u20135153","journal-title":"In Advances in Neural Information Processing Systems"}],"container-title":["Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-022-01094-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00607-022-01094-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-022-01094-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,8]],"date-time":"2022-10-08T16:23:31Z","timestamp":1665246211000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00607-022-01094-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,17]]},"references-count":47,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2022,11]]}},"alternative-id":["1094"],"URL":"https:\/\/doi.org\/10.1007\/s00607-022-01094-1","relation":{},"ISSN":["0010-485X","1436-5057"],"issn-type":[{"type":"print","value":"0010-485X"},{"type":"electronic","value":"1436-5057"}],"subject":[],"published":{"date-parts":[[2022,6,17]]},"assertion":[{"value":"12 September 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 May 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 June 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}