{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,18]],"date-time":"2025-10-18T10:50:31Z","timestamp":1760784631491,"version":"3.28.0"},"reference-count":72,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,2,1]],"date-time":"2019-02-01T00:00:00Z","timestamp":1548979200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,2,1]],"date-time":"2019-02-01T00:00:00Z","timestamp":1548979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,2,1]],"date-time":"2019-02-01T00:00:00Z","timestamp":1548979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,2]]},"DOI":"10.1109\/empdp.2019.8671552","type":"proceedings-article","created":{"date-parts":[[2019,3,22]],"date-time":"2019-03-22T02:40:56Z","timestamp":1553222456000},"page":"124-131","source":"Crossref","is-referenced-by-count":6,"title":["Deep Learning at Scale"],"prefix":"10.1109","author":[{"given":"Paolo","family":"Viviani","sequence":"first","affiliation":[]},{"given":"Maurizio","family":"Drocco","sequence":"additional","affiliation":[]},{"given":"Daniele","family":"Baccega","sequence":"additional","affiliation":[]},{"given":"Iacopo","family":"Colonnelli","sequence":"additional","affiliation":[]},{"given":"Marco","family":"Aldinucci","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref72","article-title":"HPC4AI, an AI-on-demand federated platform endeavour","author":"aldinucci","year":"2018","journal-title":"ACM Computing Frontiers Ischia Italy"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/898\/8\/082039"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15880-3_7"},{"key":"ref39","article-title":"Scalable Distributed DNN Training Using Commodity GPU Cloud Computing","author":"strom","year":"2015","journal-title":"Dresden"},{"key":"ref38","article-title":"GPU Asynchronous Stochastic Gradient Descent to Speed Up Neural Network Training","author":"paine","year":"2013","journal-title":"CoRR vol abs\/1312 6186"},{"key":"ref33","article-title":"Extremely Large Minibatch SGD: Training ResNet-50 on ImageNet in 15 Minutes","author":"akiba","year":"2017","journal-title":"CoRR vol abs\/1711 04325"},{"key":"ref32","article-title":"PowerAI DDL","author":"cho","year":"2017","journal-title":"CoRR vol abs\/1708 02188"},{"key":"ref31","article-title":"Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour","author":"goyal","year":"2017","journal-title":"CoRR vol abs\/1706 02677"},{"key":"ref30","article-title":"cuDNN: Efficient Primitives for Deep Learning","author":"chetlur","year":"2014","journal-title":"CoRR vol abs\/1410 0759"},{"key":"ref37","first-page":"571","article-title":"Project Adam: Building an Efficient and Scalable Deep Learning Training System","author":"chilimbi","year":"2014","journal-title":"11th USENIX Symposium on Operating Systems Design and Implementation (OSDI 14)"},{"key":"ref36","article-title":"HOGWILD!: A Lock-Free Approach to Parallelizing Stochastic Gradient Descent","author":"niu","year":"2011","journal-title":"CoRR vol abs\/1106 5730"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"ref34","article-title":"Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift","author":"ioffe","year":"2015","journal-title":"CoRR vol abs\/1502 03167"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2017.36"},{"key":"ref62","article-title":"Asyn-chronous Parallel Algorithms for Nonconvex Big-Data Optimization: Model and Convergence","author":"cannelli","year":"2016","journal-title":"CoRR vol abs\/1607 04818"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2010.2052531"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2017.2648041"},{"journal-title":"Revisiting small batch training for deep neural networks","year":"2018","author":"masters","key":"ref28"},{"key":"ref64","article-title":"GossipGraD: Scalable Deep Learning using Gossip Communication based Asynchronous Gradient Descent","author":"daily","year":"2018","journal-title":"CoRR vol abs\/1803 05880"},{"key":"ref27","article-title":"Revisiting Distributed Synchronous SGD","author":"chen","year":"2016","journal-title":"CoRR vol abs\/1604 00981"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729586"},{"journal-title":"Parallel programming with global asynchronous memory Models C++ APIs and implementations","year":"2017","author":"drocco","key":"ref66"},{"key":"ref29","article-title":"Theano: Deep Learning on GPUs with Python","author":"bergstra","year":"2011","journal-title":"Big Learn Workshop NIPS&#x2019;11"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-36949-0_7"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/PDP.2016.97"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1002\/9781119332015.ch13"},{"key":"ref2","first-page":"1223","article-title":"Large Scale Distributed Deep Networks","author":"dean","year":"2012","journal-title":"Proceedings of the 25th International Conference on Neural Information Processing Systems - Volume 1 ser NIPS'12 USA Curran Associates Inc"},{"key":"ref1","article-title":"Demystifying Parallel and Distributed Deep Learning: An In-Depth Concurrency Analysis","author":"ben-nun","year":"2018","journal-title":"CoRR vol abs\/1802 09941"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(98)00116-6"},{"key":"ref22","first-page":"192","article-title":"The Loss Surfaces of Multilayer Networks","volume":"38","author":"choromanska","year":"2015","journal-title":"Proceedings of the Eighteenth International Conference on Artificial Intelligence and Statistics"},{"key":"ref21","article-title":"Adam: A Method for Stochastic Optimization","author":"kingma","year":"2014","journal-title":"CoRR vol abs\/1412 6980"},{"key":"ref24","article-title":"Optimiza-tion Methods for Large-Scale Machine Learning","author":"bottou","year":"2016","journal-title":"CoRR vol abs\/1606 04838"},{"key":"ref23","article-title":"On Large-Batch Training for Deep Learning: Generalization Gap and Sharp Minima","author":"keskar","year":"2016","journal-title":"CoRR vol abs\/1609 04836"},{"key":"ref26","article-title":"Don't Decay the Learning Rate, Increase the Batch Size","author":"smith","year":"2017","journal-title":"CoRR vol abs\/1711 00489"},{"key":"ref25","article-title":"Three Factors Influencing Minima in SGD","author":"jastrzebski","year":"2017","journal-title":"CoRR vol abs\/1711 04623"},{"key":"ref50","first-page":"685","article-title":"Deep Learning with Elastic Averaging SGD","author":"zhang","year":"2015","journal-title":"Proceedings of the 28th International Conference on Neural Information Processing Systems - Volume 1 ser NIPS'15"},{"key":"ref51","article-title":"Parallel training of Deep Neural Networks with Natural Gradient and Parameter Averaging","author":"povey","year":"2014","journal-title":"CoRR vol abs\/1410 7455"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/GAMENETS.2009.5137386"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2018.00091"},{"key":"ref57","article-title":"Tensor Comprehensions: Framework-Agnostic High-Performance Machine Learning Abstractions","author":"vasilache","year":"2018","journal-title":"CoRR vol abs\/1802 04730"},{"key":"ref56","article-title":"An Introduction to Computational Networks and the Computational Network Toolkit","author":"yu","year":"2014","journal-title":"Microsoft Research"},{"key":"ref55","article-title":"1-Bit Stochastic Gradient Descent and Application to Data-Parallel Distributed Training of Speech DNNs","author":"seide","year":"2014","journal-title":"Microsoft Research"},{"key":"ref54","article-title":"Deep Gradient Compression: Reducing the Communication Bandwidth for Distributed Training","author":"lin","year":"2017","journal-title":"CoRR vol abs\/1712 01887"},{"key":"ref53","article-title":"Distilling the Knowledge in a Neural Network","author":"hinton","year":"2015","journal-title":"CoRR vol abs\/1503 02531"},{"key":"ref52","article-title":"Why M Heads are Better than One: Training a Diverse Ensemble of Deep Networks","author":"lee","year":"2015","journal-title":"CoRR vol abs\/1511 06314"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-49430-8_2"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-35289-8_26"},{"key":"ref40","article-title":"Staleness-aware Async-SGD for Distributed Deep Learning","author":"zhang","year":"2015","journal-title":"CoRR vol abs\/1511 05950"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553486"},{"key":"ref13","first-page":"232","article-title":"Removing Noise in On-Line Search using Adaptive Batch Sizes","author":"orr","year":"1997","journal-title":"Advances in Neural Information Processing Systems 9"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/NNSP.1992.253705"},{"key":"ref15","first-page":"161","article-title":"The Tradeoffs of Large Scale Learning","volume":"20","author":"bottou","year":"2008","journal-title":"Advances in neural information processing systems"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(03)00138-2"},{"key":"ref17","first-page":"217","article-title":"Large Scale Online Learning","author":"bottou","year":"2004","journal-title":"Advances in Neural Information Processing Systems 16"},{"journal-title":"Deep Learning","year":"2016","author":"goodfellow","key":"ref18"},{"key":"ref19","article-title":"An overview of gradient descent optimization algorithms","author":"ruder","year":"2016","journal-title":"CoRR vol abs\/1609 04747"},{"key":"ref4","article-title":"Pipelined Back-Propagation for Context-Dependent Deep Neural Networks","author":"chen","year":"2012","journal-title":"Microsoft Research"},{"key":"ref3","first-page":"1279","article-title":"Tiled convolutional neural networks","author":"ngiam","year":"2010","journal-title":"Advances in Neural Information Processing Systems 23"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2014.09.003"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6288333"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/BFb0006203"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"lecun","year":"2015","journal-title":"The End of Nature"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1137\/0330046"},{"key":"ref9","first-page":"21","article-title":"A theoretical framework for back-propagation","author":"lecun","year":"1988","journal-title":"Proceedings of the 1988 Connectionist Models Summer School CMU Pittsburg PA"},{"journal-title":"On Scalable Deep Learning and Parallelizing Gradient Descent","year":"0","author":"hermans","key":"ref46"},{"journal-title":"DAWNBench An End-to-End Deep Learning Benchmark and Competition","year":"2017","author":"coleman","key":"ref45"},{"key":"ref48","volume":"1710","author":"lian","year":"2017","journal-title":"Asynchronous Decentralized Parallel Stochastic Gradient Descent"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ALLERTON.2016.7852343"},{"key":"ref42","article-title":"Asynchronous Parallel Stochastic Gradient Descent - A Numeric Core for Scalable Distributed Machine Learning Algorithms","author":"keuper","year":"2015","journal-title":"CoRR vol abs\/1505 04956"},{"key":"ref41","article-title":"Asynchronous Stochastic Gradient Descent with Delay Compensation for Distributed Deep Learning","author":"zheng","year":"2016","journal-title":"CoRR vol abs\/1609 08326"},{"key":"ref44","article-title":"Can Decentralized Algorithms Outperform Centralized Algorithms? A Case Study for Decentralized Parallel Stochastic Gradient Descent","author":"lian","year":"2017","journal-title":"CoRR vol abs\/1705 09056"},{"key":"ref43","article-title":"Accumulated Gradient Normalization","author":"hermans","year":"2017","journal-title":"CoRR vol abs\/1710 02368"}],"event":{"name":"2019 27th Euromicro International Conference on Parallel, Distributed and Network-Based Processing (PDP)","start":{"date-parts":[[2019,2,13]]},"location":"Pavia, Italy","end":{"date-parts":[[2019,2,15]]}},"container-title":["2019 27th Euromicro International Conference on Parallel, Distributed and Network-Based Processing (PDP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8663972\/8671541\/08671552.pdf?arnumber=8671552","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,19]],"date-time":"2022-07-19T20:24:59Z","timestamp":1658262299000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8671552\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,2]]},"references-count":72,"URL":"https:\/\/doi.org\/10.1109\/empdp.2019.8671552","relation":{},"subject":[],"published":{"date-parts":[[2019,2]]}}}