{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T22:06:10Z","timestamp":1766268370568,"version":"3.28.0"},"reference-count":41,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,3]]},"DOI":"10.1109\/icassp.2016.7472805","type":"proceedings-article","created":{"date-parts":[[2016,6,24]],"date-time":"2016-06-24T01:58:30Z","timestamp":1466733510000},"page":"5880-5884","source":"Crossref","is-referenced-by-count":68,"title":["Scalable training of deep learning machines by incremental block training with intra-block parallel optimization and blockwise model-update filtering"],"prefix":"10.1109","author":[{"given":"Kai","family":"Chen","sequence":"first","affiliation":[]},{"given":"Qiang","family":"Huo","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"Designing an MPI-based parallel and distributed machine learning platform on large-scale HPC clusters","author":"yan","year":"2012","journal-title":"2012 International Workshop on Statistical Machine Learning for Speech Processing"},{"key":"ref38","first-page":"372","article-title":"A method of solving a convex programming problem with convergence rate O(l\/sqr(k))","volume":"27","author":"nesterov","year":"1983","journal-title":"Soviet Mathematics Doklady"},{"key":"ref33","first-page":"2635","article-title":"Rectified linear neural networks with tied-scalar regularization for LVCSR","author":"zhang","year":"0","journal-title":"Proceedings of Interspeech 2015"},{"key":"ref32","first-page":"1488","article-title":"Scalable distributed DNN training using commodity GPU cloud computing","author":"strom","year":"0","journal-title":"Proceedings of Interspeech 2015"},{"key":"ref31","article-title":"Increasing deep neural network acoustic model size for large vocabulary continuous speech recognition","author":"maas","year":"2014","journal-title":"arXiv 1406 7806"},{"key":"ref30","article-title":"Parallel training of DNNs with natural gradient and parameter averaging","author":"povey","year":"0","journal-title":"Proc ICLR-2015"},{"key":"ref37","first-page":"1139","article-title":"On the importance of initialization and momentum in deep learning","author":"sutskever","year":"0","journal-title":"Proc of ICML'2013"},{"key":"ref36","article-title":"Deep learning with elastic averaging SGD","author":"zhang","year":"0","journal-title":"Proc NIPS-2015"},{"article-title":"Deep learning using alternating direction method of multipliers","year":"2014","author":"huo","key":"ref35"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1561\/2200000016"},{"key":"ref10","first-page":"1468","article-title":"Fast and accurate recurrent neural network acoustic models for speech recognition","author":"sak","year":"0","journal-title":"Proceedings of Interspeech 2015"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1992.225858"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2539499"},{"key":"ref12","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"0","journal-title":"Proc of NIPS 2012"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICFHR.2014.57"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2015.7333730"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2015.7333794"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1038\/323533a0"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/0041-5553(64)90137-5"},{"key":"ref19","first-page":"1232","article-title":"Large scale distributed deep networks","author":"dean","year":"0","journal-title":"Proc of NIPS 2012"},{"key":"ref28","first-page":"456","article-title":"Distributed training strategies for the structured perceptron","author":"mcdonald","year":"2010","journal-title":"Proc Conf North Amer Chapt Assoc Comput Linguist (NAACL)"},{"key":"ref4","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4471-5779-3","author":"yu","year":"2015","journal-title":"Automatic Speech Recognition?A Deep Learning Approach"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638950"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707742"},{"key":"ref29","first-page":"2595","article-title":"Parallelized Stochastic Gradient Descent","author":"zinkevich","year":"0","journal-title":"Proceedings of NIPS'2010"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"ref8","first-page":"338","article-title":"Long short-term memory recurrent neural network architectures for large scale acoustic modeling","author":"sak","year":"0","journal-title":"Proc INTERSPEECH-2014"},{"key":"ref7","first-page":"1764","article-title":"Towards end-to-end speech recognition with recurrent neural networks","author":"graves","year":"0","journal-title":"Proc ICML-2014"},{"key":"ref2","first-page":"437","article-title":"Conversational speech transcription using context-depedent deep neural networks","author":"seide","year":"0","journal-title":"Proc Interspeech-2011"},{"key":"ref9","first-page":"1209","article-title":"Sequence discriminative distributed training of long short-term memory recurrent neural networks","author":"sak","year":"0","journal-title":"Proc INTERSPEECH-2014"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2134090"},{"key":"ref20","first-page":"1337","article-title":"Deep learning with COTS HPC systems","author":"coates","year":"0","journal-title":"Proc of ICML'2013"},{"key":"ref22","first-page":"1048","article-title":"Parallel deep neural network training for LVCSR tasks using Blue Gene\/Q","author":"sainath","year":"0","journal-title":"Proc INTERSPEECH-2014"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853589"},{"key":"ref24","first-page":"693","article-title":"Hogwild!: A lock-free approach to parallelizing stochastic gradient descent","author":"niu","year":"0","journal-title":"Proc NIPS 2011"},{"key":"ref41","first-page":"69","article-title":"The Fisher corpus: A resource for the next generation of speech-to-text","author":"cieri","year":"0","journal-title":"Proc ICL 2004"},{"key":"ref23","first-page":"1058","article-title":"1-Bit stochastic gradient descent and its application to data-parallel distributed training of speech DNNs","author":"seide","year":"0","journal-title":"Proc INTERSPEECH-2014"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854672"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639348"}],"event":{"name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2016,3,20]]},"location":"Shanghai","end":{"date-parts":[[2016,3,25]]}},"container-title":["2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7465907\/7471614\/07472805.pdf?arnumber=7472805","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,17]],"date-time":"2024-06-17T21:26:44Z","timestamp":1718659604000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7472805\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,3]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/icassp.2016.7472805","relation":{},"subject":[],"published":{"date-parts":[[2016,3]]}}}