{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T00:01:43Z","timestamp":1773964903470,"version":"3.50.1"},"reference-count":58,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013,5]]},"DOI":"10.1109\/icassp.2013.6639345","type":"proceedings-article","created":{"date-parts":[[2013,10,29]],"date-time":"2013-10-29T23:19:46Z","timestamp":1383088786000},"page":"8604-8608","source":"Crossref","is-referenced-by-count":493,"title":["Recent advances in deep learning for speech research at Microsoft"],"prefix":"10.1109","author":[{"given":"Li","family":"Deng","sequence":"first","affiliation":[]},{"given":"Jinyu","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jui-Ting","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Kaisheng","family":"Yao","sequence":"additional","affiliation":[]},{"given":"Dong","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Frank","family":"Seide","sequence":"additional","affiliation":[]},{"given":"Michael","family":"Seltzer","sequence":"additional","affiliation":[]},{"given":"Geoff","family":"Zweig","sequence":"additional","affiliation":[]},{"given":"Xiaodong","family":"He","sequence":"additional","affiliation":[]},{"given":"Jason","family":"Williams","sequence":"additional","affiliation":[]},{"given":"Yifan","family":"Gong","sequence":"additional","affiliation":[]},{"given":"Alex","family":"Acero","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"35","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2009.4960588"},{"key":"36","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639187"},{"key":"33","article-title":"Regularized adaptation of discriminative classifiers","author":"li","year":"2006","journal-title":"ICASSP"},{"key":"34","article-title":"Improving wideband speech recognition using mixed-bandwidth training data in CD-DNNHMM","author":"li","year":"2012","journal-title":"IEEE SLT"},{"key":"39","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2010-304","article-title":"Investigation of full-sequence training of deep belief networks for speech recognition","author":"mohamed","year":"2010","journal-title":"InterSpeech"},{"key":"37","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2012.6424228"},{"key":"38","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2010-343","article-title":"Recurrent neural network based language model","author":"mikolov","year":"2010","journal-title":"InterSpeech"},{"key":"43","article-title":"Multilingual and cross-lingual speech recognition","author":"schultz","year":"1998","journal-title":"Proc DARPA Broadcast News Transcription and Understanding Workshop"},{"key":"42","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639347"},{"key":"41","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2011.6163916"},{"key":"40","article-title":"Multimodal deep learning","author":"ngiam","year":"2011","journal-title":"ICML"},{"key":"22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639292"},{"key":"23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639348"},{"key":"24","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"25","author":"hinton","year":"2012","journal-title":"Improving Neural Networks by Preventing Coadaptation of Feature Detectors"},{"key":"26","doi-asserted-by":"publisher","DOI":"10.1007\/s12046-011-0044-2"},{"key":"27","article-title":"Cross-language knowledge transfer using multilingual deep neural network with shared hidden layers","author":"huang","year":"2013","journal-title":"ICASSP"},{"key":"28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639103"},{"key":"29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638237"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.50"},{"key":"2","doi-asserted-by":"crossref","DOI":"10.21437\/Eurospeech.1995-414","article-title":"Connectionist speaker normalization and adaptation","author":"abrash","year":"1995","journal-title":"Eurospeech"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6288864"},{"key":"7","article-title":"Large vocabulary continuous speech recognition with context-dependent DBN-HMMs","author":"dahl","year":"2011","journal-title":"ICASSP"},{"key":"30","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.268"},{"key":"6","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2012-7","article-title":"Pipelined backpropagation for context-dependent deep neural networks","author":"chen","year":"2012","journal-title":"InterSpeech"},{"key":"5","article-title":"Spoken dialog challenge 2010: Comparison of live and control test results","author":"black","year":"2011","journal-title":"Proceedings of the SigDial Workshop"},{"key":"32","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2012-3","article-title":"Scalable minimum Bayes risk training of DNN acoustic models using distributed Hessian-free optimization","author":"kingsbury","year":"2012","journal-title":"InterSpeech"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639349"},{"key":"31","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2040522"},{"key":"9","article-title":"Improving DNNs for LVCSR using RELU and dropout","author":"dahl","year":"2013","journal-title":"ICASSP"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2134090"},{"key":"58","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947631"},{"key":"57","article-title":"Cache neural network language models based on longdistance dependencies for a spoken dialog system","author":"zamora-martinez","year":"2012","journal-title":"ICASSP"},{"key":"56","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2012.2227738"},{"key":"19","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2010-487","article-title":"Binary coding of speech spectrograms using a deep auto-encoder","author":"deng","year":"2010","journal-title":"InterSpeech"},{"key":"55","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639201"},{"key":"17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638952"},{"key":"18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638239"},{"key":"15","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2012.6424224"},{"key":"16","article-title":"Integrated-multilingual speech recognition using universal phonological features in a functional speech production model","author":"deng","year":"1997","journal-title":"ICASSP"},{"key":"13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6288333"},{"key":"14","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2011-607","article-title":"Deep convex net: A scalable architecture for speech pattern classification","author":"deng","year":"2011","journal-title":"InterSpeech"},{"key":"11","doi-asserted-by":"crossref","DOI":"10.1109\/TASL.2013.2244083","article-title":"Machine learning paradigms for speech recognition: An overview","volume":"21","author":"deng","year":"2013","journal-title":"IEEE Trans Audio Speech and Lang Proc"},{"key":"12","article-title":"Large-vocabulary speech recognition under adverse acoustic environments","author":"deng","year":"2000","journal-title":"Proc ICSLP"},{"key":"21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947427"},{"key":"20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947516"},{"key":"49","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2012.6424230"},{"key":"48","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638951"},{"key":"45","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639100"},{"key":"44","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2011-169","article-title":"Conversational speech transcription using context-dependent deep neural networks","author":"seide","year":"2011","journal-title":"InterSpeech"},{"key":"47","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2012-456","article-title":"Towards recurrent neural network language models with linguistic and contextual features","author":"shi","year":"2012","journal-title":"InterSpeech"},{"key":"46","doi-asserted-by":"publisher","DOI":"10.1109\/89.260337"},{"key":"10","article-title":"Large scale distributed deep networks","author":"dean","year":"2012","journal-title":"NIPS"},{"key":"51","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2012-12","article-title":"An investigation on initialization schemes for multilayer perceptron training using multilingual data and their effect on ASR performance","author":"vu","year":"2012","journal-title":"InterSpeech"},{"key":"52","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2012.6424251"},{"key":"53","article-title":"Roles of pre-training and fine-tuning in context-dependent DBN-HMMs for real-world speech recognition","author":"yu","year":"2010","journal-title":"Deep Learning Workshop"},{"key":"54","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6288897"},{"key":"50","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6289054"}],"event":{"name":"ICASSP 2013 - 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Vancouver, BC, Canada","start":{"date-parts":[[2013,5,26]]},"end":{"date-parts":[[2013,5,31]]}},"container-title":["2013 IEEE International Conference on Acoustics, Speech and Signal Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6619549\/6637585\/06639345.pdf?arnumber=6639345","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,5]],"date-time":"2023-07-05T19:26:00Z","timestamp":1688585160000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6639345\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,5]]},"references-count":58,"URL":"https:\/\/doi.org\/10.1109\/icassp.2013.6639345","relation":{},"subject":[],"published":{"date-parts":[[2013,5]]}}}