{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T18:08:56Z","timestamp":1761588536210,"version":"build-2065373602"},"reference-count":44,"publisher":"IBM","issue":"4\/5","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IBM J. Res. &amp; Dev."],"published-print":{"date-parts":[[2017,7,1]]},"DOI":"10.1147\/jrd.2017.2701178","type":"journal-article","created":{"date-parts":[[2017,9,8]],"date-time":"2017-09-08T14:20:05Z","timestamp":1504880405000},"page":"1:1-1:10","source":"Crossref","is-referenced-by-count":18,"title":["Recent advances in conversational speech recognition using convolutional and recurrent neural networks"],"prefix":"10.1147","volume":"61","author":[{"given":"G.","family":"Saon","sequence":"first","affiliation":[]},{"given":"M.","family":"Picheny","sequence":"additional","affiliation":[]}],"member":"3082","reference":[{"key":"ref39","article-title":"The IBM 2015 English conversational telephone speech recognition system","author":"saon","year":"0","journal-title":"Proceedings of Interspeech 2015"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-595"},{"key":"ref33","first-page":"270","article-title":"Entropy-based pruning of backoff language models","author":"stolcke","year":"0","journal-title":"Proc DARPA Broadcast News Transcription Understanding Workshop"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1006\/csla.1999.0128"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854669"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707705"},{"article-title":"Deep speech: Scaling up end-to-end speech recognition","year":"2014","author":"hannun","key":"ref37"},{"key":"ref36","article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","author":"chung","year":"0","journal-title":"NIPS 2014 Deep Learning Workshop"},{"key":"ref35","first-page":"147","article-title":"Empirical study of neural network language models for arabic speech recognition","author":"mangu","year":"0","journal-title":"Proc IEEE Workshop Autom Speech Recognit Understanding"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.3115\/1620754.1620822"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2001.940766"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-473"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2005.1415086"},{"key":"ref12","first-page":"2345","article-title":"Sequence-discriminative training of deep neural networks","author":"vesely","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854680"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/2.993770"},{"key":"ref15","first-page":"315","article-title":"Speech processing in the auditory system","author":"morgan","year":"2004","journal-title":"Speech Processing in the Auditory System"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2000.859163"},{"key":"ref17","first-page":"82?97,","article-title":"Deep neural networks for acoustic modeling in speech recognition","volume":"12","author":"hinton","year":"2012","journal-title":"IEEE Signal Process Mag"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2009.4960445"},{"key":"ref19","first-page":"10","article-title":"Scalable minimum Bayes risk training of deep neural network acoustic models using distributed hessian-free optimization","author":"kingsbury","year":"0","journal-title":"Proc Interspeech 2012"},{"key":"ref28","first-page":"115","article-title":"Learning precise timing with LSTM recurrent networks","volume":"3","author":"gers","year":"2003","journal-title":"J Mach Learn Res"},{"key":"ref4","first-page":"1609","article-title":"From switchboard to Fisher: Telephone collection protocols, their uses and yields","author":"cieri","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","article-title":"Long short-term memory","volume":"9","author":"schmidhuber","year":"1997","journal-title":"Neural Comput"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1992.225858"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1006\/csla.1998.0043"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"ref5","first-page":"69","article-title":"The Fisher corpus: A resource for the next generations of speech-to-text","volume":"4","author":"cieri","year":"0","journal-title":"Proc 4th Int Conf Lang Eval"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1993.319343"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1162\/089976600300015015"},{"key":"ref2","first-page":"437","article-title":"Conversational speech transcription using context-dependent deep neural networks","author":"seide","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1996.540418"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tics.2007.09.004"},{"key":"ref20","first-page":"1672","article-title":"Large scale hierarchical neural network language models","author":"kuo","year":"0","journal-title":"Proc Interspeech 2012"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472620"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707749"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"ref24","first-page":"343","article-title":"Unfolded recurrent neural networks for speech recognition","author":"saon","year":"0","journal-title":"Proc INTERSPEECH 2014"},{"article-title":"Achieving human parity in conversational speech recognition","year":"2016","author":"xiong","key":"ref41"},{"key":"ref23","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-405"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404777"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-40"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178778"}],"container-title":["IBM Journal of Research and Development"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5288520\/8030196\/08030294.pdf?arnumber=8030294","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T18:03:23Z","timestamp":1761588203000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8030294\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,7,1]]},"references-count":44,"journal-issue":{"issue":"4\/5"},"URL":"https:\/\/doi.org\/10.1147\/jrd.2017.2701178","relation":{},"ISSN":["0018-8646","0018-8646"],"issn-type":[{"type":"print","value":"0018-8646"},{"type":"electronic","value":"0018-8646"}],"subject":[],"published":{"date-parts":[[2017,7,1]]}}}