{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T18:44:42Z","timestamp":1757616282789,"version":"3.44.0"},"reference-count":69,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/asru46091.2019.9003837","type":"proceedings-article","created":{"date-parts":[[2020,2,21]],"date-time":"2020-02-21T02:01:33Z","timestamp":1582250493000},"page":"39-46","source":"Crossref","is-referenced-by-count":11,"title":["Integrating Source-Channel and Attention-Based Sequence-to-Sequence Models for Speech Recognition"],"prefix":"10.1109","author":[{"given":"Qiujia","family":"Li","sequence":"first","affiliation":[{"name":"Cambridge University,Engineering Dept.,Cambridge,U.K.,CB2 1PZ"}]},{"given":"Chao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Cambridge University,Engineering Dept.,Cambridge,U.K.,CB2 1PZ"}]},{"given":"Philip C.","family":"Woodland","sequence":"additional","affiliation":[{"name":"Cambridge University,Engineering Dept.,Cambridge,U.K.,CB2 1PZ"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(90)90049-F"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638951"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1073"},{"key":"ref32","article-title":"Sequence transduction with recurrent neural networks","author":"graves","year":"2012","journal-title":"Proc ICML Workshop on Representation Learning"},{"key":"ref31","article-title":"Con-nectionist temporal classification: Labelling unsegmented sequence data with recurrent neural networks","author":"graves","year":"2006","journal-title":"Proc ICML"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1780"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2134090"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2017.2763455"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953075"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1423"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472084"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-275"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-595"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1162\/106365601750190398"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682490"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1107"},{"key":"ref27","article-title":"A comparison of techniques for language model integration in encoder-decoder speech recognition","author":"toshniwal","year":"2018","journal-title":"Proc SLT"},{"key":"ref65","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-129","article-title":"An exploration of dropout with LSTMs","author":"cheng","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2558826"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462105"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1006\/csla.1998.0043"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707705"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639644"},{"journal-title":"Statistical Methods for Speech Recognition","year":"1997","author":"jelinek","key":"ref2"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472641"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462161"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1438"},{"key":"ref24","article-title":"Latent sequence decompositions","author":"chan","year":"2017","journal-title":"Proc ICLR"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953069"},{"key":"ref26","article-title":"End-to-end speech recognition with word-based RNN language models","author":"hori","year":"2018","journal-title":"Proc SLT"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682494"},{"key":"ref50","article-title":"Monotonic Chunkwise Attention","author":"chiu","year":"2018","journal-title":"Proc ICLR"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462380"},{"journal-title":"The HTK book (for HTK version 3 5)","year":"2015","author":"young","key":"ref59"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683851"},{"journal-title":"ADADELTA An Adaptive Learning Rate Method","year":"2012","author":"zeiler","key":"ref57"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"ref54","article-title":"The AMI meeting corpus: A preannouncement","author":"carletta","year":"2005","journal-title":"Proc MLMI"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.1997.659110"},{"key":"ref52","article-title":"Posterior probability decoding, confidence estimation and system combination","author":"evermann","year":"2000","journal-title":"Proc Speech Transcription Workshop"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462506"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2702"},{"journal-title":"Off-Line Cursive Handwriting Recognition Using Recurrent Neural Networks","year":"1994","author":"senior","key":"ref40"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461809"},{"key":"ref13","article-title":"Optimal completion distillation for sequence learning","author":"sabour","year":"2019","journal-title":"Proc ICLR"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639587"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639619"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462245"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461705"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462361"},{"key":"ref4","article-title":"Attention-based models for speech recognition","author":"chorowski","year":"2015","journal-title":"Proc NIPS"},{"key":"ref3","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2015","journal-title":"Proc ICLR"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"ref5","article-title":"A study of the recurrent neural network encoder-decoder for large vocabulary speech recognition","author":"lu","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1446"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682224"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953077"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2756440"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2002.1005687"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178778"},{"key":"ref47","article-title":"Fast and accurate recurrent neural network acoustic models for speech recognition","author":"sak","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462523"},{"key":"ref41","article-title":"A time delay neural network architecture for efficient modeling of long temporal contexts","author":"peddinti","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1006\/csla.2001.0182"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.3115\/1075812.1075885"}],"event":{"name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","start":{"date-parts":[[2019,12,14]]},"location":"Singapore","end":{"date-parts":[[2019,12,18]]}},"container-title":["2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8985378\/9003727\/09003837.pdf?arnumber=9003837","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,5]],"date-time":"2025-09-05T18:17:06Z","timestamp":1757096226000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9003837\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":69,"URL":"https:\/\/doi.org\/10.1109\/asru46091.2019.9003837","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}