{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T16:37:27Z","timestamp":1775839047954,"version":"3.50.1"},"reference-count":43,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/asru46091.2019.9003906","type":"proceedings-article","created":{"date-parts":[[2020,2,21]],"date-time":"2020-02-21T02:01:33Z","timestamp":1582250493000},"page":"114-121","source":"Crossref","is-referenced-by-count":106,"title":["Improving RNN Transducer Modeling for End-to-End Speech Recognition"],"prefix":"10.1109","author":[{"given":"Jinyu","family":"Li","sequence":"first","affiliation":[]},{"given":"Rui","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Hu","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Yifan","family":"Gong","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"Ex-ploring layer trajectory LSTM with depth processing units and attention","author":"li","year":"2018","journal-title":"Proc IEEE SLT"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1485"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472780"},{"key":"ref32","author":"kim","year":"2017","journal-title":"Residual LSTM Design of a Deep Recurrent Architecture for Distant Speech Recognition"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-677"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2016.7846305"},{"key":"ref37","author":"ba","year":"2016","journal-title":"Layer normalization"},{"key":"ref36","first-page":"173","article-title":"Deep speech 2: End-to-end speech recognition in English and Mandarin","author":"amodei","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref35","first-page":"807","article-title":"Rectified linear units improve restricted boltzmann machines","author":"nair","year":"2010","journal-title":"Proceedings of the 27th International Conference on Machine Learning (ICML-10)"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-429"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462017"},{"key":"ref40","article-title":"Im-proving layer trajectory LSTM with future context frames","author":"li","year":"2019","journal-title":"Proc ICASSP"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref12","first-page":"1764","article-title":"Towards End-to-End Speech Recognition with Recurrent Neural Networks","author":"graves","year":"2014","journal-title":"PMLR"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref14","article-title":"Neural Machine Translation by Jointly Learning to Align and Translate","author":"bahdanau","year":"2015","journal-title":"ICLRE"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"ref16","article-title":"Attention-Based Models for Speech Recognition","author":"chorowski","year":"2015","journal-title":"NIPS"},{"key":"ref17","article-title":"Sequence Transduction with Recurrent Neural Networks","volume":"abs 1211 3711","author":"graves","year":"2012","journal-title":"CoRR"},{"key":"ref18","author":"soltau","year":"2016","journal-title":"Neural speech recognizer Acoustic-to-word lstm model for large vocabulary speech recognition"},{"key":"ref19","article-title":"Exploring architectures, data and units for streaming end-to-end speech recognition with RNN-transducer","author":"rao","year":"2017","journal-title":"Proc ASRU"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472617"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"939","DOI":"10.21437\/Interspeech.2017-233","article-title":"A Comparison of Sequence-to-Sequence Models for Speech Recognition","author":"prabhavalkar","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404793"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref6","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1705","article-title":"Recurrent neural aligner: An encoder-decoder neural network model for sequence to sequence mapping","author":"sak","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-84"},{"key":"ref5","article-title":"Exploring Neural Transducers for End-to-End Speech Recognition","author":"battenberg","year":"2017","journal-title":"Proc ASRU"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462105"},{"key":"ref7","article-title":"Towards Discriminatively-trained HMM-based End-to-end models for Automatic Speech Recognition","author":"hadian","year":"2018","journal-title":"Proc ICASSP"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462366"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178778"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682336"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683510"},{"key":"ref21","author":"chiu","year":"2017","journal-title":"Monotonic chunkwise attention"},{"key":"ref42","author":"sennrich","year":"2015","journal-title":"Neural machine translation of rare words with subword units"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2933325"},{"key":"ref41","author":"chung","year":"2014","journal-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461558"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178838"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472084"},{"key":"ref25","first-page":"506","article-title":"Efficient implementation of recurrent neural network transducer in tensor-flow","author":"bagby","year":"2018","journal-title":"Proc SLT"}],"event":{"name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"SG, Singapore","start":{"date-parts":[[2019,12,14]]},"end":{"date-parts":[[2019,12,18]]}},"container-title":["2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8985378\/9003727\/09003906.pdf?arnumber=9003906","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T10:51:19Z","timestamp":1658141479000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9003906\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/asru46091.2019.9003906","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}