{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,27]],"date-time":"2025-07-27T07:20:09Z","timestamp":1753600809322,"version":"3.28.0"},"reference-count":34,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,19]]},"DOI":"10.1109\/slt48900.2021.9383606","type":"proceedings-article","created":{"date-parts":[[2021,3,25]],"date-time":"2021-03-25T16:46:54Z","timestamp":1616690814000},"page":"52-59","source":"Crossref","is-referenced-by-count":30,"title":["Alignment Restricted Streaming Recurrent Neural Network Transducer"],"prefix":"10.1109","author":[{"given":"Jay","family":"Mahadeokar","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuan","family":"Shangguan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Duc","family":"Le","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gil","family":"Keren","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hang","family":"Su","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thong","family":"Le","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ching-Feng","family":"Yeh","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christian","family":"Fuegen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael L.","family":"Seltzer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-2012"},{"key":"ref32","article-title":"Audio augmentation for speech recognition","author":"ko","year":"2015","journal-title":"Sixteenth Annual Conference of the International Speech Communication Association"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472780"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-496"},{"key":"ref10","article-title":"Sequence transduction with recurrent neural networks","volume":"abs 1211 3711","author":"graves","year":"2012","journal-title":"CoRR"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"6645","DOI":"10.1109\/ICASSP.2013.6638947","article-title":"Speech recognition with deep recurrent neural networks","author":"graves","year":"2013","journal-title":"2013 IEEE International Conference on Acoustics Speech and Signal Processing"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003822"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3016"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"1298","DOI":"10.21437\/Interspeech.2017-1705","article-title":"Recurrent neural aligner: An encoder-decoder neural network model for sequence to sequence mapping","volume":"8","author":"sak","year":"2017","journal-title":"InterSpeech"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053600"},{"key":"ref16","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2020-1855","article-title":"A new training pipeline for an improved neural transducer","author":"zeyer","year":"2020"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683109"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054715"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054663"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-4009"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2079"},{"key":"ref27","first-page":"8026","article-title":"Pytorch: An imperative style, high-performance deep learning library","author":"paszke","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1109\/ASRU.2017.8268935","article-title":"Exploring architectures, data and units for streaming end-to-end speech recognition with rnn-transducer","author":"rao","year":"2017","journal-title":"IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2017.2763455"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054188"},{"article-title":"Optimizing speech recognition for the edge","year":"2019","author":"shangguan","key":"ref8"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003854"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682336"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9004027"},{"article-title":"Streaming end-to-end bilingual asr systems with joint language identification","year":"2020","author":"punjabi","key":"ref1"},{"key":"ref20","first-page":"604","article-title":"Acoustic modelling with cd-ctc-smbr lstm rnns","author":"senior","year":"2015","journal-title":"IEEE 2015 Automatic Speech Recognition and Understanding Workshop (ASRU)"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2277"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-595"},{"article-title":"Sequence transduction with recurrent neural networks","year":"2012","author":"graves","key":"ref24"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003906"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003972"}],"event":{"name":"2021 IEEE Spoken Language Technology Workshop (SLT)","start":{"date-parts":[[2021,1,19]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,1,22]]}},"container-title":["2021 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9383468\/9383452\/09383606.pdf?arnumber=9383606","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,23]],"date-time":"2023-10-23T20:21:53Z","timestamp":1698092513000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9383606\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,19]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/slt48900.2021.9383606","relation":{},"subject":[],"published":{"date-parts":[[2021,1,19]]}}}