{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,2]],"date-time":"2026-03-02T21:46:05Z","timestamp":1772487965236,"version":"3.50.1"},"reference-count":34,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,19]]},"DOI":"10.1109\/slt48900.2021.9383624","type":"proceedings-article","created":{"date-parts":[[2021,3,25]],"date-time":"2021-03-25T20:46:54Z","timestamp":1616705214000},"page":"223-228","source":"Crossref","is-referenced-by-count":32,"title":["Dual Application of Speech Enhancement for Automatic Speech Recognition"],"prefix":"10.1109","author":[{"given":"Ashutosh","family":"Pandey","sequence":"first","affiliation":[]},{"given":"Chunxi","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Yun","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yatharth","family":"Saraf","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","first-page":"626","article-title":"SDR&#x2013; half-baked or well done?","author":"le roux","year":"2019","journal-title":"ICASSP"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2114881"},{"key":"ref31","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"ICLRE"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-2012"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3016487"},{"key":"ref10","article-title":"Exploring architectures, data and units for streaming end-to-end speech recognition with rnn-transducer","author":"rao","year":"2017","journal-title":"ASRU"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054663"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003906"},{"key":"ref13","article-title":"Minimum bayes risk training of RNN-transducer for end-to-end speech recognition","author":"weng","year":"2019"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2842159"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2019.2918706"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953173"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.2998279"},{"key":"ref18","first-page":"12","article-title":"Wide residual BLSTM network with discriminative speaker adaptation for robust speech recognition","author":"jahn heymann","year":"2016","journal-title":"Workshop on Speech Processing in Everyday Environments (CHiME&#x2019;16)"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2946789"},{"key":"ref28","first-page":"776","article-title":"Audio Set: An ontology and human-labeled dataset for audio events","author":"gemmeke","year":"2017","journal-title":"ICASSP"},{"key":"ref4","article-title":"Multilingual graphemic hybrid ASR with massive data augmentation","author":"liu","year":"2020","journal-title":"Workshop on Spoken Language Technologies for Under-resourced languages and Collaboration and Computing for Under-Resourced Languages"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054536"},{"key":"ref3","article-title":"A comparison of end-to-end models for long-form speech recog-nition","author":"chiu","year":"2019","journal-title":"ASRU"},{"key":"ref6","article-title":"Sequence transduction with recurrent neural net-works","author":"graves","year":"2012"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1344"},{"key":"ref8","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1705","article-title":"Recurrent neural aligner: An encoder-decoder neural network model for sequence to sequence mapping","author":"sak","year":"2017","journal-title":"InterSpeech"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1566","article-title":"Neural speech recognizer: Acoustic-to-word LSTM model for large vocabulary speech recognition","author":"soltau","year":"2017","journal-title":"InterSpeech"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2846"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707758"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053266"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2512042"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053831"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2561"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682169"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2955276"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP.2017.8168119"}],"event":{"name":"2021 IEEE Spoken Language Technology Workshop (SLT)","location":"Shenzhen, China","start":{"date-parts":[[2021,1,19]]},"end":{"date-parts":[[2021,1,22]]}},"container-title":["2021 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9383468\/9383452\/09383624.pdf?arnumber=9383624","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,5,3]],"date-time":"2021-05-03T21:40:21Z","timestamp":1620078021000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9383624\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,19]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/slt48900.2021.9383624","relation":{},"subject":[],"published":{"date-parts":[[2021,1,19]]}}}