{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T15:37:49Z","timestamp":1774539469633,"version":"3.50.1"},"reference-count":40,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1109\/slt.2018.8639575","type":"proceedings-article","created":{"date-parts":[[2019,2,14]],"date-time":"2019-02-14T23:36:34Z","timestamp":1550187394000},"page":"56-63","source":"Crossref","is-referenced-by-count":25,"title":["Learning Noise-Invariant Representations for Robust Speech Recognition"],"prefix":"10.1109","author":[{"given":"Davis","family":"Liang","sequence":"first","affiliation":[]},{"given":"Zhiheng","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Zachary C.","family":"Lipton","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"Csr-i (wsj0) complete ldc93s6a","author":"garofolo","year":"1993","journal-title":"Web Download"},{"key":"ref38","first-page":"2096","article-title":"Domain-adversarial training of neural networks","volume":"17","author":"ganin","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref33","article-title":"An overview of noise-robust automatic speech recognition","volume":"22","author":"li","year":"2014","journal-title":"IEEE Transactions on Audio Speech and Language Processing"},{"key":"ref32","article-title":"Recurrent neural networks for noise reduction in robust asr","author":"maas","year":"2012","journal-title":"Conference of the International Speech Communication Association"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2112350"},{"key":"ref30","article-title":"Audio-visual deep learning for noise robust speech recognition","author":"huang","year":"2013","journal-title":"IEEE International Conference on Acoustics Speech and Signal Processing"},{"key":"ref37","year":"2000","journal-title":"Quicknet"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICSLP.1996.607807"},{"key":"ref35","article-title":"Adaptive training with joint uncertainty decoding for robust recognition of noisy data","volume":"4","author":"liao","year":"2007","journal-title":"IEEE International Conference on Acoustics Speech and Signal Processing"},{"key":"ref34","article-title":"A recursive feature vector normalization approach for robust speech recognition in noise","volume":"2","author":"viikki","year":"1998","journal-title":"IEEE International Conference on Acoustics Speech and Signal Processing"},{"key":"ref10","article-title":"Sequence transduction with recurrent neural networks","author":"graves","year":"2012","journal-title":"arXiv preprint arXiv 1211 3711"},{"key":"ref40","article-title":"Trace norm regularization and faster inference for embedded speech recognition rnns","author":"kliegl","year":"2017","journal-title":"arXiv preprint arXiv 1710 09026"},{"key":"ref11","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref12","article-title":"A comparison of modeling units in sequence-to-sequence speech recognition with the transformer on mandarin chinese","author":"zhou","year":"2018","journal-title":"arXiv preprint arXiv 1805 06239"},{"key":"ref13","article-title":"Wav2letter: An end-to-end convnet-based speech recognition system","author":"collobert","year":"2016","journal-title":"arXiv preprint arXiv 1609 03193"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1980.1163420"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRAIE.2016.7939548"},{"key":"ref17","article-title":"Imagenet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1996.8.3.643"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/21.370200"},{"key":"ref28","article-title":"Adversarial logit pairing","author":"kannan","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref4","first-page":"4945","article-title":"End-to-end attention-based large vocabulary speech recognition","author":"bahdanau","year":"2016","journal-title":"IEEE International Conference on Acoustics Speech and Signal Processing"},{"key":"ref27","article-title":"Invariant representations for noisy speech recognition","author":"serdyuk","year":"2016","journal-title":"Computing Research Repository"},{"key":"ref3","first-page":"167","author":"miao","year":"2015","journal-title":"Eesen End-to-end speech recognition using deep rnn models and wfst-based decoding"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1107"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639100"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1616"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1166"},{"key":"ref7","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref2","article-title":"Deep speech 2: End-to-end speech recognition in english and mandarin","author":"amodei","year":"2015","journal-title":"arXiv 1512 02595"},{"key":"ref9","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2014","journal-title":"ArXiv Preprint"},{"key":"ref1","article-title":"Deep speech: Scaling up end-to-end speech recognition","author":"hannun","year":"2014","journal-title":"arXiv 1412 5567"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1995.7.1.108"},{"key":"ref22","article-title":"Musan: A music, speech, and noise corpus","author":"snyder","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.5.1093"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/SPLIM.2016.7528399"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1186\/s13636-014-0047-0"},{"key":"ref26","article-title":"Librispeech: an asr corpus based on public domain audio books","author":"panayotov","year":"2015","journal-title":"IEEE International Conference on Acoustics Speech and Signal Processing"},{"key":"ref25","doi-asserted-by":"crossref","DOI":"10.14311\/1105","article-title":"Multi-condition training for unknown environment adaptation in robust asr under real conditions","volume":"49","author":"rajnoha","year":"2009","journal-title":"Acta Polytechnica"}],"event":{"name":"2018 IEEE Spoken Language Technology Workshop (SLT)","location":"Athens, Greece","start":{"date-parts":[[2018,12,18]]},"end":{"date-parts":[[2018,12,21]]}},"container-title":["2018 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8632666\/8639030\/08639575.pdf?arnumber=8639575","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,27]],"date-time":"2022-01-27T05:37:40Z","timestamp":1643261860000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8639575\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/slt.2018.8639575","relation":{},"subject":[],"published":{"date-parts":[[2018,12]]}}}