{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T21:09:35Z","timestamp":1767992975459,"version":"3.49.0"},"reference-count":52,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100000266","name":"Engineering and Physical Sciences Research Council","doi-asserted-by":"publisher","award":["EP\/R012067\/1"],"award-info":[{"award-number":["EP\/R012067\/1"]}],"id":[{"id":"10.13039\/501100000266","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2022]]},"DOI":"10.1109\/taslp.2022.3172632","type":"journal-article","created":{"date-parts":[[2022,5,5]],"date-time":"2022-05-05T20:07:10Z","timestamp":1651781230000},"page":"1977-1992","source":"Crossref","is-referenced-by-count":5,"title":["Towards Robust Waveform-Based Acoustic Models"],"prefix":"10.1109","volume":"30","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4728-9644","authenticated-orcid":false,"given":"Dino","family":"Oglic","sequence":"first","affiliation":[{"name":"Applied Analytics and AI, Data Sciences and AI, BioPharmaceuticals R&amp;D, AstraZeneca, Cambridge, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5128-5099","authenticated-orcid":false,"given":"Zoran","family":"Cvetkovic","sequence":"additional","affiliation":[{"name":"Department of Engineering, King&#x0027;s College London, London, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0169-7893","authenticated-orcid":false,"given":"Peter","family":"Sollich","sequence":"additional","affiliation":[{"name":"Department of Mathematics, King&#x0027;s College London, London, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8790-3389","authenticated-orcid":false,"given":"Steve","family":"Renals","sequence":"additional","affiliation":[{"name":"Center for Speech Technology Research, University of Edinburgh, Edinburgh, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Yu","sequence":"additional","affiliation":[{"name":"Departments of Statistics and Electrical Engineering and Computer Sciences, UC Berkeley, Berkeley, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1","article-title":"Feature learning in deep neural networksstudies on speech recognition","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yu","year":"2013"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2016.11.005"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2904"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639585"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1257"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1870"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2005.10.005"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2007-430"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1999.758138"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ISIT.2011.6034260"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2090657"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205029"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2438544"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2304637"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2656"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177704472"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1137\/1114019"},{"key":"ref19","article-title":"Layer normalization","author":"Ba","year":"2016"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRev.106.620"},{"key":"ref21","first-page":"2","article-title":"Exponential families for conditional random fields","volume-title":"Proc. 20th Conf. Uncertainty Artif. Intell.","author":"Altun","year":"2004"},{"key":"ref22","first-page":"1","article-title":"Attention is all you need","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Vaswani","year":"2017"},{"key":"ref23","article-title":"Vicinal risk minimization","volume-title":"Advances in Neural Information Processing Systems","author":"Chapelle","year":"2001"},{"key":"ref24","first-page":"1310","article-title":"Certified adversarial robustness via randomized smoothing","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Cohen","year":"2019"},{"key":"ref25","first-page":"11292","article-title":"Provably robust deep learning via adversarially trained smoothed classifiers","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Salman","year":"2019"},{"key":"ref26","first-page":"1528","article-title":"A kernel theory of modern data augmentation","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Dao","year":"2019"},{"key":"ref27","article-title":"Aurora working group: DSR front end LVCSR evaluation AU\/384\/02","author":"Parihar","year":"2002"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.2991\/isccca.2013.144"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953152"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461310"},{"key":"ref31","article-title":"SMS-WSJ: Database, performance measures, and baseline recipe for multi-channel source separation and recognition","author":"Drude","year":"2019"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1987.1169544"},{"key":"ref33","article-title":"Deep speech: Scaling up end-to-end speech recognition","author":"Hannun","year":"2014"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2015-711"},{"key":"ref35","first-page":"1","article-title":"Vocal tract length perturbation (VTLP) improves speech recognition","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jaitly","year":"2013"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3104193"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CISP-BMEI51763.2020.9263564"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2456"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2014-207"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1873"},{"key":"ref41","first-page":"1","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Baevski","year":"2020"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2007.366922"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2003.1318454"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2007.4430116"},{"key":"ref45","first-page":"1","article-title":"The Kaldi speech recognition toolkit","volume-title":"Proc. IEEE Autom. Speech Recognit. Understanding Workshop","author":"Povey","year":"2011"},{"key":"ref46","first-page":"249","article-title":"Understanding the difficulty of training deep feedforward neural networks","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Glorot","year":"2010"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2015-1"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2602884"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053703"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383626"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178830"},{"key":"ref52","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4757-1949-9","volume-title":"Linear Algebra","author":"Lang","year":"1987"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/9657755\/09769982.pdf?arnumber=9769982","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T23:17:38Z","timestamp":1705965458000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9769982\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":52,"URL":"https:\/\/doi.org\/10.1109\/taslp.2022.3172632","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]}}}