{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T15:21:43Z","timestamp":1777130503429,"version":"3.51.4"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,9,21]],"date-time":"2020-09-21T00:00:00Z","timestamp":1600646400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,9,21]],"date-time":"2020-09-21T00:00:00Z","timestamp":1600646400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,9,21]],"date-time":"2020-09-21T00:00:00Z","timestamp":1600646400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,9,21]]},"DOI":"10.1109\/mmsp48831.2020.9287127","type":"proceedings-article","created":{"date-parts":[[2020,12,16]],"date-time":"2020-12-16T23:57:58Z","timestamp":1608163078000},"page":"1-5","source":"Crossref","is-referenced-by-count":10,"title":["Improving Automatic Speech Recognition Utilizing Audio-codecs for Data Augmentation"],"prefix":"10.1109","author":[{"given":"Nirayo","family":"Hailu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ingo","family":"Siegert","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andreas","family":"Nurnberger","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","first-page":"215","article-title":"Emotion intelligibility within codec-compressed and reduced bandwith speech","author":"siegert","year":"2016","journal-title":"ITG-Fb 267 Speech Communication 12 ITG-Fachtagung Sprachkommunikation"},{"key":"ref32","article-title":"High-quality, low-delay music coding in the Opus codec","author":"valin","year":"2013","journal-title":"Proceedings of the 135th Audio Engineering Society Convention p s p Audio Engineering Society"},{"key":"ref31","article-title":"Summary of Opus listening test results draft-valin-codec-results-03","author":"hoene","year":"2013","journal-title":"Internet-Draft IETF"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-23132-7_43"},{"key":"ref35","article-title":"Improved regularization techniques for end-to-end speech recognition","author":"zhou","year":"2017"},{"key":"ref34","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11937","article-title":"Addressee and response selection in multi-party conversations with speaker interaction rnns","author":"zhang","year":"2018","journal-title":"Thirty-Second AAAI Conference on Artificial Intelligence"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref11","first-page":"448","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","volume":"37","author":"ioffe","year":"2015","journal-title":"ICML&#x2019;15 Proceedings of the 32nd International Conference on International Conference on Machine Learning"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1080\/00401706.1991.10484833"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO.2018.8553611"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2013.58"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-0327-9"},{"key":"ref16","first-page":"625","article-title":"Vocal tract length perturbation (vtlp) improve speech recognition","author":"jaitly","year":"2013","journal-title":"ICML Workshop"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref18","article-title":"End-to-end deep neural network for automatic speech recognition","author":"song","year":"2015","journal-title":"Standford CS224D Reports"},{"key":"ref19","first-page":"1929","article-title":"Dropout: a simple way to prevent neural networks from overfitting","volume":"15","author":"srivastava","year":"2014","journal-title":"The Journal of Machine Learning Research"},{"key":"ref28","first-page":"229","article-title":"Measuring the impact of audio compression on the spectral quality of speech data","volume":"81","author":"siegert","year":"2016","journal-title":"Elektronische Sprachsignalverarbeitung 2016 Tagungsband der 27 Konferenz"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"ref27","article-title":"Impact of the GSM AMR Speech Codec on Formant Information Important to Forensic Speaker Identification","author":"guillemin","year":"2009","journal-title":"Proc of IEEE ICASSP"},{"key":"ref3","article-title":"Common Voice: A Massively-Multilingual Speech Corpus","author":"ardila","year":"2019"},{"key":"ref6","article-title":"New paradigm in speech recognition: deep neural networks","author":"fohr","year":"2017","journal-title":"IEEE International Conference on Information Intelligence and Systems"},{"key":"ref29","article-title":"Definition of the Opus audio codec","author":"valin","year":"0","journal-title":"RFC 6716"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref8","article-title":"Deep learning","volume":"1","author":"goodfellow","year":"2016"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-31372-2_15"},{"key":"ref2","first-page":"173","article-title":"Deep speech 2: End-to-end speech recognition in English and mandarin","author":"amodei","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2339736"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO.2019.8902524"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953069"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953077"},{"key":"ref24","article-title":"MP3 and AAC Explained","author":"brandenburg","year":"1999","journal-title":"AES 17th Int Conf High-Quality Audio Coding"},{"key":"ref23","article-title":"Audio augmentation for speech recognition","author":"ko","year":"2015","journal-title":"Sixteenth Annual Conference of the International Speech Communication Association"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1558\/sll.2004.11.1.83"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-43958-7_69"}],"event":{"name":"2020 IEEE 22nd International Workshop on Multimedia Signal Processing (MMSP)","location":"Tampere, Finland","start":{"date-parts":[[2020,9,21]]},"end":{"date-parts":[[2020,9,24]]}},"container-title":["2020 IEEE 22nd International Workshop on Multimedia Signal Processing (MMSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9287028\/9287048\/09287127.pdf?arnumber=9287127","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,5]],"date-time":"2022-12-05T19:14:31Z","timestamp":1670267671000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9287127\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9,21]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/mmsp48831.2020.9287127","relation":{},"subject":[],"published":{"date-parts":[[2020,9,21]]}}}