{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T04:12:07Z","timestamp":1774411927189,"version":"3.50.1"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,5]]},"DOI":"10.1109\/icassp40776.2020.9053008","type":"proceedings-article","created":{"date-parts":[[2020,4,9]],"date-time":"2020-04-09T16:21:13Z","timestamp":1586449273000},"page":"7069-7073","source":"Crossref","is-referenced-by-count":46,"title":["Generating Synthetic Audio Data for Attention-Based Speech Recognition Systems"],"prefix":"10.1109","author":[{"given":"Nick","family":"Rossenbach","sequence":"first","affiliation":[{"name":"RWTH Aachen University,Human Language Technology and Pattern Recognition,Germany"}]},{"given":"Albert","family":"Zeyer","sequence":"additional","affiliation":[{"name":"RWTH Aachen University,Human Language Technology and Pattern Recognition,Germany"}]},{"given":"Ralf","family":"Schluter","sequence":"additional","affiliation":[{"name":"RWTH Aachen University,Human Language Technology and Pattern Recognition,Germany"}]},{"given":"Hermann","family":"Ney","sequence":"additional","affiliation":[{"name":"RWTH Aachen University,Human Language Technology and Pattern Recognition,Germany"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"crossref","first-page":"477","DOI":"10.1109\/SLT.2018.8639589","article-title":"Leveraging sequence-to-sequence speech synthesis for enhancing acoustic-to-word speech recognition","author":"mimura","year":"2018","journal-title":"2018 IEEE Spoken Language Technology Workshop SLT 2018"},{"key":"ref11","article-title":"Training neural speech recognition systems with synthetic speech augmentation","author":"li","year":"2018","journal-title":"CoRR"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"4006","DOI":"10.21437\/Interspeech.2017-1452","article-title":"Tacotron: Towards end-to-end speech synthesis","author":"wang","year":"2017","journal-title":"Proc Interspeech 2017"},{"key":"ref13","article-title":"Natural TTS synthesis by conditioning wavenet on mel spectrogram predictions","author":"shen","year":"2017","journal-title":"CoRR"},{"key":"ref14","article-title":"Deep voice 3: 2000-speaker neural text-to-speech","author":"ping","year":"2017","journal-title":"CoRR"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1984.1172423"},{"key":"ref16","first-page":"5167","article-title":"Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis","author":"wang","year":"0","journal-title":"Proceedings of the 35th International Conference on Machine Learning ICML 2018"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2162"},{"key":"ref18","first-page":"3586","article-title":"Audio augmentation for speech recognition","author":"ko","year":"2015","journal-title":"InterSpeech"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-2015"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"426","DOI":"10.1109\/SLT.2018.8639619","article-title":"Back-translation-style data augmentation for end-to-end ASR","author":"hayashi","year":"2018","journal-title":"2018 IEEE Spoken Language Technology Workshop SLT 2018"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1616"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8268950"},{"key":"ref5","article-title":"Cycleconsistency training for end-to-end speech recognition","author":"hori","year":"2018","journal-title":"CoRR"},{"key":"ref8","first-page":"6281","article-title":"End-toend feedback loss in speech chain framework via straight-through estimator","author":"tjandra","year":"2019","journal-title":"IEEE International Conference on Acoustics Speech and Signal Processing ICASSP 2019"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1558"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref9","article-title":"Self-supervised sequence-to-sequence ASR using unpaired speech and text","author":"baskar","year":"2019","journal-title":"CoRR"},{"key":"ref1","article-title":"RWTH ASR systems for librispeech: Hybrid vs attention - w\/o data augmentation","author":"l\u00fcscher","year":"2019","journal-title":"CoRR"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953177"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1162"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref24","article-title":"Attention-based models for speech recognition","author":"chorowski","year":"2015","journal-title":"CoRR"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2225"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2441"}],"event":{"name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Barcelona, Spain","start":{"date-parts":[[2020,5,4]]},"end":{"date-parts":[[2020,5,8]]}},"container-title":["ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9040208\/9052899\/09053008.pdf?arnumber=9053008","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,2]],"date-time":"2022-08-02T19:57:19Z","timestamp":1659470239000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9053008\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,5]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/icassp40776.2020.9053008","relation":{},"subject":[],"published":{"date-parts":[[2020,5]]}}}