{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T10:21:59Z","timestamp":1743070919396,"version":"3.40.3"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030602758"},{"type":"electronic","value":"9783030602765"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-60276-5_26","type":"book-chapter","created":{"date-parts":[[2020,10,4]],"date-time":"2020-10-04T07:02:44Z","timestamp":1601794964000},"page":"255-266","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Audio Adversarial Examples for Robust Hybrid CTC\/Attention Speech Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5312-3870","authenticated-orcid":false,"given":"Ludwig","family":"K\u00fcrzinger","sequence":"first","affiliation":[]},{"given":"Edgar Ricardo","family":"Chavez Rosas","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0641-3178","authenticated-orcid":false,"given":"Lujun","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3552-3325","authenticated-orcid":false,"given":"Tobias","family":"Watzel","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1096-1596","authenticated-orcid":false,"given":"Gerhard","family":"Rigoll","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,29]]},"reference":[{"key":"26_CR1","unstructured":"Abdoli, S., Hafemann, L.G., Rony, J., Ayed, I.B., Cardinal, P., Koerich, A.L.: Universal adversarial audio perturbations. ArXiv abs\/1908.03173 (2019)"},{"key":"26_CR2","unstructured":"Andronic, I.: MP3 Compression as a means to improve robustness against adversarial noise targeting attention-based end-to-end speech recognition. Master\u2019s thesis, Technical University of Munich, Germany (2020)"},{"key":"26_CR3","doi-asserted-by":"crossref","unstructured":"Andronic, I., K\u00fcrzinger, L., Rosas, E.R.C., Rigoll, G., Seeber, B.U.: MP3 compression to diminish adversarial noise in end-to-end speech recognition. arXiv preprint arXiv:2007.12892 , To be published at SPECOM 2020 (2020)","DOI":"10.1007\/978-3-030-60276-5_3"},{"key":"26_CR4","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 (2014)"},{"key":"26_CR5","unstructured":"Carlini, N., et al.: Hidden voice commands. In: 25th $$\\{$$USENIX$$\\}$$ Security Symposium ($$\\{$$USENIX$$\\}$$ Security 16), pp. 513\u2013530 (2016)"},{"key":"26_CR6","doi-asserted-by":"crossref","unstructured":"Carlini, N., Wagner, D.: Audio adversarial examples: Targeted attacks on speech-to-text. In: 2018 IEEE Security and Privacy Workshops (SPW), pp. 1\u20137. IEEE (2018)","DOI":"10.1109\/SPW.2018.00009"},{"key":"26_CR7","doi-asserted-by":"crossref","unstructured":"Chan, W., Jaitly, N., Le, Q., Vinyals, O.: Listen, attend and spell: a neural network for large vocabulary conversational speech recognition. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4960\u20134964. IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"26_CR8","unstructured":"Chavez Rosas, E.R.: Improving robustness of sequence-to-sequence automatic speech recognition by means of adversarial training. Master\u2019s thesis, Technical University of Munich, Germany (2020)"},{"key":"26_CR9","unstructured":"Chorowski, J.K., Bahdanau, D., Serdyuk, D., Cho, K., Bengio, Y.: Attention-based models for speech recognition. In: Neural Information Processing Systems, pp. 577\u2013585 (2015)"},{"key":"26_CR10","unstructured":"Cisse, M., Adi, Y., Neverova, N., Keshet, J.: Houdini: fooling deep structured prediction models. ArXiv abs\/1707.05373 (2017)"},{"key":"26_CR11","doi-asserted-by":"crossref","unstructured":"Dong, Y., et al.: Boosting adversarial attacks with momentum. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 9185\u20139193 (2018)","DOI":"10.1109\/CVPR.2018.00957"},{"key":"26_CR12","unstructured":"Goodfellow, I.J., Shlens, J., Szegedy, C.: Explaining and harnessing adversarial examples. CoRR abs\/1412.6572 (2014)"},{"key":"26_CR13","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 369\u2013376. ACM (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"26_CR14","unstructured":"Gulcehre, C., et al.: On using monolingual corpora in neural machine translation. arXiv preprint arXiv:1503.03535 (2015)"},{"key":"26_CR15","unstructured":"Hannun, A.Y., et al.: Deep speech: scaling up end-to-end speech recognition. arXiv abs\/1412.5567 (2014)"},{"issue":"10","key":"26_CR16","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1109\/MCOM.2019.1900006","volume":"57","author":"S Hu","year":"2019","unstructured":"Hu, S., Shang, X., Qin, Z., Li, M., Wang, Q., Wang, C.: Adversarial examples for automatic speech recognition: attacks and countermeasures. IEEE Commun. Mag. 57(10), 120\u2013126 (2019)","journal-title":"IEEE Commun. Mag."},{"key":"26_CR17","doi-asserted-by":"publisher","unstructured":"Kudo, T.: Subword regularization: improving neural network translation models with multiple subword candidates. ArXiv abs\/1804.10959 (2018). https:\/\/doi.org\/10.18653\/v1\/P18-1007","DOI":"10.18653\/v1\/P18-1007"},{"key":"26_CR18","unstructured":"Kurakin, A., Goodfellow, I., Bengio, S.: Adversarial examples in the physical world. CoRR abs\/1607.02533 (2016). http:\/\/arxiv.org\/abs\/1607.02533"},{"key":"26_CR19","unstructured":"Kurakin, A., Goodfellow, I., Bengio, S.: Adversarial machine learning at scale. CoRR abs\/1611.01236 (2016). http:\/\/arxiv.org\/abs\/1611.01236"},{"key":"26_CR20","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"258","DOI":"10.1007\/978-3-030-26061-3_27","volume-title":"Speech and Computer","author":"L K\u00fcrzinger","year":"2019","unstructured":"K\u00fcrzinger, L., Watzel, T., Li, L., Baumgartner, R., Rigoll, G.: Exploring hybrid CTC\/Attention end-to-end speech recognition with Gaussian processes. In: Salah, A.A., Karpov, A., Potapova, R. (eds.) SPECOM 2019. LNCS (LNAI), vol. 11658, pp. 258\u2013269. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-26061-3_27"},{"key":"26_CR21","doi-asserted-by":"crossref","unstructured":"Lu, L., Kong, L., Dyer, C., Smith, N.A.: Multitask learning with CTC and segmental CRF for speech recognition (2017). 10\/gf3hs6","DOI":"10.21437\/Interspeech.2017-71"},{"key":"26_CR22","doi-asserted-by":"publisher","unstructured":"Neekhara, P., Hussain, S., Pandey, P., Dubnov, S., McAuley, J., Koushanfar, F.: Universal adversarial perturbations for speech recognition systems. ArXiv abs\/1905.03828 (2019). https:\/\/doi.org\/10.21437\/interspeech.2019-1353","DOI":"10.21437\/interspeech.2019-1353"},{"key":"26_CR23","unstructured":"Qin, Y., Carlini, N., Goodfellow, I., Cottrell, G., Raffel, C.: Imperceptible, robust, and targeted adversarial examples for automatic speech recognition. ArXiv abs\/1903.10346 (2019)"},{"key":"26_CR24","unstructured":"Rousseau, A., Del\u00e9glise, P., Esteve, Y.: Enhancing the TED-LIUM corpus with selected data for language modeling and more ted talks. In: LREC, pp. 3935\u20133939 (2014)"},{"key":"26_CR25","doi-asserted-by":"publisher","unstructured":"Sch\u00f6nherr, L., Kohls, K., Zeiler, S., Holz, T., Kolossa, D.: Adversarial attacks against automatic speech recognition systems via psychoacoustic hiding. ArXiv abs\/1808.05665 (2018). https:\/\/doi.org\/10.14722\/ndss.2019.23288","DOI":"10.14722\/ndss.2019.23288"},{"key":"26_CR26","doi-asserted-by":"crossref","unstructured":"Shen, J., et al.: Natural TTS synthesis by conditioning WaveNet on Mel spectrogram predictions. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4779\u20134783. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8461368"},{"issue":"11","key":"26_CR27","doi-asserted-by":"publisher","first-page":"1826","DOI":"10.1109\/TASLP.2019.2933146","volume":"27","author":"S Sun","year":"2019","unstructured":"Sun, S., Guo, P., Xie, L., Hwang, M.Y.: Adversarial regularization for attention based end-to-end robust speech recognition. IEEE\/ACM Trans. Audio Speech Lang. Process. 27(11), 1826\u20131838 (2019)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"26_CR28","unstructured":"Szegedy, C., et al.: Intriguing properties of neural networks. CoRR abs\/1312.6199 (2013)"},{"key":"26_CR29","unstructured":"Vadillo, J., Santana, R.: Universal adversarial examples in speech command classification. ArXiv abs\/1911.10182 (2019)"},{"key":"26_CR30","doi-asserted-by":"publisher","unstructured":"Watanabe, S., et al.: ESPnet: end-to-end speech processing toolkit. In: Interspeech, pp. 2207\u20132211 (2018). https:\/\/doi.org\/10.21437\/Interspeech.2018-1456","DOI":"10.21437\/Interspeech.2018-1456"},{"issue":"8","key":"26_CR31","doi-asserted-by":"publisher","first-page":"1240","DOI":"10.1109\/JSTSP.2017.2763455","volume":"11","author":"S Watanabe","year":"2017","unstructured":"Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T.: Hybrid CTC\/Attention architecture for end-to-end speech recognition. IEEE J. Sel. Top. Sig. Process. 11(8), 1240\u20131253 (2017)","journal-title":"IEEE J. Sel. Top. Sig. Process."}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-60276-5_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,6]],"date-time":"2021-04-06T02:07:59Z","timestamp":1617674879000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-60276-5_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030602758","9783030602765"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-60276-5_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"29 September 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"St. Petersburg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Russia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/specom.nw.ru\/2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"160","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"65","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"41% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the Corona pandemic SPECOM 2020 was held as a virtual event","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}