{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T16:31:31Z","timestamp":1780417891769,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,14]],"date-time":"2022-08-14T00:00:00Z","timestamp":1660435200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"A*STAR Centre for Frontier AI Research"},{"name":"NTU Data Science and Artificial Intelligence Center"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,14]]},"DOI":"10.1145\/3534678.3539268","type":"proceedings-article","created":{"date-parts":[[2022,8,12]],"date-time":"2022-08-12T19:06:12Z","timestamp":1660331172000},"page":"1430-1440","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Synthesising Audio Adversarial Examples for Automatic Speech Recognition"],"prefix":"10.1145","author":[{"given":"Xinghua","family":"Qu","sequence":"first","affiliation":[{"name":"Bytedance AI Lab, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Pengfei","family":"Wei","sequence":"additional","affiliation":[{"name":"Bytedance AI Lab, Sinagpore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mingyong","family":"Gao","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhu","family":"Sun","sequence":"additional","affiliation":[{"name":"Institute of High Performance Computing and Centre for Frontier AI Research &amp; A*STAR, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yew Soon","family":"Ong","sequence":"additional","affiliation":[{"name":"Nanyang Technological University &amp; A*STAR Centre for Frontier AI Research, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zejun","family":"Ma","sequence":"additional","affiliation":[{"name":"Bytedance AI Lab, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2022,8,14]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Proceedings of the 33th International Conference on Machine Learning (ICML). PMLR, 173--182","author":"Amodei Dario","year":"2016","unstructured":"Dario Amodei, Sundaram Ananthanarayanan, Rishita Anubhai, Jingliang Bai, Eric Battenberg, Carl Case, Jared Casper, Bryan Catanzaro, Qiang Cheng, Guoliang Chen, et al. 2016. Deep speech 2: End-to-end speech recognition in english and mandarin. In Proceedings of the 33th International Conference on Machine Learning (ICML). PMLR, 173--182."},{"key":"e_1_3_2_2_2_1","volume-title":"Common voice: A massively-multilingual speech corpus. arXiv preprint arXiv:1912.06670","author":"Ardila Rosana","year":"2019","unstructured":"Rosana Ardila, Megan Branson, Kelly Davis, Michael Henretty, Michael Kohler, Josh Meyer, Reuben Morais, Lindsay Saunders, Francis M Tyers, and Gregor Weber. 2019. Common voice: A massively-multilingual speech corpus. arXiv preprint arXiv:1912.06670 (2019)."},{"key":"e_1_3_2_2_3_1","volume-title":"Proceedings of the 35th International Conference on Machine Learning (ICML). PMLR, 404--413","author":"Balles Lukas","year":"2018","unstructured":"Lukas Balles and Philipp Hennig. 2018. Dissecting adam: The sign, magnitude and variance of stochastic gradients. In Proceedings of the 35th International Conference on Machine Learning (ICML). PMLR, 404--413."},{"key":"e_1_3_2_2_4_1","volume-title":"Interpreting and explaining deep neural networks for classification of audio signals. arXiv preprint arXiv:1807.03418","author":"Becker S\u00f6ren","year":"2018","unstructured":"S\u00f6ren Becker, Marcel Ackermann, Sebastian Lapuschkin, Klaus-Robert M\u00fcller, and Wojciech Samek. 2018. Interpreting and explaining deep neural networks for classification of audio signals. arXiv preprint arXiv:1807.03418 (2018)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1214\/ss\/1177011077"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/SPW.2018.00009"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2020.23055"},{"key":"e_1_3_2_2_9_1","volume-title":"International Conference on Machine Learning (ICML). PMLR, 2206--2216","author":"Croce Francesco","year":"2020","unstructured":"Francesco Croce and Matthias Hein. 2020. Reliable evaluation of adversarial robustness with an ensemble of diverse parameter-free attacks. In International Conference on Machine Learning (ICML). PMLR, 2206--2216."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_2_11_1","volume-title":"Towards robust speech-to-text adversarial attack. arXiv preprint arXiv:2103.08095","author":"Esmaeilpour Mohammad","year":"2021","unstructured":"Mohammad Esmaeilpour, Patrick Cardinal, and Alessandro Lameiras Koerich. 2021. Towards robust speech-to-text adversarial attack. arXiv preprint arXiv:2103.08095 (2021)."},{"key":"e_1_3_2_2_12_1","volume-title":"Explaining and harnessing adversarial examples. arXiv preprint arXiv:1412.6572","author":"Goodfellow Ian J","year":"2015","unstructured":"Ian J Goodfellow, Jonathon Shlens, and Christian Szegedy. 2015. Explaining and harnessing adversarial examples. arXiv preprint arXiv:1412.6572 (2015)."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2420"},{"key":"e_1_3_2_2_15_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning (ICML)","author":"Kim Jaehyeon","year":"2021","unstructured":"Jaehyeon Kim, Jungil Kong, and Juhee Son. 2021. Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech. Proceedings of the 38th International Conference on Machine Learning (ICML) (2021)."},{"key":"e_1_3_2_2_16_1","volume-title":"HiFi-GAN: Generative Adversarial Networks for Efficient and High Fidelity Speech Synthesis. Advances in Neural Information Processing Systems (NeurIPS) 33","author":"Kong Jungil","year":"2020","unstructured":"Jungil Kong, Jaehyeon Kim, and Jaekyoung Bae. 2020. HiFi-GAN: Generative Adversarial Networks for Efficient and High Fidelity Speech Synthesis. Advances in Neural Information Processing Systems (NeurIPS) 33 (2020)."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5928"},{"key":"e_1_3_2_2_18_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Madry Aleksander","year":"2018","unstructured":"Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, Dimitris Tsipras, and Adrian Vladu. 2018. Towards Deep Learning Models Resistant to Adversarial Attacks. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_19_1","volume-title":"An introduction to genetic algorithms","author":"Mitchell Melanie","unstructured":"Melanie Mitchell. 1998. An introduction to genetic algorithms. MIT Press."},{"key":"e_1_3_2_2_20_1","volume-title":"Beat Buesser, Ambrish Rawat, Martin Wistuba, Valentina Zantedeschi, Nathalie Baracaldo, Bryant Chen, Heiko Ludwig, Ian Molloy, and Ben Edwards.","author":"Nicolae Maria-Irina","year":"2018","unstructured":"Maria-Irina Nicolae, Mathieu Sinn, Minh Ngoc Tran, Beat Buesser, Ambrish Rawat, Martin Wistuba, Valentina Zantedeschi, Nathalie Baracaldo, Bryant Chen, Heiko Ludwig, Ian Molloy, and Ben Edwards. 2018. Adversarial Robustness Toolbox v1.2.0. CoRR 1807.01069 (2018). https:\/\/arxiv.org\/pdf\/1807.01069"},{"key":"e_1_3_2_2_21_1","volume-title":"Explainable AI: Interpreting, Explaining and Visualizing Deep Learning","author":"Oh Seong Joon","unstructured":"Seong Joon Oh, Bernt Schiele, and Mario Fritz. 2019. Towards reverse-engineering black-box neural networks. In Explainable AI: Interpreting, Explaining and Visualizing Deep Learning. Springer, 121--144."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"e_1_3_2_2_23_1","volume-title":"International Conference on Machine Learning (ICML). PMLR, 5231--5240","author":"Qin Yao","year":"2019","unstructured":"Yao Qin, Nicholas Carlini, Garrison Cottrell, Ian Goodfellow, and Colin Raffel. 2019. Imperceptible, robust, and targeted adversarial examples for automatic speech recognition. In International Conference on Machine Learning (ICML). PMLR, 5231--5240."},{"key":"e_1_3_2_2_24_1","volume-title":"15th {USENIX} Symposium on Networked Systems Design and Implementation ({NSDI} . 547--560.","author":"Roy Nirupam","unstructured":"Nirupam Roy, Sheng Shen, Haitham Hassanieh, and Romit Roy Choudhury. 2018. Inaudible voice commands: The long-range attack and defense. In 15th {USENIX} Symposium on Networked Systems Design and Implementation ({NSDI} . 547--560."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107309"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"crossref","unstructured":"David Silver Julian Schrittwieser Karen Simonyan Ioannis Antonoglou Aja Huang Arthur Guez Thomas Hubert Lucas Baker Matthew Lai Adrian Bolton et al. 2017. Mastering the game of go without human knowledge. Nature 550 7676 (2017) 354--359.","DOI":"10.1038\/nature24270"},{"key":"e_1_3_2_2_28_1","first-page":"8312","article-title":"Constructing Unrestricted Adversarial Examples with Generative Models","volume":"31","author":"Song Yang","year":"2018","unstructured":"Yang Song, Rui Shu, Nate Kushman, and Stefano Ermon. 2018. Constructing Unrestricted Adversarial Examples with Generative Models. Advances in Neural Information Processing Systems (NeurIPS) 31 (2018), 8312--8323.","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"e_1_3_2_2_29_1","volume-title":"A Survey on Neural Speech Synthesis. arXiv e-prints","author":"Tan Xu","year":"2021","unstructured":"Xu Tan, Tao Qin, Frank Soong, and Tie-Yan Liu. 2021. A Survey on Neural Speech Synthesis. arXiv e-prints (2021), arXiv--2106."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/SPW.2019.00016"},{"key":"e_1_3_2_2_31_1","unstructured":"Christophe Veaux Junichi Yamagishi and Kirsten MacDonald. 2017. CSTR VCTK Corpus: English Multi-speaker Corpus for CSTR Voice Cloning Toolkit."},{"key":"e_1_3_2_2_32_1","volume-title":"At-gan: An adversarial generator model for non-constrained adversarial examples. arXiv preprint arXiv:1904.07793","author":"Wang Xiaosen","year":"2019","unstructured":"Xiaosen Wang, Kun He, Chuanbiao Song, Liwei Wang, and John E Hopcroft. 2019. At-gan: An adversarial generator model for non-constrained adversarial examples. arXiv preprint arXiv:1904.07793 (2019)."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"e_1_3_2_2_34_1","volume-title":"Merlin: An Open Source Neural Network Speech Synthesis System.. In SSW. 202--207.","author":"Wu Zhizheng","year":"2016","unstructured":"Zhizheng Wu, Oliver Watts, and Simon King. 2016. Merlin: An Open Source Neural Network Speech Synthesis System.. In SSW. 202--207."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i16.17663"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/741"},{"key":"e_1_3_2_2_37_1","volume-title":"7th International Conference on Learning Representations (ICLR).","author":"Yang Zhuolin","year":"2019","unstructured":"Zhuolin Yang, Pin Yu Chen, Bo Li, and Dawn Song. 2019. Characterizing audio adversarial examples using temporal dependency. In 7th International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_38_1","volume-title":"Commandersong: A systematic approach for practical adversarial voice recognition. In 27th {USENIX} Security Symposium ({USENIX} Security 18). 49--64.","author":"Yuan Xuejing","year":"2018","unstructured":"Xuejing Yuan, Yuxuan Chen, Yue Zhao, Yunhui Long, Xiaokang Liu, Kai Chen, Shengzhi Zhang, Heqing Huang, Xiaofeng Wang, and Carl A Gunter. 2018. Commandersong: A systematic approach for practical adversarial voice recognition. In 27th {USENIX} Security Symposium ({USENIX} Security 18). 49--64."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2007.1078"},{"key":"e_1_3_2_2_40_1","volume-title":"Grey-box Extraction of Natural Language Models. In International Conference on Machine Learning (ICML). PMLR, 12278--12286","author":"Zanella-Beguelin Santiago","year":"2021","unstructured":"Santiago Zanella-Beguelin, Shruti Tople, Andrew Paverd, and Boris K\u00f6pf. 2021. Grey-box Extraction of Natural Language Models. In International Conference on Machine Learning (ICML). PMLR, 12278--12286."},{"key":"e_1_3_2_2_41_1","volume-title":"compact, and high quality LSTM-RNN based statistical parametric speech synthesizers for mobile devices. arXiv preprint arXiv:1606.06061","author":"Zen Heiga","year":"2016","unstructured":"Heiga Zen, Yannis Agiomyrgiannakis, Niels Egberts, Fergus Henderson, and Przemyslaw Szczepaniak. 2016. Fast, compact, and high quality LSTM-RNN based statistical parametric speech synthesizers for mobile devices. arXiv preprint arXiv:1606.06061 (2016)."},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133956.3134052"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3374217","article-title":"Adversarial attacks on deep-learning models in natural language processing: A survey","volume":"11","author":"Zhang Wei Emma","year":"2020","unstructured":"Wei Emma Zhang, Quan Z Sheng, Ahoud Alhazmi, and Chenliang Li. 2020. Adversarial attacks on deep-learning models in natural language processing: A survey. ACM Transactions on Intelligent Systems and Technology (TIST) 11, 3 (2020), 1--41.","journal-title":"ACM Transactions on Intelligent Systems and Technology (TIST)"}],"event":{"name":"KDD '22: The 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Washington DC USA","acronym":"KDD '22","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539268","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3534678.3539268","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:59:59Z","timestamp":1750186799000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539268"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,14]]},"references-count":43,"alternative-id":["10.1145\/3534678.3539268","10.1145\/3534678"],"URL":"https:\/\/doi.org\/10.1145\/3534678.3539268","relation":{},"subject":[],"published":{"date-parts":[[2022,8,14]]},"assertion":[{"value":"2022-08-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}