{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T14:15:48Z","timestamp":1775225748340,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T00:00:00Z","timestamp":1733270400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CCF-2007159"],"award-info":[{"award-number":["CCF-2007159"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS-2310207"],"award-info":[{"award-number":["CNS-2310207"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,4]]},"DOI":"10.1145\/3636534.3690692","type":"proceedings-article","created":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T23:13:18Z","timestamp":1733353998000},"page":"1268-1282","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["WavePurifier: Purifying Audio Adversarial Examples via Hierarchical Diffusion Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3779-4679","authenticated-orcid":false,"given":"Hanqing","family":"Guo","sequence":"first","affiliation":[{"name":"Michigan State University, East Lansing, US"},{"name":"University of Hawaii at Manoa, Honolulu, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9353-9042","authenticated-orcid":false,"given":"Guangjing","family":"Wang","sequence":"additional","affiliation":[{"name":"Michigan State University, East Lansing, US"},{"name":"University of South Florida, Tampa, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0471-7063","authenticated-orcid":false,"given":"Bocheng","family":"Chen","sequence":"additional","affiliation":[{"name":"Michigan State University, East Lansing, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2062-9013","authenticated-orcid":false,"given":"Yuanda","family":"Wang","sequence":"additional","affiliation":[{"name":"Michigan State University, East Lansing, US"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7392-3477","authenticated-orcid":false,"given":"Xiao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Duke University, Durham, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5208-7775","authenticated-orcid":false,"given":"Xun","family":"Chen","sequence":"additional","affiliation":[{"name":"Samsung Research America, Mountain View, US"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6272-7668","authenticated-orcid":false,"given":"Qiben","family":"Yan","sequence":"additional","affiliation":[{"name":"Michigan State University, East Lansing, US"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2861-8438","authenticated-orcid":false,"given":"Li","family":"Xiao","sequence":"additional","affiliation":[{"name":"Michigan State University, East Lansing, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,12,4]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2023. RapidFuzz. https:\/\/pypi.org\/project\/rapidfuzz\/."},{"key":"e_1_3_2_1_2_1","volume-title":"31st Conference on Neural Information Processing Systems (NIPS).","author":"Alzantot Moustafa","year":"2017","unstructured":"Moustafa Alzantot, Bharathan Balaji, and Mani Srivastava. 2017. Did you hear that? adversarial examples against automatic speech recognition. In 31st Conference on Neural Information Processing Systems (NIPS)."},{"key":"e_1_3_2_1_3_1","unstructured":"Amazon. 2021. Amazon Echo. https:\/\/www.amazon.com\/All-New-Echo-4th-Gen\/dp\/B07XKF5RM3."},{"key":"e_1_3_2_1_4_1","unstructured":"Fran\u00e7oise Beaufays. 2022. Ask a Techspert: How does Google Assistant understand your questions? https:\/\/blog.google\/products\/assistant\/ask-a-techspert-assistant-questions\/."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/SPW.2018.00009"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2020.23055"},{"key":"e_1_3_2_1_7_1","volume-title":"29th USENIX Security Symposium (USENIX Security 20)","author":"Chen Yuxuan","year":"2020","unstructured":"Yuxuan Chen, Xuejing Yuan, Jiangshan Zhang, Yue Zhao, Shengzhi Zhang, Kai Chen, and XiaoFeng Wang. 2020. Devil's whisper: A general approach for physical adversarial attacks against commercial black-box speech recognition devices. In 29th USENIX Security Symposium (USENIX Security 20). 2667--2684."},{"key":"e_1_3_2_1_8_1","volume-title":"Houdini: Fooling deep structured visual and speech recognition models with adversarial examples. Advances in neural information processing systems 30","author":"Cisse Moustapha M","year":"2017","unstructured":"Moustapha M Cisse, Yossi Adi, Natalia Neverova, and Joseph Keshet. 2017. Houdini: Fooling deep structured visual and speech recognition models with adversarial examples. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_9_1","unstructured":"cleverhans. 2019. Lingvo. https:\/\/github.com\/cleverhans-lab\/cleverhans."},{"key":"e_1_3_2_1_10_1","first-page":"8780","article-title":"Diffusion models beat gans on image synthesis","volume":"34","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. Advances in Neural Information Processing Systems 34 (2021), 8780--8794.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_11_1","volume-title":"On the Limitations of Stochastic Pre-processing Defenses. arXiv preprint arXiv:2206.09491","author":"Gao Yue","year":"2022","unstructured":"Yue Gao, Ilia Shumailov, Kassem Fawaz, and Nicolas Papernot. 2022. On the Limitations of Stochastic Pre-processing Defenses. arXiv preprint arXiv:2206.09491 (2022)."},{"key":"e_1_3_2_1_12_1","unstructured":"Wolf Garbe. 2022. SymSpell: 1 million times faster through Symmetric Delete spelling correction algorithm. https:\/\/github.com\/wolfgarbe\/SymSpell."},{"key":"e_1_3_2_1_13_1","volume-title":"Explaining and harnessing adversarial examples. arXiv preprint arXiv:1412.6572","author":"Goodfellow Ian J","year":"2014","unstructured":"Ian J Goodfellow, Jonathon Shlens, and Christian Szegedy. 2014. Explaining and harnessing adversarial examples. arXiv preprint arXiv:1412.6572 (2014)."},{"key":"e_1_3_2_1_14_1","unstructured":"Google. 2021. Google Assistant. https:\/\/assistant.google.com\/."},{"key":"e_1_3_2_1_15_1","unstructured":"Google. 2021. Google Home\/Nest. https:\/\/store.google.com\/product."},{"key":"e_1_3_2_1_16_1","first-page":"4218","article-title":"Improving robustness using generated data","volume":"34","author":"Gowal Sven","year":"2021","unstructured":"Sven Gowal, Sylvestre-Alvise Rebuffi, Olivia Wiles, Florian Stimberg, Dan Andrei Calian, and Timothy A Mann. 2021. Improving robustness using generated data. Advances in Neural Information Processing Systems 34 (2021), 4218--4233.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_17_1","volume-title":"Countering adversarial images using input transformations. arXiv preprint arXiv:1711.00117","author":"Guo Chuan","year":"2017","unstructured":"Chuan Guo, Mayank Rana, Moustapha Cisse, and Laurens Van Der Maaten. 2017. Countering adversarial images using input transformations. arXiv preprint arXiv:1711.00117 (2017)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3570361.3613261"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3607199.3607240"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3548606.3560660"},{"key":"e_1_3_2_1_21_1","unstructured":"Awni Hannun Carl Case Jared Casper Bryan Catanzaro Greg Diamos Erich Elsen Ryan Prenger Sanjeev Satheesh Shubho Sengupta Adam Coates et al. 2014. Deep speech: Scaling up end-to-end speech recognition. arXiv preprint arXiv:1412.5567 (2014)."},{"key":"e_1_3_2_1_22_1","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems 33 (2020), 6840--6851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_23_1","volume-title":"30th USENIX Security Symposium (USENIX Security 21)","author":"Hussain Shehzeen","year":"2021","unstructured":"Shehzeen Hussain, Paarth Neekhara, Shlomo Dubnov, Julian McAuley, and Farinaz Koushanfar. 2021. {WaveGuard}: Understanding and Mitigating Audio Adversarial Examples. In 30th USENIX Security Symposium (USENIX Security 21). 2273--2290."},{"key":"e_1_3_2_1_24_1","volume-title":"Diffwave: A versatile diffusion model for audio synthesis. arXiv preprint arXiv:2009.09761","author":"Kong Zhifeng","year":"2020","unstructured":"Zhifeng Kong, Wei Ping, Jiaji Huang, Kexin Zhao, and Bryan Catanzaro. 2020. Diffwave: A versatile diffusion model for audio synthesis. arXiv preprint arXiv:2009.09761 (2020)."},{"key":"e_1_3_2_1_25_1","unstructured":"Leibniz. 1700. Leibniz integral rule. https:\/\/en.wikipedia.org\/wiki\/Leibniz_integral_rule."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i11.26531"},{"key":"e_1_3_2_1_27_1","volume-title":"Symposium on Advances in Approximate Bayesian Inference. PMLR, 1--28","author":"Li Xuechen","year":"2020","unstructured":"Xuechen Li, Ting-Kam Leonard Wong, Ricky TQ Chen, and David K Duvenaud. 2020. Scalable gradients and variational inference for stochastic differential equations. In Symposium on Advances in Approximate Bayesian Inference. PMLR, 1--28."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372297.3423348"},{"key":"e_1_3_2_1_29_1","volume-title":"Defensive quantization: When efficiency meets robustness. arXiv preprint arXiv:1904.08444","author":"Lin Ji","year":"2019","unstructured":"Ji Lin, Chuang Gan, and Song Han. 2019. Defensive quantization: When efficiency meets robustness. arXiv preprint arXiv:1904.08444 (2019)."},{"key":"e_1_3_2_1_30_1","volume-title":"Towards deep learning models resistant to adversarial attacks. arXiv preprint arXiv:1706.06083","author":"Madry Aleksander","year":"2017","unstructured":"Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, Dimitris Tsipras, and Adrian Vladu. 2017. Towards deep learning models resistant to adversarial attacks. arXiv preprint arXiv:1706.06083 (2017)."},{"key":"e_1_3_2_1_31_1","volume-title":"Diffusion Models for Adversarial Purification. arXiv preprint arXiv:2205.07460","author":"Nie Weili","year":"2022","unstructured":"Weili Nie, Brandon Guo, Yujia Huang, Chaowei Xiao, Arash Vahdat, and Anima Anandkumar. 2022. Diffusion Models for Adversarial Purification. arXiv preprint arXiv:2205.07460 (2022)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Peter Norvig. 2007. How to Write a Spelling Corrector. https:\/\/norvig.com\/spell-correct.html.","DOI":"10.1016\/S0262-4079(06)61118-4"},{"key":"e_1_3_2_1_33_1","unstructured":"OpenAI. 2022. dall-e-2. https:\/\/openai.com\/dall-e-2\/."},{"key":"e_1_3_2_1_34_1","unstructured":"opne ai. 2019. guided-diffusion. https:\/\/github.com\/openai\/guided-diffusion."},{"key":"e_1_3_2_1_35_1","volume-title":"International conference on machine learning. PMLR, 5231--5240","author":"Qin Yao","year":"2019","unstructured":"Yao Qin, Nicholas Carlini, Garrison Cottrell, Ian Goodfellow, and Colin Raffel. 2019. Imperceptible, robust, and targeted adversarial examples for automatic speech recognition. In International conference on machine learning. PMLR, 5231--5240."},{"key":"e_1_3_2_1_36_1","unstructured":"Jonathan Shen Patrick Nguyen Yonghui Wu Zhifeng Chen Mia X Chen Ye Jia Anjuli Kannan Tara Sainath Yuan Cao Chung-Cheng Chiu et al. 2019. Lingvo: a modular and scalable framework for sequence-to-sequence modeling. arXiv preprint arXiv:1902.08295 (2019)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3495243.3560531"},{"key":"e_1_3_2_1_38_1","volume-title":"Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456","author":"Song Yang","year":"2020","unstructured":"Yang Song, Jascha Sohl-Dickstein, Diederik P Kingma, Abhishek Kumar, Stefano Ermon, and Ben Poole. 2020. Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)."},{"key":"e_1_3_2_1_39_1","unstructured":"Speechly. 2023. Analyzing OpenAI's Whisper ASR Model's Word Error Rates Across Languages. https:\/\/www.speechly.com\/blog\/analyzing-open-ais-whisper-asr-models-word-error-rates-across-languages. Accessed: 2024-08-14."},{"key":"e_1_3_2_1_40_1","unstructured":"stability.ai. 2022. stable diffusion. https:\/\/stability.ai\/blog\/stable-diffusion-public-release."},{"key":"e_1_3_2_1_41_1","volume-title":"Intriguing properties of neural networks. arXiv preprint arXiv:1312.6199","author":"Szegedy Christian","year":"2013","unstructured":"Christian Szegedy, Wojciech Zaremba, Ilya Sutskever, Joan Bruna, Dumitru Erhan, Ian Goodfellow, and Rob Fergus. 2013. Intriguing properties of neural networks. arXiv preprint arXiv:1312.6199 (2013)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/SPW.2019.00016"},{"key":"e_1_3_2_1_43_1","article-title":"Hey siri: An on-device dnn-powered voice trigger for apple's personal assistant","volume":"1","author":"Team Siri","year":"2017","unstructured":"Siri Team. 2017. Hey siri: An on-device dnn-powered voice trigger for apple's personal assistant. Apple Machine Learning Journal 1, 6 (2017).","journal-title":"Apple Machine Learning Journal"},{"key":"e_1_3_2_1_44_1","first-page":"11287","article-title":"Score-based generative modeling in latent space","volume":"34","author":"Vahdat Arash","year":"2021","unstructured":"Arash Vahdat, Karsten Kreis, and Jan Kautz. 2021. Score-based generative modeling in latent space. Advances in Neural Information Processing Systems 34 (2021), 11287--11302.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(93)90095-3"},{"key":"e_1_3_2_1_46_1","volume-title":"A Practical Survey on Emerging Threats from AI-driven Voice Attacks: How Vulnerable are Commercial Voice Control Systems? arXiv preprint arXiv:2312.06010","author":"Wang Yuanda","year":"2023","unstructured":"Yuanda Wang, Qiben Yan, Nikolay Ivanov, and Xun Chen. 2023. A Practical Survey on Emerging Threats from AI-driven Voice Attacks: How Vulnerable are Commercial Voice Control Systems? arXiv preprint arXiv:2312.06010 (2023)."},{"key":"e_1_3_2_1_47_1","volume-title":"Defending against adversarial audio via diffusion model. arXiv preprint arXiv:2303.01507","author":"Wu Shutong","year":"2023","unstructured":"Shutong Wu, Jiongxiao Wang, Wei Ping, Weili Nie, and Chaowei Xiao. 2023. Defending against adversarial audio via diffusion model. arXiv preprint arXiv:2303.01507 (2023)."},{"key":"e_1_3_2_1_48_1","volume-title":"Robust audio adversarial example for a physical attack. arXiv preprint arXiv:1810.11793","author":"Yakura Hiromu","year":"2018","unstructured":"Hiromu Yakura and Jun Sakuma. 2018. Robust audio adversarial example for a physical attack. arXiv preprint arXiv:1810.11793 (2018)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2020.24068"},{"key":"e_1_3_2_1_50_1","volume-title":"27th USENIX Security Symposium (USENIX Security 18)","author":"Yuan Xuejing","year":"2018","unstructured":"Xuejing Yuan, Yuxuan Chen, Yue Zhao, Yunhui Long, Xiaokang Liu, Kai Chen, Shengzhi Zhang, Heqing Huang, Xiaofeng Wang, and Carl A Gunter. 2018. Commandersong: A systematic approach for practical adversarial voice recognition. In 27th USENIX Security Symposium (USENIX Security 18). 49--64."}],"event":{"name":"ACM MobiCom '24: 30th Annual International Conference on Mobile Computing and Networking","location":"Washington D.C. DC USA","acronym":"ACM MobiCom '24","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing"]},"container-title":["Proceedings of the 30th Annual International Conference on Mobile Computing and Networking"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3636534.3690692","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3636534.3690692","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3636534.3690692","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:36Z","timestamp":1750295856000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3636534.3690692"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,4]]},"references-count":50,"alternative-id":["10.1145\/3636534.3690692","10.1145\/3636534"],"URL":"https:\/\/doi.org\/10.1145\/3636534.3690692","relation":{},"subject":[],"published":{"date-parts":[[2024,12,4]]},"assertion":[{"value":"2024-12-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}