{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T16:46:07Z","timestamp":1778085967006,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,8,24]],"date-time":"2025-08-24T00:00:00Z","timestamp":1755993600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2310207"],"award-info":[{"award-number":["2310207"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,25]]},"DOI":"10.1145\/3708821.3733881","type":"proceedings-article","created":{"date-parts":[[2025,8,13]],"date-time":"2025-08-13T06:30:56Z","timestamp":1755066656000},"page":"696-709","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["ClearMask: Noise-Free and Naturalness-Preserving Protection Against Voice Deepfake Attacks"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-2062-9013","authenticated-orcid":false,"given":"Yuanda","family":"Wang","sequence":"first","affiliation":[{"name":"Michigan State University, East Lansing, Michigan, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0471-7063","authenticated-orcid":false,"given":"Bocheng","family":"Chen","sequence":"additional","affiliation":[{"name":"Michigan State University, East Lansing, Michigan, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3779-4679","authenticated-orcid":false,"given":"Hanqing","family":"Guo","sequence":"additional","affiliation":[{"name":"University of Hawaii at M?noa, Honolulu, Hawaii, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9353-9042","authenticated-orcid":false,"given":"Guangjing","family":"Wang","sequence":"additional","affiliation":[{"name":"University of South Florida, Tampa, Florida, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7149-3344","authenticated-orcid":false,"given":"Weikang","family":"Ding","sequence":"additional","affiliation":[{"name":"Michigan State University, East Lansing, Michigan, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6272-7668","authenticated-orcid":false,"given":"Qiben","family":"Yan","sequence":"additional","affiliation":[{"name":"Michigan State University, East Lansing, Michigan, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,8,24]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"2010. Apple FaceTime. https:\/\/apps.apple.com\/us\/app\/facetime\/."},{"key":"e_1_3_3_2_3_2","unstructured":"2019. Fraudsters Used AI to Mimic CEO\u2019s Voice in Unusual Cybercrime Case. https:\/\/www.wsj.com\/articles\/fraudsters-use-ai-to-mimic-ceos-voice-in-unusual-cybercrime-case-11567157402."},{"key":"e_1_3_3_2_4_2","unstructured":"2023. ElevenLabs. https:\/\/www.elevenlabs.io\/."},{"key":"e_1_3_3_2_5_2","unstructured":"2023. How I Broke Into a Bank Account With an AI-Generated Voice. https:\/\/www.vice.com\/en\/article\/dy7axa\/how-i-broke-into-a-bank-account-with-an-ai-generated-voice."},{"key":"e_1_3_3_2_6_2","unstructured":"2023. plat.ht. https:\/\/play.ht\/."},{"key":"e_1_3_3_2_7_2","unstructured":"2023. \u2018Mom these bad men have me\u2019: She believes scammers cloned her daughter\u2019s voice in a fake kidnapping. https:\/\/www.cnn.com\/2023\/04\/29\/us\/ai-scam-calls-kidnapping-cec\/index.html."},{"key":"e_1_3_3_2_8_2","unstructured":"2024. ChatGPT 4.0. https:\/\/chatgpt.com\/."},{"key":"e_1_3_3_2_9_2","unstructured":"2024. Google Text-to-Speech. https:\/\/cloud.google.com\/text-to-speech."},{"key":"e_1_3_3_2_10_2","unstructured":"2024. Soniox: Introducing AudioMind. https:\/\/soniox.com\/."},{"key":"e_1_3_3_2_11_2","first-page":"2685","volume-title":"29th USENIX Security Symposium (USENIX Security 20)","author":"Ahmed Muhammad\u00a0Ejaz","year":"2020","unstructured":"Muhammad\u00a0Ejaz Ahmed, Il-Youp Kwak, Jun\u00a0Ho Huh, Iljoo Kim, Taekkyung Oh, and Hyoungshick Kim. 2020. Void: A fast and light voice liveness detection system. In 29th USENIX Security Symposium (USENIX Security 20). 2685\u20132702."},{"key":"e_1_3_3_2_12_2","unstructured":"Rosana Ardila Megan Branson Kelly Davis Michael Henretty Michael Kohler Josh Meyer Reuben Morais Lindsay Saunders Francis\u00a0M Tyers and Gregor Weber. 2019. Common voice: A massively-multilingual speech corpus. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1912.06670 (2019)."},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/SP46215.2023.10179383"},{"key":"e_1_3_3_2_14_2","first-page":"2709","volume-title":"International Conference on Machine Learning","author":"Casanova Edresson","year":"2022","unstructured":"Edresson Casanova, Julian Weber, Christopher\u00a0D Shulby, Arnaldo\u00a0Candido Junior, Eren G\u00f6lge, and Moacir\u00a0A Ponti. 2022. Yourtts: Towards zero-shot multi-speaker tts and zero-shot voice conversion for everyone. In International Conference on Machine Learning. PMLR, 2709\u20132720."},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Meng Chen Li Lu Jiadi Yu Zhongjie Ba Feng Lin and Kui Ren. 2023. AdvReverb: Rethinking the Stealthiness of Audio Adversarial Examples to Human Perception. IEEE Transactions on Information Forensics and Security (2023).","DOI":"10.1109\/TIFS.2023.3345639"},{"key":"e_1_3_3_2_16_2","volume-title":"33th USENIX security symposium (USENIX Security 24)","author":"Chen Meng","year":"2024","unstructured":"Meng Chen, Xiangyu Xu, Li Lu, Zhongjie Ba, Feng Lin, and Kui Ren. 2024. Devil in the Room: Triggering Audio Backdoors in the Physical World. In 33th USENIX security symposium (USENIX Security 24)."},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2020.23055"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.21437\/Eurospeech.2003-664"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414257"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"crossref","unstructured":"Ju-chieh Chou Cheng-chieh Yeh and Hung-yi Lee. 2019. One-shot voice conversion by separating speaker and content representations with instance normalization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1904.05742 (2019).","DOI":"10.21437\/Interspeech.2019-2663"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"crossref","unstructured":"Brecht Desplanques Jenthe Thienpondt and Kris Demuynck. 2020. Ecapa-tdnn: Emphasized channel attention propagation and aggregation in tdnn based speaker verification. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2005.07143 (2020).","DOI":"10.21437\/Interspeech.2020-2650"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.265"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461711"},{"key":"e_1_3_3_2_24_2","unstructured":"Helmut Haas. 1972. The influence of a single echo on the audibility of speech. Journal of the audio engineering society 20 2 (1972) 146\u2013159."},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383529"},{"key":"e_1_3_3_2_26_2","unstructured":"Rongjie Huang Yi Ren Jinglin Liu Chenye Cui and Zhou Zhao. 2022. Generspeech: Towards style transfer for generalizable out-of-domain text-to-speech. Advances in Neural Information Processing Systems 35 (2022) 10970\u201310983."},{"key":"e_1_3_3_2_27_2","first-page":"2273","volume-title":"30th USENIX security symposium (USENIX Security 21)","author":"Hussain Shehzeen","year":"2021","unstructured":"Shehzeen Hussain, Paarth Neekhara, Shlomo Dubnov, Julian McAuley, and Farinaz Koushanfar. 2021. { WaveGuard} : Understanding and mitigating audio adversarial examples. In 30th USENIX security symposium (USENIX Security 21). 2273\u20132290."},{"key":"e_1_3_3_2_28_2","unstructured":"Ye Jia Yu Zhang Ron Weiss Quan Wang Jonathan Shen Fei Ren Patrick Nguyen Ruoming Pang Ignacio Lopez\u00a0Moreno Yonghui Wu et\u00a0al. 2018. Transfer learning from speaker verification to multispeaker text-to-speech synthesis. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3665451.3665532"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953152"},{"key":"e_1_3_3_2_31_2","unstructured":"Jungil Kong Jaehyeon Kim and Jaekyoung Bae. 2020. Hifi-gan: Generative adversarial networks for efficient and high fidelity speech synthesis. Advances in neural information processing systems 33 (2020) 17022\u201317033."},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746956"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3627106.3627183"},{"key":"e_1_3_3_2_34_2","volume-title":"International Conference on Learning Representations","author":"Madry Aleksander","year":"2018","unstructured":"Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, Dimitris Tsipras, and Adrian Vladu. 2018. Towards Deep Learning Models Resistant to Adversarial Attacks. In International Conference on Learning Representations."},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"e_1_3_3_2_36_2","unstructured":"Adam Paszke Sam Gross Francisco Massa Adam Lerer James Bradbury Gregory Chanan Trevor Killeen Zeming Lin Natalia Gimelshein Luca Antiga et\u00a0al. 2019. Pytorch: An imperative style high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_3_2_37_2","unstructured":"Vadim Popov Ivan Vovk Vladimir Gogoryan Tasnima Sadekova Mikhail Kudinov and Jiansheng Wei. 2021. Diffusion-based voice conversion with fast maximum likelihood sampling scheme. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2109.13821 (2021)."},{"key":"e_1_3_3_2_38_2","first-page":"7836","volume-title":"International Conference on Machine Learning","author":"Qian Kaizhi","year":"2020","unstructured":"Kaizhi Qian, Yang Zhang, Shiyu Chang, Mark Hasegawa-Johnson, and David Cox. 2020. Unsupervised speech decomposition via triple information bottleneck. In International Conference on Machine Learning. PMLR, 7836\u20137846."},{"key":"e_1_3_3_2_39_2","first-page":"5210","volume-title":"International Conference on Machine Learning","author":"Qian Kaizhi","year":"2019","unstructured":"Kaizhi Qian, Yang Zhang, Shiyu Chang, Xuesong Yang, and Mark Hasegawa-Johnson. 2019. Autovc: Zero-shot voice style transfer with only autoencoder loss. In International Conference on Machine Learning. PMLR, 5210\u20135219."},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9415103"},{"key":"e_1_3_3_2_41_2","unstructured":"Mirco Ravanelli Titouan Parcollet Peter Plantinga Aku Rouhe Samuele Cornell Loren Lugosch Cem Subakan Nauman Dawalatabad Abdelwahab Heba Jianyuan Zhong Ju-Chieh Chou Sung-Lin Yeh Szu-Wei Fu Chien-Feng Liao Elena Rastorgueva Fran\u00e7ois Grondin William Aris Hwidong Na Yan Gao Renato\u00a0De Mori and Yoshua Bengio. 2021. SpeechBrain: A General-Purpose Speech Toolkit. arxiv:https:\/\/arXiv.org\/abs\/2106.04624\u00a0[eess.AS] arXiv:https:\/\/arXiv.org\/abs\/2106.04624."},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3427228.3427276"},{"key":"e_1_3_3_2_43_2","unstructured":"Christian\u00a0J Steinmetz Nicholas\u00a0J Bryan and Joshua\u00a0D Reiss. 2022. Style transfer of audio effects with differentiable signal processing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2207.08759 (2022)."},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"crossref","unstructured":"James Traer and Josh\u00a0H McDermott. 2016. Statistics of natural reverberation enable perceptual separation of sound and space. Proceedings of the National Academy of Sciences 113 48 (2016) E7856\u2013E7865.","DOI":"10.1073\/pnas.1612524113"},{"key":"e_1_3_3_2_45_2","unstructured":"Christophe Veaux Junichi Yamagishi Kirsten MacDonald et\u00a0al. 2016. Superseded-cstr vctk corpus: English multi-speaker corpus for cstr voice cloning toolkit. (2016)."},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462665"},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","DOI":"10.1145\/3558482.3590189"},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2022.24254"},{"key":"e_1_3_3_2_49_2","first-page":"5180","volume-title":"International conference on machine learning","author":"Wang Yuxuan","year":"2018","unstructured":"Yuxuan Wang, Daisy Stanton, Yu Zhang, RJ-Skerry Ryan, Eric Battenberg, Joel Shor, Ying Xiao, Ye Jia, Fei Ren, and Rif\u00a0A Saurous. 2018. Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis. In International conference on machine learning. PMLR, 5180\u20135189."},{"key":"e_1_3_3_2_50_2","unstructured":"Yuanda Wang Qiben Yan Nikolay Ivanov and Xun Chen. 2023. A Practical Survey on Emerging Threats from AI-driven Voice Attacks: How Vulnerable are Commercial Voice Control Systems? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.06010 (2023)."},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","DOI":"10.1145\/3460120.3484742"},{"key":"e_1_3_3_2_52_2","first-page":"3799","volume-title":"32nd USENIX Security Symposium (USENIX Security 23)","author":"Yu Zhiyuan","year":"2023","unstructured":"Zhiyuan Yu, Yuanhaur Chang, Ning Zhang, and Chaowei Xiao. 2023. { SMACK} : Semantically Meaningful Adversarial Audio Attack. In 32nd USENIX Security Symposium (USENIX Security 23). 3799\u20133816."},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1145\/3576915.3623209"},{"key":"e_1_3_3_2_54_2","unstructured":"Anna Zhadan. 2023. Emma Watson reads Mein Kampf while Biden announces invasion of Russia in latest AI voice clone abuse. https:\/\/cybernews.com\/news\/ai-voice-clone-misuse\/."}],"event":{"name":"ASIA CCS '25: 20th ACM Asia Conference on Computer and Communications Security","location":"Hanoi Vietnam","acronym":"ASIA CCS '25","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control"]},"container-title":["Proceedings of the 20th ACM Asia Conference on Computer and Communications Security"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3708821.3733881","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3708821.3733881","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,13]],"date-time":"2025-08-13T07:26:57Z","timestamp":1755070017000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3708821.3733881"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,24]]},"references-count":53,"alternative-id":["10.1145\/3708821.3733881","10.1145\/3708821"],"URL":"https:\/\/doi.org\/10.1145\/3708821.3733881","relation":{},"subject":[],"published":{"date-parts":[[2025,8,24]]},"assertion":[{"value":"2025-08-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}