{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T09:03:22Z","timestamp":1765357402982,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSF","award":["CNS-1950171"],"award-info":[{"award-number":["CNS-1950171"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1145\/3558482.3590189","type":"proceedings-article","created":{"date-parts":[[2023,6,28]],"date-time":"2023-06-28T16:10:08Z","timestamp":1687968608000},"page":"239-250","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["VSMask: Defending Against Voice Synthesis Attack via Real-Time Predictive Perturbation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-2062-9013","authenticated-orcid":false,"given":"Yuanda","family":"Wang","sequence":"first","affiliation":[{"name":"Michigan State University, East Lansing, MI, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3779-4679","authenticated-orcid":false,"given":"Hanqing","family":"Guo","sequence":"additional","affiliation":[{"name":"Michigan State University, East Lansing, MI, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9353-9042","authenticated-orcid":false,"given":"Guangjing","family":"Wang","sequence":"additional","affiliation":[{"name":"Michigan State University, East Lansing, MI, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0471-7063","authenticated-orcid":false,"given":"Bocheng","family":"Chen","sequence":"additional","affiliation":[{"name":"Michigan State University, East Lansing, MI, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6272-7668","authenticated-orcid":false,"given":"Qiben","family":"Yan","sequence":"additional","affiliation":[{"name":"Michigan State University, East Lansing, MI, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,6,28]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Deep insights of deepfake technology: A review,\" arXiv preprint arXiv:2105.00192","author":"Mahmud B. U.","year":"2021","unstructured":"B. U. Mahmud and A. Sharmin , \" Deep insights of deepfake technology: A review,\" arXiv preprint arXiv:2105.00192 , 2021 . B. U. Mahmud and A. Sharmin, \"Deep insights of deepfake technology: A review,\" arXiv preprint arXiv:2105.00192, 2021."},{"key":"e_1_3_2_2_2_1","volume-title":"June 23","author":"Amazon's","year":"2022","unstructured":"\" Amazon's alexa impersonates grandma , creeping out internet,\" https:\/\/www.bloomberg.com\/news\/articles\/2022-06--23\/amazon-s-alexaimpersonates- grandma-creeping-out-the-internet , June 23 , 2022 . \"Amazon's alexa impersonates grandma, creeping out internet,\" https:\/\/www.bloomberg.com\/news\/articles\/2022-06--23\/amazon-s-alexaimpersonates- grandma-creeping-out-the-internet, June 23, 2022."},{"key":"e_1_3_2_2_3_1","volume-title":"Accessed on","author":"Voice","year":"2022","unstructured":"\" Voice guidance & navigation,\" https:\/\/www.readspeaker.ai\/applications\/voiceguidance- and-navigation\/ , Accessed on August 15, 2022 . \"Voice guidance & navigation,\" https:\/\/www.readspeaker.ai\/applications\/voiceguidance- and-navigation\/, Accessed on August 15, 2022."},{"volume-title":"May 21","year":"2015","key":"e_1_3_2_2_4_1","unstructured":"\"Voiceprint : The new wechat password,\" https:\/\/blog.wechat.com\/2015\/05\/21\/vo iceprint-the-new-wechat-password\/ , May 21 , 2015 . \"Voiceprint: The new wechat password,\" https:\/\/blog.wechat.com\/2015\/05\/21\/vo iceprint-the-new-wechat-password\/, May 21, 2015."},{"key":"e_1_3_2_2_5_1","volume-title":"Ghosttalk: Interactive attack on smartphone voice system through power line,\" in Network and Distributed Systems Security (NDSS) Symposium","author":"Wang Y.","year":"2022","unstructured":"Y. Wang , H. Guo , and Q. Yan , \" Ghosttalk: Interactive attack on smartphone voice system through power line,\" in Network and Distributed Systems Security (NDSS) Symposium , 2022 . Y. Wang, H. Guo, and Q. Yan, \"Ghosttalk: Interactive attack on smartphone voice system through power line,\" in Network and Distributed Systems Security (NDSS) Symposium, 2022."},{"key":"e_1_3_2_2_6_1","first-page":"235","volume-title":"hello, it's me\": Deep learning-based speech synthesis attacks in the real world,\" in Proceedings of the 2021 ACM SIGSAC Conference on Computer and Communications Security","author":"Wenger E.","year":"2021","unstructured":"E. Wenger , M. Bronckers , C. Cianfarani , J. Cryan , A. Sha , H. Zheng , and B. Y. Zhao , \"\" hello, it's me\": Deep learning-based speech synthesis attacks in the real world,\" in Proceedings of the 2021 ACM SIGSAC Conference on Computer and Communications Security , 2021 , pp. 235 -- 251 . E. Wenger, M. Bronckers, C. Cianfarani, J. Cryan, A. Sha, H. Zheng, and B. Y. Zhao, \"\" hello, it's me\": Deep learning-based speech synthesis attacks in the real world,\" in Proceedings of the 2021 ACM SIGSAC Conference on Computer and Communications Security, 2021, pp. 235--251."},{"key":"e_1_3_2_2_7_1","first-page":"2685","volume-title":"Void: A fast and light voice liveness detection system,\" in 29th USENIX Security Symposium (USENIX Security 20)","author":"Ahmed M. E.","year":"2020","unstructured":"M. E. Ahmed , I.-Y. Kwak , J. H. Huh , I. Kim , T. Oh , and H. Kim , \" Void: A fast and light voice liveness detection system,\" in 29th USENIX Security Symposium (USENIX Security 20) , 2020 , pp. 2685 -- 2702 . M. E. Ahmed, I.-Y. Kwak, J. H. Huh, I. Kim, T. Oh, and H. Kim, \"Void: A fast and light voice liveness detection system,\" in 29th USENIX Security Symposium (USENIX Security 20), 2020, pp. 2685--2702."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2021.3076358"},{"key":"e_1_3_2_2_9_1","volume-title":"Channel-wise gated res2net: Towards robust detection of synthetic speech attacks,\" arXiv preprint arXiv:2107.08803","author":"Li X.","year":"2021","unstructured":"X. Li , X. Wu , H. Lu , X. Liu , and H. Meng , \" Channel-wise gated res2net: Towards robust detection of synthetic speech attacks,\" arXiv preprint arXiv:2107.08803 , 2021 . X. Li, X. Wu, H. Lu, X. Liu, and H. Meng, \"Channel-wise gated res2net: Towards robust detection of synthetic speech attacks,\" arXiv preprint arXiv:2107.08803, 2021."},{"key":"e_1_3_2_2_10_1","first-page":"552","volume-title":"IEEE","author":"Huang C.-y.","year":"2021","unstructured":"C.-y. Huang , Y. Y. Lin , H.-y. Lee , and L.-s. Lee , \" Defending your voice : Adversarial attack on voice conversion,\" in 2021 IEEE Spoken Language Technology Workshop (SLT) . IEEE , 2021 , pp. 552 -- 559 . C.-y. Huang, Y. Y. Lin, H.-y. Lee, and L.-s. Lee, \"Defending your voice: Adversarial attack on voice conversion,\" in 2021 IEEE Spoken Language Technology Workshop (SLT). IEEE, 2021, pp. 552--559."},{"key":"e_1_3_2_2_11_1","volume-title":"Real-time neural voice camouflage,\" in International Conference on Learning Representations","author":"Chiquier M.","year":"2021","unstructured":"M. Chiquier , C. Mao , and C. Vondrick , \" Real-time neural voice camouflage,\" in International Conference on Learning Representations , 2021 . M. Chiquier, C. Mao, and C. Vondrick, \"Real-time neural voice camouflage,\" in International Conference on Learning Representations, 2021."},{"volume-title":"November, 2022.","year":"2022","key":"e_1_3_2_2_12_1","unstructured":"\"71 instagram statistics 2022 ,\" https:\/\/https:\/\/www.demandsage.com\/instagramstatistics\/ , November, 2022. \"71 instagram statistics 2022,\" https:\/\/https:\/\/www.demandsage.com\/instagramstatistics\/, November, 2022."},{"key":"e_1_3_2_2_13_1","volume-title":"Adversarial attacks against automatic speech recognition systems via psychoacoustic hiding,\" arXiv preprint arXiv:1808.05665","author":"Sch\u00f6nherr L.","year":"2018","unstructured":"L. Sch\u00f6nherr , K. Kohls , S. Zeiler , T. Holz , and D. Kolossa , \" Adversarial attacks against automatic speech recognition systems via psychoacoustic hiding,\" arXiv preprint arXiv:1808.05665 , 2018 . L. Sch\u00f6nherr, K. Kohls, S. Zeiler, T. Holz, and D. Kolossa, \"Adversarial attacks against automatic speech recognition systems via psychoacoustic hiding,\" arXiv preprint arXiv:1808.05665, 2018."},{"key":"e_1_3_2_2_14_1","volume-title":"Exploring targeted universal adversarial perturbations to end-to-end asr models,\" arXiv preprint arXiv:2104.02757","author":"Lu Z.","year":"2021","unstructured":"Z. Lu , W. Han , Y. Zhang , and L. Cao , \" Exploring targeted universal adversarial perturbations to end-to-end asr models,\" arXiv preprint arXiv:2104.02757 , 2021 . Z. Lu, W. Han, Y. Zhang, and L. Cao, \"Exploring targeted universal adversarial perturbations to end-to-end asr models,\" arXiv preprint arXiv:2104.02757, 2021."},{"key":"e_1_3_2_2_15_1","volume-title":"Accessed on","author":"Equal","year":"2022","unstructured":"\" Equal loudness contours (iso 226--2003),\" https:\/\/chart-studio.plotly.com\/ ~mrlyu le\/16\/equal-loudness-contours-iso-226--2003\/#\/ , Accessed on August 10, 2022 . \"Equal loudness contours (iso 226--2003),\" https:\/\/chart-studio.plotly.com\/~mrlyu le\/16\/equal-loudness-contours-iso-226--2003\/#\/, Accessed on August 10, 2022."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"crossref","unstructured":"J.-c. Chou C.-c. Yeh and H.-y. Lee \"One-shot voice conversion by separating speaker and content representations with instance normalization \" arXiv preprint arXiv:1904.05742 2019.  J.-c. Chou C.-c. Yeh and H.-y. Lee \"One-shot voice conversion by separating speaker and content representations with instance normalization \" arXiv preprint arXiv:1904.05742 2019.","DOI":"10.21437\/Interspeech.2019-2663"},{"key":"e_1_3_2_2_17_1","first-page":"5210","volume-title":"PMLR","author":"Qian K.","year":"2019","unstructured":"K. Qian , Y. Zhang , S. Chang , X. Yang , and M. Hasegawa-Johnson , \" Autovc: Zeroshot voice style transfer with only autoencoder loss,\" in International Conference on Machine Learning . PMLR , 2019 , pp. 5210 -- 5219 . K. Qian, Y. Zhang, S. Chang, X. Yang, and M. Hasegawa-Johnson, \"Autovc: Zeroshot voice style transfer with only autoencoder loss,\" in International Conference on Machine Learning. PMLR, 2019, pp. 5210--5219."},{"key":"e_1_3_2_2_18_1","volume-title":"Wu et al., \"Transfer learning from speaker verification to multispeaker text-to-speech synthesis,\" Advances in neural information processing systems","author":"Jia Y.","year":"2018","unstructured":"Y. Jia , Y. Zhang , R. Weiss , Q. Wang , J. Shen , F. Ren , P. Nguyen , R. Pang , I. Lopez Moreno , Y. Wu et al., \"Transfer learning from speaker verification to multispeaker text-to-speech synthesis,\" Advances in neural information processing systems , vol. 31 , 2018 . Y. Jia, Y. Zhang, R. Weiss, Q. Wang, J. Shen, F. Ren, P. Nguyen, R. Pang, I. Lopez Moreno, Y. Wu et al., \"Transfer learning from speaker verification to multispeaker text-to-speech synthesis,\" Advances in neural information processing systems, vol. 31, 2018."},{"key":"e_1_3_2_2_19_1","volume-title":"MacDonald et al., \"Cstr vctk corpus: English multispeaker corpus for cstr voice cloning toolkit,\" University of Edinburgh","author":"Veaux C.","year":"2017","unstructured":"C. Veaux , J. Yamagishi , K. MacDonald et al., \"Cstr vctk corpus: English multispeaker corpus for cstr voice cloning toolkit,\" University of Edinburgh . The Centre for Speech Technology Research (CSTR) , 2017 . C. Veaux, J. Yamagishi, K. MacDonald et al., \"Cstr vctk corpus: English multispeaker corpus for cstr voice cloning toolkit,\" University of Edinburgh. The Centre for Speech Technology Research (CSTR), 2017."},{"key":"e_1_3_2_2_20_1","first-page":"5206","volume-title":"IEEE","author":"Panayotov V.","year":"2015","unstructured":"V. Panayotov , G. Chen , D. Povey , and S. Khudanpur , \" Librispeech: an asr corpus based on public domain audio books,\" in 2015 IEEE international conference on acoustics, speech and signal processing (ICASSP) . IEEE , 2015 , pp. 5206 -- 5210 . V. Panayotov, G. Chen, D. Povey, and S. Khudanpur, \"Librispeech: an asr corpus based on public domain audio books,\" in 2015 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, 2015, pp. 5206--5210."},{"key":"e_1_3_2_2_21_1","volume-title":"SpeechBrain: A general-purpose speech toolkit","author":"Ravanelli M.","year":"2021","unstructured":"M. Ravanelli , T. Parcollet , P. Plantinga , A. Rouhe , S. Cornell , L. Lugosch , C. Subakan , N. Dawalatabad , A. Heba , J. Zhong , J.-C. Chou , S.-L. Yeh , S.-W. Fu , C.-F. Liao , E. Rastorgueva , F. Grondin , W. Aris , H. Na , Y. Gao , R. D. Mori , and Y. Bengio , \" SpeechBrain: A general-purpose speech toolkit ,\" 2021 , arXiv: 2106.04624. M. Ravanelli, T. Parcollet, P. Plantinga, A. Rouhe, S. Cornell, L. Lugosch, C. Subakan, N. Dawalatabad, A. Heba, J. Zhong, J.-C. Chou, S.-L. Yeh, S.-W. Fu, C.-F. Liao, E. Rastorgueva, F. Grondin,W. Aris, H. Na, Y. Gao, R. D. Mori, and Y. Bengio, \"SpeechBrain: A general-purpose speech toolkit,\" 2021, arXiv:2106.04624."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/35.46671"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2114881"},{"key":"e_1_3_2_2_24_1","first-page":"18","volume-title":"Adabelief optimizer: Adapting stepsizes by the belief in observed gradients,\" Advances in neural information processing systems","author":"Zhuang J.","year":"2020","unstructured":"J. Zhuang , T. Tang , Y. Ding , S. C. Tatikonda , N. Dvornek , X. Papademetris , and J. Duncan , \" Adabelief optimizer: Adapting stepsizes by the belief in observed gradients,\" Advances in neural information processing systems , vol. 33 , pp. 18 795-- 18 806, 2020 . J. Zhuang, T. Tang, Y. Ding, S. C. Tatikonda, N. Dvornek, X. Papademetris, and J. Duncan, \"Adabelief optimizer: Adapting stepsizes by the belief in observed gradients,\" Advances in neural information processing systems, vol. 33, pp. 18 795-- 18 806, 2020."},{"key":"e_1_3_2_2_25_1","first-page":"2273","volume-title":"USENIX Association","author":"Hussain S.","year":"2021","unstructured":"S. Hussain , P. Neekhara , S. Dubnov , J. McAuley , and F. Koushanfar , \" WaveGuard: Understanding and mitigating audio adversarial examples,\" in 30th USENIX Security Symposium (USENIX Security 21) . USENIX Association , Aug. 2021 , pp. 2273 -- 2290 . [Online]. Available : https:\/\/www.usenix.org\/conference\/usenixsecu rity21\/presentation\/hussain S. Hussain, P. Neekhara, S. Dubnov, J. McAuley, and F. Koushanfar, \"WaveGuard: Understanding and mitigating audio adversarial examples,\" in 30th USENIX Security Symposium (USENIX Security 21). USENIX Association, Aug. 2021, pp. 2273--2290. [Online]. Available: https:\/\/www.usenix.org\/conference\/usenixsecu rity21\/presentation\/hussain"},{"key":"e_1_3_2_2_26_1","first-page":"1310","volume-title":"PMLR","author":"Cohen J.","year":"2019","unstructured":"J. Cohen , E. Rosenfeld , and Z. Kolter , \" Certified adversarial robustness via randomized smoothing,\" in international conference on machine learning . PMLR , 2019 , pp. 1310 -- 1320 . J. Cohen, E. Rosenfeld, and Z. Kolter, \"Certified adversarial robustness via randomized smoothing,\" in international conference on machine learning. PMLR, 2019, pp. 1310--1320."},{"key":"e_1_3_2_2_27_1","volume-title":"Towards deep learning models resistant to adversarial attacks,\" in International Conference on Learning Representations","author":"Madry A.","year":"2018","unstructured":"A. Madry , A. Makelov , L. Schmidt , D. Tsipras , and A. Vladu , \" Towards deep learning models resistant to adversarial attacks,\" in International Conference on Learning Representations , 2018 . A. Madry, A. Makelov, L. Schmidt, D. Tsipras, and A. Vladu, \"Towards deep learning models resistant to adversarial attacks,\" in International Conference on Learning Representations, 2018."},{"key":"e_1_3_2_2_28_1","volume-title":"Defending against adversarial audio via diffusion model,\" arXiv preprint arXiv:2303.01507","author":"Wu S.","year":"2023","unstructured":"S. Wu , J. Wang , W. Ping , W. Nie , and C. Xiao , \" Defending against adversarial audio via diffusion model,\" arXiv preprint arXiv:2303.01507 , 2023 . S. Wu, J. Wang, W. Ping, W. Nie, and C. Xiao, \"Defending against adversarial audio via diffusion model,\" arXiv preprint arXiv:2303.01507, 2023."},{"key":"e_1_3_2_2_29_1","volume-title":"Robust audio adversarial example for a physical attack,\" arXiv preprint arXiv:1810.11793","author":"Yakura H.","year":"2018","unstructured":"H. Yakura and J. Sakuma , \" Robust audio adversarial example for a physical attack,\" arXiv preprint arXiv:1810.11793 , 2018 . H. Yakura and J. Sakuma, \"Robust audio adversarial example for a physical attack,\" arXiv preprint arXiv:1810.11793, 2018."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1109\/ICASSP.1997.599604","volume-title":"Noise cancelling for microphone arrays,\" in 1997 IEEE International Conference on Acoustics, Speech, and Signal Processing","author":"Meyer J.","year":"1997","unstructured":"J. Meyer and C. Sydow , \" Noise cancelling for microphone arrays,\" in 1997 IEEE International Conference on Acoustics, Speech, and Signal Processing , vol. 1 , 1997 , pp. 211 -- 213 vol.1. J. Meyer and C. Sydow, \"Noise cancelling for microphone arrays,\" in 1997 IEEE International Conference on Acoustics, Speech, and Signal Processing, vol. 1, 1997, pp. 211--213 vol.1."},{"key":"e_1_3_2_2_31_1","first-page":"86","volume-title":"Black-box adversarial attacks on commercial speech platforms with minimal information,\" in Proceedings of the 2021 ACM SIGSAC Conference on Computer and Communications Security","author":"Zheng B.","year":"2021","unstructured":"B. Zheng , P. Jiang , Q. Wang , Q. Li , C. Shen , C. Wang , Y. Ge , Q. Teng , and S. Zhang , \" Black-box adversarial attacks on commercial speech platforms with minimal information,\" in Proceedings of the 2021 ACM SIGSAC Conference on Computer and Communications Security , 2021 , pp. 86 -- 107 . B. Zheng, P. Jiang, Q.Wang, Q. Li, C. Shen, C.Wang, Y. Ge, Q. Teng, and S. Zhang, \"Black-box adversarial attacks on commercial speech platforms with minimal information,\" in Proceedings of the 2021 ACM SIGSAC Conference on Computer and Communications Security, 2021, pp. 86--107."},{"key":"e_1_3_2_2_32_1","volume-title":"Voice conversion with smoothed gmm and map adaptation,\" in Eighth European Conference on Speech Communication and Technology","author":"Chen Y.","year":"2003","unstructured":"Y. Chen , M. Chu , E. Chang , J. Liu , and R. Liu , \" Voice conversion with smoothed gmm and map adaptation,\" in Eighth European Conference on Speech Communication and Technology , 2003 . Y. Chen, M. Chu, E. Chang, J. Liu, and R. Liu, \"Voice conversion with smoothed gmm and map adaptation,\" in Eighth European Conference on Speech Communication and Technology, 2003."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2009.2038663"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2041699"},{"key":"e_1_3_2_2_35_1","first-page":"19","volume-title":"IEEE","author":"Mohammadi S. H.","year":"2014","unstructured":"S. H. Mohammadi and A. Kain , \" Voice conversion using deep neural networks with speaker-independent pre-training,\" in 2014 IEEE Spoken Language Technology Workshop (SLT) . IEEE , 2014 , pp. 19 -- 23 . S. H. Mohammadi and A. Kain, \"Voice conversion using deep neural networks with speaker-independent pre-training,\" in 2014 IEEE Spoken Language Technology Workshop (SLT). IEEE, 2014, pp. 19--23."},{"key":"e_1_3_2_2_36_1","first-page":"290","volume-title":"Speech and Signal Processing (ICASSP). IEEE","author":"Takamichi S.","year":"2014","unstructured":"S. Takamichi , T. Toda , G. Neubig , S. Sakti , and S. Nakamura , \" A postfilter to modify the modulation spectrum in hmm-based speech synthesis,\" in 2014 IEEE International Conference on Acoustics , Speech and Signal Processing (ICASSP). IEEE , 2014 , pp. 290 -- 294 . S. Takamichi, T. Toda, G. Neubig, S. Sakti, and S. Nakamura, \"A postfilter to modify the modulation spectrum in hmm-based speech synthesis,\" in 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2014, pp. 290--294."},{"key":"e_1_3_2_2_37_1","first-page":"1","volume-title":"IEEE","author":"Hsu C.-C.","year":"2016","unstructured":"C.-C. Hsu , H.-T. Hwang , Y.-C. Wu , Y. Tsao , and H.-M. Wang , \"Voice conversion from non-parallel corpora using variational auto-encoder,\" in 2016 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA) . IEEE , 2016 , pp. 1 -- 6 . C.-C. Hsu, H.-T. Hwang, Y.-C. Wu, Y. Tsao, and H.-M. Wang, \"Voice conversion from non-parallel corpora using variational auto-encoder,\" in 2016 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA). IEEE, 2016, pp. 1--6."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"crossref","unstructured":"C. C. Hsu H.-T. Hwang Y.-C. Wu Y. Tsao and H.-M. Wang \"Voice conversion from unaligned corpora using variational autoencoding wasserstein generative adversarial networks \" 2017.  C. C. Hsu H.-T. Hwang Y.-C. Wu Y. Tsao and H.-M. Wang \"Voice conversion from unaligned corpora using variational autoencoding wasserstein generative adversarial networks \" 2017.","DOI":"10.21437\/Interspeech.2017-63"},{"key":"e_1_3_2_2_39_1","volume-title":"Parallel-data-free voice conversion using cycleconsistent adversarial networks,\" arXiv preprint arXiv:1711.11293","author":"Kaneko T.","year":"2017","unstructured":"T. Kaneko and H. Kameoka , \" Parallel-data-free voice conversion using cycleconsistent adversarial networks,\" arXiv preprint arXiv:1711.11293 , 2017 . T. Kaneko and H. Kameoka, \"Parallel-data-free voice conversion using cycleconsistent adversarial networks,\" arXiv preprint arXiv:1711.11293, 2017."},{"key":"e_1_3_2_2_40_1","first-page":"266","volume-title":"IEEE","author":"Kameoka H.","year":"2018","unstructured":"H. Kameoka , T. Kaneko , K. Tanaka , and N. Hojo , \" Stargan-vc: Non-parallel manyto- many voice conversion using star generative adversarial networks,\" in 2018 IEEE Spoken Language Technology Workshop (SLT) . IEEE , 2018 , pp. 266 -- 273 . H. Kameoka, T. Kaneko, K. Tanaka, and N. Hojo, \"Stargan-vc: Non-parallel manyto- many voice conversion using star generative adversarial networks,\" in 2018 IEEE Spoken Language Technology Workshop (SLT). IEEE, 2018, pp. 266--273."},{"key":"e_1_3_2_2_41_1","first-page":"2506","volume-title":"Speech and Signal Processing (ICASSP). IEEE","author":"Gao Y.","year":"2018","unstructured":"Y. Gao , R. Singh , and B. Raj , \" Voice impersonation using generative adversarial networks,\" in 2018 IEEE International Conference on Acoustics , Speech and Signal Processing (ICASSP). IEEE , 2018 , pp. 2506 -- 2510 . Y. Gao, R. Singh, and B. Raj, \"Voice impersonation using generative adversarial networks,\" in 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2018, pp. 2506--2510."},{"key":"e_1_3_2_2_42_1","volume-title":"S. Dieleman, H. Zen, K. Simonyan, O. Vinyals, A. Graves, N. Kalchbrenner, A. Senior, and K. Kavukcuoglu, \"Wavenet: A generative model for raw audio,\" arXiv preprint arXiv:1609.03499","author":"A.","year":"2016","unstructured":"A. v. d. Oord , S. Dieleman, H. Zen, K. Simonyan, O. Vinyals, A. Graves, N. Kalchbrenner, A. Senior, and K. Kavukcuoglu, \"Wavenet: A generative model for raw audio,\" arXiv preprint arXiv:1609.03499 , 2016 . A. v. d. Oord, S. Dieleman, H. Zen, K. Simonyan, O. Vinyals, A. Graves, N. Kalchbrenner, A. Senior, and K. Kavukcuoglu, \"Wavenet: A generative model for raw audio,\" arXiv preprint arXiv:1609.03499, 2016."},{"key":"e_1_3_2_2_43_1","first-page":"4779","volume-title":"IEEE","author":"Shen J.","year":"2018","unstructured":"J. Shen , R. Pang , R. J. Weiss , M. Schuster , N. Jaitly , Z. Yang , Z. Chen , Y. Zhang , Y. Wang , R. Skerrv-Ryan et al., \"Natural tts synthesis by conditioning wavenet on mel spectrogram predictions,\" in 2018 IEEE international conference on acoustics, speech and signal processing (ICASSP) . IEEE , 2018 , pp. 4779 -- 4783 . J. Shen, R. Pang, R. J. Weiss, M. Schuster, N. Jaitly, Z. Yang, Z. Chen, Y. Zhang, Y.Wang, R. Skerrv-Ryan et al., \"Natural tts synthesis by conditioning wavenet on mel spectrogram predictions,\" in 2018 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, 2018, pp. 4779--4783."},{"key":"e_1_3_2_2_44_1","volume-title":"Deep voice 2: Multi-speaker neural text-to-speech,\" Advances in neural information processing systems","author":"Gibiansky A.","year":"2017","unstructured":"A. Gibiansky , S. Arik , G. Diamos , J. Miller , K. Peng , W. Ping , J. Raiman , and Y. Zhou , \" Deep voice 2: Multi-speaker neural text-to-speech,\" Advances in neural information processing systems , vol. 30 , 2017 . A. Gibiansky, S. Arik, G. Diamos, J. Miller, K. Peng, W. Ping, J. Raiman, and Y. Zhou, \"Deep voice 2: Multi-speaker neural text-to-speech,\" Advances in neural information processing systems, vol. 30, 2017."},{"key":"e_1_3_2_2_45_1","volume-title":"Deep voice 3: Scaling text-to-speech with convolutional sequence learning,\" arXiv preprint arXiv:1710.07654","author":"Ping W.","year":"2017","unstructured":"W. Ping , K. Peng , A. Gibiansky , S. O. Arik , A. Kannan , S. Narang , J. Raiman , and J. Miller , \" Deep voice 3: Scaling text-to-speech with convolutional sequence learning,\" arXiv preprint arXiv:1710.07654 , 2017 . W. Ping, K. Peng, A. Gibiansky, S. O. Arik, A. Kannan, S. Narang, J. Raiman, and J. Miller, \"Deep voice 3: Scaling text-to-speech with convolutional sequence learning,\" arXiv preprint arXiv:1710.07654, 2017."},{"key":"e_1_3_2_2_46_1","first-page":"1","volume-title":"IEEE","author":"Carlini N.","year":"2018","unstructured":"N. Carlini and D. Wagner , \" Audio adversarial examples: Targeted attacks on speech-to-text,\" in 2018 IEEE security and privacy workshops (SPW) . IEEE , 2018 , pp. 1 -- 7 . N. Carlini and D. Wagner, \"Audio adversarial examples: Targeted attacks on speech-to-text,\" in 2018 IEEE security and privacy workshops (SPW). IEEE, 2018, pp. 1--7."},{"key":"e_1_3_2_2_47_1","first-page":"513","volume-title":"Hidden voice commands,\" in 25th USENIX Security Symposium (USENIX Security 16)","author":"Carlini N.","year":"2016","unstructured":"N. Carlini , P. Mishra , T. Vaidya , Y. Zhang , M. Sherr , C. Shields , D. Wagner , and W. Zhou , \" Hidden voice commands,\" in 25th USENIX Security Symposium (USENIX Security 16) . Austin, TX : USENIX Association , Aug. 2016 , pp. 513 -- 530 . N. Carlini, P. Mishra, T. Vaidya, Y. Zhang, M. Sherr, C. Shields, D. Wagner, and W. Zhou, \"Hidden voice commands,\" in 25th USENIX Security Symposium (USENIX Security 16). Austin, TX: USENIX Association, Aug. 2016, pp. 513--530."},{"key":"e_1_3_2_2_48_1","unstructured":". Available: https:\/\/www.usenix.org\/conference\/usenixsecurity16\/tech nical-sessions\/presentation\/carlini  . Available: https:\/\/www.usenix.org\/conference\/usenixsecurity16\/tech nical-sessions\/presentation\/carlini"},{"key":"e_1_3_2_2_49_1","first-page":"49","volume-title":"{CommanderSong}: A systematic approach for practical adversarial voice recognition,\" in 27th USENIX security symposium (USENIX security","author":"Yuan X.","year":"2018","unstructured":"X. Yuan , Y. Chen , Y. Zhao , Y. Long , X. Liu , K. Chen , S. Zhang , H. Huang , X. Wang , and C. A. Gunter , \" {CommanderSong}: A systematic approach for practical adversarial voice recognition,\" in 27th USENIX security symposium (USENIX security , 2018 , pp. 49 -- 64 . X. Yuan, Y. Chen, Y. Zhao, Y. Long, X. Liu, K. Chen, S. Zhang, H. Huang, X.Wang, and C. A. Gunter, \"{CommanderSong}: A systematic approach for practical adversarial voice recognition,\" in 27th USENIX security symposium (USENIX security , 2018, pp. 49--64."},{"key":"e_1_3_2_2_50_1","first-page":"5231","volume-title":"PMLR","author":"Qin Y.","year":"2019","unstructured":"Y. Qin , N. Carlini , G. Cottrell , I. Goodfellow , and C. Raffel , \" Imperceptible, robust, and targeted adversarial examples for automatic speech recognition,\" in International conference on machine learning . PMLR , 2019 , pp. 5231 -- 5240 . Y. Qin, N. Carlini, G. Cottrell, I. Goodfellow, and C. Raffel, \"Imperceptible, robust, and targeted adversarial examples for automatic speech recognition,\" in International conference on machine learning. PMLR, 2019, pp. 5231--5240."},{"key":"e_1_3_2_2_51_1","first-page":"1962","volume-title":"IEEE","author":"Kreuk F.","year":"2018","unstructured":"F. Kreuk , Y. Adi , M. Cisse , and J. Keshet , \" Fooling end-to-end speaker verification with adversarial examples,\" in 2018 IEEE international conference on acoustics, speech and signal processing (ICASSP) . IEEE , 2018 , pp. 1962 -- 1966 . F. Kreuk, Y. Adi, M. Cisse, and J. Keshet, \"Fooling end-to-end speaker verification with adversarial examples,\" in 2018 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, 2018, pp. 1962--1966."},{"key":"e_1_3_2_2_52_1","volume-title":"Crafting adversarial examples for speech paralinguistics applications,\" arXiv preprint arXiv:1711.03280","author":"Gong Y.","year":"2017","unstructured":"Y. Gong and C. Poellabauer , \" Crafting adversarial examples for speech paralinguistics applications,\" arXiv preprint arXiv:1711.03280 , 2017 . Y. Gong and C. Poellabauer, \"Crafting adversarial examples for speech paralinguistics applications,\" arXiv preprint arXiv:1711.03280, 2017."},{"key":"e_1_3_2_2_53_1","first-page":"694","volume-title":"IEEE","author":"Chen G.","year":"2021","unstructured":"G. Chen , S. Chenb , L. Fan , X. Du , Z. Zhao , F. Song , and Y. Liu , \" Who is real bob\" adversarial attacks on speaker recognition systems,\" in 2021 IEEE Symposium on Security and Privacy (SP) . IEEE , 2021 , pp. 694 -- 711 . G. Chen, S. Chenb, L. Fan, X. Du, Z. Zhao, F. Song, and Y. Liu, \"Who is real bob\" adversarial attacks on speaker recognition systems,\" in 2021 IEEE Symposium on Security and Privacy (SP). IEEE, 2021, pp. 694--711."},{"key":"e_1_3_2_2_54_1","first-page":"377","volume-title":"IEEE","author":"Zhang L.","year":"2020","unstructured":"L. Zhang , Y. Meng , J. Yu , C. Xiang , B. Falk , and H. Zhu , \" Voiceprint mimicry attack towards speaker verification system in smart home,\" in IEEE INFOCOM 2020-IEEE Conference on Computer Communications . IEEE , 2020 , pp. 377 -- 386 . L. Zhang, Y. Meng, J. Yu, C. Xiang, B. Falk, and H. Zhu, \"Voiceprint mimicry attack towards speaker verification system in smart home,\" in IEEE INFOCOM 2020-IEEE Conference on Computer Communications. IEEE, 2020, pp. 377--386."},{"key":"e_1_3_2_2_55_1","first-page":"1121","volume-title":"Advpulse: Universal, synchronizationfree, and targeted audio adversarial attacks via subsecond perturbations,\" in Proceedings of the 2020 ACMSIGSAC Conference on Computer and Communications Security","author":"Li Z.","year":"2020","unstructured":"Z. Li , Y. Wu , J. Liu , Y. Chen , and B. Yuan , \" Advpulse: Universal, synchronizationfree, and targeted audio adversarial attacks via subsecond perturbations,\" in Proceedings of the 2020 ACMSIGSAC Conference on Computer and Communications Security , 2020 , pp. 1121 -- 1134 . Z. Li, Y. Wu, J. Liu, Y. Chen, and B. Yuan, \"Advpulse: Universal, synchronizationfree, and targeted audio adversarial attacks via subsecond perturbations,\" in Proceedings of the 2020 ACMSIGSAC Conference on Computer and Communications Security, 2020, pp. 1121--1134."},{"key":"e_1_3_2_2_56_1","volume-title":"Real-time adversarial attacks,\" arXiv preprint arXiv:1905.13399","author":"Gong Y.","year":"2019","unstructured":"Y. Gong , B. Li , C. Poellabauer , and Y. Shi , \" Real-time adversarial attacks,\" arXiv preprint arXiv:1905.13399 , 2019 . Y. Gong, B. Li, C. Poellabauer, and Y. Shi, \"Real-time adversarial attacks,\" arXiv preprint arXiv:1905.13399, 2019."},{"key":"e_1_3_2_2_57_1","first-page":"1353","volume-title":"Specpatch: Human-in-the-loop adversarial audio spectrogram patch attack on speech recognition,\" in Proceedings of the 2022 ACM SIGSAC Conference on Computer and Communications Security","author":"Guo H.","year":"2022","unstructured":"H. Guo , Y. Wang , N. Ivanov , L. Xiao , and Q. Yan , \" Specpatch: Human-in-the-loop adversarial audio spectrogram patch attack on speech recognition,\" in Proceedings of the 2022 ACM SIGSAC Conference on Computer and Communications Security , 2022 , pp. 1353 -- 1366 . H. Guo, Y. Wang, N. Ivanov, L. Xiao, and Q. Yan, \"Specpatch: Human-in-the-loop adversarial audio spectrogram patch attack on speech recognition,\" in Proceedings of the 2022 ACM SIGSAC Conference on Computer and Communications Security, 2022, pp. 1353--1366."},{"key":"e_1_3_2_2_58_1","first-page":"82","volume-title":"Enjoy voice input with voiceprint unclonability and anonymity,\" in Proceedings of the 16th ACM Conference on Embedded Networked Sensor Systems","author":"Qian J.","year":"2018","unstructured":"J. Qian , H. Du , J. Hou , L. Chen , T. Jung , and X.-Y. Li , \"Hidebehind : Enjoy voice input with voiceprint unclonability and anonymity,\" in Proceedings of the 16th ACM Conference on Embedded Networked Sensor Systems , 2018 , pp. 82 -- 94 . J. Qian, H. Du, J. Hou, L. Chen, T. Jung, and X.-Y. Li, \"Hidebehind: Enjoy voice input with voiceprint unclonability and anonymity,\" in Proceedings of the 16th ACM Conference on Embedded Networked Sensor Systems, 2018, pp. 82--94."},{"key":"e_1_3_2_2_59_1","volume-title":"Speaker anonymisation using the mcadams coefficient,\" arXiv preprint arXiv:2011.01130","author":"Patino J.","year":"2020","unstructured":"J. Patino , N. Tomashenko , M. Todisco , A. Nautsch , and N. Evans , \" Speaker anonymisation using the mcadams coefficient,\" arXiv preprint arXiv:2011.01130 , 2020 . J. Patino, N. Tomashenko, M. Todisco, A. Nautsch, and N. Evans, \"Speaker anonymisation using the mcadams coefficient,\" arXiv preprint arXiv:2011.01130, 2020."},{"key":"e_1_3_2_2_60_1","first-page":"84","volume-title":"IEEE","author":"Vaidya T.","year":"2019","unstructured":"T. Vaidya and M. Sherr , \" You talk too much: Limiting privacy exposure via voice input,\" in 2019 IEEE Security and Privacy Workshops (SPW) . IEEE , 2019 , pp. 84 -- 91 . T. Vaidya and M. Sherr, \"You talk too much: Limiting privacy exposure via voice input,\" in 2019 IEEE Security and Privacy Workshops (SPW). IEEE, 2019, pp. 84--91."},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372297.3420025"},{"key":"e_1_3_2_2_62_1","volume-title":"Emotionless: Privacy-preserving speech analysis for voice assistants,\" arXiv preprint arXiv:1908.03632","author":"Aloufi R.","year":"2019","unstructured":"R. Aloufi , H. Haddadi , and D. Boyle , \" Emotionless: Privacy-preserving speech analysis for voice assistants,\" arXiv preprint arXiv:1908.03632 , 2019 . R. Aloufi, H. Haddadi, and D. Boyle, \"Emotionless: Privacy-preserving speech analysis for voice assistants,\" arXiv preprint arXiv:1908.03632, 2019."},{"key":"e_1_3_2_2_63_1","volume-title":"V-cloak: Intelligibility-, naturalness-& timbre-preserving real-time voice anonymization,\" arXiv preprint arXiv:2210.15140","author":"Deng J.","year":"2022","unstructured":"J. Deng , F. Teng , Y. Chen , X. Chen , Z. Wang , and W. Xu , \" V-cloak: Intelligibility-, naturalness-& timbre-preserving real-time voice anonymization,\" arXiv preprint arXiv:2210.15140 , 2022 . J. Deng, F. Teng, Y. Chen, X. Chen, Z. Wang, and W. Xu, \"V-cloak: Intelligibility-, naturalness-& timbre-preserving real-time voice anonymization,\" arXiv preprint arXiv:2210.15140, 2022."}],"event":{"name":"WiSec '23: 16th ACM Conference on Security and Privacy in Wireless and Mobile Networks","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control","SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing"],"location":"Guildford United Kingdom","acronym":"WiSec '23"},"container-title":["Proceedings of the 16th ACM Conference on Security and Privacy in Wireless and Mobile Networks"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3558482.3590189","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3558482.3590189","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3558482.3590189","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:07Z","timestamp":1750178827000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3558482.3590189"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":63,"alternative-id":["10.1145\/3558482.3590189","10.1145\/3558482"],"URL":"https:\/\/doi.org\/10.1145\/3558482.3590189","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]},"assertion":[{"value":"2023-06-28","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}