{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T05:03:08Z","timestamp":1769749388893,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,4,25]],"date-time":"2022-04-25T00:00:00Z","timestamp":1650844800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,4,25]]},"DOI":"10.1145\/3477314.3507013","type":"proceedings-article","created":{"date-parts":[[2022,5,7]],"date-time":"2022-05-07T00:37:36Z","timestamp":1651883856000},"page":"1646-1655","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["The dawn of a text-dependent society"],"prefix":"10.1145","author":[{"given":"Anton","family":"Firc","sequence":"first","affiliation":[{"name":"Brno University of Technology, Brno, Czech Republic"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kamil","family":"Malinka","sequence":"additional","affiliation":[{"name":"Brno University of Technology, Brno, Czech Republic"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,5,6]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the 12th Language Resources and Evaluation Conference. European Language Resources Association","author":"Ardila Rosana","year":"2020","unstructured":"Rosana Ardila, Megan Branson, Kelly Davis, Michael Kohler, Josh Meyer, Michael Henretty, Reuben Morais, Lindsay Saunders, Francis Tyers, and Gregor Weber. 2020. Common Voice: A Massively-Multilingual Speech Corpus. In Proceedings of the 12th Language Resources and Evaluation Conference. European Language Resources Association, Marseille, France, 4218--4222. https:\/\/www.aclweb.org\/anthology\/2020.lrec-1.520"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1186\/s13635-021-00116-3"},{"key":"e_1_3_2_1_4_1","unstructured":"Jemine Corentin. 2019. Real-time Voice Cloning. Master thesis. Universit\u00e9 de Li\u00e8ge Li\u00e8ge Belgique. https:\/\/matheo.uliege.be\/handle\/2268.2\/6801?locale=en"},{"key":"e_1_3_2_1_5_1","unstructured":"Descript. 2020. Descript webpage. online. https:\/\/www.descript.com"},{"key":"e_1_3_2_1_6_1","unstructured":"Descript. 2021. Overdub. online. https:\/\/www.descript.com\/overdub"},{"key":"e_1_3_2_1_7_1","unstructured":"Joel Frank and Lea Sch\u00f6nherr. 2021. WaveFake: A Data Set to Facilitate Audio Deepfake Detection. arXiv:cs.LG\/2111.02813"},{"key":"e_1_3_2_1_8_1","unstructured":"Sudipto Ghosh. 2019. Are You Confident About Distinguishing Between a Computer-Generated Voice and Human Voice? online. https:\/\/aithority.com\/ait-featured-posts\/are-you-confident-about-distinguishing-between-a-computer-generated-voice-and-human-voice\/"},{"key":"e_1_3_2_1_9_1","unstructured":"ID R&D. 2021. Combat Voice Spoofing Attacks. online. https:\/\/www.idrnd.ai\/voice-anti-spoofing\/"},{"key":"e_1_3_2_1_10_1","unstructured":"Keith Ito and Linda Johnson. 2017. The LJ Speech Dataset. online. https:\/\/keithito.com\/LJ-Speech-Dataset\/"},{"key":"e_1_3_2_1_11_1","unstructured":"Ed Jefferson. 2020. Are voice biometrics the new passwords? online. https:\/\/www.raconteur.net\/technology\/cybersecurity\/voice-biometrics\/"},{"key":"e_1_3_2_1_12_1","volume-title":"Ignacio Lopez Moreno, and Yonghui Wu","author":"Jia Ye","year":"2019","unstructured":"Ye Jia, Yu Zhang, Ron J. Weiss, Quan Wang, Jonathan Shen, Fei Ren, Zhifeng Chen, Patrick Nguyen, Ruoming Pang, Ignacio Lopez Moreno, and Yonghui Wu. 2019. Transfer Learning from Speaker Verification to Multispeaker Text-To-Speech Synthesis. arXiv:cs.CL\/1806.04558"},{"key":"e_1_3_2_1_13_1","unstructured":"Rupert Jones. 2018. Voice recognition: is it really as secure as it sounds? online. https:\/\/www.theguardian.com\/money\/2018\/sep\/22\/voice-recognition-is-it-really-as-secure-as-it-sounds"},{"key":"e_1_3_2_1_14_1","volume-title":"Artificial Intelligence Enabled - Deepfake technology The Emerge of a New Threat. Master thesis","author":"Jones Valencia A.","unstructured":"Valencia A. Jones. 2020. Artificial Intelligence Enabled - Deepfake technology The Emerge of a New Threat. Master thesis. Utica College."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4020-2676-8_7"},{"key":"e_1_3_2_1_16_1","unstructured":"Microsoft. 2020. About the Speech SDK. https:\/\/docs.microsoft.com\/en-us\/azure\/cognitive-services\/speech-service\/speaker-recognition-overview#speaker-verification."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2019.101027"},{"key":"e_1_3_2_1_18_1","volume-title":"Cuong M. Nguyen, Dung Nguyen, Duc Thanh Nguyen, and Saeid Nahavandi.","author":"Nguyen Thanh Thi","year":"2021","unstructured":"Thanh Thi Nguyen, Quoc Viet Hung Nguyen, Cuong M. Nguyen, Dung Nguyen, Duc Thanh Nguyen, and Saeid Nahavandi. 2021. Deep Learning for Deepfakes Creation and Detection: A Survey. arXiv:cs.CV\/1909.11573"},{"key":"e_1_3_2_1_19_1","unstructured":"Recommendation P.85. 1994. Telephone transmission quality subjective opinion tests. A method for subjective performance assessment of the quality of speech voice output devices."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"e_1_3_2_1_21_1","unstructured":"Jon Petersen. 2019. Combating deepfakes with voice biometric technology. online. https:\/\/www.techradar.com\/news\/combating-deepfakes-with-voice-biometric-technology"},{"key":"e_1_3_2_1_22_1","unstructured":"Phonexia. 2021. Phonexia Voice Verify. https:\/\/www.phonexia.com\/en\/product\/voice-verify\/."},{"key":"e_1_3_2_1_23_1","unstructured":"Precise Biometrics AB. 2014. Understanding biometric performance evaluation. https:\/\/precisebiometrics.com\/wp-content\/uploads\/2014\/11\/White-Paper-Understanding-Biometric-Performance-Evaluation-QR.pdf."},{"key":"e_1_3_2_1_24_1","unstructured":"Real-Time-Voice-Cloning 2020. [Online]. Single speaker fine-tuning process and results. Real-Time-Voice-Cloning GitHub. https:\/\/github.com\/CorentinJ\/Real-Time-Voice-Cloning\/issues\/437"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/SPED.2019.8906599"},{"key":"e_1_3_2_1_26_1","unstructured":"Resmble AI. 2020. Resemble AI webpage. online. https:\/\/www.resemble.ai"},{"key":"e_1_3_2_1_27_1","unstructured":"Eric Hal Schwartz. 2019. Deepfake Security Concerns Are Limiting Voice ID Adoption: Survey. online. https:\/\/voicebot.ai\/2019\/12\/19\/deepfake-security-concerns-are-limiting-voice-id-adoption-survey\/"},{"key":"e_1_3_2_1_28_1","unstructured":"John Seymour and Azeem Aqil. 2018. Your Voice is My Passport. https:\/\/www.blackhat.com\/us-18\/briefings\/schedule\/#your-voice-is-my-passport-11395"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"Jonathan Shen Ruoming Pang Ron J. Weiss Mike Schuster Navdeep Jaitly Zongheng Yang Zhifeng Chen Yu Zhang Yuxuan Wang RJ Skerry-Ryan Rif A. Saurous Yannis Agiomyrgiannakis and Yonghui Wu. 2018. Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions. arXiv:cs.CL\/1712.05884","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"e_1_3_2_1_30_1","unstructured":"Dan Simmonss. 2017. BBC fools HSBC voice recognition security system. online. https:\/\/www.bbc.com\/news\/technology-39965545"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383459"},{"key":"e_1_3_2_1_32_1","unstructured":"Aaron van den Oord Sander Dieleman Heiga Zen Karen Simonyan Oriol Vinyals Alex Graves Nal Kalchbrenner Andrew Senior and Koray Kavukcuoglu. 2016. WaveNet: A Generative Model for Raw Audio. arXiv:cs.SD\/1609.03499"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Run Wang Felix Juefei-Xu Yihao Huang Qing Guo Xiaofei Xie Lei Ma and Yang Liu. 2020. DeepSonar: Towards Effective and Robust Detection of AI-Synthesized Fake Voices. arXiv:eess.AS\/2005.13770","DOI":"10.1145\/3394171.3413716"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.7488\/ds\/2555"},{"key":"e_1_3_2_1_35_1","volume-title":"Tomi Kinnunen, Nicholas Evans, and H\u00e9ctor Delgado.","author":"Yamagishi Junichi","year":"2021","unstructured":"Junichi Yamagishi, Xin Wang, Massimiliano Todisco, Md Sahidullah, Jose Patino, Andreas Nautsch, Xuechen Liu, Kong Aik Lee, Tomi Kinnunen, Nicholas Evans, and H\u00e9ctor Delgado. 2021. ASVspoof 2021: accelerating progress in spoofed and deepfake speech detection. arXiv:eess.AS\/2109.00537"},{"key":"e_1_3_2_1_36_1","volume-title":"Tomi Kinnunen, Zhenhua Ling, and Tomoki Toda.","author":"Yi Zhao","year":"2020","unstructured":"Zhao Yi, Wen-Chin Huang, Xiaohai Tian, Junichi Yamagishi, Rohan Kumar Das, Tomi Kinnunen, Zhenhua Ling, and Tomoki Toda. 2020. Voice Conversion Challenge 2020 --- Intra-lingual semi-parallel and cross-lingual voice conversion. https:\/\/www.isca-speech.org\/archive\/VCC_BC_2020\/pdfs\/VCC2020_paper_13.pdf"},{"key":"e_1_3_2_1_37_1","volume-title":"LibriTTS: A Corpus Derived from LibriSpeech for Text-to-Speech. CoRR abs\/1904.02882","author":"Zen Heiga","year":"2019","unstructured":"Heiga Zen, Viet Dang, Rob Clark, Yu Zhang, Ron J. Weiss, Ye Jia, Zhifeng Chen, and Yonghui Wu. 2019. LibriTTS: A Corpus Derived from LibriSpeech for Text-to-Speech. CoRR abs\/1904.02882 (2019). arXiv:1904.02882 http:\/\/arxiv.org\/abs\/1904.02882"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","unstructured":"Zhenyu Zhang Yewei Gu Xiaowei Yi and Xianfeng Zhao. 2020. SynSpeechDDB: a new synthetic speech detection database. 10.21227\/ta8z-mx73","DOI":"10.21227\/ta8z-mx73"},{"key":"e_1_3_2_1_39_1","volume-title":"FMFCCA: A Challenging Mandarin Dataset for Synthetic Speech Detection. arXiv:cs.SD\/2110.09441","author":"Zhang Zhenyu","year":"2021","unstructured":"Zhenyu Zhang, Yewei Gu, Xiaowei Yi, and Xianfeng Zhao. 2021. FMFCCA: A Challenging Mandarin Dataset for Synthetic Speech Detection. arXiv:cs.SD\/2110.09441"},{"key":"e_1_3_2_1_40_1","volume-title":"The Blizzard Challenge","author":"Zhou Xiao","year":"2020","unstructured":"Xiao Zhou, Zhen-Hua Ling, and Simon King. 2020. The Blizzard Challenge 2020. online. http:\/\/www.festvox.org\/blizzard\/bc2020\/BC20_zhou_ling_king.pdf"}],"event":{"name":"SAC '22: The 37th ACM\/SIGAPP Symposium on Applied Computing","location":"Virtual Event","acronym":"SAC '22","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"]},"container-title":["Proceedings of the 37th ACM\/SIGAPP Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3477314.3507013","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3477314.3507013","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:18:33Z","timestamp":1750191513000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3477314.3507013"}},"subtitle":["deepfakes as a threat to speech verification systems"],"short-title":[],"issued":{"date-parts":[[2022,4,25]]},"references-count":39,"alternative-id":["10.1145\/3477314.3507013","10.1145\/3477314"],"URL":"https:\/\/doi.org\/10.1145\/3477314.3507013","relation":{},"subject":[],"published":{"date-parts":[[2022,4,25]]},"assertion":[{"value":"2022-05-06","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}