{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:41:40Z","timestamp":1775230900534,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,24]],"date-time":"2024-06-24T00:00:00Z","timestamp":1719187200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"AFRL and DARPA","award":["FA8750-20-2-1004"],"award-info":[{"award-number":["FA8750-20-2-1004"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,24]]},"DOI":"10.1145\/3658664.3659658","type":"proceedings-article","created":{"date-parts":[[2024,6,12]],"date-time":"2024-06-12T15:09:27Z","timestamp":1718204967000},"page":"277-282","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Are Recent Deepfake Speech Generators Detectable?"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7399-4876","authenticated-orcid":false,"given":"Kratika","family":"Bhagtani","sequence":"first","affiliation":[{"name":"School of Electrical and Computer Engineering, Purdue University, West Lafayette, IN, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6464-7688","authenticated-orcid":false,"given":"Amit Kumar Singh","family":"Yadav","sequence":"additional","affiliation":[{"name":"School of Electrical and Computer Engineering, Purdue University, West Lafayette, IN, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0406-0222","authenticated-orcid":false,"given":"Paolo","family":"Bestagini","sequence":"additional","affiliation":[{"name":"Dipartimento di Elettronica, Informazione e Bioingegneria, Politecnico di Milano, Milan, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2909-7323","authenticated-orcid":false,"given":"Edward J.","family":"Delp","sequence":"additional","affiliation":[{"name":"School of Electrical and Computer Engineering, Purdue University, West Lafayette, IN, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,6,24]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2022. Deepfake presidents used in Russia-Ukraine war. https:\/\/www.bbc.com\/ news\/technology-60780142"},{"key":"e_1_3_2_1_2_1","unstructured":"2022. Send in the clones: Using artificial intelligence to digitally replicate human voices. https:\/\/www.npr.org\/2022\/01\/17\/1073031858\/artificial-intelligence-voicecloning"},{"key":"e_1_3_2_1_3_1","unstructured":"2023. Speech Synthesis ElevenLabs. https:\/\/elevenlabs.io\/"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASYU52992.2021.9598977"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-3174"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/IEEECONF59524.2023.10477041"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/MIPR54900.2022.00064"},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings of the International Conference of Audio Engineering Society: High-Quality Audio Coding (September","author":"Brandenburg Karlheinz","year":"1999","unstructured":"Karlheinz Brandenburg. 1999. MP3 and AAC Explained. Proceedings of the International Conference of Audio Engineering Society: High-Quality Audio Coding (September 1999). Signa, Italy."},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the International Conference on Learning Representations (May","author":"Brock Andrew","year":"2019","unstructured":"Andrew Brock, Jeff Donahue, and Karen Simonyan. 2019. Large Scale GAN Training for High Fidelity Natural Image Synthesis. Proceedings of the International Conference on Learning Representations (May 2019), 35 pages. https: \/\/openreview.net\/pdf?id=B1xsqj09Fm New Orleans, Louisiana, USA."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2020.2999185"},{"key":"e_1_3_2_1_11_1","unstructured":"Coqui. 2023. XTTS. https:\/\/docs.coqui.ai\/en\/latest\/models\/xtts.html"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2307.05782"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-99-1912-3_21"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICAI52203.2021.9445238"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_17_1","volume-title":"Denoising Diffusion Probabilistic Models. Advances in Neural Information Processing Systems 33 (December","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. Advances in Neural Information Processing Systems 33 (December 2020), 6840--6851. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/ 4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2021.3089437"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547855"},{"key":"e_1_3_2_1_20_1","unstructured":"Keith Ito and Linda Johnson. 2017. The LJ Speech Dataset. https:\/\/keithito.com\/LJSpeech- Dataset\/"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/s13369-021-06297-w"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-2326"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2010.05646"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-227"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10094745"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413828"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2201.11972"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2023.3285283"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2106.03153"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"e_1_3_2_1_31_1","volume-title":"Improving Language Understanding by Generative Pre-training. (June","author":"Radford Alec","year":"2018","unstructured":"Alec Radford, Karthik Narasimhan, Tim Salimans, and Ilya Sutskever. 2018. Improving Language Understanding by Generative Pre-training. (June 2018), 12 pages. OpenAI."},{"key":"e_1_3_2_1_32_1","volume-title":"ChatGPT: A comprehensive review on background, applications, key challenges, bias, ethics, limitations and future scope. Internet of Things and Cyber-Physical Systems 3 (April","author":"Ray Partha Pratim","year":"2023","unstructured":"Partha Pratim Ray. 2023. ChatGPT: A comprehensive review on background, applications, key challenges, bias, ethics, limitations and future scope. Internet of Things and Cyber-Physical Systems 3 (April 2023), 121--154. https:\/\/doi.org\/10. 1016\/j.iotcps.2023.04.003"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1038\/323533a0"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA58977.2023.00075"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1121\/1.1915893"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.21437\/Odyssey.2022-16"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2017.01.001"},{"key":"e_1_3_2_1_39_1","volume-title":"Proceedings of the International Conference on Learning Representations (April","author":"Xiao Zhisheng","year":"2022","unstructured":"Zhisheng Xiao, Karsten Kreis, and Arash Vahdat. 2022. Tackling the Generative Learning Trilemma with Denoising Diffusion GANs. Proceedings of the International Conference on Learning Representations (April 2022), 28 pages. https:\/\/openreview.net\/pdf?id=JprM0p-q0Co"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2404.10989"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2304.03323"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.7488\/ds\/2555"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2305.17739"}],"event":{"name":"IH&MMSEC '24: ACM Workshop on Information Hiding and Multimedia Security","location":"Baiona Spain","acronym":"IH&MMSEC '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2024 ACM Workshop on Information Hiding and Multimedia Security"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3658664.3659658","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3658664.3659658","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T23:43:21Z","timestamp":1755906201000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3658664.3659658"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,24]]},"references-count":43,"alternative-id":["10.1145\/3658664.3659658","10.1145\/3658664"],"URL":"https:\/\/doi.org\/10.1145\/3658664.3659658","relation":{},"subject":[],"published":{"date-parts":[[2024,6,24]]},"assertion":[{"value":"2024-06-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}