{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T15:27:06Z","timestamp":1777390026379,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","funder":[{"name":"ULRI DSRI"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,18]]},"DOI":"10.1145\/3733102.3736707","type":"proceedings-article","created":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T11:14:07Z","timestamp":1750158847000},"page":"174-180","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["SAFE: Synthetic Audio Forensics Evaluation Challenge"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-7232-4542","authenticated-orcid":false,"given":"Trapeznikov","family":"Kirill","sequence":"first","affiliation":[{"name":"STR, Woburn, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1621-1238","authenticated-orcid":false,"given":"Paul","family":"Cummer","sequence":"additional","affiliation":[{"name":"STR, Woburn, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6904-261X","authenticated-orcid":false,"given":"Pranay","family":"Pherwani","sequence":"additional","affiliation":[{"name":"STR, Woburn, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6374-6446","authenticated-orcid":false,"given":"Jai","family":"Aslam","sequence":"additional","affiliation":[{"name":"STR, Arlington, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8735-1956","authenticated-orcid":false,"given":"Michael","family":"Davinroy","sequence":"additional","affiliation":[{"name":"Aptima, Woburn, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4809-8951","authenticated-orcid":false,"given":"Peter","family":"Bautista","sequence":"additional","affiliation":[{"name":"Aptima, Woburn, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8095-3707","authenticated-orcid":false,"given":"Laura","family":"Cassani","sequence":"additional","affiliation":[{"name":"Aptima, Woburn, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3986-4039","authenticated-orcid":false,"given":"Matthew","family":"Stamm","sequence":"additional","affiliation":[{"name":"Drexel University, Philadelphia, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,6,17]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"crossref","unstructured":"Sarah Barrington et\u00a0al. 2025. People are poorly equipped to detect AI-powered voice clones. Scientific Reports 15 1 (2025) 11004.","DOI":"10.1038\/s41598-025-94170-3"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10889450"},{"key":"e_1_3_3_2_4_2","unstructured":"Cartesia 2025. Cartesia Text-to-Speech API: sonic-english model. https:\/\/cartesia.ai\/product\/text-to-speech-tts. Accessed: 2025-04-30."},{"key":"e_1_3_3_2_5_2","unstructured":"Yushen Chen et\u00a0al. 2024. F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.06885 (2024)."},{"key":"e_1_3_3_2_6_2","unstructured":"Seamless Communication et\u00a0al. 2023. SeamlessM4T\u2014Massively Multilingual & Multimodal Machine Translation. ArXiv (2023)."},{"key":"e_1_3_3_2_7_2","volume-title":"2022 IEEE Spoken Language Technology Workshop","author":"Conneau Alexis","unstructured":"Alexis Conneau et\u00a0al. [n. d.]. Fleurs: Few-shot learning evaluation of universal representations of speech. In 2022 IEEE Spoken Language Technology Workshop."},{"key":"e_1_3_3_2_8_2","unstructured":"William Corvey. 2024. Semantic Forensics (SemaFor). DARPA nd https:\/\/www. darpa. mil\/program\/semantic-forensics (2024)."},{"key":"e_1_3_3_2_9_2","unstructured":"Luca Della\u00a0Libera et\u00a0al. 2025. FocalCodec: Low-Bitrate Speech Coding via Focal Modulation Networks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.04465 (2025)."},{"key":"e_1_3_3_2_10_2","unstructured":"Alexandre D\u00e9fossez et\u00a0al. 2022. High Fidelity Neural Audio Compression. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2210.13438 (2022)."},{"key":"e_1_3_3_2_11_2","unstructured":"ElevenLabs 2025. ElevenLabs Text-to-Speech API. https:\/\/elevenlabs.io\/docs\/api-reference\/text-to-speech Accessed: 2025-04-30."},{"key":"e_1_3_3_2_12_2","unstructured":"Nawar Halabi et\u00a0al. 2016. Arabic speech corpus. Oxford Text Archive Coll. (2016)."},{"key":"e_1_3_3_2_13_2","unstructured":"Hexgrad 2025. Kokoro-TTS. https:\/\/huggingface.co\/spaces\/hexgrad\/Kokoro-TTS. Accessed: 2025-04-30."},{"key":"e_1_3_3_2_14_2","unstructured":"Huggingface 2025. Competitions. https:\/\/huggingface.co\/docs\/competitions."},{"key":"e_1_3_3_2_15_2","unstructured":"Innoai 2025. Edge-TTS-Text-to-Speech. https:\/\/huggingface.co\/spaces\/innoai\/Edge-TTS-Text-to-Speech."},{"key":"e_1_3_3_2_16_2","unstructured":"Yoach Lacombe et\u00a0al. 2024. Parler-TTS. github.com\/huggingface\/parler-tts."},{"key":"e_1_3_3_2_17_2","unstructured":"Sang-Hoon Lee et\u00a0al. 2023. Hierspeech++: Bridging the gap between semantic and acoustic representation of speech by hierarchical variational inference for zero-shot speech synthesis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.12454 (2023)."},{"key":"e_1_3_3_2_18_2","unstructured":"Yinghao\u00a0Aaron Li et\u00a0al. 2023. Styletts 2: Towards human-level text-to-speech through style diffusion and adversarial training with large speech language models. Advances in Neural Information Processing Systems (2023)."},{"key":"e_1_3_3_2_19_2","unstructured":"Shijia Liao et\u00a0al. 2024. Fish-Speech: Leveraging Large Language Models for Advanced Multilingual Text-to-Speech Synthesis. arxiv:https:\/\/arXiv.org\/abs\/2411.01156\u00a0[cs.SD] https:\/\/arxiv.org\/abs\/2411.01156"},{"key":"e_1_3_3_2_20_2","unstructured":"Librivox 2025. LibriVox Free Pub. Domain Audiobooks. https:\/\/librivox.org\/. Access: 2025-05-08."},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"crossref","unstructured":"Haohe Liu et\u00a0al. 2024. Semanticodec: An ultra low bitrate semantic audio codec for general sound. IEEE Journal of Selected Topics in Signal Processing (2024).","DOI":"10.1109\/JSTSP.2024.3506286"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Xuechen Liu et\u00a0al. 2023. ASVspoof 2021: Towards Spoofed and Deepfake Speech Detection in the Wild. IEEE\/ACM Transactions on Audio Speech and Language Processing (2023) 2507\u20132522.","DOI":"10.1109\/TASLP.2023.3285283"},{"key":"e_1_3_3_2_23_2","unstructured":"Metavoice 2025. MetaVoice-1B-v0.1. https:\/\/huggingface.co\/metavoiceio\/metavoice-1B-v0.1. Accessed: 2025-04-30."},{"key":"e_1_3_3_2_24_2","unstructured":"Jack Moffitt. 2001. Ogg Vorbis\u2014open free audio\u2014set your media free. Linux journal 2001 81es (2001) 9\u2013es."},{"key":"e_1_3_3_2_25_2","unstructured":"OpenAI 2025. Text-to-Speech API. https:\/\/docs-dev.ttsopenai.com."},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"crossref","unstructured":"Davide Salvi et\u00a0al. 2023. Synthetic speech attribution: Highlights from the ieee signal processing cup 2022 student competition [sp competitions]. IEEE Signal Processing Magazine 40 6 (2023) 92\u201398.","DOI":"10.1109\/MSP.2023.3268823"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"crossref","unstructured":"Davide Salvi et\u00a0al. 2023. TIMIT-TTS: A Text-to-Speech Dataset for Multimodal Synthetic Media Detection. IEEE Access 11 (2023) 50851\u201350866. https:\/\/ieeexplore.ieee.org\/abstract\/document\/10124769","DOI":"10.1109\/ACCESS.2023.3276480"},{"key":"e_1_3_3_2_28_2","unstructured":"Maarten\u00a0Van Segbroeck et\u00a0al. 2019. DiPCo\u2013Dinner Party Corpus. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1909.13447 (2019)."},{"key":"e_1_3_3_2_29_2","volume-title":"NeurIPS 2024 Workshop AI-Driven Speech, Music, and Sound","author":"Siuzdak Hubert","year":"2024","unstructured":"Hubert Siuzdak et\u00a0al. 2024. SNAC: Multi-Scale Neural Audio Codec. In NeurIPS 2024 Workshop AI-Driven Speech, Music, and Sound."},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"crossref","unstructured":"Massimiliano Todisco et\u00a0al. 2019. ASVspoof 2019: Future horizons in spoofed and fake audio detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1904.05441 (2019).","DOI":"10.21437\/Interspeech.2019-2249"},{"key":"e_1_3_3_2_31_2","unstructured":"Jean-Marc Valin et\u00a0al. 2012. RFC 6716: Definition of the Opus audio codec."},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"crossref","unstructured":"Junichi Yamagishi et\u00a0al. 2021. ASVspoof 2021: accelerating progress in spoofed and deepfake speech detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2109.00537 (2021).","DOI":"10.21437\/ASVSPOOF.2021-8"},{"key":"e_1_3_3_2_33_2","unstructured":"Zonos 2025. https:\/\/huggingface.co\/spaces\/Steveeeeeeen\/Zonos Accessed: 2025-04-30."}],"event":{"name":"IH&MMSEC '25: ACM Workshop on Information Hiding and Multimedia Security","location":"San Jose USA","acronym":"IH&MMSEC '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the ACM Workshop on Information Hiding and Multimedia Security"],"original-title":[],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T11:15:28Z","timestamp":1750158928000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3733102.3736707"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,17]]},"references-count":32,"alternative-id":["10.1145\/3733102.3736707","10.1145\/3733102"],"URL":"https:\/\/doi.org\/10.1145\/3733102.3736707","relation":{},"subject":[],"published":{"date-parts":[[2025,6,17]]},"assertion":[{"value":"2025-06-17","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}