{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T20:00:05Z","timestamp":1776888005463,"version":"3.51.2"},"reference-count":41,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,6]]},"DOI":"10.1109\/asru65441.2025.11434712","type":"proceedings-article","created":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T19:48:04Z","timestamp":1775159284000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["Multilingual Dataset Integration Strategies for Robust Audio Deepfake Detection: A SAFE Challenge System"],"prefix":"10.1109","author":[{"given":"Hashim","family":"Ali","sequence":"first","affiliation":[{"name":"University of Michigan,Electrical and Computer Engineering,Dearborn,USA"}]},{"given":"Surya","family":"Subramani","sequence":"additional","affiliation":[{"name":"University of Michigan,Electrical and Computer Engineering,Dearborn,USA"}]},{"given":"Nithin Sai","family":"Adupa","sequence":"additional","affiliation":[{"name":"University of Michigan,Electrical and Computer Engineering,Dearborn,USA"}]},{"given":"Lekha","family":"Bollinani","sequence":"additional","affiliation":[{"name":"University of Michigan,Electrical and Computer Engineering,Dearborn,USA"}]},{"given":"Sali","family":"El-Loh","sequence":"additional","affiliation":[{"name":"University of Michigan,Electrical and Computer Engineering,Dearborn,USA"}]},{"given":"Hafiz","family":"Malik","sequence":"additional","affiliation":[{"name":"University of Michigan,Electrical and Computer Engineering,Dearborn,USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TBIOM.2021.3059479"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2020.101114"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1017\/atsip.2019.21"},{"key":"ref4","article-title":"Does Audio Deepfake Detection Generalize?","author":"M\u00fcller","year":"2022","journal-title":"arXiv:2203.16263 [cs, eess]"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"2507","DOI":"10.1109\/TASLP.2023.3285283","article-title":"ASVspoof 2021: Towards Spoofed and Deepfake Speech Detection in the Wild","volume":"31","author":"Liu","year":"2023","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/ASVSPOOF.2021-8"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/ASVspoof.2024-1"},{"issue":"15","key":"ref8","first-page":"3750","article-title":"Asvspoof 2015: Automatic speaker verification spoofing and countermeasures challenge evaluation plan","volume":"10","author":"Wu","year":"2014","journal-title":"Training"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN60899.2024.10650962"},{"key":"ref10","article-title":"SpoofCeleb: Speech Deepfake Detection and SASV In The Wild","author":"Jung","year":"2024","journal-title":"arXiv:2409.17285 [cs]"},{"key":"ref11","article-title":"Beijing ZKJ-NPU Speaker Verification System for VoxCeleb Speaker Recognition Challenge 2021","author":"Zhang","year":"2021","journal-title":"arXiv:2109.03568 [cs, eess]"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3658664.3659656"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/taslpro.2025.3525966"},{"key":"ref14","article-title":"Speechtokenizer: Unified speech tokenizer for speech language models","volume-title":"The Twelfth International Conference on Learning Representations","author":"Zhang"},{"key":"ref15","article-title":"High fidelity neural audio compression","author":"D\u00e9fossez","year":"2022","journal-title":"arXiv preprint arXiv:2210.13438"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447523"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/SLT61566.2024.10832250"},{"key":"ref18","first-page":"8599","article-title":"Gradtts: A diffusion probabilistic model for text-to-speech","volume-title":"International conference on machine learning.","author":"Popov"},{"key":"ref19","article-title":"Naturalspeech 2: Latent diffusion models are natural and zeroshot speech and singing synthesizers","author":"Shen","year":"2023","journal-title":"arXiv preprint arXiv:2304.09116"},{"key":"ref20","article-title":"Styletts 2: Towards human-level text-to-speech through style diffusion and adversarial training with large speech language models","volume":"36","author":"Li","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10448291"},{"key":"ref22","article-title":"P-flow: a fast and data-efficient zeroshot tts through speech prompting","volume":"36","author":"Kim","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2025-2418"},{"key":"ref24","article-title":"The Biden Deepfake Robocall Is Only the Beginning","author":"Elliott","year":"2024","journal-title":"Wired"},{"key":"ref25","volume-title":"Sadiq Khan says fake AI audio of him nearly led to serious disorder","author":"Spring","year":"2024"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-2016"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.313"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/SLT61566.2024.10832320"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746213"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-1302"},{"key":"ref31","first-page":"72","article-title":"Exploring wavlm backends for speech spoofing and deepfake detection","volume-title":"Proc. ASVspoof 2024","author":"Stourbe"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.21437\/ASVspoof.2024-25"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Odyssey.2022-16"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10961"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/jstsp.2022.3188113"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i10.21315"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-349"},{"key":"ref38","volume-title":"Mockingjay: Unsupervised speech representation learning with","author":"Encoders","year":"2020"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2022-143"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-1775"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2024.3389631"}],"event":{"name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,12,6]]},"end":{"date-parts":[[2025,12,10]]}},"container-title":["2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11434577\/11433836\/11434712.pdf?arnumber=11434712","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T04:58:39Z","timestamp":1775192319000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11434712\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/asru65441.2025.11434712","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]}}}