{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T01:58:13Z","timestamp":1771466293548,"version":"3.50.1"},"reference-count":49,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000185","name":"Defense Advanced Research Projects Agency","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006602","name":"Air Force Research Laboratory","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006602","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,4,14]]},"DOI":"10.1109\/icassp48485.2024.10446471","type":"proceedings-article","created":{"date-parts":[[2024,3,18]],"date-time":"2024-03-18T18:56:31Z","timestamp":1710788191000},"page":"11171-11175","source":"Crossref","is-referenced-by-count":6,"title":["Mdrt: Multi-Domain Synthetic Speech Localization"],"prefix":"10.1109","author":[{"given":"Amit Kumar","family":"Singh Yadav","sequence":"first","affiliation":[{"name":"Purdue University,School of Electrical and Computer Engineering,West Lafayette,IN,USA"}]},{"given":"Kratika","family":"Bhagtani","sequence":"additional","affiliation":[{"name":"Purdue University,School of Electrical and Computer Engineering,West Lafayette,IN,USA"}]},{"given":"Sriram","family":"Baireddy","sequence":"additional","affiliation":[{"name":"Purdue University,School of Electrical and Computer Engineering,West Lafayette,IN,USA"}]},{"given":"Paolo","family":"Bestagini","sequence":"additional","affiliation":[{"name":"Politecnico di Milano,Informazione e Bioingegneria,Dipartimento di Elettronica,Milan,Italy"}]},{"given":"Stefano","family":"Tubaro","sequence":"additional","affiliation":[{"name":"Politecnico di Milano,Informazione e Bioingegneria,Dipartimento di Elettronica,Milan,Italy"}]},{"given":"Edward J.","family":"Delp","sequence":"additional","affiliation":[{"name":"Purdue University,School of Electrical and Computer Engineering,West Lafayette,IN,USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2019-2249"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/taslp.2023.3285283"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3233236"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-930"},{"key":"ref5","article-title":"Add 2023: The second audio deepfake detection challenge","author":"Yi","year":"2023"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-738"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2023.3250266"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462018"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2723721"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053520"},{"key":"ref12","first-page":"8599","article-title":"Grad-TTS: A Diffusion Probabilistic Model for Text-to-Speech","volume-title":"Proceedings of the International Conference on Machine Learning","volume":"139","author":"Popov"},{"key":"ref13","first-page":"7748","article-title":"Meta-stylespeech : Multi-speaker adaptive text-to-speech generation","volume-title":"Proceedings of International Conference on Machine Learning","volume":"139","author":"Min"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-2326"},{"key":"ref15","volume-title":"WELLSAID: AI Voice Over for Commercials","volume":"2022","year":"2022"},{"key":"ref16","article-title":"Neural Codec Language Models are Zero-Shot Text to Speech Synthesizers","author":"Wang","year":"2023"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-6"},{"key":"ref18","article-title":"Speechgen: Unlocking the generative power of speech language models with prompts","author":"Wu","year":"2023"},{"key":"ref19","volume-title":"Deepfake Video of Zelenskyy Could be \u2018Tip of the Iceberg\u2019 in Info War, Experts Warn","author":"Allyn","year":"2022"},{"key":"ref20","article-title":"Goldman Sachs, Ozy Media and a $40 Million Conference Call Gone Wrong","volume-title":"The New York Times","author":"Smith","year":"2021"},{"key":"ref21","volume-title":"Scammers use AI to mimic voices of loved ones in distress","author":"Evans","year":"2023"},{"key":"ref22","article-title":"AnOverviewofRecentWork in Media Forensics: Methods and Threats","author":"Bhagtani","year":"2022"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2021.3089437"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746213"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10094704"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096741"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095043"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1214"},{"key":"ref29","first-page":"18","article-title":"Convolutional recurrent neural network and multitask learning for manipulation region location","volume-title":"Proceedings of IJCAI Workshop on Deepfake Audio Detection and Analysis","author":"Li"},{"key":"ref30","article-title":"Transsionadd: A multi-frame reinforcement based sequence tagging model for audio deepfake detection","author":"Liu","year":"2023"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1121\/1.1915893"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10094774"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3616540"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2017.01.001"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2011.11.004"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.21437\/ASVSPOOF.2021-2"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00745"},{"key":"ref38","author":"Baevski","year":"2020","journal-title":"Wav2vec 2.0: A framework for self-supervised learning of speech representations"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-227"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10097236"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-221"},{"key":"ref42","first-page":"1","article-title":"Masked SpectrogramModeling using Masked Autoencoders for Learning General-purpose Audio Representation","volume-title":"Proceedings of Machine Learning Research","volume":"166","author":"Niizumi"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.2352\/ei.2023.35.4.mwsf-372"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3577163.3595112"},{"key":"ref45","article-title":"Machine Learning for Speech Forensics and Hypersonic Vehicle Applications","volume-title":"Ph.D. dissertation","author":"Bartusiak","year":"2022"},{"key":"ref46","volume-title":"Theory and Applications of Digital Speech Processing","author":"Rabiner","year":"2010"},{"key":"ref47","volume-title":"ASVspoof 2019: The 3rd Automatic Speaker Verification Spoofing and Countermeasures Challenge database","author":"Yamagishi","year":"2019"},{"key":"ref48","article-title":"Decoupled Weight Decay Regularization","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Loshchilov"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"}],"event":{"name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Seoul, Korea, Republic of","start":{"date-parts":[[2024,4,14]]},"end":{"date-parts":[[2024,4,19]]}},"container-title":["ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10445798\/10445803\/10446471.pdf?arnumber=10446471","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,2]],"date-time":"2024-08-02T04:39:10Z","timestamp":1722573550000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10446471\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,14]]},"references-count":49,"URL":"https:\/\/doi.org\/10.1109\/icassp48485.2024.10446471","relation":{},"subject":[],"published":{"date-parts":[[2024,4,14]]}}}