{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T16:13:59Z","timestamp":1776183239014,"version":"3.50.1"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T00:00:00Z","timestamp":1730937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T00:00:00Z","timestamp":1730937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,11,7]]},"DOI":"10.1109\/iscslp63861.2024.10800535","type":"proceedings-article","created":{"date-parts":[[2024,12,23]],"date-time":"2024-12-23T19:11:17Z","timestamp":1734981077000},"page":"466-470","source":"Crossref","is-referenced-by-count":4,"title":["MADD: A Multi-Lingual Multi-Speaker Audio Deepfake Detection Dataset"],"prefix":"10.1109","author":[{"given":"Xiaoke","family":"Qi","sequence":"first","affiliation":[{"name":"School of Information Management for Law, China University of Political Science and Law,Beijing"}]},{"given":"Hao","family":"Gu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences,Beijing"}]},{"given":"Jiangyan","family":"Yi","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences,Beijing"}]},{"given":"Jianhua","family":"Tao","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Automation,Beijing"}]},{"given":"Yong","family":"Ren","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences,Beijing"}]},{"given":"Jiayi","family":"He","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences,Beijing"}]},{"given":"Siding","family":"Zeng","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences,Beijing"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2249"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2023.3285283"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746939"},{"key":"ref4","article-title":"Add 2023: the second audio deepfake detection challenge","author":"Yi","year":"2023","journal-title":"arXiv preprint"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-108"},{"key":"ref6","article-title":"Wavefake: A data set to facilitate audio deepfake detection","author":"Frank","year":"2021","journal-title":"arXiv preprint"},{"key":"ref7","article-title":"Common voice: A massively-multilingual speech corpus","author":"Ardila","year":"2019","journal-title":"arXiv preprint"},{"key":"ref8","article-title":"Gigaspeech 2: An evolving, large-scale and multi-domain asr corpus for low-resource languages with au-tomated crawling, transcription and refinement","author":"Yang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref9","first-page":"8067","article-title":"Glow-tts: A generative flow for text-to-speech via monotonic alignment search","volume":"33","author":"Kim","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1609.03499"},{"key":"ref11","first-page":"17022","article-title":"Hifi-gan: Generative adversarial networks for efficient and high fidelity speech synthesis","volume-title":"Advances in neural information processing systems","volume":"33","author":"Kong","year":"2020"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683143"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref14","article-title":"Fastspeech 2: Fast and high-quality end-to-end text to speech","author":"Ren","year":"2020","journal-title":"arXiv preprint"},{"key":"ref15","first-page":"5530","article-title":"Conditional variational autoencoder with adversarial learning for end-to-end text-to-speech","volume-title":"International Conference on Machine Learning","author":"Kim"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2020-1066"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682897"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2017-63"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2917232"},{"key":"ref20","first-page":"16251","article-title":"Neu-ral analysis and synthesis: Reconstructing speech from self-supervised representations","volume-title":"Advances in Neural Information Pro-cessing Systems","volume":"34","author":"Choi","year":"2021"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095191"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/SPED.2019.8906599"},{"key":"ref23","article-title":"Jsut corpus: free large-scale japanese speech corpus for end-to-end speech synthe-sis","author":"Sonobe","year":"2017","journal-title":"arXiv preprint"},{"key":"ref24","article-title":"Mel-gan: Generative adversarial networks for conditional waveform synthesis","volume-title":"Advances in neural information processing systems","volume":"32","author":"Kumar","year":"2019"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110468"},{"key":"ref26","article-title":"Fakeavceleb: A novel audio-video multimodal deepfake dataset","author":"Khalid","year":"2021","journal-title":"arXiv preprint"},{"key":"ref27","article-title":"The codecfake dataset and counter-measures for the universally detection of deepfake audio","author":"Xie","year":"2024","journal-title":"arXiv preprint"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN60899.2024.10650962"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-930"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3233236"},{"key":"ref31","article-title":"Aishell-2: Transforming mandarin asr research into industrial scale","volume-title":"arXiv preprint","author":"Du","year":"2018"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref33","article-title":"Scaling speech technology to 1,000+ languages","author":"Pratap","year":"2023","journal-title":"arXiv"}],"event":{"name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","location":"Beijing, China","start":{"date-parts":[[2024,11,7]]},"end":{"date-parts":[[2024,11,10]]}},"container-title":["2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10799944\/10799969\/10800535.pdf?arnumber=10800535","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,24]],"date-time":"2024-12-24T06:26:34Z","timestamp":1735021594000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10800535\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,7]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/iscslp63861.2024.10800535","relation":{},"subject":[],"published":{"date-parts":[[2024,11,7]]}}}