{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T15:55:16Z","timestamp":1776182116725,"version":"3.50.1"},"reference-count":339,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,6,4]],"date-time":"2022-06-04T00:00:00Z","timestamp":1654300800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,6,4]],"date-time":"2022-06-04T00:00:00Z","timestamp":1654300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1815724"],"award-info":[{"award-number":["1815724"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Punjab Higher Education Commission, Pakistan","award":["PHEC\/ARA\/PIRCA\/20527\/21"],"award-info":[{"award-number":["PHEC\/ARA\/PIRCA\/20527\/21"]}]},{"name":"Michigan Translational Research and Commercialization","award":["292883"],"award-info":[{"award-number":["292883"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,2]]},"DOI":"10.1007\/s10489-022-03766-z","type":"journal-article","created":{"date-parts":[[2022,6,4]],"date-time":"2022-06-04T02:02:21Z","timestamp":1654308141000},"page":"3974-4026","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":394,"title":["Deepfakes generation and detection: state-of-the-art, open challenges, countermeasures, and way forward"],"prefix":"10.1007","volume":"53","author":[{"given":"Momina","family":"Masood","sequence":"first","affiliation":[]},{"given":"Mariam","family":"Nawaz","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7927-3436","authenticated-orcid":false,"given":"Khalid Mahmood","family":"Malik","sequence":"additional","affiliation":[]},{"given":"Ali","family":"Javed","sequence":"additional","affiliation":[]},{"given":"Aun","family":"Irtaza","sequence":"additional","affiliation":[]},{"given":"Hafiz","family":"Malik","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,6,4]]},"reference":[{"key":"3766_CR1","first-page":"2672","volume":"1","author":"I Goodfellow","year":"2014","unstructured":"Goodfellow I et al (2014) Generative adversarial nets. Adv Neural Inf Proces Syst 1:2672\u20132680","journal-title":"Adv Neural Inf Proces Syst"},{"key":"3766_CR2","doi-asserted-by":"publisher","first-page":"553","DOI":"10.1007\/s43681-021-00072-1","volume":"1","author":"H Etienne","year":"2021","unstructured":"Etienne H (2021) The future of online trust (and why Deepfake is advancing it). AI Ethics 1:553\u2013562. https:\/\/doi.org\/10.1007\/s43681-021-00072-1","journal-title":"AI Ethics"},{"key":"3766_CR3","unstructured":"ZAO. https:\/\/apps.apple.com\/cn\/app\/zao\/id1465199127. Accessed September 09, 2020"},{"key":"3766_CR4","unstructured":"Reface App. https:\/\/reface.app\/. Accessed September 11, 2020"},{"key":"3766_CR5","unstructured":"FaceApp. https:\/\/www.faceapp.com\/. Accessed September 17, 2020"},{"key":"3766_CR6","unstructured":"Audacity. https:\/\/www.audacityteam.org\/. Accessed September 09, 2020"},{"key":"3766_CR7","unstructured":"Sound Forge. https:\/\/www.magix.com\/gb\/music\/sound-forge\/. Accessed January 11, 2021"},{"key":"3766_CR8","doi-asserted-by":"crossref","unstructured":"Shu K, Wang S, Lee D, Liu H (2020) Mining disinformation and fake news: concepts, methods, and recent advancements. In: Disinformation, misinformation, and fake news in social media. Springer, pp 1\u201319","DOI":"10.1007\/978-3-030-42699-6_1"},{"key":"3766_CR9","doi-asserted-by":"crossref","unstructured":"Chan C, Ginosar S, Zhou T, Efros AA (2019) Everybody dance now. In: Proceedings of the IEEE international conference on computer vision, pp 5933\u20135942","DOI":"10.1109\/ICCV.2019.00603"},{"key":"3766_CR10","doi-asserted-by":"crossref","unstructured":"Malik KM, Malik H, Baumann R (2019) Towards vulnerability analysis of voice-driven interfaces and countermeasures for replay attacks. In 2019 IEEE conference on multimedia information processing and retrieval (MIPR). IEEE, pp 523\u2013528","DOI":"10.1109\/MIPR.2019.00106"},{"key":"3766_CR11","doi-asserted-by":"publisher","first-page":"982","DOI":"10.1109\/JSTSP.2020.2999828","volume":"14","author":"KM Malik","year":"2020","unstructured":"Malik KM, Javed A, Malik H, Irtaza A (2020) A light-weight replay detection framework for voice controlled iot devices. IEEE J Sel Top Sign Process 14:982\u2013996","journal-title":"IEEE J Sel Top Sign Process"},{"key":"3766_CR12","doi-asserted-by":"publisher","first-page":"108283","DOI":"10.1016\/j.apacoust.2021.108283","volume":"183","author":"A Javed","year":"2021","unstructured":"Javed A, Malik KM, Irtaza A, Malik H (2021) Towards protecting cyber-physical and IoT systems from single-and multi-order voice spoofing attacks. Appl Acoust 183:108283","journal-title":"Appl Acoust"},{"key":"3766_CR13","doi-asserted-by":"publisher","first-page":"3524","DOI":"10.1109\/TIFS.2021.3082303","volume":"16","author":"M Aljasem","year":"2021","unstructured":"Aljasem M, Irtaza A, Malik H, Saba N, Javed A, Malik KM, Meharmohammadi M (2021) Secure automatic speaker verification (SASV) system through sm-ALTP features and asymmetric bagging. IEEE Trans Inf Forensics Secur 16:3524\u20133537","journal-title":"IEEE Trans Inf Forensics Secur"},{"key":"3766_CR14","doi-asserted-by":"crossref","unstructured":"Sharma M, Kaur M (2022) A review of Deepfake technology: an emerging AI threat. Soft Comput Secur Appl:605\u2013619","DOI":"10.1007\/978-981-16-5301-8_44"},{"key":"3766_CR15","doi-asserted-by":"publisher","first-page":"6259","DOI":"10.1007\/s11042-021-11733-y","volume":"81","author":"T Zhang","year":"2022","unstructured":"Zhang T (2022) Deepfake generation and detection, a survey. Multimed Tools Appl 81:6259\u20136276. https:\/\/doi.org\/10.1007\/s11042-021-11733-y","journal-title":"Multimed Tools Appl"},{"key":"3766_CR16","doi-asserted-by":"publisher","first-page":"18757","DOI":"10.1109\/ACCESS.2022.3151186","volume":"10","author":"A Malik","year":"2022","unstructured":"Malik A, Kuribayashi M, Abdullahi SM, Khan AN (2022) DeepFake detection for human face images and videos: a survey. IEEE Access 10:18757\u201318775","journal-title":"IEEE Access"},{"key":"3766_CR17","doi-asserted-by":"crossref","unstructured":"Rana MS, Nobi MN, Murali B, Sung AH (2022) Deepfake detection: a systematic literature review. IEEE Access","DOI":"10.1109\/ACCESS.2022.3154404"},{"key":"3766_CR18","doi-asserted-by":"publisher","first-page":"910","DOI":"10.1109\/JSTSP.2020.3002101","volume":"14","author":"L Verdoliva","year":"2020","unstructured":"Verdoliva L (2020) Media forensics and deepfakes: an overview. IEEE J Sel Top Sign Process 14:910\u2013932","journal-title":"IEEE J Sel Top Sign Process"},{"key":"3766_CR19","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1016\/j.inffus.2020.06.014","volume":"64","author":"R Tolosana","year":"2020","unstructured":"Tolosana R, Vera-Rodriguez R, Fierrez J, Morales A, Ortega-Garcia J (2020) Deepfakes and beyond: a survey of face manipulation and fake detection. Inf Fusion 64:131\u2013148","journal-title":"Inf Fusion"},{"key":"3766_CR20","unstructured":"Nguyen TT, Nguyen CM, Nguyen DT, Nguyen DT, Nahavandi S (2019) Deep learning for deepfakes creation and detection. arXiv preprint arXiv:190911573"},{"key":"3766_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3425780","volume":"54","author":"Y Mirsky","year":"2021","unstructured":"Mirsky Y, Lee W (2021) The creation and detection of deepfakes: a survey. ACM Comput Surv 54:1\u201341","journal-title":"ACM Comput Surv"},{"key":"3766_CR22","doi-asserted-by":"publisher","first-page":"817","DOI":"10.1016\/j.procs.2017.11.106","volume":"121","author":"L Oliveira","year":"2017","unstructured":"Oliveira L (2017) The current state of fake news. Procedia Comput Sci 121:817\u2013825","journal-title":"Procedia Comput Sci"},{"key":"3766_CR23","first-page":"147","volume":"98","author":"R Chesney","year":"2019","unstructured":"Chesney R, Citron D (2019) Deepfakes and the new disinformation war: the coming age of post-truth geopolitics. Foreign Aff 98:147","journal-title":"Foreign Aff"},{"key":"3766_CR24","doi-asserted-by":"publisher","first-page":"138","DOI":"10.1109\/TTS.2020.3001312","volume":"1","author":"S Karnouskos","year":"2020","unstructured":"Karnouskos S (2020) Artificial intelligence in digital media: the era of deepfakes. IEEE Trans Technol Soc 1:138\u2013147","journal-title":"IEEE Trans Technol Soc"},{"key":"3766_CR25","doi-asserted-by":"publisher","first-page":"363","DOI":"10.1007\/s41060-021-00299-5","volume":"13","author":"H Stiff","year":"2021","unstructured":"Stiff H, Johansson F (2021) Detecting computer-generated disinformation. Int J Data Sci Anal 13:363\u2013383. https:\/\/doi.org\/10.1007\/s41060-021-00299-5","journal-title":"Int J Data Sci Anal"},{"key":"3766_CR26","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1177\/1940161220944364","volume":"26","author":"T Dobber","year":"2021","unstructured":"Dobber T, Metoui N, Trilling D, Helberger N, de Vreese C (2021) Do (microtargeted) deepfakes have real effects on political attitudes? Int J Press Polit 26:69\u201391","journal-title":"Int J Press Polit"},{"key":"3766_CR27","doi-asserted-by":"publisher","first-page":"3947","DOI":"10.1007\/s10489-019-01488-3","volume":"49","author":"G Lingam","year":"2019","unstructured":"Lingam G, Rout RR, Somayajulu DV (2019) Adaptive deep Q-learning model for detecting social bots and influential users in online social networks. Appl Intell 49:3947\u20133964","journal-title":"Appl Intell"},{"key":"3766_CR28","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41467-018-06930-7","volume":"9","author":"C Shao","year":"2018","unstructured":"Shao C, Ciampaglia GL, Varol O, Yang K-C, Flammini A, Menczer F (2018) The spread of low-credibility content by social bots. Nat Commun 9:1\u20139","journal-title":"Nat Commun"},{"key":"3766_CR29","first-page":"7","volume-title":"Media manipulation and disinformation online","author":"A Marwick","year":"2017","unstructured":"Marwick A, Lewis R (2017) Media manipulation and disinformation online. Data & Society Research Institute, New York, pp 7\u201319"},{"key":"3766_CR30","doi-asserted-by":"publisher","first-page":"e175","DOI":"10.1016\/S2589-7500(20)30315-0","volume":"3","author":"S-F Tsao","year":"2021","unstructured":"Tsao S-F, Chen H, Tisseverasinghe T, Yang Y, Li L, Butt ZA (2021) What social media told us in the time of COVID-19: a scoping review. Lancet Digit Health 3:e175\u2013e194","journal-title":"Lancet Digit Health"},{"key":"3766_CR31","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1145\/3377330.3377334","volume":"48","author":"F Pierri","year":"2019","unstructured":"Pierri F, Ceri S (2019) False news on social media: a data-driven survey. ACM SIGMOD Rec 48:18\u201327","journal-title":"ACM SIGMOD Rec"},{"key":"3766_CR32","first-page":"1753","volume":"107","author":"B Chesney","year":"2019","unstructured":"Chesney B, Citron D (2019) Deep fakes: a looming challenge for privacy, democracy, and national security. Calif Law Rev 107:1753","journal-title":"Calif Law Rev"},{"key":"3766_CR33","doi-asserted-by":"crossref","unstructured":"G\u00fcera D, Delp EJ (2018) Deepfake video detection using recurrent neural networks. In 2018 15th IEEE international conference on advanced video and signal based surveillance (AVSS). IEEE, pp 1\u20136","DOI":"10.1109\/AVSS.2018.8639163"},{"key":"3766_CR34","first-page":"1","volume":"1","author":"S Gupta","year":"2021","unstructured":"Gupta S, Mohan N, Kaushal P (2021) Passive image forensics using universal techniques: a review. Artif Intell Rev 1:1\u201351","journal-title":"Artif Intell Rev"},{"key":"3766_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s13735-020-00196-w","volume":"10","author":"MR Pavan Kumar","year":"2021","unstructured":"Pavan Kumar MR, Jayagopal P (2021) Generative adversarial networks: a survey on applications and challenges. Int J Multimed Inf Retr 10:1\u201324. https:\/\/doi.org\/10.1007\/s13735-020-00196-w","journal-title":"Int J Multimed Inf Retr"},{"key":"3766_CR36","doi-asserted-by":"crossref","unstructured":"Choi Y, Choi M, Kim M, Ha J-W, Kim S, Choo J (2018) Stargan: unified generative adversarial networks for multi-domain image-to-image translation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 8789\u20138797","DOI":"10.1109\/CVPR.2018.00916"},{"key":"3766_CR37","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1145\/3072959.3073640","volume":"36","author":"S Suwajanakorn","year":"2017","unstructured":"Suwajanakorn S, Seitz SM, Kemelmacher-Shlizerman I (2017) Synthesizing Obama: learning lip sync from audio. ACM Trans Graph 36:95\u2013108. https:\/\/doi.org\/10.1145\/3072959.3073640","journal-title":"ACM Trans Graph"},{"key":"3766_CR38","doi-asserted-by":"crossref","unstructured":"Thies J, Zollhofer M, Stamminger M, Theobalt C, Nie\u00dfner M (2016) Face2face: real-time face capture and reenactment of rgb videos. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2387\u20132395","DOI":"10.1109\/CVPR.2016.262"},{"key":"3766_CR39","doi-asserted-by":"crossref","unstructured":"Wiles O, Sophia Koepke A, Zisserman A (2018) X2face: a network for controlling face generation using images, audio, and pose codes. In: Proceedings of the European conference on computer vision (ECCV), pp 670\u2013686","DOI":"10.1007\/978-3-030-01261-8_41"},{"key":"3766_CR40","doi-asserted-by":"crossref","unstructured":"Bregler C, Covell M, Slaney M (1997) Video rewrite: driving visual speech with audio. In: Proceedings of the 24th annual conference on Computer graphics and interactive techniques, pp 353\u2013360","DOI":"10.1145\/258734.258880"},{"key":"3766_CR41","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1145\/3447255","volume":"64","author":"DG Johnson","year":"2021","unstructured":"Johnson DG, Diakopoulos N (2021) What to do about deepfakes. Commun ACM 64:33\u201335","journal-title":"Commun ACM"},{"key":"3766_CR42","unstructured":"FakeApp 2.2.0. https:\/\/www.malavida.com\/en\/soft\/fakeapp\/. Accessed September 18, 2020"},{"key":"3766_CR43","unstructured":"Faceswap: Deepfakes software for all. https:\/\/github.com\/deepfakes\/faceswap. Accessed September 08, 2020"},{"key":"3766_CR44","unstructured":"DeepFaceLab. https:\/\/github.com\/iperov\/DeepFaceLab. Accessed August 18, 2020"},{"key":"3766_CR45","unstructured":"Siarohin A, Lathuili\u00e8re S, Tulyakov S, Ricci E, Sebe N (2019) First order motion model for image animation. In: Advances in neural information processing systems, pp 7137\u20137147"},{"key":"3766_CR46","doi-asserted-by":"crossref","unstructured":"Zhou H, Sun Y, Wu W, Loy CC, Wang X, Liu Z (2021) Pose-controllable talking face generation by implicitly modularized audio-visual representation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 4176\u20134186","DOI":"10.1109\/CVPR46437.2021.00416"},{"key":"3766_CR47","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1145\/3197517.3201283","volume":"37","author":"H Kim","year":"2018","unstructured":"Kim H, Garrido P, Tewari A, Xu W, Thies J, Niessner M, P\u00e9rez P, Richardt C, Zollh\u00f6fer M, Theobalt C (2018) Deep video portraits. ACM Trans Graph 37:163\u2013177. https:\/\/doi.org\/10.1145\/3197517.3201283","journal-title":"ACM Trans Graph"},{"key":"3766_CR48","doi-asserted-by":"crossref","unstructured":"Ha S, Kersner M, Kim B, Seo S, Kim D (2020) Marionette: few-shot face reenactment preserving identity of unseen targets. In: Proceedings of the AAAI conference on artificial intelligence, pp 10893\u201310900","DOI":"10.1609\/aaai.v34i07.6721"},{"key":"3766_CR49","doi-asserted-by":"crossref","unstructured":"Wang Y, Bilinski P, Bremond F, Dantcheva A (2020) ImaGINator: conditional Spatio-temporal GAN for video generation. In: The IEEE winter conference on applications of computer vision, pp 1160\u20131169","DOI":"10.1109\/WACV45572.2020.9093492"},{"key":"3766_CR50","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3478513.3480484","volume":"40","author":"Y Lu","year":"2021","unstructured":"Lu Y, Chai J, Cao X (2021) Live speech portraits: real-time photorealistic talking-head animation. ACM Trans Graph 40:1\u201317","journal-title":"ACM Trans Graph"},{"key":"3766_CR51","doi-asserted-by":"crossref","unstructured":"Lahiri A, Kwatra V, Frueh C, Lewis J, Bregler C (2021) LipSync3D: data-efficient learning of personalized 3D talking faces from video using pose and lighting normalization. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2755\u20132764","DOI":"10.1109\/CVPR46437.2021.00278"},{"key":"3766_CR52","doi-asserted-by":"publisher","first-page":"39","DOI":"10.22215\/timreview\/1282","volume":"9","author":"M Westerlund","year":"2019","unstructured":"Westerlund M (2019) The emergence of deepfake technology: a review. Technol Innov Manag Rev 9:39\u201352","journal-title":"Technol Innov Manag Rev"},{"key":"3766_CR53","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1145\/3371409","volume":"63","author":"S Greengard","year":"2019","unstructured":"Greengard S (2019) Will deepfakes do deep damage? Commun ACM 63:17\u201319","journal-title":"Commun ACM"},{"key":"3766_CR54","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1089\/cyber.2020.0176","volume":"24","author":"Y Lee","year":"2021","unstructured":"Lee Y, Huang K-T, Blom R, Schriner R, Ciccarelli CA (2021) To believe or not to believe: framing analysis of content and audience response of top 10 deepfake videos on youtube. Cyberpsychol Behav Soc Netw 24:153\u2013158","journal-title":"Cyberpsychol Behav Soc Netw"},{"key":"3766_CR55","unstructured":"Oord Avd et al. (2016) Wavenet: a generative model for raw audio. In: 9th ISCA speech synthesis workshop, p 2"},{"key":"3766_CR56","doi-asserted-by":"crossref","unstructured":"Wang Y et al. (2017) Tacotron: towards end-to-end speech synthesis. arXiv preprint arXiv:170310135","DOI":"10.21437\/Interspeech.2017-1452"},{"key":"3766_CR57","unstructured":"Arik SO et al. (2017) Deep voice: real-time neural text-to-speech. In: International conference on machine learning PMLR, pp 195\u2013204"},{"key":"3766_CR58","doi-asserted-by":"crossref","unstructured":"Wang R, Juefei-Xu F, Huang Y, Guo Q, Xie X, Ma L, Liu Y (2020) Deepsonar: towards effective and robust detection of ai-synthesized fake voices. In: Proceedings of the 28th ACM international conference on multimedia, pp 1207\u20131216","DOI":"10.1145\/3394171.3413716"},{"key":"3766_CR59","unstructured":"Arik S, Chen J, Peng K, Ping W, Zhou Y (2018) Neural voice cloning with a few samples. In: Advances in neural information processing systems, pp 10019\u201310029"},{"key":"3766_CR60","doi-asserted-by":"crossref","unstructured":"Wang T-C, Liu M-Y, Zhu J-Y, Tao A, Kautz J, Catanzaro B (2018) High-resolution image synthesis and semantic manipulation with conditional gans. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 8798\u20138807","DOI":"10.1109\/CVPR.2018.00917"},{"key":"3766_CR61","doi-asserted-by":"crossref","unstructured":"Nirkin Y, Masi I, Tuan AT, Hassner T, Medioni G (2018) On face segmentation, face swapping, and face perception. In 2018 13th IEEE international conference on automatic face & gesture recognition (FG 2018). IEEE, pp 98\u2013105","DOI":"10.1109\/FG.2018.00024"},{"key":"3766_CR62","doi-asserted-by":"crossref","unstructured":"Bitouk D, Kumar N, Dhillon S, Belhumeur P, Nayar SK (2008) Face swapping: automatically replacing faces in photographs. In: ACM transactions on graphics (TOG). ACM, pp 39","DOI":"10.1145\/1399504.1360638"},{"key":"3766_CR63","doi-asserted-by":"crossref","unstructured":"Lin Y, Lin Q, Tang F, Wang S (2012) Face replacement with large-pose differences. In: Proceedings of the 20th ACM international conference on multimedia. ACM, pp 1249\u20131250","DOI":"10.1145\/2393347.2396426"},{"key":"3766_CR64","doi-asserted-by":"crossref","unstructured":"Smith BM, Zhang L (2012) Joint face alignment with non-parametric shape models. In: European conference on computer vision. Springer, pp 43\u201356","DOI":"10.1007\/978-3-642-33712-3_4"},{"key":"3766_CR65","unstructured":"Faceswap-GAN https:\/\/github.com\/shaoanlu\/faceswap-GAN. Accessed September 18, 2020"},{"key":"3766_CR66","doi-asserted-by":"crossref","unstructured":"Korshunova I, Shi W, Dambre J, Theis L (2017) Fast face-swap using convolutional neural networks. In: Proceedings of the IEEE international conference on computer vision, pp 3677\u20133685","DOI":"10.1109\/ICCV.2017.397"},{"key":"3766_CR67","doi-asserted-by":"crossref","unstructured":"Nirkin Y, Keller Y, Hassner T (2019) FSGAN: subject agnostic face swapping and reenactment. In: Proceedings of the IEEE international conference on computer vision, pp 7184\u20137193","DOI":"10.1109\/ICCV.2019.00728"},{"key":"3766_CR68","doi-asserted-by":"crossref","unstructured":"Natsume R, Yatagawa T, Morishima S (2018) RSGAN: face swapping and editing using face and hair representation in latent spaces. arXiv preprint arXiv:180403447","DOI":"10.1145\/3230744.3230818"},{"key":"3766_CR69","doi-asserted-by":"crossref","unstructured":"Natsume R, Yatagawa T, Morishima S (2018) Fsnet: an identity-aware generative model for image-based face swapping. In: Asian conference on computer vision. Springer, pp 117\u2013132","DOI":"10.1007\/978-3-030-20876-9_8"},{"key":"3766_CR70","doi-asserted-by":"crossref","unstructured":"Li L, Bao J, Yang H, Chen D, Wen F (2020) Advancing high fidelity identity swapping for forgery detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5074\u20135083","DOI":"10.1109\/CVPR42600.2020.00512"},{"key":"3766_CR71","unstructured":"Petrov I et al. (2020) DeepFaceLab: a simple, flexible and extensible face swapping framework. arXiv preprint arXiv:200505535"},{"key":"3766_CR72","doi-asserted-by":"crossref","unstructured":"Chen D, Chen Q, Wu J, Yu X, Jia T (2019) Face swapping: realistic image synthesis based on facial landmarks alignment. Math Probl Eng 2019","DOI":"10.1155\/2019\/8902701"},{"key":"3766_CR73","doi-asserted-by":"crossref","unstructured":"Zhang Y, Zheng L, Thing VL (2017) Automated face swapping and its detection. In: 2017 IEEE 2nd international conference on signal and image processing (ICSIP). IEEE, pp 15\u201319","DOI":"10.1109\/SIPROCESS.2017.8124497"},{"key":"3766_CR74","doi-asserted-by":"crossref","unstructured":"Yang X, Li Y, Lyu S (2019) Exposing deep fakes using inconsistent head poses. In: 2019 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 8261\u20138265","DOI":"10.1109\/ICASSP.2019.8683164"},{"key":"3766_CR75","unstructured":"G\u00fcera D, Baireddy S, Bestagini P, Tubaro S, Delp EJ (2019) We need no pixels: video manipulation detection using stream descriptors. arXiv preprint arXiv:190608743"},{"key":"3766_CR76","volume-title":"Video demystified: a handbook for the digital engineer","author":"K Jack","year":"2011","unstructured":"Jack K (2011) Video demystified: a handbook for the digital engineer. Elsevier"},{"key":"3766_CR77","doi-asserted-by":"crossref","unstructured":"Ciftci UA, Demir I (2020) FakeCatcher: detection of synthetic portrait videos using biological signals. IEEE Trans Pattern Anal Mach Intell 1","DOI":"10.1109\/TPAMI.2020.3009287"},{"key":"3766_CR78","doi-asserted-by":"publisher","first-page":"83144","DOI":"10.1109\/ACCESS.2020.2988660","volume":"8","author":"T Jung","year":"2020","unstructured":"Jung T, Kim S, Kim K (2020) DeepVision: Deepfakes detection using human eye blinking pattern. IEEE Access 8:83144\u201383154","journal-title":"IEEE Access"},{"key":"3766_CR79","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1109\/TPAMI.2017.2781233","volume":"41","author":"R Ranjan","year":"2017","unstructured":"Ranjan R, Patel VM, Chellappa R (2017) Hyperface: a deep multi-task learning framework for face detection, landmark localization, pose estimation, and gender recognition. IEEE Trans Pattern Anal Mach Intell 41:121\u2013135","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"3766_CR80","unstructured":"Soukupova T, Cech J (2016) Eye blink detection using facial landmarks. In: 21st Computer Vision Winter Workshop"},{"key":"3766_CR81","doi-asserted-by":"crossref","unstructured":"Matern F, Riess C, Stamminger M (2019) Exploiting visual artifacts to expose deepfakes and face manipulations. In: 2019 IEEE winter applications of computer vision workshops (WACVW). IEEE, pp 83\u201392","DOI":"10.1109\/WACVW.2019.00020"},{"key":"3766_CR82","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1023\/A:1011174803800","volume":"43","author":"J Malik","year":"2001","unstructured":"Malik J, Belongie S, Leung T, Shi J (2001) Contour and texture analysis for image segmentation. Int J Comput Vis 43:7\u201327","journal-title":"Int J Comput Vis"},{"key":"3766_CR83","unstructured":"Agarwal S, Farid H, Gu Y, He M, Nagano K, Li H (2019) Protecting world leaders against deep fakes. In: Proceedings of the IEEE conference on computer vision and pattern recognition workshops, pp 38-45"},{"key":"3766_CR84","unstructured":"Li Y, Lyu S (2019) Exposing deepfake videos by detecting face warping artifacts. In: IEEE conference on computer vision and pattern recognition workshops (CVPRW), pp 46\u201352"},{"key":"3766_CR85","doi-asserted-by":"crossref","unstructured":"Li Y, Chang M-C, Lyu S (2018) In ictu oculi: exposing ai generated fake face videos by detecting eye blinking. In: 2018 IEEE international workshop on information forensics and security (WIFS). IEEE, pp 1\u20137","DOI":"10.1109\/WIFS.2018.8630787"},{"key":"3766_CR86","doi-asserted-by":"crossref","unstructured":"Montserrat DM et al. (2020) Deepfakes detection with automatic face weighting. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops, pp 668\u2013669","DOI":"10.1109\/CVPRW50498.2020.00342"},{"key":"3766_CR87","unstructured":"de Lima O, Franklin S, Basu S, Karwoski B, George A (2020) Deepfake detection using spatiotemporal convolutional networks. arXiv preprint arXiv:14749"},{"key":"3766_CR88","doi-asserted-by":"crossref","unstructured":"Agarwal S, El-Gaaly T, Farid H, Lim S-N (2020) Detecting deep-fake videos from appearance and behavior. In 2020 IEEE international workshop on information forensics and security (WIFS). IEEE, pp 1\u20136","DOI":"10.1109\/WIFS49906.2020.9360904"},{"key":"3766_CR89","doi-asserted-by":"crossref","unstructured":"Fernandes S, Raj S, Ortiz E, Vintila I, Salter M, Urosevic G, Jha S (2019) Predicting heart rate variations of Deepfake videos using neural ODE. In: Proceedings of the IEEE international conference on computer vision workshops","DOI":"10.1109\/ICCVW.2019.00213"},{"key":"3766_CR90","doi-asserted-by":"crossref","unstructured":"Yang J, Xiao S, Li A, Lu W, Gao X, Li Y (2021) MSTA-net: forgery detection by generating manipulation trace based on multi-scale self-texture attention. IEEE Trans Circuits Syst Video Technol","DOI":"10.1109\/TCSVT.2021.3133859"},{"key":"3766_CR91","first-page":"80","volume":"3","author":"E Sabir","year":"2019","unstructured":"Sabir E, Cheng J, Jaiswal A, AbdAlmageed W, Masi I, Natarajan P (2019) Recurrent convolutional strategies for face manipulation detection in videos. Interfaces (GUI) 3:80\u201387","journal-title":"Interfaces (GUI)"},{"key":"3766_CR92","doi-asserted-by":"crossref","unstructured":"Afchar D, Nozick V, Yamagishi J, Echizen I (2018) Mesonet: a compact facial video forgery detection network. In: 2018 IEEE international workshop on information forensics and security (WIFS). IEEE, pp 1\u20137","DOI":"10.1109\/WIFS.2018.8630761"},{"key":"3766_CR93","doi-asserted-by":"crossref","unstructured":"Nguyen HH, Fang F, Yamagishi J, Echizen I (2019) Multi-task learning for detecting and segmenting manipulated facial images and videos. In: 2019 IEEE 10th international conference on biometrics theory, applications and systems (BTAS), pp 1\u20138","DOI":"10.1109\/BTAS46853.2019.9185974"},{"key":"3766_CR94","unstructured":"Cozzolino D, Thies J, R\u00f6ssler A, Riess C, Nie\u00dfner M, Verdoliva L (2018) Forensictransfer: weakly-supervised domain adaptation for forgery detection. arXiv preprint arXiv:181202510"},{"key":"3766_CR95","doi-asserted-by":"crossref","unstructured":"Rossler A, Cozzolino D, Verdoliva L, Riess C, Thies J, Nie\u00dfner M (2019) Faceforensics++: learning to detect manipulated facial images. In: Proceedings of the IEEE international conference on computer vision, pp 1\u201311","DOI":"10.1109\/ICCV.2019.00009"},{"key":"3766_CR96","first-page":"1755","volume":"10","author":"DE King","year":"2009","unstructured":"King DE (2009) Dlib-ml: a machine learning toolkit. J Mach Learn Res 10:1755\u20131758","journal-title":"J Mach Learn Res"},{"key":"3766_CR97","doi-asserted-by":"publisher","first-page":"1499","DOI":"10.1109\/LSP.2016.2603342","volume":"23","author":"K Zhang","year":"2016","unstructured":"Zhang K, Zhang Z, Li Z, Qiao Y (2016) Joint face detection and alignment using multitask cascaded convolutional networks. IEEE Signal Process Lett 23:1499\u20131503","journal-title":"IEEE Signal Process Lett"},{"key":"3766_CR98","doi-asserted-by":"crossref","unstructured":"Wiles O, Koepke A, Zisserman A (2018) Self-supervised learning of a facial attribute embedding from video. Paper presented at the 29th British machine vision conference (BMVC)","DOI":"10.1109\/ICCVW.2019.00364"},{"key":"3766_CR99","unstructured":"Rezende DJ, Mohamed S, Wierstra D (2014) Stochastic backpropagation and approximate inference in deep generative models. Paper presented at the international conference on machine learning, pp 1278\u20131286"},{"key":"3766_CR100","unstructured":"Rahman H, Ahmed MU, Begum S, Funk P (2016) Real time heart rate monitoring from facial RGB color video using webcam. In: The 29th annual workshop of the Swedish artificial intelligence society (SAIS). Link\u00f6ping University Electronic Press"},{"key":"3766_CR101","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2185520.2185561","volume":"31","author":"H-Y Wu","year":"2012","unstructured":"Wu H-Y, Rubinstein M, Shih E, Guttag J, Durand F, Freeman W (2012) Eulerian video magnification for revealing subtle changes in the world. ACM Trans Graph 31:1\u20138","journal-title":"ACM Trans Graph"},{"key":"3766_CR102","unstructured":"Chen RT, Rubanova Y, Bettencourt J, Duvenaud DK (2018) Neural ordinary differential equations. In: Advances in neural information processing systems, pp 6571\u20136583"},{"key":"3766_CR103","doi-asserted-by":"publisher","first-page":"4234","DOI":"10.1109\/TIFS.2021.3102487","volume":"16","author":"J Yang","year":"2021","unstructured":"Yang J, Li A, Xiao S, Lu W, Gao X (2021) MTD-net: learning to detect deepfakes images by multi-scale texture difference. IEEE Trans Inf Forensics Secur 16:4234\u20134245","journal-title":"IEEE Trans Inf Forensics Secur"},{"key":"3766_CR104","doi-asserted-by":"crossref","unstructured":"Fan B, Wang L, Soong FK, Xie L (2015) Photo-real talking head with deep bidirectional LSTM. In: 2015 IEEE international conference on acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 4884\u20134888","DOI":"10.1109\/ICASSP.2015.7178899"},{"key":"3766_CR105","doi-asserted-by":"crossref","unstructured":"Charles J, Magee D, Hogg D (2016) Virtual immortality: reanimating characters from tv shows. In European conference on computer vision. Springer, pp 879\u2013886","DOI":"10.1007\/978-3-319-49409-8_71"},{"key":"3766_CR106","first-page":"1","volume":"1","author":"A Jamaludin","year":"2019","unstructured":"Jamaludin A, Chung JS, Zisserman A (2019) You said that?: Synthesising talking faces from audio. Int J Comput Vis 1:1\u201313","journal-title":"Int J Comput Vis"},{"key":"3766_CR107","doi-asserted-by":"crossref","unstructured":"Vougioukas K, Petridis S, Pantic M (2019) End-to-end speech-driven realistic facial animation with temporal GANs. In: Proceedings of the IEEE conference on computer vision and pattern recognition workshops, pp 37\u201340","DOI":"10.1007\/s11263-019-01251-8"},{"key":"3766_CR108","doi-asserted-by":"crossref","unstructured":"Zhou H, Liu Y, Liu Z, Luo P, Wang X (2019) Talking face generation by adversarially disentangled audio-visual representation. In: Proceedings of the AAAI conference on artificial intelligence, pp 9299\u20139306","DOI":"10.1609\/aaai.v33i01.33019299"},{"key":"3766_CR109","doi-asserted-by":"crossref","unstructured":"Garrido P, Valgaerts L, Sarmadi H, Steiner I, Varanasi K, Perez P, Theobalt C (2015) Vdub: modifying face video of actors for plausible visual alignment to a dubbed audio track. In: Computer graphics forum. Wiley Online Library, pp 193\u2013204","DOI":"10.1111\/cgf.12552"},{"key":"3766_CR110","unstructured":"KR Prajwal, Mukhopadhyay R, Philip J, Jha A, Namboodiri V, Jawahar C (2019) Towards automatic face-to-face translation. In: Proceedings of the 27th ACM international conference on multimedia, pp 1428\u20131436"},{"key":"3766_CR111","doi-asserted-by":"crossref","unstructured":"Prajwal K, Mukhopadhyay R, Namboodiri VP, Jawahar C (2020) A lip sync expert is all you need for speech to lip generation in the wild. In: Proceedings of the 28th ACM international conference on multimedia, pp 484\u2013492","DOI":"10.1145\/3394171.3413532"},{"key":"3766_CR112","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3306346.3323028","volume":"38","author":"O Fried","year":"2019","unstructured":"Fried O, Tewari A, Zollh\u00f6fer M, Finkelstein A, Shechtman E, Goldman DB, Genova K, Jin Z, Theobalt C, Agrawala M (2019) Text-based editing of talking-head video. ACM Trans Graph 38:1\u201314","journal-title":"ACM Trans Graph"},{"key":"3766_CR113","unstructured":"Kim B-H, Ganapathi V (2019) LumiereNet: lecture video synthesis from audio. arXiv preprint arXiv:190702253"},{"key":"3766_CR114","doi-asserted-by":"crossref","unstructured":"Korshunov P, Marcel S (2018) Speaker inconsistency detection in tampered video. In 2018 26th European signal processing conference (EUSIPCO). IEEE, pp 2375\u20132379","DOI":"10.23919\/EUSIPCO.2018.8553270"},{"key":"3766_CR115","doi-asserted-by":"crossref","unstructured":"Sanderson C, Lovell BC (2009) Multi-region probabilistic histograms for robust and scalable identity inference. In: International conference on biometrics. Springer, pp 199\u2013208","DOI":"10.1007\/978-3-642-01793-3_21"},{"key":"3766_CR116","doi-asserted-by":"crossref","unstructured":"Anand A, Labati RD, Genovese A, Mu\u00f1oz E, Piuri V, Scotti F (2017) Age estimation based on face images and pre-trained convolutional neural networks. In: 2017 IEEE symposium series on computational intelligence (SSCI). IEEE, pp 1\u20137","DOI":"10.1109\/SSCI.2017.8285381"},{"key":"3766_CR117","doi-asserted-by":"publisher","first-page":"5329","DOI":"10.1007\/s11042-015-2848-2","volume":"75","author":"E Boutellaa","year":"2016","unstructured":"Boutellaa E, Boulkenafet Z, Komulainen J, Hadid A (2016) Audiovisual synchrony assessment for replay attack detection in talking face biometrics. Multimed Tools Appl 75:5329\u20135343","journal-title":"Multimed Tools Appl"},{"key":"3766_CR118","doi-asserted-by":"crossref","unstructured":"Korshunov P et al. (2019) Tampered speaker inconsistency detection with phonetically aware audio-visual features. In: International Conference on Machine Learning","DOI":"10.23919\/EUSIPCO.2018.8553270"},{"key":"3766_CR119","doi-asserted-by":"crossref","unstructured":"Agarwal S, Farid H, Fried O, Agrawala M (2020) Detecting deep-fake videos from phoneme-viseme mismatches. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops, pp 660\u2013661","DOI":"10.1109\/CVPRW50498.2020.00338"},{"key":"3766_CR120","doi-asserted-by":"crossref","unstructured":"Haliassos A, Vougioukas K, Petridis S, Pantic M (2021) Lips Don't lie: a Generalisable and robust approach to face forgery detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5039\u20135049","DOI":"10.1109\/CVPR46437.2021.00500"},{"key":"3766_CR121","doi-asserted-by":"crossref","unstructured":"Chugh K, Gupta P, Dhall A, Subramanian R (2020) Not made for each other-audio-visual dissonance-based deepfake detection and localization. In: Proceedings of the 28th ACM international conference on multimedia, pp 439\u2013447","DOI":"10.1145\/3394171.3413700"},{"key":"3766_CR122","doi-asserted-by":"crossref","unstructured":"Mittal T, Bhattacharya U, Chandra R, Bera A, Manocha D (2020) Emotions Don't lie: an audio-visual deepfake detection method using affective cues. In: Proceedings of the 28th ACM international conference on multimedia, pp 2823\u20132832","DOI":"10.1145\/3394171.3413570"},{"key":"3766_CR123","doi-asserted-by":"publisher","first-page":"1024","DOI":"10.1109\/JSTSP.2020.2999185","volume":"14","author":"A Chintha","year":"2020","unstructured":"Chintha A, Thai B, Sohrawardi SJ, Bhatt K, Hickerson A, Wright M, Ptucha R (2020) Recurrent convolutional structures for audio spoof and video deepfake detection. IEEE J Sel Top Sign Process 14:1024\u20131037","journal-title":"IEEE J Sel Top Sign Process"},{"key":"3766_CR124","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3197517.3201350","volume":"37","author":"J Thies","year":"2018","unstructured":"Thies J, Zollh\u00f6fer M, Theobalt C, Stamminger M, Nie\u00dfner M (2018) Real-time reenactment of human portrait videos. ACM Trans Graph 37:1\u201313. https:\/\/doi.org\/10.1145\/3197517.3201350","journal-title":"ACM Trans Graph"},{"key":"3766_CR125","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2816795.2818056","volume":"34","author":"J Thies","year":"2015","unstructured":"Thies J, Zollh\u00f6fer M, Nie\u00dfner M, Valgaerts L, Stamminger M, Theobalt C (2015) Real-time expression transfer for facial reenactment. ACM Trans Graph 34:1\u201314","journal-title":"ACM Trans Graph"},{"key":"3766_CR126","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2601097.2601165","volume":"33","author":"M Zollh\u00f6fer","year":"2014","unstructured":"Zollh\u00f6fer M, Nie\u00dfner M, Izadi S, Rehmann C, Zach C, Fisher M, Wu C, Fitzgibbon A, Loop C, Theobalt C, Stamminger M (2014) Real-time non-rigid reconstruction using an RGB-D camera. ACM Trans Graph 33:1\u201312","journal-title":"ACM Trans Graph"},{"key":"3766_CR127","first-page":"1","volume":"37","author":"J Thies","year":"2018","unstructured":"Thies J, Zollh\u00f6fer M, Theobalt C, Stamminger M, Nie\u00dfner M (2018) Headon: real-time reenactment of human portrait videos. ACM Trans Graph 37:1\u201313","journal-title":"ACM Trans Graph"},{"key":"3766_CR128","unstructured":"Mirza M, Osindero S (2014) Conditional generative adversarial nets. arXiv preprint arXiv:14111784"},{"key":"3766_CR129","doi-asserted-by":"crossref","unstructured":"Wu W, Zhang Y, Li C, Qian C, Change Loy C (2018) ReenactGAN: learning to reenact faces via boundary transfer. In: Proceedings of the European conference on computer vision (ECCV), pp 603\u2013619","DOI":"10.1007\/978-3-030-01246-5_37"},{"key":"3766_CR130","doi-asserted-by":"crossref","unstructured":"Pumarola A, Agudo A, Mart\u00ednez AM, Sanfeliu A, Moreno-Noguer F (2018) GANimation: anatomically-aware facial animation from a single image. In: Proceedings of the European conference on computer vision (ECCV), pp 818\u2013833","DOI":"10.1007\/978-3-030-01249-6_50"},{"key":"3766_CR131","unstructured":"Sanchez E, Valstar M (2020) Triple consistency loss for pairing distributions in GAN-based face synthesis. In: 15th IEEE international conference on automatic face and gesture recognition. IEEE, pp 53\u201360"},{"key":"3766_CR132","doi-asserted-by":"crossref","unstructured":"Zakharov E, Shysheya A, Burkov E, Lempitsky V (2019) Few-shot adversarial learning of realistic neural talking head models. In: Proceedings of the IEEE international conference on computer vision, pp 9459\u20139468","DOI":"10.1109\/ICCV.2019.00955"},{"key":"3766_CR133","unstructured":"Zhang Y, Zhang S, He Y, Li C, Loy CC, Liu Z (2019) One-shot face reenactment. Paper presented at the British machine vision conference (BMVC)"},{"key":"3766_CR134","unstructured":"Hao H, Baireddy S, Reibman AR, Delp EJ (2020) FaR-GAN for one-shot face reenactment. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"3766_CR135","doi-asserted-by":"crossref","unstructured":"Blanz V, Vetter T (1999) A morphable model for the synthesis of 3D faces. In: Proceedings of the 26th annual conference on Computer graphics and interactive techniques, pp 187\u2013194","DOI":"10.1145\/311535.311556"},{"key":"3766_CR136","doi-asserted-by":"crossref","unstructured":"Wehrbein T, Rudolph M, Rosenhahn B, Wandt B (2021) Probabilistic monocular 3d human pose estimation with normalizing flows. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 11199\u201311208","DOI":"10.1109\/ICCV48922.2021.01101"},{"key":"3766_CR137","doi-asserted-by":"crossref","unstructured":"Lorenzo-Trueba J, Yamagishi J, Toda T, Saito D, Villavicencio F, Kinnunen T, Ling Z (2018) The voice conversion challenge 2018: promoting development of parallel and nonparallel methods. In the speaker and language recognition workshop. ISCA, pp 195\u2013202","DOI":"10.21437\/Odyssey.2018-28"},{"key":"3766_CR138","doi-asserted-by":"crossref","unstructured":"Amerini I, Galteri L, Caldelli R, Del Bimbo A (2019) Deepfake video detection through optical flow based CNN. In proceedings of the IEEE international conference on computer vision workshops","DOI":"10.1109\/ICCVW.2019.00152"},{"key":"3766_CR139","doi-asserted-by":"publisher","first-page":"1462","DOI":"10.1109\/83.791974","volume":"8","author":"L Alparone","year":"1999","unstructured":"Alparone L, Barni M, Bartolini F, Caldelli R (1999) Regularization of optic flow estimates by means of weighted vector median filtering. IEEE Trans Image Process 8:1462\u20131467","journal-title":"IEEE Trans Image Process"},{"key":"3766_CR140","doi-asserted-by":"crossref","unstructured":"Sun D, Yang X, Liu M-Y, Kautz J (2018) PWC-net: CNNs for optical flow using pyramid, warping, and cost volume. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 8934\u20138943","DOI":"10.1109\/CVPR.2018.00931"},{"key":"3766_CR141","doi-asserted-by":"crossref","unstructured":"Baltru\u0161aitis T, Robinson P, Morency L-P (2016) Openface: an open source facial behavior analysis toolkit. In: 2016 IEEE winter conference on applications of computer vision (WACV). IEEE, pp 1\u201310","DOI":"10.1109\/WACV.2016.7477553"},{"key":"3766_CR142","unstructured":"Kingma DP, Welling M (2013) Auto-encoding variational bayes. arXiv preprint arXiv:13126114"},{"key":"3766_CR143","unstructured":"Radford A, Metz L, Chintala S (2015) Unsupervised representation learning with deep convolutional generative adversarial networks. arXiv preprint arXiv:151106434"},{"key":"3766_CR144","unstructured":"Liu M-Y, Tuzel O (2016) Coupled generative adversarial networks. In: Advances in neural information processing systems, pp 469\u2013477"},{"key":"3766_CR145","unstructured":"Karras T, Aila T, Laine S, Lehtinen J (2017) Progressive growing of gans for improved quality, stability, and variation. In: 6th International Conference on Learning Representations"},{"key":"3766_CR146","doi-asserted-by":"crossref","unstructured":"Karras T, Laine S, Aila T (2019) A style-based generator architecture for generative adversarial networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4401\u20134410","DOI":"10.1109\/CVPR.2019.00453"},{"key":"3766_CR147","doi-asserted-by":"crossref","unstructured":"Karras T, Laine S, Aittala M, Hellsten J, Lehtinen J, Aila T (2020) Analyzing and improving the image quality of stylegan. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 8110\u20138119","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"3766_CR148","doi-asserted-by":"crossref","unstructured":"Huang R, Zhang S, Li T, He R (2017) Beyond face rotation: global and local perception Gan for photorealistic and identity preserving frontal view synthesis. In: Proceedings of the IEEE international conference on computer vision, pp 2439\u20132448","DOI":"10.1109\/ICCV.2017.267"},{"key":"3766_CR149","unstructured":"Zhang H, Goodfellow I, Metaxas D, Odena A (2019) Self-attention generative adversarial networks. In: international conference on machine learning. PMLR, pp 7354\u20137363"},{"key":"3766_CR150","unstructured":"Brock A, Donahue J, Simonyan K (2019) Large scale gan training for high fidelity natural image synthesis. In: 7th International Conference on Learning Representations"},{"key":"3766_CR151","doi-asserted-by":"crossref","unstructured":"Zhang H, Xu T, Li H, Zhang S, Wang X, Huang X, Metaxas DN (2017) Stackgan: text to photo-realistic image synthesis with stacked generative adversarial networks. In: Proceedings of the IEEE international conference on computer vision, pp 5907\u20135915","DOI":"10.1109\/ICCV.2017.629"},{"key":"3766_CR152","doi-asserted-by":"publisher","first-page":"2260","DOI":"10.1007\/s10489-021-02464-6","volume":"52","author":"E Lu","year":"2022","unstructured":"Lu E, Hu X (2022) Image super-resolution via channel attention and spatial attention. Appl Intell 52:2260\u20132268. https:\/\/doi.org\/10.1007\/s10489-021-02464-6","journal-title":"Appl Intell"},{"key":"3766_CR153","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1016\/j.ins.2020.05.134","volume":"537","author":"J-L Zhong","year":"2020","unstructured":"Zhong J-L, Pun C-M, Gan Y-F (2020) Dense moment feature index and best match algorithms for video copy-move forgery detection. Inf Sci 537:184\u2013202","journal-title":"Inf Sci"},{"key":"3766_CR154","doi-asserted-by":"crossref","unstructured":"Ding X, Huang Y, Li Y, He J (2020) Forgery detection of motion compensation interpolated frames based on discontinuity of optical flow. Multimed Tools Appl:1\u201326","DOI":"10.1007\/s11042-020-09340-4"},{"key":"3766_CR155","doi-asserted-by":"crossref","unstructured":"Niyishaka P, Bhagvati C (2020) Copy-move forgery detection using image blobs and BRISK feature. Multimed Tools Appl:1\u201315","DOI":"10.1007\/s11042-020-09225-6"},{"key":"3766_CR156","doi-asserted-by":"crossref","unstructured":"Sunitha K, Krishna A, Prasad B (2022) Copy-move tampering detection using keypoint based hybrid feature extraction and improved transformation model. Appl Intell:1\u201312","DOI":"10.1007\/s10489-022-03207-x"},{"key":"3766_CR157","doi-asserted-by":"crossref","unstructured":"Tyagi S, Yadav D (2022) A detailed analysis of image and video forgery detection techniques. Vis Comput:1\u201321","DOI":"10.1007\/s00371-021-02347-4"},{"key":"3766_CR158","first-page":"1927","volume":"69","author":"M Nawaz","year":"2021","unstructured":"Nawaz M, Mehmood Z, Nazir T, Masood M, Tariq U, Mahdi Munshi A, Mehmood A, Rashid M (2021) Image authenticity detection using DWT and circular block-based LTrP features. Comput Mater Contin 69:1927\u20131944","journal-title":"Comput Mater Contin"},{"key":"3766_CR159","doi-asserted-by":"crossref","unstructured":"Akhtar Z, Dasgupta D (2019) A comparative evaluation of local feature descriptors for deepfakes detection. In: 2019 IEEE international symposium on technologies for homeland security (HST). IEEE, pp 1\u20135","DOI":"10.1109\/HST47167.2019.9033005"},{"key":"3766_CR160","doi-asserted-by":"crossref","unstructured":"McCloskey S, Albright M (2018) Detecting gan-generated imagery using color cues. arXiv preprint arXiv:08247","DOI":"10.1109\/ICIP.2019.8803661"},{"key":"3766_CR161","doi-asserted-by":"crossref","unstructured":"Guarnera L, Giudice O, Battiato S (2020) DeepFake detection by analyzing convolutional traces. In proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops, pp 666\u2013667","DOI":"10.1109\/CVPRW50498.2020.00341"},{"key":"3766_CR162","first-page":"532","volume":"5","author":"L Nataraj","year":"2019","unstructured":"Nataraj L, Mohammed TM, Manjunath B, Chandrasekaran S, Flenner A, Bappy JH, Roy-Chowdhury AK (2019) Detecting GAN generated fake images using co-occurrence matrices. Electronic Imaging 5:532\u2013531","journal-title":"Electronic Imaging"},{"key":"3766_CR163","doi-asserted-by":"crossref","unstructured":"Yu N, Davis LS, Fritz M (2019) Attributing fake images to GANs: learning and analyzing GAN fingerprints. In: Proceedings of the IEEE international conference on computer vision, pp 7556\u20137566","DOI":"10.1109\/ICCV.2019.00765"},{"key":"3766_CR164","doi-asserted-by":"crossref","unstructured":"Marra F, Saltori C, Boato G, Verdoliva L (2019) Incremental learning for the detection and classification of GAN-generated images. In: 2019 IEEE international workshop on information forensics and security (WIFS). IEEE, pp 1\u20136","DOI":"10.1109\/WIFS47025.2019.9035099"},{"key":"3766_CR165","doi-asserted-by":"crossref","unstructured":"Rebuffi S-A, Kolesnikov A, Sperl G, Lampert CH (2017) ICARL: incremental classifier and representation learning. In: proceedings of the IEEE conference on computer vision and pattern recognition, pp 2001\u20132010","DOI":"10.1109\/CVPR.2017.587"},{"key":"3766_CR166","unstructured":"Perarnau G, Van De Weijer J, Raducanu B, \u00c1lvarez JM (2016) Invertible conditional gans for image editing. arXiv preprint arXiv:161106355"},{"key":"3766_CR167","unstructured":"Lample G, Zeghidour N, Usunier N, Bordes A, Denoyer L, Ranzato MA (2017) Fader networks: manipulating images by sliding attributes. In: Advances in neural information processing systems, pp 5967\u20135976"},{"key":"3766_CR168","doi-asserted-by":"crossref","unstructured":"Choi Y, Uh Y, Yoo J, Ha J-W (2020) Stargan v2: diverse image synthesis for multiple domains. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 8188\u20138197","DOI":"10.1109\/CVPR42600.2020.00821"},{"key":"3766_CR169","doi-asserted-by":"publisher","first-page":"5464","DOI":"10.1109\/TIP.2019.2916751","volume":"28","author":"Z He","year":"2019","unstructured":"He Z, Zuo W, Kan M, Shan S, Chen X (2019) Attgan: facial attribute editing by only changing what you want. IEEE Trans Image Process 28:5464\u20135478","journal-title":"IEEE Trans Image Process"},{"key":"3766_CR170","doi-asserted-by":"crossref","unstructured":"Liu M, Ding Y, Xia M, Liu X, Ding E, Zuo W, Wen S (2019) Stgan: a unified selective transfer network for arbitrary image attribute editing. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3673\u20133682","DOI":"10.1109\/CVPR.2019.00379"},{"key":"3766_CR171","doi-asserted-by":"crossref","unstructured":"Zhang G, Kan M, Shan S, Chen X (2018) Generative adversarial network with spatial attention for face attribute editing. In: Proceedings of the European conference on computer vision (ECCV), pp 417\u2013432","DOI":"10.1007\/978-3-030-01231-1_26"},{"key":"3766_CR172","unstructured":"He Z, Kan M, Zhang J, Shan S (2020) PA-GAN: progressive attention generative adversarial network for facial attribute editing. arXiv preprint arXiv:200705892"},{"key":"3766_CR173","first-page":"532-531","volume":"2019","author":"L Nataraj","year":"2019","unstructured":"Nataraj L, Mohammed TM, Manjunath B, Chandrasekaran S, Flenner A, Bappy JH, Roy-Chowdhury AK (2019) Detecting GAN generated fake images using co-occurrence matrices. Electron Imaging 2019:532-531\u2013532-537","journal-title":"Electron Imaging"},{"key":"3766_CR174","doi-asserted-by":"crossref","unstructured":"Zhang X, Karaman S, Chang S-F (2019) Detecting and simulating artifacts in gan fake images. In 2019 IEEE international workshop on information forensics and security (WIFS). IEEE, pp 1\u20136","DOI":"10.1109\/WIFS47025.2019.9035107"},{"key":"3766_CR175","doi-asserted-by":"crossref","unstructured":"Isola P, Zhu J-Y, Zhou T, Efros AA (2017) Image-to-image translation with conditional adversarial networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1125\u20131134","DOI":"10.1109\/CVPR.2017.632"},{"key":"3766_CR176","doi-asserted-by":"crossref","unstructured":"Wang R, Juefei-Xu F, Ma L, Xie X, Huang Y, Wang J, Liu Y (2021) Fakespotter: a simple yet robust baseline for spotting AI-synthesized fake faces. In: Proceedings of the 29th international conference on international joint conferences on artificial intelligence, pp 3444\u20133451","DOI":"10.24963\/ijcai.2020\/476"},{"key":"3766_CR177","doi-asserted-by":"crossref","unstructured":"Parkhi OM, Vedaldi A, Zisserman A (2015) Deep face recognition. In: Proceedings of the British Machine Vision, pp 6","DOI":"10.5244\/C.29.41"},{"key":"3766_CR178","unstructured":"Amos B, Ludwiczuk B, Satyanarayanan M (2016) Openface: a general-purpose face recognition library with mobile applications. CMU School of Computer Science 6"},{"key":"3766_CR179","doi-asserted-by":"crossref","unstructured":"Schroff F, Kalenichenko D, Philbin J (2015) Facenet: a unified embedding for face recognition and clustering. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 815\u2013823","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"3766_CR180","doi-asserted-by":"publisher","first-page":"1903","DOI":"10.1109\/TIFS.2016.2561898","volume":"11","author":"A Bharati","year":"2016","unstructured":"Bharati A, Singh R, Vatsa M, Bowyer KW (2016) Detecting facial retouching using supervised deep learning. IEEE Trans Inf Forensics Secur 11:1903\u20131913","journal-title":"IEEE Trans Inf Forensics Secur"},{"key":"3766_CR181","doi-asserted-by":"crossref","unstructured":"Jain A, Singh R, Vatsa M (2018) On detecting gans and retouching based synthetic alterations. In: 2018 IEEE 9th international conference on biometrics theory, applications and systems (BTAS). IEEE, pp 1\u20137","DOI":"10.1109\/BTAS.2018.8698545"},{"key":"3766_CR182","doi-asserted-by":"crossref","unstructured":"Tariq S, Lee S, Kim H, Shin Y, Woo SS (2018) Detecting both machine and human created fake face images in the wild. In: Proceedings of the 2nd international workshop on multimedia privacy and security, pp 81\u201387","DOI":"10.1145\/3267357.3267367"},{"key":"3766_CR183","doi-asserted-by":"crossref","unstructured":"Dang H, Liu F, Stehouwer J, Liu X, Jain AK (2020) On the detection of digital face manipulation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5781\u20135790","DOI":"10.1109\/CVPR42600.2020.00582"},{"key":"3766_CR184","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1049\/iet-bmt.2019.0196","volume":"9","author":"C Rathgeb","year":"2020","unstructured":"Rathgeb C, Botaljov A, Stockhardt F, Isadskiy S, Debiasi L, Uhl A, Busch C (2020) PRNU-based detection of facial retouching. IET Biom 9:154\u2013164","journal-title":"IET Biom"},{"key":"3766_CR185","doi-asserted-by":"crossref","unstructured":"Li Y, Zhang C, Sun P, Ke L, Ju Y, Qi H, Lyu S (2021) DeepFake-o-meter: an open platform for DeepFake detection. In: 2021 IEEE security and privacy workshops (SPW). IEEE, pp 277\u2013281","DOI":"10.1109\/SPW53761.2021.00047"},{"key":"3766_CR186","doi-asserted-by":"crossref","unstructured":"Mehta V, Gupta P, Subramanian R, Dhall A (2021) FakeBuster: a DeepFakes detection tool for video conferencing scenarios. In 26th international conference on intelligent user interfaces, pp 61\u201363","DOI":"10.1145\/3397482.3450726"},{"key":"3766_CR187","unstructured":"Reality Defender 2020: A FORCE AGAINST DEEPFAKES. (2020). https:\/\/rd2020.org\/index.html. Accessed August 03, 2021"},{"key":"3766_CR188","unstructured":"Durall R, Keuper M, Pfreundt F-J, Keuper J (2019) Unmasking deepfakes with simple features. arXiv preprint arXiv:00686"},{"key":"3766_CR189","doi-asserted-by":"crossref","unstructured":"Marra F, Gragnaniello D, Cozzolino D, Verdoliva L (2018) Detection of gan-generated fake images over social networks. In: 2018 IEEE conference on multimedia information processing and retrieval (MIPR). IEEE, pp 384\u2013389","DOI":"10.1109\/MIPR.2018.00084"},{"key":"3766_CR190","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1016\/j.patrec.2021.03.005","volume":"146","author":"R Caldelli","year":"2021","unstructured":"Caldelli R, Galteri L, Amerini I, Del Bimbo A (2021) Optical flow based CNN for detection of unlearnt deepfake manipulations. Pattern Recogn Lett 146:31\u201337","journal-title":"Pattern Recogn Lett"},{"key":"3766_CR191","unstructured":"Korshunov P, Marcel S (2018) Deepfakes: a new threat to face recognition? Assessment and detection. arXiv preprint arXiv:181208685"},{"key":"3766_CR192","doi-asserted-by":"crossref","unstructured":"Wang S-Y, Wang O, Zhang R, Owens A, Efros AA (2020) CNN-generated images are surprisingly easy to spot... for now. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 8695\u20138704","DOI":"10.1109\/CVPR42600.2020.00872"},{"key":"3766_CR193","doi-asserted-by":"crossref","unstructured":"Malik H (2019) Securing voice-driven interfaces against fake (cloned) audio attacks. In 2019 IEEE conference on multimedia information processing and retrieval (MIPR). IEEE, pp 512\u2013517","DOI":"10.1109\/MIPR.2019.00104"},{"key":"3766_CR194","unstructured":"Li Y, Yang X, Sun P, Qi H, Lyu S (2020) Celeb-df: a new dataset for deepfake forensics. In: IEEE Conference on Computer Vision and Patten Recognition (CVPR)"},{"key":"3766_CR195","doi-asserted-by":"crossref","unstructured":"Khalid H, Woo SS (2020) OC-FakeDect: classifying deepfakes using one-class variational autoencoder. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops, pp 656\u2013657","DOI":"10.1109\/CVPRW50498.2020.00336"},{"key":"3766_CR196","doi-asserted-by":"crossref","unstructured":"Cozzolino D, R\u00f6ssler A, Thies J, Nie\u00dfner M, Verdoliva L (2021) ID-reveal: identity-aware DeepFake video detection. Paper presented at the international conference on computer vision, pp 15088\u201315097","DOI":"10.1109\/ICCV48922.2021.01483"},{"key":"3766_CR197","doi-asserted-by":"crossref","unstructured":"Hu J, Liao X, Wang W, Qin Z (2021) Detecting compressed deepfake videos in social networks using frame-temporality two-stream convolutional network. IEEE Trans Circuits Syst Video Technol:1","DOI":"10.1109\/TCSVT.2021.3074259"},{"key":"3766_CR198","doi-asserted-by":"crossref","unstructured":"Li X, Yu K, Ji S, Wang Y, Wu C, Xue H (2020) Fighting against deepfake: patch & pair convolutional neural networks (ppcnn). In companion proceedings of the web conference 2020, pp 88\u201389","DOI":"10.1145\/3366424.3382711"},{"key":"3766_CR199","doi-asserted-by":"crossref","unstructured":"Amerini I, Caldelli R (2020) Exploiting prediction error inconsistencies through LSTM-based classifiers to detect deepfake videos. In: Proceedings of the 2020 ACM workshop on information hiding and multimedia security, pp 97\u2013102","DOI":"10.1145\/3369412.3395070"},{"key":"3766_CR200","doi-asserted-by":"crossref","unstructured":"Hosler B, Salvi D, Murray A, Antonacci F, Bestagini P, Tubaro S, Stamm MC (2021) Do Deepfakes feel emotions? A semantic approach to detecting deepfakes via emotional inconsistencies. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1013\u20131022","DOI":"10.1109\/CVPRW53098.2021.00112"},{"key":"3766_CR201","doi-asserted-by":"crossref","unstructured":"Zhao T, Xu X, Xu M, Ding H, Xiong Y, Xia W (2021) Learning self-consistency for deepfake detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 15023\u201315033","DOI":"10.1109\/ICCV48922.2021.01475"},{"key":"3766_CR202","unstructured":"AlBadawy EA, Lyu S, Farid H (2019) Detecting AI-synthesized speech using bispectral analysis. In: CVPR workshops, pp 104-109"},{"key":"3766_CR203","doi-asserted-by":"publisher","first-page":"7687","DOI":"10.1007\/s11042-020-10098-y","volume":"80","author":"Z Guo","year":"2021","unstructured":"Guo Z, Hu L, Xia M, Yang G (2021) Blind detection of glow-based facial forgery. Multimed Tools Appl 80:7687\u20137710. https:\/\/doi.org\/10.1007\/s11042-020-10098-y","journal-title":"Multimed Tools Appl"},{"key":"3766_CR204","doi-asserted-by":"crossref","unstructured":"Guo Z, Yang G, Chen J, Sun X (2020) Fake face detection via adaptive residuals extraction network. arXiv preprint arXiv:04945","DOI":"10.1016\/j.cviu.2021.103170"},{"key":"3766_CR205","doi-asserted-by":"publisher","unstructured":"Fu T, Xia M, Yang G (2022) Detecting GAN-generated face images via hybrid texture and sensor noise based features. Multimed Tools Appl. https:\/\/doi.org\/10.1007\/s11042-022-12661-1","DOI":"10.1007\/s11042-022-12661-1"},{"key":"3766_CR206","doi-asserted-by":"publisher","first-page":"30789","DOI":"10.1007\/s11042-020-09147-3","volume":"80","author":"J Fei","year":"2021","unstructured":"Fei J, Xia Z, Yu P, Xiao F (2021) Exposing AI-generated videos with motion magnification. Multimed Tools Appl 80:30789\u201330802. https:\/\/doi.org\/10.1007\/s11042-020-09147-3","journal-title":"Multimed Tools Appl"},{"key":"3766_CR207","doi-asserted-by":"publisher","first-page":"212","DOI":"10.1007\/s42979-020-00225-9","volume":"1","author":"A Singh","year":"2020","unstructured":"Singh A, Saimbhi AS, Singh N, Mittal M (2020) DeepFake video detection: a time-distributed approach. SN Comput Sci 1:212. https:\/\/doi.org\/10.1007\/s42979-020-00225-9","journal-title":"SN Comput Sci"},{"key":"3766_CR208","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1109\/TBIOM.2021.3065735","volume":"3","author":"B Han","year":"2021","unstructured":"Han B, Han X, Zhang H, Li J, Cao X (2021) Fighting fake news: two stream network for deepfake detection via learnable SRM. IEEE Trans Biom Behav Identity Sci 3:320\u2013331","journal-title":"IEEE Trans Biom Behav Identity Sci"},{"key":"3766_CR209","doi-asserted-by":"crossref","unstructured":"Rana MS, Sung AH (2020) Deepfakestack: a deep ensemble-based learning technique for deepfake detection. In: 2020 7th IEEE international conference on cyber security and cloud computing (CSCloud)\/2020 6th IEEE international conference on edge computing and scalable cloud (EdgeCom). IEEE, pp 70\u201375","DOI":"10.1109\/CSCloud-EdgeCom49738.2020.00021"},{"key":"3766_CR210","doi-asserted-by":"crossref","unstructured":"Wu Z, Das RK, Yang J, Li H (2020) Light convolutional neural network with feature genuinization for detection of synthetic speech attacks. In: Interspeech 2020, 21st Annual Conference of the International Speech Communication Association. ISCA, pp 1101\u20131105","DOI":"10.21437\/Interspeech.2020-1810"},{"key":"3766_CR211","doi-asserted-by":"publisher","unstructured":"Yu C-M, Chen K-C, Chang C-T, Ti Y-W (2022) SegNet: a network for detecting deepfake facial videos. Multimedia Systems 1. https:\/\/doi.org\/10.1007\/s00530-021-00876-5","DOI":"10.1007\/s00530-021-00876-5"},{"key":"3766_CR212","doi-asserted-by":"publisher","first-page":"4159","DOI":"10.1007\/s11063-021-10588-6","volume":"53","author":"Y Su","year":"2021","unstructured":"Su Y, Xia H, Liang Q, Nie W (2021) Exposing DeepFake videos using attention based convolutional LSTM network. Neural Process Lett 53:4159\u20134175. https:\/\/doi.org\/10.1007\/s11063-021-10588-6","journal-title":"Neural Process Lett"},{"key":"3766_CR213","doi-asserted-by":"crossref","unstructured":"Masood M, Nawaz M, Javed A, Nazir T, Mehmood A, Mahum R (2021) Classification of Deepfake videos using pre-trained convolutional neural networks. In: 2021 international conference on digital futures and transformative technologies (ICoDT2). IEEE, pp 1\u20136","DOI":"10.1109\/ICoDT252288.2021.9441519"},{"key":"3766_CR214","doi-asserted-by":"crossref","unstructured":"Wang R, Ma L, Juefei-Xu F, Xie X, Wang J, Liu Y (2020) Fakespotter: a simple baseline for spotting ai-synthesized fake faces. In: Proceedings of the 29th international joint conference on artificial intelligence (IJCAI), pp 3444\u20133451","DOI":"10.24963\/ijcai.2020\/476"},{"key":"3766_CR215","doi-asserted-by":"publisher","first-page":"353","DOI":"10.1007\/s00530-021-00756-y","volume":"27","author":"Z Pan","year":"2021","unstructured":"Pan Z, Ren Y, Zhang X (2021) Low-complexity fake face detection based on forensic similarity. Multimedia Systems 27:353\u2013361. https:\/\/doi.org\/10.1007\/s00530-021-00756-y","journal-title":"Multimedia Systems"},{"key":"3766_CR216","doi-asserted-by":"publisher","first-page":"128","DOI":"10.3390\/jimaging7080128","volume":"7","author":"O Giudice","year":"2021","unstructured":"Giudice O, Guarnera L, Battiato S (2021) Fighting deepfakes by detecting gan dct anomalies. J Imaging 7:128","journal-title":"J Imaging"},{"key":"3766_CR217","doi-asserted-by":"crossref","unstructured":"Lorenzo-Trueba J, Fang F, Wang X, Echizen I, Yamagishi J, Kinnunen T (2018) Can we steal your vocal identity from the internet?: initial investigation of cloning Obama's voice using GAN, WaveNet and low-quality found data. In the speaker and language recognition workshop. ISCA, pp 240\u2013247","DOI":"10.21437\/Odyssey.2018-34"},{"key":"3766_CR218","doi-asserted-by":"publisher","first-page":"101114","DOI":"10.1016\/j.csl.2020.101114","volume":"64","author":"X Wang","year":"2020","unstructured":"Wang X et al (2020) ASVspoof 2019: a large-scale public database of synthetized, converted and replayed speech. Comput Speech Lang 64:101114","journal-title":"Comput Speech Lang"},{"key":"3766_CR219","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073702","volume":"36","author":"Z Jin","year":"2017","unstructured":"Jin Z, Mysore GJ, Diverdi S, Lu J, Finkelstein A (2017) Voco: text-based insertion and replacement in audio narration. ACM Trans Graph 36:1\u201313","journal-title":"ACM Trans Graph"},{"key":"3766_CR220","unstructured":"Leung A NVIDIA Reveals That Part of Its CEO's Keynote Presentation Was Deepfaked. https:\/\/hypebeast.com\/2021\/8\/nvidia-deepfake-jensen-huang-omniverse-keynote-video. Accessed August 29, 2021"},{"key":"3766_CR221","unstructured":"Sotelo J, Mehri S, Kumar K, Santos JF, Kastner K, Courville A, Bengio Y (2017) Char2wav: end-to-end speech synthesis. In: 5th International Conference on Learning Representations"},{"key":"3766_CR222","doi-asserted-by":"crossref","unstructured":"Sisman B, Yamagishi J, King S, Li H (2020) An overview of voice conversion and its challenges: from statistical modeling to deep learning. IEEE\/ACM Transactions on Audio, Speech, Language Processing","DOI":"10.1109\/TASLP.2020.3038524"},{"key":"3766_CR223","doi-asserted-by":"publisher","first-page":"100","DOI":"10.1109\/MCOM.001.1900396","volume":"58","author":"P Partila","year":"2020","unstructured":"Partila P, Tovarek J, Ilk GH, Rozhon J, Voznak M (2020) Deep learning serves voice cloning: how vulnerable are automatic speaker verification systems to spoofing trials? IEEE Commun Mag 58:100\u2013105","journal-title":"IEEE Commun Mag"},{"key":"3766_CR224","unstructured":"Ping W et al (2018) Deep voice 3: 2000-speaker neural text-to-speech. Proc ICLR:214\u2013217"},{"key":"3766_CR225","unstructured":"Bi\u0144kowski M et al. (2020) High fidelity speech synthesis with adversarial networks. Paper presented at the 8th international conference on learning representations"},{"key":"3766_CR226","unstructured":"Kumar K et al (2019) Melgan: generative adversarial networks for conditional waveform synthesis. Adv Neural Inf Proces Syst 32"},{"key":"3766_CR227","first-page":"17022","volume":"33","author":"J Kong","year":"2020","unstructured":"Kong J, Kim J, Bae J (2020) Hifi-Gan: generative adversarial networks for efficient and high fidelity speech synthesis. Adv Neural Inf Proces Syst 33:17022\u201317033","journal-title":"Adv Neural Inf Proces Syst"},{"key":"3766_CR228","doi-asserted-by":"publisher","first-page":"2967","DOI":"10.1109\/TASLP.2020.3034994","volume":"28","author":"H-T Luong","year":"2020","unstructured":"Luong H-T, Yamagishi J (2020) NAUTILUS: a versatile voice cloning system. IEEE\/ACM Trans Audio Speech Lang Process 28:2967\u20132981","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"3766_CR229","unstructured":"Peng K, Ping W, Song Z, Zhao K (2020) Non-autoregressive neural text-to-speech. In: International conference on machine learning. PMLR, pp 7586\u20137598"},{"key":"3766_CR230","unstructured":"Taigman Y, Wolf L, Polyak A, Nachmani E (2018) Voiceloop: voice fitting and synthesis via a phonological loop. In: 6th International Conference on Learning Representations"},{"key":"3766_CR231","unstructured":"Oord A et al. (2018) Parallel wavenet: fast high-fidelity speech synthesis. In international conference on machine learning. PMLR, pp 3918\u20133926"},{"key":"3766_CR232","first-page":"8067","volume":"33","author":"J Kim","year":"2020","unstructured":"Kim J, Kim S, Kong J, Yoon S (2020) Glow-tts: a generative flow for text-to-speech via monotonic alignment search. Adv Neural Inf Proces Syst 33:8067\u20138077","journal-title":"Adv Neural Inf Proces Syst"},{"key":"3766_CR233","unstructured":"Jia Y et al. (2018) Transfer learning from speaker verification to multispeaker text-to-speech synthesis. In: Advances in neural information processing systems, pp 4480\u20134490"},{"key":"3766_CR234","unstructured":"Lee Y, Kim T, Lee S-Y (2018) Voice imitating text-to-speech neural networks. arXiv preprint arXiv:00927"},{"key":"3766_CR235","unstructured":"Chen Y et al. (2019) Sample efficient adaptive text-to-speech. In: 7th International Conference on Learning Representations"},{"key":"3766_CR236","doi-asserted-by":"crossref","unstructured":"Cong J, Yang S, Xie L, Yu G, Wan G (2020) Data efficient voice cloning from noisy samples with domain adversarial training. Paper presented at the 21st Annual Conference of the International Speech Communication Association, pp 811\u2013815","DOI":"10.21437\/Interspeech.2020-2530"},{"key":"3766_CR237","unstructured":"Gibiansky A et al. (2017) Deep voice 2: multi-speaker neural text-to-speech. In: Advances in neural information processing systems, pp 2962\u20132970"},{"key":"3766_CR238","doi-asserted-by":"crossref","unstructured":"Yasuda Y, Wang X, Takaki S, Yamagishi J (2019) Investigation of enhanced Tacotron text-to-speech synthesis systems with self-attention for pitch accent language. In: 2019 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 6905\u20136909","DOI":"10.1109\/ICASSP.2019.8682353"},{"key":"3766_CR239","doi-asserted-by":"crossref","unstructured":"Yamamoto R, Song E, Kim J-M (2020) Parallel WaveGAN: a fast waveform generation model based on generative adversarial networks with multi-resolution spectrogram. In: 2020 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 6199\u20136203","DOI":"10.1109\/ICASSP40776.2020.9053795"},{"key":"3766_CR240","first-page":"3165","volume":"32","author":"Y Ren","year":"2019","unstructured":"Ren Y, Ruan Y, Tan X, Qin T, Zhao S, Zhao Z, Liu T-Y (2019) Fastspeech: fast, robust and controllable text to speech. Adv Neural Inf Proces Syst 32:3165\u20133174","journal-title":"Adv Neural Inf Proces Syst"},{"key":"3766_CR241","doi-asserted-by":"crossref","unstructured":"Toda T, Chen L-H, Saito D, Villavicencio F, Wester M, Wu Z, Yamagishi J (2016) The voice conversion challenge 2016. In: INTERSPEECH, pp 1632\u20131636","DOI":"10.21437\/Interspeech.2016-1066"},{"key":"3766_CR242","doi-asserted-by":"crossref","unstructured":"Zhao Y et al. (2020) Voice conversion challenge 2020: Intra-lingual semi-parallel and cross-lingual voice conversion. In: Proceeding joint workshop for the blizzard challenge and voice conversion challenge","DOI":"10.21437\/VCC_BC.2020-1"},{"key":"3766_CR243","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1109\/89.661472","volume":"6","author":"Y Stylianou","year":"1998","unstructured":"Stylianou Y, Capp\u00e9 O, Moulines E (1998) Continuous probabilistic transform for voice conversion. IEEE Trans Speech Audio Process 6:131\u2013142","journal-title":"IEEE Trans Speech Audio Process"},{"key":"3766_CR244","doi-asserted-by":"publisher","first-page":"2222","DOI":"10.1109\/TASL.2007.907344","volume":"15","author":"T Toda","year":"2007","unstructured":"Toda T, Black AW, Tokuda K (2007) Voice conversion based on maximum-likelihood estimation of spectral parameter trajectory. IEEE Trans Speech Audio Process 15:2222\u20132235","journal-title":"IEEE Trans Speech Audio Process"},{"key":"3766_CR245","doi-asserted-by":"publisher","first-page":"806","DOI":"10.1109\/TASL.2011.2165944","volume":"20","author":"E Helander","year":"2011","unstructured":"Helander E, Sil\u00e9n H, Virtanen T, Gabbouj M (2011) Voice conversion using dynamic kernel partial least squares regression. IEEE Trans Audio Speech Lang Process 20:806\u2013817","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"3766_CR246","doi-asserted-by":"publisher","first-page":"1506","DOI":"10.1109\/TASLP.2014.2333242","volume":"22","author":"Z Wu","year":"2014","unstructured":"Wu Z, Virtanen T, Chng ES, Li H (2014) Exemplar-based sparse representation with residual compensation for voice conversion. IEEE\/ACM Trans Audio Speech Lang Process 22:1506\u20131521","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"3766_CR247","doi-asserted-by":"crossref","unstructured":"Nakashika T, Takiguchi T, Ariki Y (2014) High-order sequence modeling using speaker-dependent recurrent temporal restricted Boltzmann machines for voice conversion. In: Fifteenth annual conference of the international speech communication association","DOI":"10.21437\/Interspeech.2014-447"},{"key":"3766_CR248","doi-asserted-by":"crossref","unstructured":"Ming H, Huang D-Y, Xie L, Wu J, Dong M, Li H (2016) Deep bidirectional LSTM modeling of timbre and prosody for emotional voice conversion. In: INTERSPEECH, pp 2453\u20132457","DOI":"10.21437\/Interspeech.2016-1053"},{"key":"3766_CR249","doi-asserted-by":"crossref","unstructured":"Sun L, Kang S, Li K, Meng H (2015) Voice conversion using deep bidirectional long short-term memory based recurrent neural networks. In 2015 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 4869\u20134873","DOI":"10.1109\/ICASSP.2015.7178896"},{"key":"3766_CR250","doi-asserted-by":"crossref","unstructured":"Wu J, Wu Z, Xie L (2016) On the use of i-vectors and average voice model for voice conversion without parallel data. In: 2016 Asia-Pacific signal and information processing association annual summit and conference (APSIPA). IEEE, pp 1\u20136","DOI":"10.1109\/APSIPA.2016.7820901"},{"key":"3766_CR251","doi-asserted-by":"crossref","unstructured":"Liu L-J, Ling Z-H, Jiang Y, Zhou M, Dai L-R (2018) WaveNet vocoder with limited training data for voice conversion. In: INTERSPEECH, pp 1983\u20131987","DOI":"10.21437\/Interspeech.2018-1190"},{"key":"3766_CR252","unstructured":"Hsu P-c, Wang C-h, Liu AT, Lee H-y (2019) Towards robust neural vocoding for speech generation: a survey. arXiv preprint arXiv:02461"},{"key":"3766_CR253","doi-asserted-by":"crossref","unstructured":"Kaneko T, Kameoka H (2018) Cyclegan-vc: Non-parallel voice conversion using cycle-consistent adversarial networks. In: 2018 26th European signal processing conference (EUSIPCO). IEEE, pp 2100\u20132104","DOI":"10.23919\/EUSIPCO.2018.8553236"},{"key":"3766_CR254","doi-asserted-by":"crossref","unstructured":"Chou J-c, Yeh C-c, Lee H-y, Lee L-s (2018) Multi-target voice conversion without parallel data by adversarially learning disentangled audio representations. In: 19th Annual Conference of the International Speech Communication Association. ISCA, pp 501\u2013505","DOI":"10.21437\/Interspeech.2018-1830"},{"key":"3766_CR255","doi-asserted-by":"crossref","unstructured":"Kaneko T, Kameoka H, Tanaka K, Hojo N (2019) Cyclegan-vc2: improved cyclegan-based non-parallel voice conversion. In: 2019 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 6820\u20136824","DOI":"10.1109\/ICASSP.2019.8682897"},{"key":"3766_CR256","doi-asserted-by":"crossref","unstructured":"Fang F, Yamagishi J, Echizen I, Lorenzo-Trueba J (2018) High-quality nonparallel voice conversion based on cycle-consistent adversarial network. In 2018 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 5279\u20135283","DOI":"10.1109\/ICASSP.2018.8462342"},{"key":"3766_CR257","doi-asserted-by":"crossref","unstructured":"Hsu C-C, Hwang H-T, Wu Y-C, Tsao Y, Wang H-M (2017) Voice conversion from unaligned corpora using variational autoencoding wasserstein generative adversarial networks. Paper presented at the 18th Annual Conference of the International Speech Communication Association, pp 3364\u20133368","DOI":"10.21437\/Interspeech.2017-63"},{"key":"3766_CR258","doi-asserted-by":"crossref","unstructured":"Kameoka H, Kaneko T, Tanaka K, Hojo N (2018) Stargan-vc: Non-parallel many-to-many voice conversion using star generative adversarial networks. In: 2018 IEEE spoken language technology workshop (SLT). IEEE, pp 266\u2013273","DOI":"10.1109\/SLT.2018.8639535"},{"key":"3766_CR259","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1016\/j.specom.2020.05.004","volume":"122","author":"M Zhang","year":"2020","unstructured":"Zhang M, Sisman B, Zhao L, Li H (2020) DeepConversion: Voice conversion with limited parallel training data. Speech Comm 122:31\u201343","journal-title":"Speech Comm"},{"key":"3766_CR260","doi-asserted-by":"publisher","first-page":"468","DOI":"10.1109\/TETCI.2020.2977678","volume":"4","author":"W-C Huang","year":"2020","unstructured":"Huang W-C, Luo H, Hwang H-T, Lo C-C, Peng Y-H, Tsao Y, Wang H-M (2020) Unsupervised representation disentanglement using cross domain features and adversarial learning in variational autoencoder based voice conversion. IEEE Trans Emerg Top Comput Intell 4:468\u2013479","journal-title":"IEEE Trans Emerg Top Comput Intell"},{"key":"3766_CR261","doi-asserted-by":"crossref","unstructured":"Qian K, Jin Z, Hasegawa-Johnson M, Mysore GJ (2020) F0-consistent many-to-many non-parallel voice conversion via conditional autoencoder. In 2020 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 6284\u20136288","DOI":"10.1109\/ICASSP40776.2020.9054734"},{"key":"3766_CR262","doi-asserted-by":"publisher","first-page":"2041","DOI":"10.1109\/TASLP.2019.2938863","volume":"27","author":"J Chorowski","year":"2019","unstructured":"Chorowski J, Weiss RJ, Bengio S, van den Oord A (2019) Unsupervised speech representation learning using wavenet autoencoders. IEEE\/ACM Trans Audio Speech Lang Process 27:2041\u20132053","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"3766_CR263","doi-asserted-by":"crossref","unstructured":"Tanaka K, Kameoka H, Kaneko T, Hojo N (2019) AttS2S-VC: sequence-to-sequence voice conversion with attention and context preservation mechanisms. In: ICASSP 2019\u20132019 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 6805\u20136809","DOI":"10.1109\/ICASSP.2019.8683282"},{"key":"3766_CR264","doi-asserted-by":"crossref","unstructured":"Park S-w, Kim D-y, Joe M-c (2020) Cotatron: Transcription-guided speech encoder for any-to-many voice conversion without parallel data. In: 21st Annual Conference of the International Speech Communication Association. ISCA, pp 4696\u20134700","DOI":"10.21437\/Interspeech.2020-1542"},{"key":"3766_CR265","doi-asserted-by":"crossref","unstructured":"Huang W-C, Hayashi T, Wu Y-C, Kameoka H, Toda T (2020) Voice transformer network: Sequence-to-sequence voice conversion using transformer with text-to-speech pretraining. In: 21st Annual Conference of the International Speech Communication Association. ISCA, pp 4676\u20134680","DOI":"10.21437\/Interspeech.2020-1066"},{"key":"3766_CR266","doi-asserted-by":"crossref","unstructured":"Lu H, Wu Z, Dai D, Li R, Kang S, Jia J, Meng H (2019) One-shot voice conversion with global speaker embeddings. In: INTERSPEECH, pp 669\u2013673","DOI":"10.21437\/Interspeech.2019-2365"},{"key":"3766_CR267","doi-asserted-by":"crossref","unstructured":"Liu S, Zhong J, Sun L, Wu X, Liu X, Meng H (2018) Voice conversion across arbitrary speakers based on a single target-speaker utterance. In: INTERSPEECH, pp 496\u2013500","DOI":"10.21437\/Interspeech.2018-1504"},{"key":"3766_CR268","doi-asserted-by":"crossref","unstructured":"Huang T-h, Lin J-h, Lee H-y (2021) How far are we from robust voice conversion: a survey. In: 2021 IEEE spoken language technology workshop (SLT). IEEE, pp 514\u2013521","DOI":"10.1109\/SLT48900.2021.9383498"},{"key":"3766_CR269","doi-asserted-by":"crossref","unstructured":"Li N, Tuo D, Su D, Li Z, Yu D, Tencent A (2018) Deep discriminative embeddings for duration robust speaker verification. In: INTERSPEECH, pp 2262\u20132266","DOI":"10.21437\/Interspeech.2018-1769"},{"key":"3766_CR270","doi-asserted-by":"crossref","unstructured":"Chou J-c, Yeh C-c, Lee H-y (2019) One-shot voice conversion by separating speaker and content representations with instance normalization. In: 20th Annual Conference of the International Speech Communication Association. ISCA, pp 664\u2013668","DOI":"10.21437\/Interspeech.2019-2663"},{"key":"3766_CR271","unstructured":"Qian K, Zhang Y, Chang S, Yang X, Hasegawa-Johnson M (2019) Autovc: zero-shot voice style transfer with only autoencoder loss. In: International conference on machine learning. PMLR, pp 5210\u20135219"},{"key":"3766_CR272","unstructured":"Rebryk Y, Beliaev S (2020) ConVoice: real-time zero-shot voice style transfer with convolutional network. arXiv preprint arXiv:07815"},{"key":"3766_CR273","unstructured":"Kominek J, Black AW (2004) The CMU Arctic speech databases. In: Fifth ISCA workshop on speech synthesis"},{"key":"3766_CR274","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1016\/0167-6393(90)90011-W","volume":"9","author":"A Kurematsu","year":"1990","unstructured":"Kurematsu A, Takeda K, Sagisaka Y, Katagiri S, Kuwabara H, Shikano K (1990) ATR Japanese speech database as a tool of speech recognition and synthesis. Speech Comm 9:357\u2013363","journal-title":"Speech Comm"},{"key":"3766_CR275","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1016\/S0167-6393(98)00085-5","volume":"27","author":"H Kawahara","year":"1999","unstructured":"Kawahara H, Masuda-Katsuse I, De Cheveigne A (1999) Restructuring speech representations using a pitch-adaptive time\u2013frequency smoothing and an instantaneous-frequency-based F0 extraction: possible role of a repetitive structure in sounds. Speech Comm 27:187\u2013207","journal-title":"Speech Comm"},{"key":"3766_CR276","doi-asserted-by":"crossref","unstructured":"Kamble MR, Sailor HB, Patil HA, Li H (2020) Advances in anti-spoofing: from the perspective of ASVspoof challenges. APSIPA Trans Signal Inf Process 9","DOI":"10.1017\/ATSIP.2019.21"},{"key":"3766_CR277","doi-asserted-by":"crossref","unstructured":"Li X, Li N, Weng C, Liu X, Su D, Yu D, Meng H (2021) Replay and synthetic speech detection with res2net architecture. In 2021 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 6354\u20136358","DOI":"10.1109\/ICASSP39728.2021.9413828"},{"key":"3766_CR278","doi-asserted-by":"crossref","unstructured":"Yi J, Bai Y, Tao J, Tian Z, Wang C, Wang T, Fu R (2021) Half-truth: a partially fake audio detection dataset. In: 22nd Annual Conference of the International Speech Communication Association. ISCA, pp 1654\u20131658","DOI":"10.21437\/Interspeech.2021-930"},{"key":"3766_CR279","doi-asserted-by":"crossref","unstructured":"Das RK, Yang J, Li H (2021) Data augmentation with signal Companding for detection of logical access attacks. In: 2021 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, pp 6349\u20136353","DOI":"10.1109\/ICASSP39728.2021.9413501"},{"key":"3766_CR280","doi-asserted-by":"crossref","unstructured":"Ma H, Yi J, Tao J, Bai Y, Tian Z, Wang C (2021) Continual Learning for Fake Audio Detection. In: 22nd Annual Conference of the International Speech Communication Association. ISCA, pp 886\u2013890","DOI":"10.21437\/Interspeech.2021-794"},{"key":"3766_CR281","doi-asserted-by":"crossref","unstructured":"Singh AK, Singh P (2021) Detection of AI-synthesized speech using cepstral & bispectral statistics. In: 4th international conference on multimedia information processing and retrieval (MIPR). IEEE, pp 412\u2013417","DOI":"10.1109\/MIPR51284.2021.00076"},{"key":"3766_CR282","doi-asserted-by":"crossref","unstructured":"Gao Y, Vuong T, Elyasi M, Bharaj G, Singh R (2021) Generalized Spoofing Detection Inspired from Audio Generation Artifacts. In: 22nd Annual Conference of the International Speech Communication Association. ISCA, pp 4184\u20134188","DOI":"10.21437\/Interspeech.2021-1705"},{"key":"3766_CR283","unstructured":"Aravind P, Nechiyil U, Paramparambath N (2020) Audio spoofing verification using deep convolutional neural networks by transfer learning. arXiv preprint arXiv:03464"},{"key":"3766_CR284","doi-asserted-by":"publisher","first-page":"101096","DOI":"10.1016\/j.csl.2020.101096","volume":"63","author":"J Monteiro","year":"2020","unstructured":"Monteiro J, Alam J, Falk THJCS (2020) Generalized end-to-end detection of spoofing attacks to automatic speaker recognizers. Comput Speech Lang 63:101096","journal-title":"Comput Speech Lang"},{"key":"3766_CR285","doi-asserted-by":"crossref","unstructured":"Chen T, Kumar A, Nagarsheth P, Sivaraman G, Khoury E (2020) Generalization of audio deepfake detection. In proc. odyssey 2020 the speaker and language recognition workshop, pp 132\u2013137","DOI":"10.21437\/Odyssey.2020-19"},{"key":"3766_CR286","doi-asserted-by":"publisher","first-page":"1813","DOI":"10.1109\/TASLP.2020.2998870","volume":"28","author":"L Huang","year":"2020","unstructured":"Huang L, Pun C-M (2020) Audio replay spoof attack detection by joint segment-based linear filter Bank feature extraction and attention-enhanced DenseNet-BiLSTM network. IEEE\/ACM Trans Audio Speech Lang Process 28:1813\u20131825","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"3766_CR287","doi-asserted-by":"crossref","unstructured":"Zhang Z, Yi X, Zhao X (2021) Fake speech detection using residual network with transformer encoder. In: Proceedings of the 2021 ACM workshop on information hiding and multimedia security, pp 13\u201322","DOI":"10.1145\/3437880.3460408"},{"key":"3766_CR288","doi-asserted-by":"crossref","unstructured":"Reimao R, Tzerpos V (2019) FoR: a dataset for synthetic speech detection. In international conference on speech technology and human-computer dialogue IEEE, pp 1\u201310","DOI":"10.1109\/SPED.2019.8906599"},{"key":"3766_CR289","doi-asserted-by":"publisher","first-page":"937","DOI":"10.1109\/LSP.2021.3076358","volume":"28","author":"Y Zhang","year":"2021","unstructured":"Zhang Y, Jiang F, Duan Z (2021) One-class learning towards synthetic voice spoofing detection. IEEE Signal Process Lett 28:937\u2013941","journal-title":"IEEE Signal Process Lett"},{"key":"3766_CR290","doi-asserted-by":"crossref","unstructured":"Gomez-Alanis A, Peinado AM, Gonzalez JA, Gomez AM (2019) A light convolutional GRU-RNN deep feature extractor for ASV spoofing detection. In: Proc Interspeech, pp 1068\u20131072","DOI":"10.21437\/Interspeech.2019-2212"},{"key":"3766_CR291","doi-asserted-by":"publisher","first-page":"1265","DOI":"10.1109\/LSP.2021.3089437","volume":"28","author":"G Hua","year":"2021","unstructured":"Hua G, Bengjinteoh A, Zhang H (2021) Towards end-to-end synthetic speech detection. IEEE Signal Process Lett 28:1265\u20131269","journal-title":"IEEE Signal Process Lett"},{"key":"3766_CR292","doi-asserted-by":"crossref","unstructured":"Jiang Z, Zhu H, Peng L, Ding W, Ren Y (2020) Self-supervised spoofing audio detection scheme. In: INTERSPEECH, pp 4223\u20134227","DOI":"10.21437\/Interspeech.2020-1760"},{"key":"3766_CR293","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s13635-020-00115-w","volume":"2021","author":"C Borrelli","year":"2021","unstructured":"Borrelli C, Bestagini P, Antonacci F, Sarti A, Tubaro S (2021) Synthetic speech detection through short-term and long-term prediction traces. EURASIP J Inf Secur 2021:1\u201314","journal-title":"EURASIP J Inf Secur"},{"key":"3766_CR294","unstructured":"Malik H (2019) Fighting AI with AI: fake speech detection using deep learning. In: International Conference on Audio Forensics. AES"},{"key":"3766_CR295","first-page":"1","volume":"1","author":"J Khochare","year":"2021","unstructured":"Khochare J, Joshi C, Yenarkar B, Suratkar S, Kazi F (2021) A deep learning framework for audio deepfake detection. Arab J Sci Eng 1:1\u201312","journal-title":"Arab J Sci Eng"},{"key":"3766_CR296","doi-asserted-by":"crossref","unstructured":"Yamagishi J et al. (2021) ASVspoof 2021: accelerating progress in spoofed and deepfake speech detection. arXiv preprint arXiv:00537","DOI":"10.21437\/ASVSPOOF.2021-8"},{"key":"3766_CR297","unstructured":"Frank J, Sch\u00f6nherr L (2021) WaveFake: a data set to facilitate audio deepfake detection. In: 35th annual conference on neural information processing systems"},{"key":"3766_CR298","unstructured":"Dolhansky B, Bitton J, Pflaum B, Lu J, Howes R, Wang M, Ferrer CC (2020) The DeepFake detection challenge dataset. arXiv preprint arXiv:200607397"},{"key":"3766_CR299","doi-asserted-by":"crossref","unstructured":"Jiang L, Li R, Wu W, Qian C, Loy CC (2020) Deeperforensics-1.0: a large-scale dataset for real-world face forgery detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2889\u20132898","DOI":"10.1109\/CVPR42600.2020.00296"},{"key":"3766_CR300","doi-asserted-by":"crossref","unstructured":"Zi B, Chang M, Chen J, Ma X, Jiang Y-G (2020) Wilddeepfake: a challenging real-world dataset for deepfake detection. In proceedings of the 28th ACM international conference on multimedia, pp 2382\u20132390","DOI":"10.1145\/3394171.3413769"},{"key":"3766_CR301","doi-asserted-by":"crossref","unstructured":"He Y et al. (2021) Forgerynet: a versatile benchmark for comprehensive forgery analysis. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 4360\u20134369","DOI":"10.1109\/CVPR46437.2021.00434"},{"key":"3766_CR302","unstructured":"Khalid H, Tariq S, Kim M, Woo SS (2021) FakeAVCeleb: a novel audio-video multimodal deepfake dataset. In: Thirty-fifth conference on neural information processing systems"},{"key":"3766_CR303","unstructured":"Ito K (2017) The LJ speech dataset. https:\/\/keithito.com\/LJ-Speech-Dataset. Accessed December 22, 2020"},{"key":"3766_CR304","unstructured":"The M-AILABS speech dataset. (2019). https:\/\/www.caito.de\/2019\/01\/the-m-ailabs-speech-dataset\/. Accessed Feb 25, 2021"},{"key":"3766_CR305","unstructured":"Ardila R et al. (2019) Common voice: a massively-multilingual speech corpus. arXiv preprint arXiv:191206670"},{"key":"3766_CR306","unstructured":"R\u00f6ssler A, Cozzolino D, Verdoliva L, Riess C, Thies J, Nie\u00dfner M (2018) Faceforensics: a large-scale video dataset for forgery detection in human faces. arXiv preprint arXiv:180309179"},{"key":"3766_CR307","unstructured":"Faceswap. https:\/\/github.com\/MarekKowalski\/FaceSwap\/. Accessed August 14, 2020"},{"key":"3766_CR308","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3306346.3323035","volume":"38","author":"J Thies","year":"2019","unstructured":"Thies J, Zollh\u00f6fer M, Nie\u00dfner M (2019) Deferred neural rendering: image synthesis using neural textures. ACM Trans Graph 38:1\u201312","journal-title":"ACM Trans Graph"},{"key":"3766_CR309","unstructured":"Abu-El-Haija S, Kothari N, Lee J, Natsev P, Toderici G, Varadarajan B, Vijayanarasimhan S (2016) Youtube-8m: a large-scale video classification benchmark. arXiv preprint arXiv:160908675"},{"key":"3766_CR310","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1016\/j.automatica.2017.08.011","volume":"86","author":"A Aravkin","year":"2017","unstructured":"Aravkin A, Burke JV, Ljung L, Lozano A, Pillonetto G (2017) Generalized Kalman smoothing: modeling and algorithms. Automatica 86:63\u201386","journal-title":"Automatica"},{"key":"3766_CR311","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1109\/38.946629","volume":"21","author":"E Reinhard","year":"2001","unstructured":"Reinhard E, Adhikhmin M, Gooch B, Shirley P (2001) Color transfer between images. IEEE Comput Graph 21:34\u201341","journal-title":"IEEE Comput Graph"},{"key":"3766_CR312","unstructured":"Dolhansky B, Howes R, Pflaum B, Baram N, Ferrer CC (2019) The deepfake detection challenge (dfdc) preview dataset. arXiv preprint arXiv:08854"},{"key":"3766_CR313","doi-asserted-by":"crossref","unstructured":"Versteegh M, Thiolliere R, Schatz T, Cao XN, Anguera X, Jansen A, Dupoux E (2015) Zero resource speech challenge. In: 16th Annual Conference of the International Speech Communication Association. ISCA, pp 3169\u20133173","DOI":"10.21437\/Interspeech.2015-638"},{"key":"3766_CR314","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s42979-021-00495-x","volume":"2","author":"A Mitra","year":"2021","unstructured":"Mitra A, Mohanty SP, Corcoran P, Kougianos E (2021) A machine learning based approach for Deepfake detection in social media through key video frame extraction. SN Comput Sci 2:98. https:\/\/doi.org\/10.1007\/s42979-021-00495-x","journal-title":"SN Comput Sci"},{"key":"3766_CR315","doi-asserted-by":"crossref","unstructured":"Trinh L, Liu Y (2021) An examination of fairness of AI models for deepfake detection. In: Proceedings of the thirtieth international joint conference on artificial intelligence. IJCAI, pp 567\u2013574","DOI":"10.24963\/ijcai.2021\/79"},{"key":"3766_CR316","doi-asserted-by":"crossref","unstructured":"Carlini N, Farid H (2020) Evading deepfake-image detectors with white-and black-box attacks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops, pp 658\u2013659","DOI":"10.1109\/CVPRW50498.2020.00337"},{"key":"3766_CR317","doi-asserted-by":"crossref","unstructured":"Neekhara P, Dolhansky B, Bitton J, Ferrer CC (2021) Adversarial threats to deepfake detection: a practical perspective. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 923\u2013932","DOI":"10.1109\/CVPRW53098.2021.00103"},{"key":"3766_CR318","doi-asserted-by":"crossref","unstructured":"Huang C-y, Lin YY, Lee H-y, Lee L-s (2021) Defending your voice: adversarial attack on voice conversion. In: 2021 IEEE spoken language technology workshop (SLT). IEEE, pp 552\u2013559","DOI":"10.1109\/SLT48900.2021.9383529"},{"key":"3766_CR319","unstructured":"Ding Y-Y, Zhang J-X, Liu L-J, Jiang Y, Hu Y, Ling Z-H (2020) Adversarial post-processing of voice conversion against spoofing detection. In: 2020 Asia-Pacific signal and information processing association annual summit and conference (APSIPA ASC). IEEE, pp 556\u2013560"},{"key":"3766_CR320","doi-asserted-by":"crossref","unstructured":"Durall R, Keuper M, Keuper J (2020) Watch your up-convolution: CNN based generative deep neural networks are failing to reproduce spectral distributions. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 7890\u20137899","DOI":"10.1109\/CVPR42600.2020.00791"},{"key":"3766_CR321","doi-asserted-by":"crossref","unstructured":"Jung S, Keuper M (2021) Spectral distribution aware image generation. In: Proceedings of the AAAI conference on artificial intelligence, pp 1734\u20131742","DOI":"10.1609\/aaai.v35i2.16267"},{"key":"3766_CR322","unstructured":"Huang Y et al. (2020) FakeRetouch: evading DeepFakes detection via the guidance of deliberate noise. arXiv preprint arXiv:09213"},{"key":"3766_CR323","doi-asserted-by":"publisher","first-page":"1038","DOI":"10.1109\/JSTSP.2020.3007250","volume":"14","author":"JC Neves","year":"2020","unstructured":"Neves JC, Tolosana R, Vera-Rodriguez R, Lopes V, Proen\u00e7a H, Fierrez J (2020) Ganprintr: improved fakes and evaluation of the state of the art in face manipulation detection. IEEE J Sel Top Sign Process 14:1038\u20131048","journal-title":"IEEE J Sel Top Sign Process"},{"key":"3766_CR324","doi-asserted-by":"crossref","unstructured":"Osakabe T, Tanaka M, Kinoshita Y, Kiya H (2021) CycleGAN without checkerboard artifacts for counter-forensics of fake-image detection. In: International workshop on advanced imaging technology (IWAIT) 2021. International Society for Optics and Photonics, pp 1176609","DOI":"10.1117\/12.2590977"},{"key":"3766_CR325","doi-asserted-by":"crossref","unstructured":"Huang Y et al. (2020) Fakepolisher: making deepfakes more detection-evasive by shallow reconstruction. In: Proceedings of the 28th ACM international conference on multimedia, pp 1217\u20131226","DOI":"10.1145\/3394171.3413732"},{"key":"3766_CR326","doi-asserted-by":"crossref","unstructured":"Bansal A, Ma S, Ramanan D, Sheikh Y (2018) Recycle-gan: unsupervised video retargeting. In: Proceedings of the European conference on computer vision (ECCV), pp 119-135","DOI":"10.1007\/978-3-030-01228-1_8"},{"key":"3766_CR327","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1250\/ast.11.71","volume":"11","author":"M Abe","year":"1990","unstructured":"Abe M, Nakamura S, Shikano K, Kuwabara H (1990) Voice conversion through vector quantization. J Acoust Soc Jpn 11:71\u201376","journal-title":"J Acoust Soc Jpn"},{"key":"3766_CR328","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1109\/MITP.2020.2977589","volume":"22","author":"P Fraga-Lamas","year":"2020","unstructured":"Fraga-Lamas P, Fern\u00e1ndez-Caram\u00e9s TM (2020) Fake news, disinformation, and Deepfakes: leveraging distributed ledger technologies and Blockchain to combat digital deception and counterfeit reality. IT Prof 22:53\u201359","journal-title":"IT Prof"},{"key":"3766_CR329","doi-asserted-by":"publisher","first-page":"41596","DOI":"10.1109\/ACCESS.2019.2905689","volume":"7","author":"HR Hasan","year":"2019","unstructured":"Hasan HR, Salah K (2019) Combating deepfake videos using blockchain and smart contracts. IEEE Access 7:41596\u201341606","journal-title":"IEEE Access"},{"key":"3766_CR330","doi-asserted-by":"crossref","unstructured":"Mao D, Zhao S, Hao Z (2022) A shared updatable method of content regulation for deepfake videos based on blockchain. Appl Intell:1\u201318","DOI":"10.1007\/s10489-021-03156-x"},{"key":"3766_CR331","doi-asserted-by":"crossref","unstructured":"Kaddar B, Fezza SA, Hamidouche W, Akhtar Z, Hadid A (2021) HCiT: Deepfake video detection using a hybrid model of CNN features and vision transformer. In: 2021 international conference on visual communications and image processing (VCIP). IEEE, pp 1\u20135","DOI":"10.1109\/VCIP53242.2021.9675402"},{"key":"3766_CR332","unstructured":"Wodajo D, Atnafu S (2021) Deepfake video detection using convolutional vision transformer. arXiv preprint arXiv:11126"},{"key":"3766_CR333","doi-asserted-by":"crossref","unstructured":"Wang J, Wu Z, Chen J, Jiang Y-G (2021) M2tr: Multi-modal multi-scale transformers for deepfake detection. arXiv preprint arXiv:09770","DOI":"10.1145\/3512527.3531415"},{"key":"3766_CR334","doi-asserted-by":"crossref","first-page":"28","DOI":"10.1504\/IJCAT.2012.050130","volume":"45","author":"B Deokar","year":"2012","unstructured":"Deokar B, Hazarnis A (2012) Intrusion detection system using log files and reinforcement learning. Int J Comput Appl 45:28\u201335","journal-title":"Int J Comput Appl"},{"key":"3766_CR335","doi-asserted-by":"crossref","unstructured":"Liu Z, Wang J, Gong S, Lu H, Tao D (2019) Deep reinforcement active learning for human-in-the-loop person re-identification. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 6122\u20136131","DOI":"10.1109\/ICCV.2019.00622"},{"key":"3766_CR336","doi-asserted-by":"crossref","unstructured":"Wang J, Yan Y, Zhang Y, Cao G, Yang M, Ng MK (2020) Deep reinforcement active learning for medical image classification. In: International conference on medical image computing and computer-assisted intervention. Springer, pp 33\u201342","DOI":"10.1007\/978-3-030-59710-8_4"},{"key":"3766_CR337","doi-asserted-by":"crossref","unstructured":"Feng M, Xu H (2017) Deep reinforecement learning based optimal defense for cyber-physical system in presence of unknown cyber-attack. In: 2017 IEEE symposium series on computational intelligence (SSCI). IEEE, pp 1\u20138","DOI":"10.1109\/SSCI.2017.8285298"},{"key":"3766_CR338","doi-asserted-by":"publisher","first-page":"101132","DOI":"10.1016\/j.csl.2020.101132","volume":"65","author":"R Baumann","year":"2021","unstructured":"Baumann R, Malik KM, Javed A, Ball A, Kujawa B, Malik H (2021) Voice spoofing detection corpus for single and multi-order audio replays. Comput Speech Lang 65:101132","journal-title":"Comput Speech Lang"},{"key":"3766_CR339","doi-asserted-by":"crossref","unstructured":"Gon\u00e7alves AR, Violato RP, Korshunov P, Marcel S, Simoes FO (2017) On the generalization of fused systems in voice presentation attack detection. In: 2017 international conference of the biometrics special interest group (BIOSIG). IEEE, pp 1\u20135","DOI":"10.23919\/BIOSIG.2017.8053516"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03766-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-022-03766-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03766-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,3]],"date-time":"2023-02-03T15:40:41Z","timestamp":1675438841000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-022-03766-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,4]]},"references-count":339,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,2]]}},"alternative-id":["3766"],"URL":"https:\/\/doi.org\/10.1007\/s10489-022-03766-z","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,6,4]]},"assertion":[{"value":"11 May 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 June 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}