{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T23:58:45Z","timestamp":1774742325251,"version":"3.50.1"},"reference-count":78,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T00:00:00Z","timestamp":1596240000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T00:00:00Z","timestamp":1596240000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T00:00:00Z","timestamp":1596240000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"The Ethics and Governance of Artificial Intelligence Initiative","award":["006409-002"],"award-info":[{"award-number":["006409-002"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE J. Sel. Top. Signal Process."],"published-print":{"date-parts":[[2020,8]]},"DOI":"10.1109\/jstsp.2020.2999185","type":"journal-article","created":{"date-parts":[[2020,6,1]],"date-time":"2020-06-01T21:55:51Z","timestamp":1591048551000},"page":"1024-1037","source":"Crossref","is-referenced-by-count":176,"title":["Recurrent Convolutional Structures for Audio Spoof and Video Deepfake Detection"],"prefix":"10.1109","volume":"14","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1816-8111","authenticated-orcid":false,"given":"Akash","family":"Chintha","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0341-2153","authenticated-orcid":false,"given":"Bao","family":"Thai","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4707-7035","authenticated-orcid":false,"given":"Saniat Javid","family":"Sohrawardi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8455-6353","authenticated-orcid":false,"given":"Kartavya","family":"Bhatt","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6854-8027","authenticated-orcid":false,"given":"Andrea","family":"Hickerson","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8489-6347","authenticated-orcid":false,"given":"Matthew","family":"Wright","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2712-7429","authenticated-orcid":false,"given":"Raymond","family":"Ptucha","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref73","article-title":"The deepfake detection challenge (DFDC) preview dataset","author":"dolhansky","year":"2019","journal-title":"arXiv 1910 08854"},{"key":"ref72","first-page":"199","article-title":"Multi-region probabilistic histograms for robust and scalable identity inference","author":"sanderson","year":"0","journal-title":"Proc Int Conf Biometrics"},{"key":"ref71","article-title":"Deepfakes: a new threat to face recognition? assessment and detection","author":"korshunov","year":"2018","journal-title":"arXiv 1812 08685"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683164"},{"key":"ref76","article-title":"ASVspoof 2015: The first automatic speaker verification spoofing and countermeasures challenge","author":"wu","year":"0","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"key":"ref74","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"arXiv 1412 6980"},{"key":"ref39","first-page":"1","article-title":"Deep video portraits","author":"kim","year":"2018","journal-title":"ACM Trans Graph"},{"key":"ref75","article-title":"The OpenCV Library","author":"bradski","year":"2000","journal-title":"Dr Dobb's Journal of Software Tools"},{"key":"ref38","article-title":"Attribute-guided face generation using conditional cyclegan","author":"lu","year":"2018","journal-title":"arXiv 1705 09966v2"},{"key":"ref78","first-page":"92","article-title":"Analysing the predictions of a CNN-based replay spoofing detection system","author":"chettri","year":"0","journal-title":"Proc IEEE Spoken Lang Technol Workshop"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3319535.3363269"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.195"},{"key":"ref31","doi-asserted-by":"crossref","first-page":"363","DOI":"10.1007\/978-3-319-92627-8_16","article-title":"A cross-database study of voice presentation attack detection","author":"korshunov","year":"2019","journal-title":"Handbook of Biometric Anti-spoofing"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2279"},{"key":"ref37","article-title":"Learning to discover cross-domain relations with generative adversarial networks","author":"kim","year":"2017","journal-title":"arXiv 1703 05192v2"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"},{"key":"ref35","first-page":"1060","article-title":"Generative adversarial text to image synthesis","author":"reed","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref34","article-title":"faceswap","author":"kowalski","year":"0"},{"key":"ref60","first-page":"1242","article-title":"A straightforward and efficient implementation of the factor analysis model for speaker verification","author":"matrouf","year":"0","journal-title":"Proc 8th Annu Conf Int Speech Commun Assoc"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2505"},{"key":"ref61","article-title":"Towards robust audio spoofing detection: A detailed comparison of traditional and learned features","author":"bt","year":"2019","journal-title":"arXiv 1905 12439"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2249"},{"key":"ref28","article-title":"The voice conversion challenge 2018: Promoting development of parallel and nonparallel methods","author":"lorenzo-trueba","year":"2018","journal-title":"arXiv 1804 04262"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-3174"},{"key":"ref27","first-page":"38","article-title":"Protecting world leaders against deep fakes","author":"agarwal","year":"0","journal-title":"Proc IEEE Conf Comput Vision Pattern Recognit Workshops"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003845"},{"key":"ref66","first-page":"1755","article-title":"Dlib-ml: A machine learning toolkit","volume":"10","author":"king","year":"2009","journal-title":"J Mach Learn Res"},{"key":"ref29","first-page":"2","article-title":"The asvspoof 2017 challenge: Assessing the limits of replay spoofing attack detection","author":"kinnunen","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref67","article-title":"Auto-encoding variational bayes","author":"kingma","year":"2013","journal-title":"arXiv 1312 6114"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcp.2003.09.032"},{"key":"ref69","article-title":"Celeb-df: A new dataset for deepfake forensics","author":"li","year":"2019","journal-title":"arXiv 1909 12962"},{"key":"ref2","article-title":"Reducing malicious use of synthetic media research: Considerations and potential release practices for machine learning","author":"ovadya","year":"2019","journal-title":"arXiv 1907 11274"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1353\/tj.2018.0097"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/AVSS.2018.8639163"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00009"},{"key":"ref21","article-title":"Capsule-forensics: Using capsule networks to detect forged images and videos","author":"nguyen","year":"2018","journal-title":"arXiv 1810 11215"},{"key":"ref24","article-title":"In ictu oculi: Exposing ai generated fake face videos by detecting eye blinking","author":"li","year":"2018","journal-title":"arXiv 1806 02877"},{"key":"ref23","article-title":"Forensictransfer: Weakly-supervised domain adaptation for forgery detection","author":"cozzolino","year":"2018","journal-title":"arXiv 1812 02510"},{"key":"ref26","article-title":"Fakecatcher: Detection of synthetic portrait videos using biological signals","author":"ciftci","year":"2019","journal-title":"arXiv 1901 02212"},{"key":"ref25","doi-asserted-by":"crossref","DOI":"10.1109\/BTAS46853.2019.9185974","article-title":"Multi-task learning for detecting and segmenting manipulated facial images and videos","author":"nguyen","year":"2019"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2018.00020"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1002\/j.1538-7305.1987.tb00198.x"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1006\/dspr.1999.0361"},{"key":"ref57","doi-asserted-by":"crossref","first-page":"283","DOI":"10.21437\/Odyssey.2016-41","article-title":"A new feature for automatic speaker verification anti-spoofing: Constant Q cepstral coefficients","volume":"45","author":"todisco","year":"2016","journal-title":"Odyssey"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683282"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164317"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639535"},{"key":"ref53","first-page":"10 019","article-title":"Neural voice cloning with a few samples","author":"arik","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref52","article-title":"WaveNet: A generative model for raw audio","volume":"abs 1609 3499","author":"van den oord","year":"2016","journal-title":"CoRR"},{"key":"ref10","article-title":"Invertible conditional gans for image editing","author":"perarnau","year":"2016","journal-title":"arXiv 1611 06355"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"},{"key":"ref40","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3072959.3073640","article-title":"Synthesizing obama: Learning lip sync from audio","author":"suwajanakorn","year":"2017","journal-title":"ACM Trans Graph"},{"key":"ref12","article-title":"Interpreting the latent space of gans for semantic face editing","author":"shen","year":"2019","journal-title":"arXiv 1907 10786"},{"key":"ref13","article-title":"Unsupervised representation learning with deep convolutional generative adversarial networks","author":"radford","year":"2015","journal-title":"arXiv 1511 06434"},{"key":"ref14","article-title":"Deferred neural rendering: Image synthesis using neural textures","author":"thies","year":"2019","journal-title":"arXiv 1904 12356"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01065"},{"key":"ref16","article-title":"Few-shot video-to-video synthesis","author":"wang","year":"2019","journal-title":"arXiv 1910 12713"},{"key":"ref17","article-title":"Marionette: Few-shot face reenactment preserving identity of unseen targets","author":"ha","year":"2019","journal-title":"arXiv 1911 08139"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/WIFS.2018.8630761"},{"key":"ref19","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2014","journal-title":"arXiv 1409 1556"},{"key":"ref4","first-page":"2672","article-title":"Generative adversarial nets","author":"goodfellow","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.397"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/2929464.2929475"},{"key":"ref5","article-title":"faceswap-GAN","author":"lu","year":"0"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"ref7","article-title":"Progressive growing of GANs for improved quality, stability, and variation","author":"karras","year":"2017","journal-title":"arXiv 1710 10196"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/DICTA.2010.57"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00916"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"ref45","article-title":"Deep residual learning for image recognition","volume":"abs 1512 3385","author":"he","year":"2015","journal-title":"CoRR"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"ref47","article-title":"Recurrent convolutional strategies for face manipulation detection in videos","volume":"3","author":"sabir","year":"2019","journal-title":"Interfaces (GUI)"},{"key":"ref42","first-page":"44","article-title":"Transforming auto-encoders","author":"hinton","year":"0","journal-title":"Int Conf Artif Neural Netw"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/WACVW.2019.00020"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref43","first-page":"3856","article-title":"Dynamic routing between capsules","author":"sabour","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"}],"container-title":["IEEE Journal of Selected Topics in Signal Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/4200690\/9177372\/09105097.pdf?arnumber=9105097","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T17:07:08Z","timestamp":1651079228000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9105097\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,8]]},"references-count":78,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/jstsp.2020.2999185","relation":{},"ISSN":["1932-4553","1941-0484"],"issn-type":[{"value":"1932-4553","type":"print"},{"value":"1941-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,8]]}}}