{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T15:28:47Z","timestamp":1773329327572,"version":"3.50.1"},"reference-count":100,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2024,6,24]],"date-time":"2024-06-24T00:00:00Z","timestamp":1719187200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,6,24]],"date-time":"2024-06-24T00:00:00Z","timestamp":1719187200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s11263-024-02054-2","type":"journal-article","created":{"date-parts":[[2024,6,24]],"date-time":"2024-06-24T07:02:12Z","timestamp":1719212532000},"page":"5663-5680","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["Learning Spatiotemporal Inconsistency via Thumbnail Layout for Face Deepfake Detection"],"prefix":"10.1007","volume":"132","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0463-3107","authenticated-orcid":false,"given":"Yuting","family":"Xu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3890-1894","authenticated-orcid":false,"given":"Jian","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Lijun","family":"Sheng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1630-6058","authenticated-orcid":false,"given":"Xiao-Yu","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,6,24]]},"reference":[{"key":"2054_CR1","doi-asserted-by":"crossref","unstructured":"Afchar, D., Nozick, V., Yamagishi, J., & Echizen, I. (2018). Mesonet: A compact facial video forgery detection network. In 2018 IEEE international workshop on information forensics and security 1\u20137.","DOI":"10.1109\/WIFS.2018.8630761"},{"key":"2054_CR2","unstructured":"Agarwal, S., Farid, H., Gu, Y., He, M., Nagano, K., & Li, H. (2019). Protecting world leaders against deep fakes. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 38."},{"key":"2054_CR3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00152","volume-title":"Deepfake video detection through optical flow based CNN","author":"I Amerini","year":"2019","unstructured":"Amerini, I., Galteri, L., Caldelli, R., & Del Bimbo, A. (2019). Deepfake video detection through optical flow based CNN. In Proc: ICCV."},{"key":"2054_CR4","doi-asserted-by":"crossref","unstructured":"Arnab, A., Dehghani, M., Heigold, G., Sun, C., Lucic, M., & Schmid, C. (2021). Vivit: A video vision transformer. Proceedings of the IEEE\/CVF international conference on computer vision, pp. 6836\u20136846.","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"2054_CR5","doi-asserted-by":"crossref","unstructured":"Bilen, H., Fernando, B., Gavves, E., Vedaldi, A., & Gould, S. (2016). Dynamic image networks for action recognition. Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 3034\u20133042.","DOI":"10.1109\/CVPR.2016.331"},{"key":"2054_CR6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00408","volume-title":"End-to-end reconstruction-classification learning for face forgery detection","author":"J Cao","year":"2022","unstructured":"Cao, J., Ma, C., Yao, T., Chen, S., Ding, S., & Yang, X. (2022). End-to-end reconstruction-classification learning for face forgery detection. In Proc: CVPR."},{"key":"2054_CR7","doi-asserted-by":"crossref","unstructured":"Carreira, J., & Zisserman, A. (2017). Quo vadis, action recognition? a new model & the kinetics dataset. In proceedings of the IEEE conference on computer vision and pattern recognition, pp. 6299\u20136308.","DOI":"10.1109\/CVPR.2017.502"},{"key":"2054_CR8","doi-asserted-by":"crossref","unstructured":"Chai, L., Bau, D., Lim, S.-N., & Isola, P. (2020). What makes fake images detectable? understanding properties that generalize. In Proc. ECCV pp. 103\u2013120.","DOI":"10.1007\/978-3-030-58574-7_7"},{"key":"2054_CR9","doi-asserted-by":"crossref","unstructured":"Chen, Y., Rohrbach, M., Yan, Z., Shuicheng, Y., Feng, J., & Kalantidis, Y. (2019). Graph-based global reasoning networks. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 433\u2013442.","DOI":"10.1109\/CVPR.2019.00052"},{"key":"2054_CR10","doi-asserted-by":"crossref","unstructured":"Chen, L., Zhang, Y., Song, Y., Liu, L., & Wang, J. (2022). Self-supervised learning of adversarial example: Towards good generalizations for deepfake detection. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition pp. 18710\u201318719.","DOI":"10.1109\/CVPR52688.2022.01815"},{"key":"2054_CR11","doi-asserted-by":"crossref","unstructured":"Chollet, F. (2017). Xception: Deep learning with depthwise separable convolutions. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 1251\u20131258.","DOI":"10.1109\/CVPR.2017.195"},{"key":"2054_CR12","doi-asserted-by":"crossref","unstructured":"Coccomini, D. A., Caldelli, R., Falchi, F., Gennaro, C., & Amato, G. (2022). Cross-forgery analysis of vision transformers and CNNS for Deepfake image detection. In Proceedings of the 1st International Workshop on Multimedia AI against Disinformation, pp. 52\u201358.","DOI":"10.1145\/3512732.3533582"},{"key":"2054_CR13","doi-asserted-by":"crossref","unstructured":"Cozzolino, D., R\u00f6ssler, A., Thies, J., Nie\u00dfner, M., & Verdoliva, L. (2021). Id-reveal: Identity-aware deepfake video detection. In Proceedings of the IEEE\/CVF international conference on computer vision pp. 15108\u201315117.","DOI":"10.1109\/ICCV48922.2021.01483"},{"key":"2054_CR14","volume-title":"Nicola Messina, Claudio Gennaro, & Fabrizio Falchi","author":"AC Davide","year":"2022","unstructured":"Davide, A. C. (2022). Nicola Messina, Claudio Gennaro, & Fabrizio Falchi. ICIAP: Combining efficientnet and vision transformers for video deepfake detection. In Proc."},{"key":"2054_CR15","doi-asserted-by":"crossref","unstructured":"Davis, J.\u00a0W., & Bobick, A.\u00a0F. (1997). The representation and recognition of human movement using temporal templates. In Proceedings of IEEE computer society conference on computer vision and pattern recognition, pp. 928\u2013934.","DOI":"10.1109\/CVPR.1997.609439"},{"key":"2054_CR16","unstructured":"deepfakes. Deepfakes. https:\/\/github.com\/deepfakes\/faceswap, 2020."},{"key":"2054_CR17","unstructured":"DeVries, T., & Taylor, G. W. (2017). Improved regularization of convolutional neural networks with cutout. arXiv:1708.04552"},{"key":"2054_CR18","unstructured":"Dolhansky, B., Bitton, J., Pflaum, B., Lu, J., Howes, R., Wang, M., & Ferrer, C\u00a0C. (2020). The deepfake detection challenge (dfdc) dataset. arXiv:2006.07397."},{"key":"2054_CR19","doi-asserted-by":"crossref","unstructured":"Dong, X., Bao, J., Chen, D., Zhang, T., Zhang, W., Nenghai, Y., Chen, D., Wen, F., & Guo, B. (2022). Protecting celebrities from Deepfake with identity consistency transformer. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition pp. 9468\u20139478.","DOI":"10.1109\/CVPR52688.2022.00925"},{"key":"2054_CR20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00771","volume-title":"Think twice before detecting GAN-generated fake images from their spectral domain imprints","author":"C Dong","year":"2022","unstructured":"Dong, C., Kumar, A., & Liu, E. (2022). Think twice before detecting GAN-generated fake images from their spectral domain imprints. In Proc: CVPR."},{"key":"2054_CR21","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"A Dosovitskiy","year":"2021","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., & Houlsby, N. (2021). An image is worth 16x16 words: Transformers for image recognition at scale. In Proc: ICLR."},{"key":"2054_CR22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01963","volume-title":"Learning second order local anomaly for general face forgery detection","author":"J Fei","year":"2022","unstructured":"Fei, J., Dai, Y., Peipeng, Y., Shen, T., Xia, Z., & Weng, J. (2022). Learning second order local anomaly for general face forgery detection. In Proc: CVPR."},{"key":"2054_CR23","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., & He, K. (2019). Slowfast networks for video recognition. Proceedings of the IEEE\/CVF international conference on computer vision pp. 6202\u20136211.","DOI":"10.1109\/ICCV.2019.00630"},{"key":"2054_CR24","volume-title":"Leveraging frequency analysis for deep fake image recognition","author":"J Frank","year":"2020","unstructured":"Frank, J., Eisenhofer, T., Sch\u00f6nherr, L., Fischer, A., Kolossa, D., & Holz, T. (2020). Leveraging frequency analysis for deep fake image recognition. In Proc: ICML."},{"key":"2054_CR25","doi-asserted-by":"crossref","unstructured":"Gerstner, C.\u00a0R., & Farid, H. (2022). Detecting real-time deep-fake videos using active illumination. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 53\u201360.","DOI":"10.1109\/CVPRW56347.2022.00015"},{"key":"2054_CR26","volume-title":"Generative adversarial networks","author":"I Goodfellow","year":"2014","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Bing, X., Warde-Farley, D., Ozair, S., Courville, A., & Bengio, Y. (2014). Generative adversarial networks. In Proc: NeurIPS."},{"key":"2054_CR27","volume-title":"Robust image forgery detection over online social network shared images","author":"W Haiwei","year":"2022","unstructured":"Haiwei, W., Zhou, J., Tian, J., & Liu, J. (2022). Robust image forgery detection over online social network shared images. In Proc: CVPR."},{"key":"2054_CR28","doi-asserted-by":"crossref","unstructured":"Haliassos, A., Mira, R., Petridis, S., & Pantic, M. (2022). Leveraging real talking faces via self-supervision for robust forgery detection. Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition pp. 14950\u201314962.","DOI":"10.1109\/CVPR52688.2022.01453"},{"key":"2054_CR29","doi-asserted-by":"crossref","unstructured":"Haliassos, A., Vougioukas, K., Petridis, S., & Pantic, M. (2021). Lips don\u2019t lie: A generalisable & robust approach to face forgery detection. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition pp. 5039\u20135049.","DOI":"10.1109\/CVPR46437.2021.00500"},{"key":"2054_CR30","doi-asserted-by":"crossref","unstructured":"Hara, K., Kataoka, H., & Satoh, Y. (2017). Learning spatio-temporal features with 3d residual networks for action recognition. Proceedings of the IEEE international conference on computer vision workshops, pp. 3154\u20133160.","DOI":"10.1109\/ICCVW.2017.373"},{"key":"2054_CR31","doi-asserted-by":"crossref","unstructured":"He, K. , Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"2054_CR32","unstructured":"Heo, Y.-J., Choi, Y.-J., Lee, Y.-W., & Kim, B.-G. (2021). Deepfake detection scheme based on vision transformer and distillation. arXiv:2104.01353."},{"key":"2054_CR33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00339","volume-title":"Depth-aware generative adversarial network for talking head video generation","author":"F-T Hong","year":"2022","unstructured":"Hong, F.-T., Zhang, L., Shen, L., & Dan, X. (2022). Depth-aware generative adversarial network for talking head video generation. In Proc: CVPR."},{"key":"2054_CR34","doi-asserted-by":"crossref","unstructured":"Jia, G., Zheng, M., Chuanrui, H., Ma, X., Yuting, X., Liu, L., Deng, Y., & He, R. (2021). Inconsistency-aware wavelet dual-branch network for face forgery detection. IEEE T-BIOM,3(3) .","DOI":"10.1109\/TBIOM.2021.3086109"},{"key":"2054_CR35","doi-asserted-by":"crossref","unstructured":"Jiang, L., Li, R., Wu, W., Qian, C., & Loy, C.\u00a0Change. (2020). Deeperforensics-1.0: A large-scale dataset for real-world face forgery detection. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 2889\u20132898.","DOI":"10.1109\/CVPR42600.2020.00296"},{"key":"2054_CR36","doi-asserted-by":"crossref","unstructured":"Juan, H., Liao, X., Liang, J., Zhou, W., & Qin, Z. (2022). Finfer: Frame inference-based Deepfake detection for high-visual-quality videos. In Proceedings of the AAAI conference on artificial intelligence 951\u2013959.","DOI":"10.1609\/aaai.v36i1.19978"},{"key":"2054_CR37","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., & Aila, T. (2019). A style-based generator architecture for generative adversarial networks. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 4401\u20134410.","DOI":"10.1109\/CVPR.2019.00453"},{"key":"2054_CR38","doi-asserted-by":"crossref","unstructured":"Khan, S.\u00a0A., & Dai, H. (2021). Video transformer for Deepfake detection with incremental learning. In In Proceedings of the 29th ACM international conference on multimedia, pp. 1821\u20131828.","DOI":"10.1145\/3474085.3475332"},{"issue":"6","key":"2054_CR39","doi-asserted-by":"publisher","first-page":"2953","DOI":"10.3390\/app12062953","volume":"12","author":"A Khormali","year":"2022","unstructured":"Khormali, A., & Yuan, J.-S. (2022). Dfdt: An end-to-end Deepfake detection framework using vision transformer. Applied Sciences, 12(6), 2953.","journal-title":"Applied Sciences"},{"key":"2054_CR40","unstructured":"Kingma, D.\u00a0P., & Ba, J. (2014). Adam: A method for stochastic optimization. arXiv:1412.6980."},{"key":"2054_CR41","unstructured":"Kipf, Thomas\u00a0N., & Welling, M. (2016). Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907, 2016."},{"key":"2054_CR42","doi-asserted-by":"crossref","unstructured":"Kwon, P., You, J., Nam, G., Park, S., and Chae, G A large-scale korean deepfake detection dataset. In Proceedings of the IEEE\/CVF international conference on computer vision, pages 10744\u201310753, 2021.","DOI":"10.1109\/ICCV48922.2021.01057"},{"key":"2054_CR43","unstructured":"Le, B\u00a0M., & Woo, S\u00a0S. (2022) Add: Frequency attention and multi-view based knowledge distillation to detect low-quality compressed deepfake images. In Proceedings of the AAAI conference on artificial intelligence."},{"key":"2054_CR44","unstructured":"Li, Y., & Lyu, S. (2019). Exposing deepfake videos by detecting face warping artifacts. In Proc. CVPRW, pp. 656\u2013663."},{"key":"2054_CR45","doi-asserted-by":"crossref","unstructured":"Li, L., Bao, J., Yang, H., Chen, D., & Wen, F. (2020). Advancing high fidelity identity swapping for forgery detection. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 5074\u20135083.","DOI":"10.1109\/CVPR42600.2020.00512"},{"key":"2054_CR46","doi-asserted-by":"crossref","unstructured":"Li, L., Bao, J., Zhang, T., Yang, H., Chen, D., Wen, F., & Guo, B. (2020). Face x-ray for more general face forgery detection. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition pp. 5001\u20135010.","DOI":"10.1109\/CVPR42600.2020.00505"},{"key":"2054_CR47","doi-asserted-by":"crossref","unstructured":"Li, Y., Chang, M-C., & Lyu, S. (2018). In ictu oculi: Exposing AI generated fake face videos by detecting eye blinking. In IEEE WIFS.","DOI":"10.1109\/WIFS.2018.8630787"},{"key":"2054_CR48","doi-asserted-by":"crossref","unstructured":"Li, Y., Xin Y., Pu, S., Honggang Q, & Lyu, S. (2020). Celeb-df: A large-scale challenging dataset for Deepfake forensics. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 3207\u20133216.","DOI":"10.1109\/CVPR42600.2020.00327"},{"key":"2054_CR49","unstructured":"Liang, X., Hu, Z., Zhang, H., Lin, L., & Xing, E\u00a0P. (2018). Symbolic graph reasoning meets convolutions. Advances in Neural Information Processing Systems."},{"key":"2054_CR50","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y, Cao, Y, Han, H., Wei, Y, Zhang, Z, Lin, S, & Guo, B. (2021). Swin transformer: Hierarchical vision transformer using shifted windows. In Proceedings of the IEEE\/CVF international conference on computer vision 10012\u201310022.","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2054_CR51","doi-asserted-by":"crossref","unstructured":"Liu, Z., Luo, D., Wang, Y., Wang, L., Tai, Y., Wang, C., Li, J., Huang, F., & Lu, T., Teinet: Towards an efficient architecture for video recognition. In Proceedings of the AAAI conference on artificial intelligence, pp. 11669\u201311676, 2020.","DOI":"10.1609\/aaai.v34i07.6836"},{"key":"2054_CR52","doi-asserted-by":"crossref","unstructured":"Liu, Z., Qi, X., & Torr, P\u00a0HS. (2020). Global texture enhancement for fake face detection in the wild. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR42600.2020.00808"},{"key":"2054_CR53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00639","volume-title":"Frequency-aware discriminative feature learning supervised by single-center loss for face forgery detection","author":"J Li","year":"2021","unstructured":"Li, J., Xie, H., Li, J., Wang, Z., & Zhang, Y. (2021). Frequency-aware discriminative feature learning supervised by single-center loss for face forgery detection. In Proc: CVPR."},{"key":"2054_CR54","unstructured":"MarekKowalski. Faceswap. https:\/\/github.com\/MarekKowalski\/FaceSwap\/, 2021."},{"key":"2054_CR55","doi-asserted-by":"crossref","unstructured":"Masi, I., Killekar, A., Mascarenhas, R\u00a0M., Gurudatt, Shenoy\u00a0P., & AbdAlmageed, W. 2020. Two-branch recurrent network for isolating Deepfakes in videos. In Proc. ECCV, pages 667\u2013684.","DOI":"10.1007\/978-3-030-58571-6_39"},{"issue":"1","key":"2054_CR56","first-page":"1","volume":"54","author":"Y Mirsky","year":"2021","unstructured":"Mirsky, Y., & Lee, W. (2021). The creation and detection of Deepfakes: A survey. ACM CSUR, 54(1), 1\u201341.","journal-title":"ACM CSUR"},{"key":"2054_CR57","doi-asserted-by":"crossref","unstructured":"Neimark, D., Bar, O., Zohar, M., & Asselmann, D. (2021). Video transformer network. In Proc. ICCV, pp. 3163\u20133172.","DOI":"10.1109\/ICCVW54120.2021.00355"},{"key":"2054_CR58","doi-asserted-by":"crossref","unstructured":"Ni, Y., Meng, D., Changqian, Y., Quan, C., Ren, D., & Zhao, Y. (2022). Core: Consistent representation learning for face forgery detection. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition 12\u201321.","DOI":"10.1109\/CVPRW56347.2022.00011"},{"key":"2054_CR59","doi-asserted-by":"crossref","unstructured":"Ni, Y., Meng, D., Changqian, Y., Quan, C., Ren, D., & Zhao, Y. (2022). Core: Consistent representation learning for face forgery detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 12\u201321.","DOI":"10.1109\/CVPRW56347.2022.00011"},{"key":"2054_CR60","volume-title":"Responsible disclosure of generative models using scalable fingerprinting","author":"Y Ning","year":"2022","unstructured":"Ning, Y., Skripniuk, V., Chen, D., Davis, L., & Fritz, M. (2022). Responsible disclosure of generative models using scalable fingerprinting. In Proc: ICLR."},{"key":"2054_CR61","doi-asserted-by":"crossref","unstructured":"Nirkin, Y., Wolf, L., Keller, Y., & Hassner, T. (2021) Deepfake detection based on discrepancies between faces and their context. In IEEE TPAMI.","DOI":"10.1109\/TPAMI.2021.3093446"},{"key":"2054_CR62","first-page":"547","volume":"17","author":"Y Peipeng","year":"2022","unstructured":"Peipeng, Y., Fei, J., Xia, Z., Zhou, Z., & Weng, J. (2022). Improving generalization by commonality learning in face forgery detection. IEEE TIFS, 17, 547\u2013558.","journal-title":"IEEE TIFS"},{"key":"2054_CR63","volume-title":"Thinking in frequency: Face forgery detection by mining frequency-aware clues","author":"Y Qian","year":"2020","unstructured":"Qian, Y., Guojun Yin, L., Sheng, Z. C., & Shao, J. (2020). Thinking in frequency: Face forgery detection by mining frequency-aware clues. In Proc: ECCV."},{"key":"2054_CR64","unstructured":"Ricker, J., Damm, S., Holz, T., & Fischer, A. (2022). Towards the detection of diffusion model deepfakes. arXiv preprint arXiv:2210.14571."},{"key":"2054_CR65","doi-asserted-by":"crossref","unstructured":"Rossler, A., Cozzolino, D., Verdoliva, L., Riess, C., Thies, J., & Niessner, M. (2019). Faceforensics++: Learning to detect manipulated facial images. Proceedings of the IEEE\/CVF international conference on computer vision, 1\u201311.","DOI":"10.1109\/ICCV.2019.00009"},{"key":"2054_CR66","unstructured":"Sabir, E., Cheng, J., Jaiswal, A., AbdAlmageed, W., Masi, I., & Natarajan, P. (2019). Recurrent convolutional strategies for face manipulation detection in videos. In Proc. CVPRW, pp. 80\u201387."},{"key":"2054_CR67","doi-asserted-by":"crossref","unstructured":"Safaei, M., & Foroosh, H. (2019). Still image action recognition by predicting spatial-temporal pixel evolution. In 2019 IEEE winter conference on applications of computer vision pp. 111\u2013120.","DOI":"10.1109\/WACV.2019.00019"},{"key":"2054_CR68","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., & Batra, D. (2017). Grad-cam: Visual explanations from deep networks via gradient-based localization. Proceedings of the IEEE international conference on computer vision, pp. 618\u2013626.","DOI":"10.1109\/ICCV.2017.74"},{"key":"2054_CR69","doi-asserted-by":"crossref","unstructured":"Shiohara, K., & Yamasaki, T. (2022). Detecting Deepfakes with self-blended images. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition pp. 18720\u201318729.","DOI":"10.1109\/CVPR52688.2022.01816"},{"key":"2054_CR70","doi-asserted-by":"crossref","unstructured":"Sun, K., Yao, T., Chen, S., Ding, S., Li, J., & Ji, R. (2022). Dual contrastive learning for general face forgery detection. Proceedings of the AAAI conference on artificial intelligence pp. 2316\u20132324.","DOI":"10.1609\/aaai.v36i2.20130"},{"key":"2054_CR71","doi-asserted-by":"crossref","unstructured":"Sun, Y., Zhang, Z., Qiu, C., Liang W., L., & Sun, & Zekai W. (2022). Faketransformer: Exposing face forgery from spatial-temporal representation modeled by facial pixel variations. In 2022 7th international conference on intelligent computing and signal processing pp. 705\u2013713.","DOI":"10.1109\/ICSP54964.2022.9778420"},{"key":"2054_CR72","unstructured":"Tan, M., & Le, Q. (2019). Efficientnet: Rethinking model scaling for convolutional neural networks. International conference on machine learning, pp. 6105\u20136114."},{"key":"2054_CR73","doi-asserted-by":"crossref","unstructured":"Thies, J., Zollhofer, M., Stamminger, M., Theobalt, C., & Nie\u00dfner, M. (2016). Face2face: Real-time face capture and reenactment of RGB videos. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2387\u20132395.","DOI":"10.1109\/CVPR.2016.262"},{"issue":"4","key":"2054_CR74","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3306346.3323035","volume":"38","author":"J Thies","year":"2019","unstructured":"Thies, J., Zollh\u00f6fer, M., & Nie\u00dfner, M. (2019). Deferred neural rendering: Image synthesis using neural textures. ACM TOG, 38(4), 1\u201312.","journal-title":"ACM TOG"},{"issue":"5","key":"2054_CR75","doi-asserted-by":"publisher","first-page":"910","DOI":"10.1109\/JSTSP.2020.3002101","volume":"14","author":"L Verdoliva","year":"2020","unstructured":"Verdoliva, L. (2020). Media forensics & Deepfakes: An overview. IEEE Journal of Selected Topics in Signal Processing, 14(5), 910\u2013932.","journal-title":"IEEE Journal of Selected Topics in Signal Processing"},{"key":"2054_CR76","doi-asserted-by":"crossref","unstructured":"Wang, C., & Deng, W. (2021). Representative forgery mining for fake face detection. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition pp. 14923\u201314932.","DOI":"10.1109\/CVPR46437.2021.01468"},{"key":"2054_CR77","doi-asserted-by":"crossref","unstructured":"Wang, X., & Gupta, A. (2018). Videos as space-time region graphs. Proceedings of the European conference on computer vision, pp. 399\u2013417.","DOI":"10.1007\/978-3-030-01228-1_25"},{"key":"2054_CR78","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., & He, K. (2018). Non-local neural networks. In Proc. CVPR, pp. 7794\u20137803.","DOI":"10.1109\/CVPR.2018.00813"},{"key":"2054_CR79","doi-asserted-by":"crossref","unstructured":"Wang, P., Liu, K., Zhou, W., Zhou, H., Liu, H., Zhang, W., & Nenghai, Y. (2022). Adt: Anti-deepfake transformer. In ICASSP 2022-2022 IEEE International conference on acoustics, speech and signal processing, pp. 2899\u20131903.","DOI":"10.1109\/ICASSP43922.2022.9746888"},{"key":"2054_CR80","doi-asserted-by":"crossref","unstructured":"Wang, S.-Y., Wang, O., Zhang, R., Owens, A., & Efros, A.\u00a0A. (2020). CNN-generated images are surprisingly easy to spot... for now. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition , pp. 8695\u20138704.","DOI":"10.1109\/CVPR42600.2020.00872"},{"key":"2054_CR81","doi-asserted-by":"crossref","unstructured":"Wang, J., Zuxuan, W., Ouyang, W., Han, Xintong, C., Jingjing, J., Yu-G., & Li, S-N. (2022). M2tr: Multi-modal multi-scale transformers for Deepfake detection. In Proceedings of the 2022 international conference on multimedia retrieval 615\u2013623.","DOI":"10.1145\/3512527.3531415"},{"key":"2054_CR82","unstructured":"Wodajo, D., & Atnafu, S. Deepfake video detection using convolutional vision transformer. arXiv:2102.11126, 2021."},{"key":"2054_CR83","doi-asserted-by":"crossref","unstructured":"Yang, J., Ang, Y\u00a0Z., Guo, Z., Zhou, K., Zhang, W., & Liu, Z. Panoptic scene graph generation. In Procedings of ECCV, pp. 178\u2013196, 2022.","DOI":"10.1007\/978-3-031-19812-0_11"},{"key":"2054_CR84","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683164","volume-title":"Exposing deep fakes using inconsistent head poses","author":"X Yang","year":"2019","unstructured":"Yang, X., Li, Y., & Lyu, S. (2019). Exposing deep fakes using inconsistent head poses. In Proc: ICASSP."},{"key":"2054_CR85","first-page":"1696","volume":"18","author":"Z Yang","year":"2023","unstructured":"Yang, Z., Liang, J., Yuting, X., Zhang, X.-Y., & He, R. (2023). Masked relation learning for Deepfake detection. IEEE TIFS, 18, 1696\u20131708.","journal-title":"IEEE TIFS"},{"key":"2054_CR86","doi-asserted-by":"crossref","unstructured":"Yao, B., & Fei-Fei, L. (2012). Action recognition with exemplar based 2.5 d graph matching. In Proc. ECCV, pages 173\u2013186.","DOI":"10.1007\/978-3-642-33765-9_13"},{"key":"2054_CR87","doi-asserted-by":"crossref","unstructured":"Yiwei, R., Zhou, W., Liu, Y., Sun, Ji., & Li, Q. (2021). Bita-net: Bi-temporal attention network for facial video forgery detection. In: In 2021 IEEE International Joint Conference on Biometrics, pp. 1\u20138.","DOI":"10.1109\/IJCB52358.2021.9484408"},{"key":"2054_CR88","unstructured":"Yuting, X., Jia, G., Huang, H., Duan, J., & He, R. (2021). Visual-semantic transformer for face forgery detection. In 2021 IEEE International Joint Conference on Biometrics pp. 1\u20137."},{"key":"2054_CR89","unstructured":"Yuting, X., Liang, Jian, J, Gengyun, Y, Ziming, Z, Yanhao, & He, R. (2023). Tall: Thumbnail layout for Deepfake video detection. In Proceedings of the IEEE\/CVF international conference on computer vision, 22658\u201322668."},{"key":"2054_CR90","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Li, X., Liu, C., Shuai, B., Zhu, Y., Brattoli, B., Chen, H., Marsic, I., & Tighe, J. (2021). Vidtr: Video transformer without convolutions. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 13577\u201313587.","DOI":"10.1109\/ICCV48922.2021.01332"},{"issue":"10","key":"2054_CR91","doi-asserted-by":"publisher","first-page":"1499","DOI":"10.1109\/LSP.2016.2603342","volume":"23","author":"K Zhang","year":"2016","unstructured":"Zhang, K., Zhang, Z., Li, Z., & Qiao, Yu. (2016). Joint face detection and alignment using multitask cascaded convolutional networks. IEEE Signal Processing Letters, 23(10), 1499\u20131503.","journal-title":"IEEE Signal Processing Letters"},{"key":"2054_CR92","doi-asserted-by":"crossref","unstructured":"Zhao, T., Xiang, X., Mingze, X., Ding, H., Xiong, Y., & Xia, W. (2021). Learning self-consistency for deepfake detection. In Proceedings of the IEEE\/CVF international conference on computer vision pp. 15023\u201315033.","DOI":"10.1109\/ICCV48922.2021.01475"},{"key":"2054_CR93","unstructured":"Zhao, H., Zhou, W., Chen, Dongdong., Zhang, Weiming., & Yu, Nenghai. Self-supervised transformer for Deepfake detection. arXiv:2203.01265, 2022."},{"key":"2054_CR94","doi-asserted-by":"crossref","unstructured":"Zhao, H., Zhou, W., Chen, D., Wei, T., Zhang, W., & Nenghai, Y. (2021). In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition pp. 2185\u20132194.","DOI":"10.1109\/CVPR46437.2021.00222"},{"key":"2054_CR95","first-page":"1335","volume":"18","author":"C Zhao","year":"2023","unstructured":"Zhao, C., Wang, C., Guosheng, H., Chen, H., Liu, C., & Tang, J. (2023). Istvt: Interpretable spatial-temporal video transformer for Deepfake detection. IEEE TIFS, 18, 1335\u20131348.","journal-title":"IEEE TIFS"},{"key":"2054_CR96","doi-asserted-by":"crossref","unstructured":"Zheng, Y., Bao, J., Chen, D., Zeng, M., & Wen, F. (2021). Exploring temporal coherence for more general video face forgery detection. In Proceedings of the IEEE\/CVF international conference on computer vision pp. 15044\u201315054.","DOI":"10.1109\/ICCV48922.2021.01477"},{"key":"2054_CR97","doi-asserted-by":"crossref","unstructured":"Zhihao, G., Chen, Y., Yao, T., Ding, S., Li, J., Huang, F., & Ma, L. (2021). Spatiotemporal inconsistency learning for Deepfake video detection. Proceedings of the 29th ACM international conference on multimedia pp. 3473\u20133481.","DOI":"10.1145\/3474085.3475508"},{"key":"2054_CR98","volume-title":"Delving into the local: Dynamic inconsistency learning for Deepfake video detection","author":"G Zhihao","year":"2022","unstructured":"Zhihao, G., Chen, Y., Yao, T., Ding, S., Li, J., & Ma, L. (2022). Delving into the local: Dynamic inconsistency learning for Deepfake video detection. In Proc: AAAI."},{"key":"2054_CR99","doi-asserted-by":"crossref","unstructured":"Zhou, Y., & Lim, S-N. (2021). Joint audio-visual Deepfake detection. Proceedings of the IEEE\/CVF international conference on computer vision pp. 14800\u201314809.","DOI":"10.1109\/ICCV48922.2021.01453"},{"key":"2054_CR100","doi-asserted-by":"crossref","unstructured":"Zi, B., Chang, M., Chen, J., Ma, X., & Jiang, Y.-G. (2020). Wilddeepfake: A challenging real-world dataset for deepfake detection. In Proceedings of the 28th ACM international conference on multimedia 2382\u20132390.","DOI":"10.1145\/3394171.3413769"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02054-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-024-02054-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02054-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T10:11:57Z","timestamp":1731665517000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-024-02054-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,24]]},"references-count":100,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["2054"],"URL":"https:\/\/doi.org\/10.1007\/s11263-024-02054-2","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,6,24]]},"assertion":[{"value":"15 September 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 March 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 June 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}