{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T00:48:10Z","timestamp":1774658890557,"version":"3.50.1"},"reference-count":94,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2025,3,11]],"date-time":"2025-03-11T00:00:00Z","timestamp":1741651200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,3,11]],"date-time":"2025-03-11T00:00:00Z","timestamp":1741651200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s11263-025-02395-6","type":"journal-article","created":{"date-parts":[[2025,3,11]],"date-time":"2025-03-11T18:21:09Z","timestamp":1741717269000},"page":"4538-4554","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["UniFace++: Revisiting a Unified Framework for Face Reenactment and Swapping via 3D Priors"],"prefix":"10.1007","volume":"133","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9300-4497","authenticated-orcid":false,"given":"Chao","family":"Xu","sequence":"first","affiliation":[]},{"given":"Yijie","family":"Qian","sequence":"additional","affiliation":[]},{"given":"Shaoting","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Baigui","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Yong","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Xuelong","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,3,11]]},"reference":[{"key":"2395_CR1","doi-asserted-by":"crossref","unstructured":"Avrahami, O., Lischinski, D., & Fried, O. (2022). Blended diffusion for text-driven editing of natural images. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 18208\u201318218).","DOI":"10.1109\/CVPR52688.2022.01767"},{"key":"2395_CR2","doi-asserted-by":"crossref","unstructured":"Bao, J., Chen, D., Wen, F., Li, H., & Hua, G. (2018). Towards open-set identity preserving face synthesis. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 6713\u20136722).","DOI":"10.1109\/CVPR.2018.00702"},{"key":"2395_CR3","doi-asserted-by":"crossref","unstructured":"Bitouk, D., Kumar, N., Dhillon, S., Belhumeur, P., & Nayar, S. K. (2008). Face swapping: automatically replacing faces in photographs. In ACM SIGGRAPH (pp. 1\u20138).","DOI":"10.1145\/1360612.1360638"},{"key":"2395_CR4","doi-asserted-by":"publisher","first-page":"669","DOI":"10.1111\/j.1467-8659.2004.00799.x","volume":"23","author":"V Blanz","year":"2004","unstructured":"Blanz, V., Scherbaum, K., Vetter, T., & Seidel, H. P. (2004). Exchanging faces in images. Computer Graphics Forum, Wiley Online Library, 23, 669\u2013676.","journal-title":"Computer Graphics Forum, Wiley Online Library"},{"key":"2395_CR5","doi-asserted-by":"crossref","unstructured":"Bounareli, S., Tzelepis, C., Argyriou, V., Patras, I., & Tzimiropoulos, G. (2023). Hyperreenact: One-shot reenactment via jointly learning to refine and retarget faces. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 7149\u20137159).","DOI":"10.1109\/ICCV51070.2023.00657"},{"key":"2395_CR6","doi-asserted-by":"crossref","unstructured":"Cao, Q., Shen, L., Xie, W., Parkhi, O.M., & Zisserman, A. (2018) Vggface2: A dataset for recognising faces across pose and age. In 2018 13th IEEE international conference on automatic face & gesture recognition (FG 2018) (pp. 67\u201374). IEEE.","DOI":"10.1109\/FG.2018.00020"},{"key":"2395_CR7","doi-asserted-by":"crossref","unstructured":"Chen, R., Chen, X., Ni, B., & Ge, Y. (2020a) Simswap: An efficient framework for high fidelity face swapping. In Proceedings of the 28th ACM international conference on multimedia (pp. 2003\u20132011).","DOI":"10.1145\/3394171.3413630"},{"key":"2395_CR8","doi-asserted-by":"crossref","unstructured":"Chen, Z., Wang, C., Yuan, B., & Tao, D. (2020b). Puppeteergan: Arbitrary portrait animation with semantic-aware appearance transformation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 13518\u201313527).","DOI":"10.1109\/CVPR42600.2020.01353"},{"key":"2395_CR9","doi-asserted-by":"crossref","unstructured":"Cheng, Y.T., Tzeng, V., Liang, Y., Wang, C.C., Chen, B.Y., Chuang, Y.Y., & Ouhyoung, M. (2009) 3d-model-based face replacement in video. In SIGGRAPH\u201909: Posters (pp. 1\u20131).","DOI":"10.1145\/1599301.1599330"},{"key":"2395_CR10","doi-asserted-by":"crossref","unstructured":"Deng, J., Guo, J., Xue, N., & Zafeiriou, S. (2019a). Arcface: Additive angular margin loss for deep face recognition. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 4690\u20134699).","DOI":"10.1109\/CVPR.2019.00482"},{"key":"2395_CR11","doi-asserted-by":"crossref","unstructured":"Deng, Y., Yang, J., Xu, S., Chen, D., Jia, Y., & Tong, X. (2019b) Accurate 3d face reconstruction with weakly-supervised learning: From single image to image set. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops.","DOI":"10.1109\/CVPRW.2019.00038"},{"key":"2395_CR12","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., & Nichol, A. (2021). Diffusion models beat gans on image synthesis. Advances in Neural Information Processing Systems, 34, 8780\u20138794.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2395_CR13","doi-asserted-by":"crossref","unstructured":"Doukas, M. C., Zafeiriou, S., & Sharmanska, V. (2021). Headgan: One-shot neural head synthesis and editing. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 14398\u201314407).","DOI":"10.1109\/ICCV48922.2021.01413"},{"key":"2395_CR14","doi-asserted-by":"crossref","unstructured":"Fan, W.C., Chen, Y.C., Chen, D., Cheng, Y., Yuan, L., & Wang, Y.C.F. (2022) Frido: Feature pyramid diffusion for complex scene image synthesis. arXiv preprint arXiv:2208.13753","DOI":"10.1609\/aaai.v37i1.25133"},{"key":"2395_CR15","doi-asserted-by":"crossref","unstructured":"Gao, G., Huang, H., Fu, C., Li, Z., & He, R. (2021a). Information bottleneck disentanglement for identity swapping. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 3404\u20133413).","DOI":"10.1109\/CVPR46437.2021.00341"},{"key":"2395_CR16","doi-asserted-by":"crossref","unstructured":"Gao, G., Huang, H., Fu, C., Li, Z., & He, R. (2021b). Information bottleneck disentanglement for identity swapping. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 3404\u20133413).","DOI":"10.1109\/CVPR46437.2021.00341"},{"key":"2395_CR17","doi-asserted-by":"crossref","unstructured":"Gao, Y., Zhou, Y., Wang, J., Li, X., Ming, X., & Lu, Y. (2023). High-fidelity and freely controllable talking head video generation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 5609\u20135619).","DOI":"10.1109\/CVPR52729.2023.00543"},{"issue":"11","key":"2395_CR18","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., & Bengio, Y. (2020). Generative adversarial networks. Communications of the ACM, 63(11), 139\u2013144.","journal-title":"Communications of the ACM"},{"key":"2395_CR19","doi-asserted-by":"publisher","first-page":"10893","DOI":"10.1609\/aaai.v34i07.6721","volume":"34","author":"S Ha","year":"2020","unstructured":"Ha, S., Kersner, M., Kim, B., Seo, S., & Kim, D. (2020). Marionette: Few-shot face reenactment preserving identity of unseen targets. Proceedings of the AAAI Conference on Artificial Intelligence, 34, 10893\u201310900.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"2395_CR20","unstructured":"Harvey, W., Naderiparizi, S., Masrani, V., Weilbach, C., & Wood, F. (2022) Flexible diffusion modeling of long videos. arXiv preprint arXiv:2205.11495"},{"key":"2395_CR21","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., & Hochreiter, S. (2017) Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in Neural Information Processing Systems, 30"},{"key":"2395_CR22","unstructured":"Ho, J., & Salimans, T. (2022) Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598"},{"key":"2395_CR23","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., & Abbeel, P. (2020). Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems, 33, 6840\u20136851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2395_CR24","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., & Abbeel, P. (2020). Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems, 33, 6840\u20136851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2395_CR25","unstructured":"Ho, J., Chan, W., Saharia, C., Whang, J., Gao, R., Gritsenko, A., Kingma, D.P., Poole, B., Norouzi, M., & Fleet, D.J., et\u00a0al. (2022a) Imagen video: High definition video generation with diffusion models. arXiv preprint arXiv:2210.02303"},{"key":"2395_CR26","unstructured":"Ho, J., Salimans, T., Gritsenko, A., Chan, W., Norouzi, M., & Fleet, D.J. (2022b) Video diffusion models. arXiv preprint arXiv:2204.03458"},{"key":"2395_CR27","unstructured":"Ho, J., Salimans, T., Gritsenko, A., Chan, W., Norouzi, M., & Fleet, D. J. (2022c). Video diffusion models., arXiv preprint arXiv:2204.03458"},{"key":"2395_CR28","doi-asserted-by":"crossref","unstructured":"Hong, F. T., & Xu, D. (2023). Implicit identity representation conditioned memory compensation network for talking head video generation. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 23062\u201323072).","DOI":"10.1109\/ICCV51070.2023.02108"},{"key":"2395_CR29","doi-asserted-by":"crossref","unstructured":"Hong, F. T., Zhang, L., Shen, L., & Xu, D. (2022). Depth-aware generative adversarial network for talking head video generation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 3397\u20133406).","DOI":"10.1109\/CVPR52688.2022.00339"},{"key":"2395_CR30","doi-asserted-by":"crossref","unstructured":"Hong, F. T., Shen, L., & Xu D (2023) Dagan++: Depth-aware generative adversarial network for talking head video generation. IEEE Transactions on Pattern Analysis and Machine Intelligence.","DOI":"10.1109\/CVPR52688.2022.00339"},{"key":"2395_CR31","doi-asserted-by":"crossref","unstructured":"Huang, P. H., Yang, F. E., & Wang, Y. C. F. (2020a). Learning identity-invariant motion representations for cross-id face reenactment. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 7084\u20137092).","DOI":"10.1109\/CVPR42600.2020.00711"},{"key":"2395_CR32","unstructured":"Huang, R., Huang, J., Yang, D., Ren, Y., Liu, L., Li, M., Ye, Z., Liu, J., Yin, X., & Zhao, Z. (2023) Make-an-audio: Text-to-audio generation with prompt-enhanced diffusion models. arXiv preprint arXiv:2301.12661"},{"key":"2395_CR33","doi-asserted-by":"crossref","unstructured":"Huang, X., & Belongie, S. (2017). Arbitrary style transfer in real-time with adaptive instance normalization. In Proceedings of the IEEE international conference on computer vision (pp. 1501\u20131510).","DOI":"10.1109\/ICCV.2017.167"},{"key":"2395_CR34","doi-asserted-by":"crossref","unstructured":"Huang, Y., Wang, Y., Tai, Y., Liu, X., Shen, P., Li, S., Li, J., & Huang, F. (2020b). Curricularface: Adaptive curriculum learning loss for deep face recognition. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 5901\u20135910).","DOI":"10.1109\/CVPR42600.2020.00594"},{"key":"2395_CR35","doi-asserted-by":"crossref","unstructured":"Jiang, D., Song, D., Tong, R., & Tang, M. (2023). Styleipsb Identity-preserving semantic basis of stylegan for high fidelity face swapping. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 352\u2013361).","DOI":"10.1109\/CVPR52729.2023.00042"},{"key":"2395_CR36","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., & Aila, T. (2019). A style-based generator architecture for generative adversarial networks. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 4401\u20134410).","DOI":"10.1109\/CVPR.2019.00453"},{"key":"2395_CR37","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aittala, M., Hellsten, J., Lehtinen, J., & Aila, T. (2020). Analyzing and improving the image quality of stylegan. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 8110\u20138119).","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"2395_CR38","unstructured":"Kim, K., Kim, Y., Cho, S., Seo, J., Nam, J., Lee, K., Kim, S., & Lee, K. (2022) Diffface: Diffusion-based face swapping with facial guidance. arXiv preprint arXiv:2212.13344"},{"key":"2395_CR39","doi-asserted-by":"crossref","unstructured":"Lee, C. H., Liu, Z., Wu, L., & Luo, P. (2020). Maskgan Towards diverse and interactive facial image manipulation. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR42600.2020.00559"},{"key":"2395_CR40","doi-asserted-by":"crossref","unstructured":"Li, J., Li, Z., Cao, J., Song, X., & He, R. (2021). Faceinpainter: High fidelity face adaptation to heterogeneous domains. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 5089\u20135098).","DOI":"10.1109\/CVPR46437.2021.00505"},{"key":"2395_CR41","doi-asserted-by":"crossref","unstructured":"Li, L., Bao, J., Yang, H., Chen, D., & Wen, F. (2020). Advancing high fidelity identity swapping for forgery detection. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 5074\u20135083).","DOI":"10.1109\/CVPR42600.2020.00512"},{"key":"2395_CR42","doi-asserted-by":"crossref","unstructured":"Li, M., Duan, Y., Zhou, J., Lu, J. (2022) Diffusion-sdf: Text-to-shape via voxelized diffusion. arXiv preprint arXiv:2212.03293","DOI":"10.1109\/CVPR52729.2023.01216"},{"key":"2395_CR43","doi-asserted-by":"crossref","unstructured":"Lin, Y., Wang, S., Lin, Q., & Tang, F. (2012) Face swapping under large pose variations: A 3d model based approach. In 2012 IEEE international conference on multimedia and expo (pp. 333\u2013338). IEEE","DOI":"10.1109\/ICME.2012.26"},{"key":"2395_CR44","unstructured":"Liu, H., Chen, Z., Yuan, Y., Mei, X., Liu, X., Mandic, D., Wang, W., & Plumbley, M.D. (2023a) Audioldm: Text-to-audio generation with latent diffusion models. arXiv preprint arXiv:2301.12503"},{"key":"2395_CR45","doi-asserted-by":"crossref","unstructured":"Liu, Z., Li, M., Zhang, Y., Wang, C., Zhang, Q., Wang, J., & Nie, Y. (2023b). Fine-grained face swapping via regional gan inversion. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 8578\u20138587).","DOI":"10.1109\/CVPR52729.2023.00829"},{"key":"2395_CR46","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1007\/978-3-031-19787-1_17","volume-title":"Computer Vision-ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings","author":"Y Luo","year":"2022","unstructured":"Luo, Y., Zhu, J., He, K., Chu, W., Tai, Y., Wang, C., & Yan, J. (2022). Styleface: Towards identity-disentangled face generation on megapixels. In X. V. I. Part (Ed.), Computer Vision-ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings (pp. 297\u2013312). Springer."},{"key":"2395_CR47","unstructured":"Molad, E., Horwitz, E., Valevski, D., Acha, A.R., Matias, Y., Pritch, Y., Leviathan, Y., & Hoshen, Y. (2023) Dreamix: Video diffusion models are general video editors. arXiv preprint arXiv:2302.01329"},{"key":"2395_CR48","doi-asserted-by":"crossref","unstructured":"Nagrani, A., Chung, J.S., & Zisserman, A. (2017) Voxceleb: a large-scale speaker identification dataset. arXiv preprint arXiv:1706.08612","DOI":"10.21437\/Interspeech.2017-950"},{"key":"2395_CR49","doi-asserted-by":"crossref","unstructured":"Natsume, R., Yatagawa, T., & Morishima, S. (2018) Rsgan: face swapping and editing using face and hair representation in latent spaces. arXiv preprint arXiv:1804.03447","DOI":"10.1145\/3230744.3230818"},{"key":"2395_CR50","unstructured":"Nichol, A. Q., & Dhariwal, P. (2021). Improved denoising diffusion probabilistic models. In International conference on machine learning (pp. 8162\u20138171). PMLR"},{"key":"2395_CR51","doi-asserted-by":"crossref","unstructured":"Park, S., Zhang, X., Bulling, A., & Hilliges, O. (2018). Learning to find eye region landmarks for remote gaze estimation in unconstrained settings. In Proceedings of the 2018 ACM symposium on eye tracking research & applications (pp. 1\u201310).","DOI":"10.1145\/3204493.3204545"},{"key":"2395_CR52","unstructured":"Perov, I., Gao, D., Chervoniy, N., Liu, K., Marangonda, S., Um\u00e9, C., Dpfks, M., Facenheim, C.S., RP, L., & Jiang, J., et\u00a0al. (2020) Deepfacelab: Integrated, flexible and extensible face-swapping framework. arXiv preprint arXiv:2005.05535"},{"key":"2395_CR53","unstructured":"Poole, B., Jain, A., Barron, J.T., & Mildenhall, B. (2022) Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988"},{"issue":"11","key":"2395_CR54","doi-asserted-by":"publisher","first-page":"6347","DOI":"10.1109\/TCSVT.2023.3268062","volume":"33","author":"Q Ren","year":"2023","unstructured":"Ren, Q., Lu, Z., Wu, H., Zhang, J., & Dong, Z. (2023). Hr-net: a landmark based high realistic face reenactment network. IEEE Transactions on Circuits and Systems for Video Technology, 33(11), 6347\u20136359.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"2395_CR55","doi-asserted-by":"crossref","unstructured":"Ren, Y., Li, G., Chen, Y., Li, T. H., & Liu, S. (2021). Pirenderer: Controllable portrait image generation via semantic neural rendering. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 13759\u201313768).","DOI":"10.1109\/ICCV48922.2021.01350"},{"key":"2395_CR56","doi-asserted-by":"crossref","unstructured":"Richardson, E., Alaluf, Y., Patashnik, O., Nitzan, Y., Azar, Y., Shapiro, S., & Cohen-Or, D. (2021). Encoding in style: a stylegan encoder for image-to-image translation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 2287\u20132296).","DOI":"10.1109\/CVPR46437.2021.00232"},{"key":"2395_CR57","doi-asserted-by":"crossref","unstructured":"Rochow, A., Schwarz, M., & Behnke, S. (2024). Fsrt: Facial scene representation transformer for face reenactment from factorized appearance head-pose and facial expression features. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 7716\u20137726).","DOI":"10.1109\/CVPR52733.2024.00737"},{"key":"2395_CR58","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., & Ommer, B. (2022). High-resolution image synthesis with latent diffusion models. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 10684\u201310695).","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"2395_CR59","doi-asserted-by":"crossref","unstructured":"Rosberg, F., Aksoy, E.E., Alonso-Fernandez, F., & Englund, C. (2023) Facedancer: Pose-and occlusion-aware high fidelity face swapping. In Proceedings of the IEEE\/CVF winter conference on applications of computer vision (pp. 3454\u20133463)","DOI":"10.1109\/WACV56688.2023.00345"},{"key":"2395_CR60","doi-asserted-by":"crossref","unstructured":"Rossler, A., Cozzolino, D., Verdoliva, L., Riess, C., Thies, J., & Nie\u00dfner, M. (2019) Faceforensics++: Learning to detect manipulated facial images. In Proceedings of the IEEE\/CVF international conference on computer vision.","DOI":"10.1109\/ICCV.2019.00009"},{"key":"2395_CR61","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., & Aberman, K. (2022) Dreambooth: Fine tuning text-to-image diffusion models for subject-driven generation. arXiv preprint arXiv:2208.12242","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"2395_CR62","doi-asserted-by":"crossref","unstructured":"Saharia, C., Chan, W., Saxena, S., Li, L., Whang, J., Denton, E., Ghasemipour, S. K. S., Ayan, B. K., Mahdavi, S. S., & Lopes, R. G., et\u00a0al. (2022) Photorealistic text-to-image diffusion models with deep language understanding. arXiv preprint arXiv:2205.11487","DOI":"10.1145\/3528233.3530757"},{"key":"2395_CR63","doi-asserted-by":"crossref","unstructured":"Shiohara, K., Yang, X., & Taketomi, T. (2023). Blendface: Re-designing identity encoders for face-swapping. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 7634\u20137644).","DOI":"10.1109\/ICCV51070.2023.00702"},{"key":"2395_CR64","doi-asserted-by":"crossref","unstructured":"Siarohin, A., Lathuili\u00e8re, S., Tulyakov, S., Ricci, E., & Sebe, N. (2019a). Animating arbitrary objects via deep motion transfer. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 2377\u20132386).","DOI":"10.1109\/CVPR.2019.00248"},{"key":"2395_CR65","unstructured":"Siarohin, A., Lathuili\u00e8re, S., Tulyakov, S., Ricci, E., & Sebe, N. (2019b) First order motion model for image animation. Advances in Neural Information Processing Systems, 32."},{"key":"2395_CR66","unstructured":"Simonyan, K., & Zisserman, A. (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556"},{"key":"2395_CR67","unstructured":"Song, J., Meng, C., & Ermon, S. (2020) Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502"},{"key":"2395_CR68","doi-asserted-by":"crossref","unstructured":"Stypulkowski, M., Vougioukas, K., He, S., Zieba, M., Petridis, S., & Pantic, M. (2023). Diffused heads: Diffusion models beat gans on talking-face generation. arXiv preprint arXiv:2301.03396","DOI":"10.1109\/WACV57701.2024.00502"},{"key":"2395_CR69","doi-asserted-by":"crossref","unstructured":"Tao, J., Wang, B., Xu, B., Ge, T., Jiang, Y., Li, W., & Duan, L. (2022). Structure-aware motion transfer with deformable anchor model. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 3637\u20133646).","DOI":"10.1109\/CVPR52688.2022.00362"},{"key":"2395_CR70","doi-asserted-by":"crossref","unstructured":"Wang, T. C., Mallya, A., & Liu, M. Y. (2021a). One-shot free-view neural talking-head synthesis for video conferencing. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 10039\u201310049).","DOI":"10.1109\/CVPR46437.2021.00991"},{"key":"2395_CR71","doi-asserted-by":"crossref","unstructured":"Wang, Y., Chen, X., Zhu, J., Chu, W., Tai, Y., Wang, C., Li, J., Wu, Y., Huang, F., & Ji, R. (2021b) Hififace: 3d shape and semantic prior guided high fidelity face swapping. arXiv preprint arXiv:2106.09965","DOI":"10.24963\/ijcai.2021\/157"},{"key":"2395_CR72","unstructured":"Wei, H., Yang, Z., & Wang, Z. (2024) Aniportrait: Audio-driven synthesis of photorealistic portrait animation. arXiv preprint arXiv:2403.17694"},{"key":"2395_CR73","doi-asserted-by":"crossref","unstructured":"Wiles, O., Koepke, A., Zisserman, A. (2018) X2face: A network for controlling face generation using images, audio, and pose codes. In Proceedings of the European conference on computer vision (ECCV) (pp. 670\u2013686).","DOI":"10.1007\/978-3-030-01261-8_41"},{"key":"2395_CR74","doi-asserted-by":"crossref","unstructured":"Wu, J.Z., Ge, Y., Wang, X., Lei, W., Gu, Y., Hsu, W., Shan, Y., Qie, X., & Shou, M.Z. (2022) Tune-a-video: One-shot tuning of image diffusion models for text-to-video generation. arXiv preprint arXiv:2212.11565","DOI":"10.1109\/ICCV51070.2023.00701"},{"key":"2395_CR75","doi-asserted-by":"crossref","unstructured":"Wu, W., Zhang, Y., Li, C., Qian, C., & Loy, C. C. (2018). Reenactgan: Learning to reenact faces via boundary transfer. In Proceedings of the European conference on computer vision (ECCV) (pp. 603\u2013619).","DOI":"10.1007\/978-3-030-01246-5_37"},{"key":"2395_CR76","doi-asserted-by":"publisher","first-page":"54","DOI":"10.1007\/978-3-031-19784-0_4","volume-title":"Computer Vision-ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings","author":"C Xu","year":"2022","unstructured":"Xu, C., Zhang, J., Han, Y., Tian, G., Zeng, X., Tai, Y., Wang, Y., Wang, C., & Liu, Y. (2022). Designing one unified framework for high-fidelity face reenactment and swapping. In X. V. Part (Ed.), Computer Vision-ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings (pp. 54\u201371). Springer."},{"key":"2395_CR77","doi-asserted-by":"crossref","unstructured":"Xu, C., Zhang, J., Hua, M., He, Q., Yi, Z., & Liu, Y. (2022b) Region-aware face swapping. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 7632\u20137641).","DOI":"10.1109\/CVPR52688.2022.00749"},{"key":"2395_CR78","doi-asserted-by":"crossref","unstructured":"Xu, C., Zhu, J., Zhang, J., Han, Y., Chu, W., Tai, Y., Wang, C., Xie, Z., & Liu, Y. (2023) High-fidelity generalized emotional talking face generation with multi-modal emotion space learning. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 6609\u20136619).","DOI":"10.1109\/CVPR52729.2023.00639"},{"key":"2395_CR79","doi-asserted-by":"crossref","unstructured":"Xu, J., Wang, X., Cheng, W., Cao, Y.P., Shan, Y., Qie, X., & Gao, S. (2022c) Dream3d: Zero-shot text-to-3d synthesis using 3d shape prior and text-to-image diffusion models. arXiv preprint arXiv:2212.14704","DOI":"10.1109\/CVPR52729.2023.02003"},{"key":"2395_CR80","doi-asserted-by":"crossref","unstructured":"Xu, Y., Deng, B., Wang, J., Jing, Y., Pan, J., & He, S. (2022d). High-resolution face swapping via latent semantics disentanglement. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 7642\u20137651).","DOI":"10.1109\/CVPR52688.2022.00749"},{"key":"2395_CR81","doi-asserted-by":"publisher","first-page":"661","DOI":"10.1007\/978-3-031-19781-9_38","volume-title":"Computer Vision-ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings","author":"Z Xu","year":"2022","unstructured":"Xu, Z., Zhou, H., Hong, Z., Liu, Z., Liu, J., Guo, Z., Han, J., Liu, J., Ding, E., & Wang, J. (2022). Styleswap: Style-based generator empowers robust face swapping. In X. I. V. Part (Ed.), Computer Vision-ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings (pp. 661\u2013677). Springer."},{"key":"2395_CR82","doi-asserted-by":"crossref","unstructured":"Yu, C., Wang, J., Peng, C., Gao, C., Yu, G., & Sang, N. (2018). Bisenet: Bilateral segmentation network for real-time semantic segmentation. In Proceedings of the European conference on computer vision (ECCV) (pp. 325\u2013341).","DOI":"10.1007\/978-3-030-01261-8_20"},{"key":"2395_CR83","doi-asserted-by":"crossref","unstructured":"Zakharov, E., Shysheya, A., Burkov, E., & Lempitsky, V. (2019). Few-shot adversarial learning of realistic neural talking head models. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 9459\u20139468).","DOI":"10.1109\/ICCV.2019.00955"},{"key":"2395_CR84","doi-asserted-by":"crossref","unstructured":"Zeng, H., Zhang, W., Fan, C., Lv, T., Wang, S., Zhang, Z., Ma, B., Li, L., Ding, Y., & Yu, X. (2022) Flowface: Semantic flow-guided shape-aware face swapping. arXiv preprint arXiv:2212.02797","DOI":"10.1609\/aaai.v37i3.25444"},{"key":"2395_CR85","doi-asserted-by":"crossref","unstructured":"Zeng, X., Pan, Y., Wang, M., Zhang, J., & Liu, Y. (2020). Realistic face reenactment via self-supervised disentangling of identity and pose. Proceedings of the AAAI Conference on Artificial Intelligence, 34, 12757\u201312764.","DOI":"10.1609\/aaai.v34i07.6970"},{"key":"2395_CR86","doi-asserted-by":"crossref","unstructured":"Zhang, B., Qi, C., Zhang, P., Zhang, B., Wu, H., Chen, D., Chen, Q., Wang, Y., & Wen, F. (2023). Metaportrait: Identity-preserving talking head generation with fast personalized adaptation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 22096\u201322105).","DOI":"10.1109\/CVPR52729.2023.02116"},{"key":"2395_CR87","doi-asserted-by":"crossref","unstructured":"Zhang, J., Zeng, X., Wang, M., Pan, Y., Liu, L., Liu, Y., Ding, Y., & Fan, C. (2020). Freenet: Multi-identity face reenactment. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 5326\u20135335).","DOI":"10.1109\/CVPR42600.2020.00537"},{"key":"2395_CR88","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., & Wang, O. (2018) The unreasonable effectiveness of deep features as a perceptual metric. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 586\u2013595).","DOI":"10.1109\/CVPR.2018.00068"},{"key":"2395_CR89","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Li, L., Ding, Y., & Fan, C. (2021). Flow-guided one-shot talking face generation with a high-resolution audio-visual dataset. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 3661\u20133670).","DOI":"10.1109\/CVPR46437.2021.00366"},{"key":"2395_CR90","doi-asserted-by":"crossref","unstructured":"Zhao, J., & Zhang, H. (2022). Thin-plate spline motion model for image animation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 3657\u20133666).","DOI":"10.1109\/CVPR52688.2022.00364"},{"key":"2395_CR91","doi-asserted-by":"crossref","unstructured":"Zhao, W., Rao, Y., Shi, W., Liu, Z., Zhou, J., & Lu, J. (2023) Diffswap: High-fidelity and controllable face swapping via 3d-aware masked diffusion. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 8568\u20138577).","DOI":"10.1109\/CVPR52729.2023.00828"},{"issue":"6","key":"2395_CR92","first-page":"1","volume":"39","author":"Y Zhou","year":"2020","unstructured":"Zhou, Y., Han, X., Shechtman, E., Echevarria, J., Kalogerakis, E., & Li, D. (2020). Makelttalk: Speaker-aware talking-head animation. ACM Transactions On Graphics (TOG), 39(6), 1\u201315.","journal-title":"ACM Transactions On Graphics (TOG)"},{"key":"2395_CR93","doi-asserted-by":"crossref","unstructured":"Zhu, F., Zhu, J., Chu, W., Tai, Y., Xie, Z., Huang, X., & Wang, C. (2022) Hifihead: One-shot high fidelity neural head synthesis with 3d control. In IJCAI (pp. 1750\u20131756).","DOI":"10.24963\/ijcai.2022\/244"},{"key":"2395_CR94","doi-asserted-by":"crossref","unstructured":"Zhu, Y., Li, Q., Wang, J., Xu, C. Z., & Sun, Z. (2021). One shot face swapping on megapixels. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 4834\u20134844).","DOI":"10.1109\/CVPR46437.2021.00480"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02395-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02395-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02395-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,7]],"date-time":"2025-06-07T06:04:10Z","timestamp":1749276250000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02395-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,11]]},"references-count":94,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["2395"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02395-6","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3,11]]},"assertion":[{"value":"4 July 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 February 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 March 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}