{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T12:14:37Z","timestamp":1775132077983,"version":"3.50.1"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T00:00:00Z","timestamp":1770336000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T00:00:00Z","timestamp":1770336000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1007\/s00530-026-02212-1","type":"journal-article","created":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T03:14:51Z","timestamp":1770347691000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Deformabletalker: edge-aware adaptive interaction for audio-driven 3D face animation with 3D Gaussian splatting"],"prefix":"10.1007","volume":"32","author":[{"given":"Minghui","family":"Shao","sequence":"first","affiliation":[]},{"given":"Haoran","family":"Lu","sequence":"additional","affiliation":[]},{"given":"Guodong","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Junli","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,2,6]]},"reference":[{"key":"2212_CR1","doi-asserted-by":"crossref","unstructured":"Prajwal, K., Mukhopadhyay, R., Namboodiri, V.P., Jawahar, C.: A lip sync expert is all you need for speech to lip generation in the wild. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 484\u2013492 (2020)","DOI":"10.1145\/3394171.3413532"},{"key":"2212_CR2","doi-asserted-by":"crossref","unstructured":"Guo, Y., Chen, K., Liang, S., Liu, Y.-J., Bao, H., Zhang, J.: Ad-nerf: audio driven neural radiance fields for talking head synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5784\u20135794 (2021)","DOI":"10.1109\/ICCV48922.2021.00573"},{"key":"2212_CR3","unstructured":"Guo, L., Ning, X., Fu, Y., Zhao, T., Kang, Z., Yu, J., Lin, Y.C., Wang, Y.: Rad-nerf: ray-decoupled training of neural radiance field. In: The Thirty-Eighth Annual Conference on Neural Information Processing Systems"},{"key":"2212_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102456","volume":"110","author":"J Li","year":"2024","unstructured":"Li, J., Zhang, J., Bai, X., Zheng, J., Zhou, J., Gu, L.: Er-nerf++: efficient region-aware neural radiance fields for high-fidelity talking portrait synthesis. Inf. Fusion 110, 102456 (2024)","journal-title":"Inf. Fusion"},{"key":"2212_CR5","doi-asserted-by":"crossref","unstructured":"Li, J., Zhang, J., Bai, X., Zheng, J., Ning, X., Zhou, J., Gu, L.: Talkinggaussian: structure-persistent 3d talking head synthesis via gaussian splatting. In: European Conference on Computer Vision, Springer, pp. 127\u2013145 (2024)","DOI":"10.1007\/978-3-031-72684-2_8"},{"key":"2212_CR6","first-page":"660","volume":"37","author":"S Xu","year":"2025","unstructured":"Xu, S., Chen, G., Guo, Y.-X., Yang, J., Li, C., Zang, Z., Zhang, Y., Tong, X., Guo, B.: Vasa-1: lifelike audio-driven talking faces generated in real time. Adv. Neural. Inf. Process. Syst. 37, 660\u2013684 (2025)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2212_CR7","unstructured":"Ye, Z., Jiang, Z., Ren, Y., Liu, J., He, J., Zhao, Z.: Geneface: generalized and high-fidelity audio-driven 3d talking face synthesis, arXiv preprint arXiv:2301.13430 (2023)"},{"key":"2212_CR8","doi-asserted-by":"crossref","unstructured":"Chen, X., Mihajlovic, M., Wang, S., Prokudin, S., Tang, S.: Morphable diffusion: 3d-consistent diffusion for single-image avatar creation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10,359\u201310,370 (2024)","DOI":"10.1109\/CVPR52733.2024.00986"},{"key":"2212_CR9","doi-asserted-by":"crossref","unstructured":"Du, C., Chen, Q., He, T., Tan, X., Chen, X., Yu, K., Zhao, S., Bian, J.: Dae-talker: high fidelity speech-driven talking face generation with diffusion autoencoder. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 4281\u20134289 (2023)","DOI":"10.1145\/3581783.3613753"},{"key":"2212_CR10","doi-asserted-by":"crossref","unstructured":"Kirschstein, T., Giebenhain, S., Nie\u00dfner, M.: Diffusionavatars: deferred diffusion for high-fidelity 3d head avatars. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5481\u20135492 (2024)","DOI":"10.1109\/CVPR52733.2024.00524"},{"key":"2212_CR11","doi-asserted-by":"crossref","unstructured":"Shen, S., Zhao, W., Meng, Z., Li, W., Zhu, Z., Zhou, J., Lu, J.: Difftalk: crafting diffusion models for generalized audio-driven portraits animation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1982\u20131991 (2023)","DOI":"10.1109\/CVPR52729.2023.00197"},{"key":"2212_CR12","doi-asserted-by":"crossref","unstructured":"Li, J., Zhang, J., Bai, X., Zhou, J., Gu, L.: Efficient region-aware neural radiance fields for high-fidelity talking portrait synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7568\u20137578 (2023)","DOI":"10.1109\/ICCV51070.2023.00696"},{"key":"2212_CR13","doi-asserted-by":"crossref","unstructured":"Tian, G., Yuan, Y., Liu, Y.: Audio2face: generating speech\/face animation from single audio with attention-based bidirectional lstm networks. In: 2019 IEEE International Conference on Multimedia and Expo Workshops (ICMEW). IEEE, pp. 366\u2013371 (2019)","DOI":"10.1109\/ICMEW.2019.00069"},{"issue":"1","key":"2212_CR14","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: Nerf: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"issue":"4","key":"2212_CR15","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3592433","volume":"42","author":"B Kerbl","year":"2023","unstructured":"Kerbl, B., Kopanas, G., Leimk\u00fchler, T., Drettakis, G.: 3d gaussian splatting for real-time radiance field rendering. ACM Trans. Graph. 42(4), 139\u20131 (2023)","journal-title":"ACM Trans. Graph."},{"key":"2212_CR16","doi-asserted-by":"crossref","unstructured":"Ma, S., Weng, Y., Shao, T., Zhou, K.: 3d gaussian blendshapes for head avatar animation. In: ACM SIGGRAPH 2024 Conference Papers, pp. 1\u201310 (2024)","DOI":"10.1145\/3641519.3657462"},{"key":"2212_CR17","doi-asserted-by":"crossref","unstructured":"Yang, Z., Gao, X., Zhou, W., Jiao, S., Zhang, Y., Jin, X.: Deformable 3d gaussians for high-fidelity monocular dynamic scene reconstruction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20,331\u201320,341 (2024)","DOI":"10.1109\/CVPR52733.2024.01922"},{"key":"2212_CR18","doi-asserted-by":"crossref","unstructured":"Xu, Y., Chen, B., Li, Z., Zhang, H., Wang, L., Zheng, Z., Liu, Y.: Gaussian head avatar: ultra high-fidelity head avatar via dynamic gaussians. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1931\u20131941 (2024)","DOI":"10.1109\/CVPR52733.2024.00189"},{"key":"2212_CR19","doi-asserted-by":"crossref","unstructured":"Xiang, J., Gao, X., Guo, Y., Zhang, J.: Flashavatar: high-fidelity head avatar with efficient gaussian embedding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1802\u20131812 (2024)","DOI":"10.1109\/CVPR52733.2024.00177"},{"key":"2212_CR20","doi-asserted-by":"crossref","unstructured":"Qian, S., Kirschstein, T., Schoneveld, L., Davoli, D., Giebenhain, S., Nie\u00dfner, M.: Gaussianavatars: photorealistic head avatars with rigged 3d gaussians. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20,299\u201320,309 (2024)","DOI":"10.1109\/CVPR52733.2024.01919"},{"key":"2212_CR21","doi-asserted-by":"crossref","unstructured":"Qian, Z., Wang, S., Mihajlovic, M., Geiger, A., Tang, S.: 3dgs-avatar: animatable avatars via deformable 3d gaussian splatting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5020\u20135030 (2024)","DOI":"10.1109\/CVPR52733.2024.00480"},{"key":"2212_CR22","doi-asserted-by":"crossref","unstructured":"Chen, Y., Wang, L., Li, Q., Xiao, H., Zhang, S., Yao, H., Liu, Y.: Monogaussianavatar: monocular gaussian point-based head avatar. In: ACM SIGGRAPH 2024 Conference Papers, pp. 1\u20139 (2024)","DOI":"10.1145\/3641519.3657499"},{"key":"2212_CR23","doi-asserted-by":"crossref","unstructured":"Schneider, S., Baevski, A., Collobert, R., Auli, M.: Wav2vec: unsupervised pre-training for speech recognition, arXiv preprint arXiv:1904.05862 (2019)","DOI":"10.21437\/Interspeech.2019-1873"},{"key":"2212_CR24","unstructured":"Hannun, A., Case, C., Casper, J., Catanzaro, B., Diamos, G., Elsen, E., Prenger, R., Satheesh, S., Sengupta, S., Coates, A., et al.: Deep speech: scaling up end-to-end speech recognition, arXiv preprint arXiv:1412.5567 (2014)"},{"key":"2212_CR25","doi-asserted-by":"publisher","first-page":"3451","DOI":"10.1109\/TASLP.2021.3122291","volume":"29","author":"W-N Hsu","year":"2021","unstructured":"Hsu, W.-N., Bolte, B., Tsai, Y.-H.H., Lakhotia, K., Salakhutdinov, R., Mohamed, A.: Hubert: self-supervised speech representation learning by masked prediction of hidden units. IEEE\/ACM Trans. Audio Speech Lang. Process. 29, 3451\u20133460 (2021)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2212_CR26","doi-asserted-by":"crossref","unstructured":"Cho, K., Lee, J., Yoon, H., Hong, Y., Ko, J., Ahn, S., Kim, S.: Gaussiantalker: real-time high-fidelity talking head synthesis with audio-driven 3d gaussian splatting, arXiv preprint arXiv:2404.16012 (2024)","DOI":"10.1145\/3664647.3681627"},{"key":"2212_CR27","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Feng, Q., Li, H.: Syngauss: real-time 3d gaussian splatting for audio-driven talking head synthesis. IEEE Access (2025)","DOI":"10.1109\/ACCESS.2025.3548015"},{"key":"2212_CR28","unstructured":"Chen, B., Hu, S., Chen, Q., Du, C., Yi, R., Qian, Y., Chen, X.: Gstalker: real-time audio-driven talking face generation via deformable gaussian splatting, arXiv preprint arXiv:2404.19040 (2024)"},{"issue":"6","key":"2212_CR29","doi-asserted-by":"publisher","first-page":"5293","DOI":"10.1109\/TDSC.2024.3371530","volume":"21","author":"S Wu","year":"2024","unstructured":"Wu, S., Liu, Z., Zhang, B., Zimmermann, R., Ba, Z., Zhang, X., Ren, K.: Do as i do: pose guided human motion copy. IEEE Trans. Dependable Secure Comput. 21(6), 5293\u20135307 (2024)","journal-title":"IEEE Trans. Dependable Secure Comput."},{"key":"2212_CR30","doi-asserted-by":"crossref","unstructured":"Wu, S., Chen, H., Yin, Y., Hu, S., Feng, R., Jiao, Y., Yang, Z., Liu, Z.: Joint-motion mutual learning for pose estimation in video. In: Proceedings of the 32nd ACM International Conference on Multimedia, pp. 8962\u20138971 (2024)","DOI":"10.1145\/3664647.3681179"},{"key":"2212_CR31","doi-asserted-by":"crossref","unstructured":"Barron, J.T., Mildenhall, B., Verbin, D., Srinivasan, P.P., Hedman, P.: Mip-nerf 360: unbounded anti-aliased neural radiance fields. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5470\u20135479 (2022)","DOI":"10.1109\/CVPR52688.2022.00539"},{"key":"2212_CR32","doi-asserted-by":"crossref","unstructured":"Pumarola, A., Corona, E., Pons-Moll, G., Moreno-Noguer, F.: D-nerf: neural radiance fields for dynamic scenes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10318\u201310327 (2021)","DOI":"10.1109\/CVPR46437.2021.01018"},{"key":"2212_CR33","doi-asserted-by":"crossref","unstructured":"Park, K., Sinha, U., Hedman, P., Barron, J.T., Bouaziz, S., Goldman, D.B., Martin-Brualla, R., Seitz, S.M.: Hypernerf: a higher-dimensional representation for topologically varying neural radiance fields, arXiv preprint arXiv:2106.13228 (2021)","DOI":"10.1145\/3478513.3480487"},{"issue":"5","key":"2212_CR34","doi-asserted-by":"publisher","first-page":"2732","DOI":"10.1109\/TVCG.2023.3247082","volume":"29","author":"L Song","year":"2023","unstructured":"Song, L., Chen, A., Li, Z., Chen, Z., Chen, L., Yuan, J., Xu, Y., Geiger, A.: Nerfplayer: a streamable dynamic scene representation with decomposed neural radiance fields. IEEE Trans. Visual Comput. Graphics 29(5), 2732\u20132742 (2023)","journal-title":"IEEE Trans. Visual Comput. Graphics"},{"key":"2212_CR35","doi-asserted-by":"crossref","unstructured":"Fang, J., Yi, T., Wang, X., Xie, L., Zhang, X., Liu, W., Nie\u00dfner, M., Tian, Q.: Fast dynamic radiance fields with time-aware neural voxels. In: SIGGRAPH Asia 2022 Conference Papers, pp. 1\u20139 (2022)","DOI":"10.1145\/3550469.3555383"},{"key":"2212_CR36","doi-asserted-by":"crossref","unstructured":"Park, K., Sinha, U., Barron, J.T., Bouaziz, S., Goldman, D.B., Seitz, S.M., Martin-Brualla, R.: Nerfies: deformable neural radiance fields. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5865\u20135874 (2021)","DOI":"10.1109\/ICCV48922.2021.00581"},{"key":"2212_CR37","unstructured":"Wang, J., Xie, J., Li, X., Xu, F., Pun, C.-M., Gao, H.: Gaussianhead: impressive head avatars with learnable gaussian diffusion, CoRR (2023)"},{"key":"2212_CR38","doi-asserted-by":"crossref","unstructured":"Shao, Z., Wang, Z., Li, Z., Wang, D., Lin, X., Zhang, Y., Fan, M., Wang, Z.: Splattingavatar: realistic real-time human avatars with mesh-embedded gaussian splatting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1606\u20131616 (2024)","DOI":"10.1109\/CVPR52733.2024.00159"},{"issue":"6","key":"2212_CR39","doi-asserted-by":"publisher","first-page":"194","DOI":"10.1145\/3130800.3130813","volume":"36","author":"T Li","year":"2017","unstructured":"Li, T., Bolkart, T., Black, M.J., Li, H., Romero, J.: Learning a model of facial shape and expression from 4d scans. ACM Trans. Graph. 36(6), 194\u20131 (2017)","journal-title":"ACM Trans. Graph."},{"key":"2212_CR40","doi-asserted-by":"crossref","unstructured":"Baltru\u0161aitis, T., Robinson, P., Morency, L.-P.: Openface: an open source facial behavior analysis toolkit. In: 2016 IEEE Winter Conference on Applications of Computer Vision (WACV). IEEE, pp. 1\u201310 (2016)","DOI":"10.1109\/WACV.2016.7477553"},{"issue":"4","key":"2212_CR41","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."},{"key":"2212_CR42","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"2212_CR43","doi-asserted-by":"crossref","unstructured":"Chen, L., Li, Z., Maddox, R.K., Duan, Z., Xu, C.: Lip movements generation at a glance. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 520\u2013535 (2018)","DOI":"10.1007\/978-3-030-01234-2_32"},{"key":"2212_CR44","doi-asserted-by":"crossref","unstructured":"Chung, J.S., Zisserman, A.: Out of time: automated lip sync in the wild. In: Computer Vision\u2014ACCV 2016 Workshops: ACCV 2016 International Workshops, Taipei, Taiwan, November 20\u201324, 2016, Revised Selected Papers, Part II 13. Springer (2017), pp. 251\u2013263","DOI":"10.1007\/978-3-319-54427-4_19"},{"key":"2212_CR45","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: Gans trained by a two time-scale update rule converge to a local nash equilibrium. Adv. Neural Inf. Process. Syst. 30 (2017)"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-026-02212-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-026-02212-1","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-026-02212-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T11:37:42Z","timestamp":1775129862000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-026-02212-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,6]]},"references-count":45,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2026,4]]}},"alternative-id":["2212"],"URL":"https:\/\/doi.org\/10.1007\/s00530-026-02212-1","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,6]]},"assertion":[{"value":"21 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 January 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}}],"article-number":"147"}}