{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T20:01:38Z","timestamp":1778788898635,"version":"3.51.4"},"publisher-location":"Cham","reference-count":60,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031726835","type":"print"},{"value":"9783031726842","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72684-2_8","type":"book-chapter","created":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T19:02:45Z","timestamp":1730574165000},"page":"127-145","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":36,"title":["TalkingGaussian: Structure-Persistent 3D Talking Head Synthesis via\u00a0Gaussian Splatting"],"prefix":"10.1007","author":[{"given":"Jiahe","family":"Li","sequence":"first","affiliation":[]},{"given":"Jiawei","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Xiao","family":"Bai","sequence":"additional","affiliation":[]},{"given":"Jin","family":"Zheng","sequence":"additional","affiliation":[]},{"given":"Xin","family":"Ning","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Lin","family":"Gu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,3]]},"reference":[{"key":"8_CR1","doi-asserted-by":"crossref","unstructured":"Baltru\u0161aitis, T., Mahmoud, M., Robinson, P.: Cross-dataset learning and person-specific normalisation for automatic action unit detection. In: 2015 11th IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG), vol.\u00a06, pp.\u00a01\u20136. IEEE (2015)","DOI":"10.1109\/FG.2015.7284869"},{"key":"8_CR2","doi-asserted-by":"crossref","unstructured":"Baltrusaitis, T., Zadeh, A., Lim, Y.C., Morency, L.P.: OpenFace 2.0: facial behavior analysis toolkit. In: 2018 13th IEEE International Conference on Automatic Face & Gesture Recognition (FG 2018), pp. 59\u201366. IEEE (2018)","DOI":"10.1109\/FG.2018.00019"},{"key":"8_CR3","doi-asserted-by":"crossref","unstructured":"Chan, E.R., et al.: Efficient geometry-aware 3D generative adversarial networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16123\u201316133 (2022)","DOI":"10.1109\/CVPR52688.2022.01565"},{"key":"8_CR4","doi-asserted-by":"crossref","unstructured":"Chatziagapi, A., Athar, S., Jain, A., Rohith, M., Bhat, V., Samaras, D.: LipNeRF: what is the right feature space to lip-sync a NeRF? In: 2023 IEEE 17th International Conference on Automatic Face and Gesture Recognition (FG), pp.\u00a01\u20138. IEEE (2023)","DOI":"10.1109\/FG57933.2023.10042567"},{"key":"8_CR5","unstructured":"Chen, G., Wang, W.: A survey on 3D Gaussian splatting. arXiv preprint arXiv:2401.03890 (2024)"},{"key":"8_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"538","DOI":"10.1007\/978-3-030-01234-2_32","volume-title":"Computer Vision \u2013 ECCV 2018","author":"L Chen","year":"2018","unstructured":"Chen, L., Li, Z., Maddox, R.K., Duan, Z., Xu, C.: Lip movements generation at a glance. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018, Part VII. LNCS, vol. 11211, pp. 538\u2013553. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_32"},{"key":"8_CR7","doi-asserted-by":"crossref","unstructured":"Chen, L., Maddox, R.K., Duan, Z., Xu, C.: Hierarchical cross-modal talking face generation with dynamic pixel-wise loss. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7832\u20137841 (2019)","DOI":"10.1109\/CVPR.2019.00802"},{"key":"8_CR8","doi-asserted-by":"crossref","unstructured":"Chen, Y., et al.: MonoGaussianAvatar: monocular Gaussian point-based head avatar. arXiv preprint arXiv:2312.04558 (2023)","DOI":"10.1145\/3641519.3657499"},{"key":"8_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1007\/978-3-319-54184-6_6","volume-title":"Computer Vision \u2013 ACCV 2016","author":"JS Chung","year":"2017","unstructured":"Chung, J.S., Zisserman, A.: Lip reading in the wild. In: Lai, S.-H., Lepetit, V., Nishino, K., Sato, Y. (eds.) ACCV 2016, Part II. LNCS, vol. 10112, pp. 87\u2013103. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-54184-6_6"},{"key":"8_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1007\/978-3-319-54427-4_19","volume-title":"Computer Vision \u2013 ACCV 2016 Workshops","author":"JS Chung","year":"2017","unstructured":"Chung, J.S., Zisserman, A.: Out of time: automated lip sync in the wild. In: Chen, C.-S., Lu, J., Ma, K.-K. (eds.) ACCV 2016, Part II. LNCS, vol. 10117, pp. 251\u2013263. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-54427-4_19"},{"key":"8_CR11","volume-title":"Facial Action Coding System: Manual","author":"P Ekman","year":"1978","unstructured":"Ekman, P., Friesen, W.V.: Facial Action Coding System: Manual. Consulting Psychologists Press, Palo Alto (1978)"},{"issue":"3","key":"8_CR12","doi-asserted-by":"publisher","first-page":"388","DOI":"10.1145\/566654.566594","volume":"21","author":"T Ezzat","year":"2002","unstructured":"Ezzat, T., Geiger, G., Poggio, T.: Trainable videorealistic speech animation. ACM Trans. Graph. (TOG) 21(3), 388\u2013398 (2002)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"8_CR13","doi-asserted-by":"crossref","unstructured":"Fang, J., et al.: Fast dynamic radiance fields with time-aware neural voxels. In: SIGGRAPH Asia 2022 Conference Papers, pp.\u00a01\u20139 (2022)","DOI":"10.1145\/3550469.3555383"},{"key":"8_CR14","doi-asserted-by":"crossref","unstructured":"Guo, X., et al.: Forward flow for novel view synthesis of dynamic scenes. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 16022\u201316033 (2023)","DOI":"10.1109\/ICCV51070.2023.01468"},{"key":"8_CR15","doi-asserted-by":"crossref","unstructured":"Guo, Y., Chen, K., Liang, S., Liu, Y.J., Bao, H., Zhang, J.: AD-NeRF: audio driven neural radiance fields for talking head synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5784\u20135794 (2021)","DOI":"10.1109\/ICCV48922.2021.00573"},{"key":"8_CR16","unstructured":"Hannun, A., et al.: Deep speech: scaling up end-to-end speech recognition. arXiv preprint arXiv:1412.5567 (2014)"},{"key":"8_CR17","doi-asserted-by":"publisher","first-page":"1767","DOI":"10.1007\/s11263-019-01150-y","volume":"127","author":"A Jamaludin","year":"2019","unstructured":"Jamaludin, A., Chung, J.S., Zisserman, A.: You said that?: Synthesising talking faces from audio. Int. J. Comput. Vis. 127, 1767\u20131779 (2019)","journal-title":"Int. J. Comput. Vis."},{"issue":"4","key":"8_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3592433","volume":"42","author":"B Kerbl","year":"2023","unstructured":"Kerbl, B., Kopanas, G., Leimk\u00fchler, T., Drettakis, G.: 3D Gaussian splatting for real-time radiance field rendering. ACM Trans. Graph. 42(4), 1\u201314 (2023)","journal-title":"ACM Trans. Graph."},{"key":"8_CR19","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"8_CR20","doi-asserted-by":"crossref","unstructured":"Kratimenos, A., Lei, J., Daniilidis, K.: DynMF: neural motion factorization for real-time dynamic view synthesis with 3D Gaussian splatting. arXiv preprint arXiv:2312.00112 (2023)","DOI":"10.1007\/978-3-031-72986-7_15"},{"key":"8_CR21","unstructured":"Kvanchiani, K., Petrova, E., Efremyan, K., Sautin, A., Kapitanov, A.: EasyPortrait\u2013face parsing and portrait segmentation dataset. arXiv preprint arXiv:2304.13509 (2023)"},{"key":"8_CR22","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102456","volume":"110","author":"J Li","year":"2024","unstructured":"Li, J., Zhang, J., Bai, X., Zheng, J., Zhou, J., Gu, L.: ER-NeRF++: efficient region-aware neural radiance fields for high-fidelity talking portrait synthesis. Inf. Fusion 110, 102456 (2024)","journal-title":"Inf. Fusion"},{"key":"8_CR23","doi-asserted-by":"crossref","unstructured":"Li, J., Zhang, J., Bai, X., Zhou, J., Gu, L.: Efficient region-aware neural radiance fields for high-fidelity talking portrait synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7568\u20137578 (2023)","DOI":"10.1109\/ICCV51070.2023.00696"},{"key":"8_CR24","doi-asserted-by":"crossref","unstructured":"Li, W., et al.: One-shot high-fidelity talking-head synthesis with deformable neural radiance field. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17969\u201317978 (2023)","DOI":"10.1109\/CVPR52729.2023.01723"},{"key":"8_CR25","doi-asserted-by":"crossref","unstructured":"Lin, Y., Dai, Z., Zhu, S., Yao, Y.: Gaussian-Flow: 4D reconstruction with dynamic 3D Gaussian particle. arXiv preprint arXiv:2312.03431 (2023)","DOI":"10.1109\/CVPR52733.2024.01997"},{"key":"8_CR26","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1007\/978-3-031-19836-6_7","volume-title":"Computer Vision-ECCV 2022, Part XXXVII","author":"X Liu","year":"2022","unstructured":"Liu, X., Xu, Y., Wu, Q., Zhou, H., Wu, W., Zhou, B.: Semantic-aware implicit neural audio-driven video portrait generation. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022, Part XXXVII. LNCS, vol. 13697, pp. 106\u2013125. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19836-6_7"},{"key":"8_CR27","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: International Conference on Learning Representations (2018)"},{"issue":"6","key":"8_CR28","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3478513.3480484","volume":"40","author":"Y Lu","year":"2021","unstructured":"Lu, Y., Chai, J., Cao, X.: Live speech portraits: real-time photorealistic talking-head animation. ACM Trans. Graph. (TOG) 40(6), 1\u201317 (2021)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"8_CR29","doi-asserted-by":"crossref","unstructured":"Luiten, J., Kopanas, G., Leibe, B., Ramanan, D.: Dynamic 3D Gaussians: tracking by persistent dynamic view synthesis. arXiv preprint arXiv:2308.09713 (2023)","DOI":"10.1109\/3DV62453.2024.00044"},{"key":"8_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1007\/978-3-030-58452-8_24","volume-title":"Computer Vision \u2013 ECCV 2020","author":"B Mildenhall","year":"2020","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 405\u2013421. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_24"},{"issue":"4","key":"8_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530127","volume":"41","author":"T M\u00fcller","year":"2022","unstructured":"M\u00fcller, T., Evans, A., Schied, C., Keller, A.: Instant neural graphics primitives with a multiresolution hash encoding. ACM Trans. Graph. (ToG) 41(4), 1\u201315 (2022)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"8_CR32","doi-asserted-by":"crossref","unstructured":"Park, K., et al.: Nerfies: deformable neural radiance fields. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5865\u20135874 (2021)","DOI":"10.1109\/ICCV48922.2021.00581"},{"key":"8_CR33","doi-asserted-by":"crossref","unstructured":"Park, K., et al.: HyperNeRF: a higher-dimensional representation for topologically varying neural radiance fields. arXiv preprint arXiv:2106.13228 (2021)","DOI":"10.1145\/3478513.3480487"},{"key":"8_CR34","doi-asserted-by":"crossref","unstructured":"Paysan, P., Knothe, R., Amberg, B., Romdhani, S., Vetter, T.: A 3D face model for pose and illumination invariant face recognition. In: 2009 Sixth IEEE International Conference on Advanced Video and Signal Based Surveillance, pp. 296\u2013301. IEEE (2009)","DOI":"10.1109\/AVSS.2009.58"},{"key":"8_CR35","doi-asserted-by":"crossref","unstructured":"Peng, Z., et al.: SyncTalk: the devil is in the synchronization for talking head synthesis. arXiv preprint arXiv:2311.17590 (2023)","DOI":"10.1109\/CVPR52733.2024.00070"},{"key":"8_CR36","doi-asserted-by":"crossref","unstructured":"Prajwal, K., Mukhopadhyay, R., Namboodiri, V.P., Jawahar, C.: A lip sync expert is all you need for speech to lip generation in the wild. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 484\u2013492 (2020)","DOI":"10.1145\/3394171.3413532"},{"key":"8_CR37","doi-asserted-by":"crossref","unstructured":"Pumarola, A., Corona, E., Pons-Moll, G., Moreno-Noguer, F.: D-NeRF: neural radiance fields for dynamic scenes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10318\u201310327 (2021)","DOI":"10.1109\/CVPR46437.2021.01018"},{"key":"8_CR38","doi-asserted-by":"crossref","unstructured":"Qian, S., Kirschstein, T., Schoneveld, L., Davoli, D., Giebenhain, S., Nie\u00dfner, M.: GaussianAvatars: photorealistic head avatars with rigged 3D Gaussians. arXiv preprint arXiv:2312.02069 (2023)","DOI":"10.1109\/CVPR52733.2024.01919"},{"key":"8_CR39","doi-asserted-by":"publisher","first-page":"666","DOI":"10.1007\/978-3-031-19775-8_39","volume-title":"Computer Vision-ECCV 2022, Part XII","author":"S Shen","year":"2022","unstructured":"Shen, S., Li, W., Zhu, Z., Duan, Y., Zhou, J., Lu, J.: Learning dynamic facial radiance fields for few-shot talking head synthesis. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022, Part XII. LNCS, vol. 13672, pp. 666\u2013682. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19775-8_39"},{"key":"8_CR40","doi-asserted-by":"crossref","unstructured":"Song, L., et al.: NeRFPlayer: a streamable dynamic scene representation with decomposed neural radiance fields. arXiv preprint arXiv:2210.15947 (2022)","DOI":"10.1109\/TVCG.2023.3247082"},{"issue":"4","key":"8_CR41","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073640","volume":"36","author":"S Suwajanakorn","year":"2017","unstructured":"Suwajanakorn, S., Seitz, S.M., Kemelmacher-Shlizerman, I.: Synthesizing Obama: learning lip sync from audio. ACM Trans. Graph. (ToG) 36(4), 1\u201313 (2017)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"8_CR42","unstructured":"Tang, J., et al.: Real-time neural radiance talking portrait synthesis via audio-spatial decomposition. arXiv preprint arXiv:2211.12368 (2022)"},{"key":"8_CR43","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"716","DOI":"10.1007\/978-3-030-58517-4_42","volume-title":"Computer Vision \u2013 ECCV 2020","author":"J Thies","year":"2020","unstructured":"Thies, J., Elgharib, M., Tewari, A., Theobalt, C., Nie\u00dfner, M.: Neural voice puppetry: audio-driven facial reenactment. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020, Part XVI. LNCS, vol. 12361, pp. 716\u2013731. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58517-4_42"},{"key":"8_CR44","unstructured":"Wang, J., Xie, J.C., Li, X., Xu, F., Pun, C.M., Gao, H.: GaussianHead: high-fidelity head avatars with learnable Gaussian derivation (2024)"},{"key":"8_CR45","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"700","DOI":"10.1007\/978-3-030-58589-1_42","volume-title":"Computer Vision \u2013 ECCV 2020","author":"K Wang","year":"2020","unstructured":"Wang, K., et al.: MEAD: a large-scale audio-visual dataset for emotional talking-face generation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020, Part XXI. LNCS, vol. 12366, pp. 700\u2013717. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58589-1_42"},{"key":"8_CR46","doi-asserted-by":"publisher","DOI":"10.1016\/j.displa.2021.102102","volume":"70","author":"X Wang","year":"2021","unstructured":"Wang, X., et al.: Multi-view stereo in the deep learning era: a comprehensive review. Displays 70, 102102 (2021)","journal-title":"Displays"},{"issue":"4","key":"8_CR47","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."},{"key":"8_CR48","doi-asserted-by":"publisher","DOI":"10.1016\/j.displa.2024.102672","volume":"83","author":"Z Wang","year":"2024","unstructured":"Wang, Z., Luo, H., Wang, X., Zheng, J., Ning, X., Bai, X.: A contrastive learning based unsupervised multi-view stereo with multi-stage self-training strategy. Displays 83, 102672 (2024)","journal-title":"Displays"},{"key":"8_CR49","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"690","DOI":"10.1007\/978-3-030-01261-8_41","volume-title":"Computer Vision \u2013 ECCV 2018","author":"O Wiles","year":"2018","unstructured":"Wiles, O., Koepke, A.S., Zisserman, A.: X2Face: a network for controlling face generation using images, audio, and pose codes. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018, Part XIII. LNCS, vol. 11217, pp. 690\u2013706. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01261-8_41"},{"key":"8_CR50","doi-asserted-by":"crossref","unstructured":"Wu, G., et al.: 4D Gaussian splatting for real-time dynamic scene rendering. arXiv preprint arXiv:2310.08528 (2023)","DOI":"10.1109\/CVPR52733.2024.01920"},{"key":"8_CR51","doi-asserted-by":"crossref","unstructured":"Xu, Y., et al.: Gaussian head avatar: ultra high-fidelity head avatar via dynamic Gaussians. arXiv preprint arXiv:2312.03029 (2023)","DOI":"10.1109\/CVPR52733.2024.00189"},{"key":"8_CR52","doi-asserted-by":"crossref","unstructured":"Yang, Z., Gao, X., Zhou, W., Jiao, S., Zhang, Y., Jin, X.: Deformable 3D Gaussians for high-fidelity monocular dynamic scene reconstruction. arXiv preprint arXiv:2309.13101 (2023)","DOI":"10.1109\/CVPR52733.2024.01922"},{"key":"8_CR53","unstructured":"Ye, Z., Jiang, Z., Ren, Y., Liu, J., He, J., Zhao, Z.: GeneFace: generalized and high-fidelity audio-driven 3D talking face synthesis. In: The Eleventh International Conference on Learning Representations (2022)"},{"key":"8_CR54","unstructured":"Ye, Z., et al.: Real3D-Portrait: one-shot realistic 3D talking portrait synthesis. arXiv preprint arXiv:2401.08503 (2024)"},{"key":"8_CR55","doi-asserted-by":"crossref","unstructured":"Yu, C., Wang, J., Peng, C., Gao, C., Yu, G., Sang, N.: BiSeNet: bilateral segmentation network for real-time semantic segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 325\u2013341 (2018)","DOI":"10.1007\/978-3-030-01261-8_20"},{"key":"8_CR56","doi-asserted-by":"crossref","unstructured":"Zhang, C., et al.: FACIAL: synthesizing dynamic talking face with implicit attribute learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3867\u20133876 (2021)","DOI":"10.1109\/ICCV48922.2021.00384"},{"key":"8_CR57","doi-asserted-by":"crossref","unstructured":"Zhang, J., et al.: CoR-GS: sparse-view 3D Gaussian splatting via co-regularization. arXiv preprint arXiv:2405.12110 (2024)","DOI":"10.1007\/978-3-031-73232-4_19"},{"key":"8_CR58","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"8_CR59","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Hu, Z., Deng, W., Fan, C., Lv, T., Ding, Y.: DINet: deformation inpainting network for realistic face visually dubbing on high resolution video. arXiv preprint arXiv:2303.03988 (2023)","DOI":"10.1609\/aaai.v37i3.25464"},{"key":"8_CR60","doi-asserted-by":"crossref","unstructured":"Zhong, W., et al.: Identity-preserving talking face generation with landmark and appearance priors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738 (2023)","DOI":"10.1109\/CVPR52729.2023.00938"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72684-2_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T19:05:07Z","timestamp":1730574307000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72684-2_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,3]]},"ISBN":["9783031726835","9783031726842"],"references-count":60,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72684-2_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,3]]},"assertion":[{"value":"3 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}