{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T12:29:39Z","timestamp":1771244979446,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":45,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819569496","type":"print"},{"value":"9789819569502","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-6950-2_35","type":"book-chapter","created":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T11:58:52Z","timestamp":1771243132000},"page":"494-507","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Splat-Portrait: Generalizing Talking Heads with\u00a0Gaussian Splatting"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5913-4095","authenticated-orcid":false,"given":"Tong","family":"Shi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4851-0949","authenticated-orcid":false,"given":"Melonie","family":"de Almeida","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3710-7413","authenticated-orcid":false,"given":"Daniela","family":"Ivanova","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3455-6280","authenticated-orcid":false,"given":"Nicolas","family":"Pugeault","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5198-7445","authenticated-orcid":false,"given":"Paul","family":"Henderson","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,2,17]]},"reference":[{"key":"35_CR1","unstructured":"Aneja, S., Sevastopolsky, A., Kirschstein, T., Thies, J., Dai, A., Nie\u00dfner, M.: Gaussianspeech: audio-driven gaussian avatars. arXiv preprint arXiv:2411.18675 (2024)"},{"key":"35_CR2","doi-asserted-by":"crossref","unstructured":"Chan, E.R., et\u00a0al.: Efficient geometry-aware 3D generative adversarial networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16123\u201316133 (2022)","DOI":"10.1109\/CVPR52688.2022.01565"},{"key":"35_CR3","doi-asserted-by":"crossref","unstructured":"Chu, X., Goswami, N., Cui, Z., Wang, H., Harada, T.: Artalk: speech-driven 3D head animation via autoregressive model (2025). https:\/\/arxiv.org\/abs\/2502.20323","DOI":"10.1145\/3757377.3763955"},{"key":"35_CR4","doi-asserted-by":"crossref","unstructured":"Chu, X., Harada, T.: Generalizable and animatable gaussian head avatar. In: The Thirty-Eighth Annual Conference on Neural Information Processing Systems (2024). https:\/\/openreview.net\/forum?id=gVM2AZ5xA6","DOI":"10.52202\/079017-1838"},{"key":"35_CR5","doi-asserted-by":"crossref","unstructured":"Chung, J.S., Zisserman, A.: Out of time: automated lip sync in the wild. In: Workshop on Multi-view Lip-Reading, ACCV (2016)","DOI":"10.1007\/978-3-319-54427-4_19"},{"key":"35_CR6","doi-asserted-by":"crossref","unstructured":"Conneau, A., Baevski, A., Collobert, R., Mohamed, A., Auli, M.: Unsupervised cross-lingual representation learning for speech recognition. arXiv preprint arXiv:2006.13979 (2020)","DOI":"10.21437\/Interspeech.2021-329"},{"key":"35_CR7","doi-asserted-by":"crossref","unstructured":"Dhamo, H., et al.: Headgas: real-time animatable head avatars via 3D gaussian splatting. In: European Conference on Computer Vision, pp. 459\u2013476. Springer, Cham (2024)","DOI":"10.1007\/978-3-031-72627-9_26"},{"key":"35_CR8","doi-asserted-by":"crossref","unstructured":"Guo, Y., Chen, K., Liang, S., Liu, Y.J., Bao, H., Zhang, J.: Ad-nerf: audio driven neural radiance fields for talking head synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5784\u20135794 (2021)","DOI":"10.1109\/ICCV48922.2021.00573"},{"key":"35_CR9","unstructured":"He, T., et\u00a0al.: Gaia: zero-shot talking avatar generation. arXiv preprint arXiv:2311.15230 (2023)"},{"key":"35_CR10","doi-asserted-by":"crossref","unstructured":"Isola, P., Zhu, J.Y., Zhou, T., Efros, A.A.: Image-to-image translation with conditional adversarial networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1125\u20131134 (2017)","DOI":"10.1109\/CVPR.2017.632"},{"key":"35_CR11","first-page":"26565","volume":"35","author":"T Karras","year":"2022","unstructured":"Karras, T., Aittala, M., Aila, T., Laine, S.: Elucidating the design space of diffusion-based generative models. Adv. Neural. Inf. Process. Syst. 35, 26565\u201326577 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"35_CR12","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aila, T.: A style-based generator architecture for generative adversarial networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4401\u20134410 (2019)","DOI":"10.1109\/CVPR.2019.00453"},{"key":"35_CR13","doi-asserted-by":"crossref","unstructured":"Kerbl, B., Kopanas, G., Leimk\u00fchler, T., Drettakis, G.: 3D gaussian splatting for real-time radiance field rendering. ACM Trans. Graph. 42","DOI":"10.1145\/3592433"},{"key":"35_CR14","unstructured":"Kim, G., Seo, K., Cha, S., Noh, J.: Nerffacespeech: one-shot audio-driven 3D talking head synthesis via generative prior. arXiv preprint arXiv:2405.05749 (2024)"},{"key":"35_CR15","doi-asserted-by":"crossref","unstructured":"Li, T., Bolkart, T., Black, M.J., Li, H., Romero, J.: Learning a model of facial shape and expression from 4D scans. ACM Trans. Graph. 36(6), 194\u20131 (2017)","DOI":"10.1145\/3130800.3130813"},{"key":"35_CR16","doi-asserted-by":"crossref","unstructured":"Li, W., et al.: One-shot high-fidelity talking-head synthesis with deformable neural radiance field. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17969\u201317978 (2023)","DOI":"10.1109\/CVPR52729.2023.01723"},{"key":"35_CR17","unstructured":"Li, X., De\u00a0Mello, S., Liu, S., Nagano, K., Iqbal, U., Kautz, J.: Generalizable one-shot 3D neural head avatar. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"35_CR18","unstructured":"Liu, C.: An analysis of the current and future state of 3d facial animation techniques and systems (2009)"},{"key":"35_CR19","unstructured":"Liu, M., et al.: One-2-3-45: any single image to 3d mesh in 45 seconds without per-shape optimization. In: Advances in Neural Information Processing Systems, vol. 36, pp. 22226\u201322246 (2023)"},{"key":"35_CR20","doi-asserted-by":"crossref","unstructured":"Liu, T., et al.: Vqtalker: towards multilingual talking avatars through facial motion tokenization. arXiv preprint arXiv:2412.09892 (2024)","DOI":"10.1609\/aaai.v39i6.32595"},{"key":"35_CR21","doi-asserted-by":"crossref","unstructured":"Ma, Z., Zhu, X., Qi, G.J., Lei, Z., Zhang, L.: Otavatar: one-shot talking face avatar with controllable tri-plane rendering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16901\u201316910 (2023)","DOI":"10.1109\/CVPR52729.2023.01621"},{"issue":"1","key":"35_CR22","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: Nerf: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"key":"35_CR23","doi-asserted-by":"crossref","unstructured":"M\u00fcller, N., Siddiqui, Y., Porzi, L., Bulo, S.R., Kontschieder, P., Nie\u00dfner, M.: Diffrf: rendering-guided 3D radiance field diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4328\u20134338 (2023)","DOI":"10.1109\/CVPR52729.2023.00421"},{"key":"35_CR24","doi-asserted-by":"crossref","unstructured":"Peng, Z., et al.: Synctalk: the devil is in the synchronization for talking head synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 666\u2013676 (2024)","DOI":"10.1109\/CVPR52733.2024.00070"},{"key":"35_CR25","doi-asserted-by":"crossref","unstructured":"Perez, E., Strub, F., De\u00a0Vries, H., Dumoulin, V., Courville, A.: Film: visual reasoning with a general conditioning layer. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.11671"},{"key":"35_CR26","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: Dreamfusion: text-to-3D using 2D diffusion. arXiv preprint arXiv:2209.14988 (2022)"},{"key":"35_CR27","doi-asserted-by":"crossref","unstructured":"Rivero, A., Athar, S., Shu, Z., Samaras, D.: Rig3dgs: creating controllable portraits from casual monocular videos. arXiv preprint arXiv:2402.03723 (2024)","DOI":"10.1109\/3DV66043.2025.00144"},{"key":"35_CR28","unstructured":"Saunders, J., Namboodiri, V.: Dubbing for everyone: data-efficient visual dubbing using neural rendering priors. arXiv preprint arXiv:2401.06126 (2024)"},{"key":"35_CR29","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"35_CR30","doi-asserted-by":"crossref","unstructured":"Stypu\u0142kowski, M., Vougioukas, K., He, S., Zi\u0119ba, M., Petridis, S., Pantic, M.: Diffused heads: diffusion models beat GANs on talking-face generation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 5091\u20135100 (2024)","DOI":"10.1109\/WACV57701.2024.00502"},{"key":"35_CR31","doi-asserted-by":"crossref","unstructured":"Szymanowicz, S., Rupprecht, C., Vedaldi, A.: Splatter image: ultra-fast single-view 3D reconstruction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10208\u201310217 (2024)","DOI":"10.1109\/CVPR52733.2024.00972"},{"key":"35_CR32","doi-asserted-by":"crossref","unstructured":"Taubner, F., Zhang, R., Tuli, M., Lindell, D.B.: Cap4D: creating animatable 4D portrait avatars with morphable multi-view diffusion models. arXiv preprint arXiv:2412.12093 (2024)","DOI":"10.1109\/CVPR52734.2025.00501"},{"key":"35_CR33","first-page":"12349","volume":"36","author":"A Tewari","year":"2023","unstructured":"Tewari, A., et al.: Diffusion with forward models: solving stochastic inverse problems without direct supervision. Adv. Neural. Inf. Process. Syst. 36, 12349\u201312362 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"35_CR34","doi-asserted-by":"crossref","unstructured":"Thies, J., Zollhofer, M., Stamminger, M., Theobalt, C., Nie\u00dfner, M.: Face2face: real-time face capture and reenactment of RGB videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2387\u20132395 (2016)","DOI":"10.1109\/CVPR.2016.262"},{"key":"35_CR35","unstructured":"Wang, J., Xie, J.C., Li, X., Xu, F., Pun, C.M., Gao, H.: Gaussianhead: impressive head avatars with learnable gaussian diffusion. arXiv preprint arXiv:2312.01632 (2023)"},{"key":"35_CR36","doi-asserted-by":"crossref","unstructured":"Wang, T.C., Mallya, A., Liu, M.Y.: One-shot free-view neural talking-head synthesis for video conferencing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10039\u201310049 (2021)","DOI":"10.1109\/CVPR46437.2021.00991"},{"key":"35_CR37","unstructured":"Watson, D., Chan, W., Martin-Brualla, R., Ho, J., Tagliasacchi, A., Norouzi, M.: Novel view synthesis with diffusion models. arXiv preprint arXiv:2210.04628 (2022)"},{"key":"35_CR38","unstructured":"Xu, M., et al.: Hallo: hierarchical audio-driven visual synthesis for portrait image animation. arXiv preprint arXiv:2406.08801 (2024)"},{"key":"35_CR39","doi-asserted-by":"crossref","unstructured":"Ye, T., et al.: Perceiving and modeling density for image dehazing. In: European Conference on Computer Vision, pp. 130\u2013145. Springer, Cham (2022)","DOI":"10.1007\/978-3-031-19800-7_8"},{"key":"35_CR40","unstructured":"Ye, Z., et\u00a0al.: Real3d-portrait: one-shot realistic 3D talking portrait synthesis. arXiv preprint arXiv:2401.08503 (2024)"},{"key":"35_CR41","doi-asserted-by":"crossref","unstructured":"Yu, H., et\u00a0al.: Gaussiantalker: speaker-specific talking head synthesis via 3D gaussian splatting. In: Proceedings of the 32nd ACM International Conference on Multimedia, pp. 3548\u20133557 (2024)","DOI":"10.1145\/3664647.3681675"},{"key":"35_CR42","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"35_CR43","doi-asserted-by":"crossref","unstructured":"Zhang, W., et al.: Sadtalker: learning realistic 3D motion coefficients for stylized audio-driven single image talking face animation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8652\u20138661 (2023)","DOI":"10.1109\/CVPR52729.2023.00836"},{"key":"35_CR44","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Li, L., Ding, Y., Fan, C.: Flow-guided one-shot talking face generation with a high-resolution audio-visual dataset. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3661\u20133670 (2021)","DOI":"10.1109\/CVPR46437.2021.00366"},{"key":"35_CR45","doi-asserted-by":"crossref","unstructured":"Zhu, X., Lei, Z., Liu, X., Shi, H., Li, S.Z.: Face alignment across large poses: a 3D solution. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 146\u2013155 (2016)","DOI":"10.1109\/CVPR.2016.23"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-6950-2_35","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T11:59:01Z","timestamp":1771243141000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-6950-2_35"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819569496","9789819569502"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-6950-2_35","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"17 February 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Prague","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2026","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 January 2026","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 January 2026","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"32","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2026","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/mmm2026.cz\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}