{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,18]],"date-time":"2026-04-18T06:41:38Z","timestamp":1776494498531,"version":"3.51.2"},"reference-count":98,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T00:00:00Z","timestamp":1772755200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T00:00:00Z","timestamp":1772755200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1007\/s11263-025-02725-8","type":"journal-article","created":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T10:20:22Z","timestamp":1772792422000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["An Interactive Conversational 3D Virtual Human"],"prefix":"10.1007","volume":"134","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3141-5562","authenticated-orcid":false,"given":"Richard","family":"Shaw","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Youngkyoon","family":"Jang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Athanasios","family":"Papaioannou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Arthur","family":"Moreau","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Helisa","family":"Dhamo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhensong","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eduardo","family":"P\u00e9rez-Pellitero","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,3,6]]},"reference":[{"key":"2725_CR1","unstructured":"ARKit. https:\/\/developer.apple.com\/augmented-reality\/arkit"},{"key":"2725_CR2","volume-title":"Voice2Face: Audio-Driven Facial and Tongue Rig Animations with cVAEs","author":"MV Aylagas","year":"2022","unstructured":"Aylagas, M. V., Leon, H. A., Teye, M., et al. (2022). Voice2Face: Audio-Driven Facial and Tongue Rig Animations with cVAEs. Forum: Comput. Graph."},{"key":"2725_CR3","doi-asserted-by":"crossref","unstructured":"Bagautdinov, T., Wu, C., Simon, T., Prada, F., Shiratori, T., Wei, S. E., & Saragih, J. (2021). Driving-Signal Aware Full-Body Avatars. ACM ToG.","DOI":"10.1145\/3450626.3459850"},{"key":"2725_CR4","unstructured":"Bai, J., Bai, S., Chu, Y., Cui, Z., Dang, K., Deng, X., & Zhu, T. (2023). Qwen Technical Report. arXiv preprint arXiv:2309.16609."},{"key":"2725_CR5","unstructured":"Brown, P. F., Della Pietra, V. J., Desouza, P. V., Lai, J. C., & Mercer, R. L. (1992). Class-Based n-gram Models of Natural Language. Computational Linguistics (1992)"},{"key":"2725_CR6","doi-asserted-by":"publisher","DOI":"10.1145\/3641289","volume-title":"A Survey on Evaluation of Large Language Models","author":"Y Chang","year":"2024","unstructured":"Chang, Y., Wang, X., Wang, J., et al. (2024). A Survey on Evaluation of Large Language Models. Technol: ACM Trans. Intell. Syst."},{"key":"2725_CR7","doi-asserted-by":"crossref","unstructured":"Chen, J., Hu, J., Wang, G., Jiang, Z., Zhou, T., Chen, Z., & Lv, C. (2025). Taoavatar: Real-time lifelike full-body talking avatars for augmented reality via 3d gaussian splatting. In: CVPR.","DOI":"10.1109\/CVPR52734.2025.01002"},{"key":"2725_CR8","doi-asserted-by":"crossref","unstructured":"Chen, L., Li, Z., Maddox, R. K., Duan, Z., & Xu, C. (2018). Lip Movements Generation at a Glance. ECCV, 520\u2013535.","DOI":"10.1007\/978-3-030-01234-2_32"},{"key":"2725_CR9","doi-asserted-by":"crossref","unstructured":"Cheng, W., Chen, R., Fan, S., Yin, W., Chen, K., Cai, Z., & Lin, K. Y. (2023). DNA-Rendering: A Diverse Neural Actor Repository for High-Fidelity Human-Centric Rendering. ICCV.","DOI":"10.1109\/ICCV51070.2023.01829"},{"key":"2725_CR10","doi-asserted-by":"crossref","unstructured":"Chhatre, K., Danecek, R., Athanasiou, N., Becherini, G., Peters, C., Black, M.J., & Bolkart, T. (2023). Emotional speech-driven 3d body animation via disentangled latent diffusion. CVPR, 1942\u20131953.","DOI":"10.1109\/CVPR52733.2024.00190"},{"key":"2725_CR11","doi-asserted-by":"crossref","unstructured":"Cho, K., Lee, J., Yoon, H., Hong, Y., Ko, J., Ahn, S., & Kim, S. (2024). GaussianTalker: Real-Time High-Fidelity Talking Head Synthesis with Audio-Driven 3D Gaussian Splatting.","DOI":"10.1145\/3664647.3681627"},{"key":"2725_CR12","doi-asserted-by":"crossref","unstructured":"Chung, J. S., & Zisserman, A. (2016, November). Out of Time: Automated Lip Sync in the Wild. ACCV 2016 International Workshops, 251\u2013263.","DOI":"10.1007\/978-3-319-54427-4_19"},{"key":"2725_CR13","unstructured":"Devlin, J., Chang, M. W., Lee, K., & Toutanova, K. (2019, June). BERT: Pre-Training of Deep Bidirectional Transformers for Language Understanding. In: Conf. of the North American Chapter of the Association for Computational Linguistics."},{"key":"2725_CR14","doi-asserted-by":"crossref","unstructured":"Dhamo, H., Nie, Y., Moreau, A., Song, J., Shaw, R., Zhou, Y., & P\u00e9rez-Pellitero, E. (2024, September). Headgas: Real-time animatable head avatars via 3d gaussian splatting. In European Conference on Computer Vision (pp. 459-476). Cham: Springer Nature Switzerland.","DOI":"10.1007\/978-3-031-72627-9_26"},{"key":"2725_CR15","volume-title":"GPT-3: Its Nature","author":"L Floridi","year":"2020","unstructured":"Floridi, L., & Chiriatti, M. (2020). GPT-3: Its Nature. Scope: Limits, and Consequences. Minds Mach."},{"key":"2725_CR16","doi-asserted-by":"crossref","unstructured":"Fridovich-Keil, S., Meanti, G., Warburg, F. R., Recht, B., & Kanazawa, A. (2023). K-Planes: Explicit Radiance Fields in Space, Time, and Appearance. CVPR.","DOI":"10.1109\/CVPR52729.2023.01201"},{"key":"2725_CR17","doi-asserted-by":"crossref","unstructured":"Gao, J., & Lin, C.-Y. (2004). Introduction to the Special Issue on Statistical Language Modeling. ACM Transactions on Asian Language Information Processing","DOI":"10.1145\/1034780.1034781"},{"key":"2725_CR18","doi-asserted-by":"crossref","unstructured":"Gao, X., Zhong, C., Xiang, J., Hong, Y., Guo, Y., & Zhang, J. (2022). Reconstructing Personalized Semantic Facial NeRF Models from Monocular Video. ACM ToG.","DOI":"10.1145\/3550454.3555501"},{"key":"2725_CR19","doi-asserted-by":"crossref","unstructured":"Grassal, P. W., Prinzler, M., Leistner, T., Rother, C., Nie\u00dfner, M., & Thies, J. (2021). Neural Head Avatars from Monocular RGB Videos. CVPR.","DOI":"10.1109\/CVPR52688.2022.01810"},{"key":"2725_CR20","doi-asserted-by":"crossref","unstructured":"Habibie, I., Elgharib, M., Sarkar, K., Abdullah, A., Nyatsanga, S., Neff, M., & Theobalt, C. (2022, July). A Motion Matching-Based Framework for Controllable Gesture Synthesis from Speech. SIGGRAPH.","DOI":"10.1145\/3528233.3530750"},{"key":"2725_CR21","doi-asserted-by":"crossref","unstructured":"Hong, Y., Peng, B., Xiao, H., Liu, L., & Zhang, J. (2021). HeadNeRF: A Realtime NeRF-based Parametric Head Model. CVPR","DOI":"10.1109\/CVPR52688.2022.01973"},{"key":"2725_CR22","doi-asserted-by":"crossref","unstructured":"Hu, L., Zhang, H., Zhang, Y., Zhou, B., Liu, B., Zhang, S., & Nie, L. (2024). GaussianAvatar: Towards Realistic Human Avatar Modeling from a Single Video via Animatable 3D Gaussians. CVPR.","DOI":"10.1109\/CVPR52733.2024.00067"},{"key":"2725_CR23","doi-asserted-by":"crossref","unstructured":"Huang, Y. H., Sun, Y. T., Yang, Z., Lyu, X., Cao, Y. P., & Qi, X. (2024). SC-GS: Sparse-Controlled Gaussian Splatting for Editable Dynamic Scenes. CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.00404"},{"issue":"4","key":"2725_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3592415","volume":"42","author":"M I\u015f\u0131k","year":"2023","unstructured":"I\u015f\u0131k, M., R\u00fcnz, M., Georgopoulos, M., Khakhulin, T., Starck, J., Agapito, L., & Nie\u00dfner, M. (2023). Humanrf: High-fidelity neural radiance fields for humans in motion. ACM Transactions on Graphics (TOG),42(4), 1\u201312.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"2725_CR25","doi-asserted-by":"crossref","unstructured":"Jang, Y., Zheng, J., Song, J., Dhamo, H., P\u00e9rez-Pellitero, E., Tanay, T., & Zafeiriou, S. (2023). VSCHH 2023: A Benchmark for the View Synthesis Challenge of Human Heads. ICCVW.","DOI":"10.1109\/ICCVW60793.2023.00120"},{"key":"2725_CR26","doi-asserted-by":"crossref","unstructured":"Jiang, B., Hong, Y., Bao, H., & Zhang, J. (2022). SelfRecon: Self Reconstruction Your Digital Avatar From Monocular Video. CVPR.","DOI":"10.1109\/CVPR52688.2022.00552"},{"key":"2725_CR27","doi-asserted-by":"crossref","unstructured":"Johnson, J., Alahi, A., & Fei-Fei, L. (2016). Perceptual Losses for Real-Time Style Transfer and Super-Resolution. ECCV.","DOI":"10.1007\/978-3-319-46475-6_43"},{"key":"2725_CR28","unstructured":"Jung, H., Brasch, N., Song, J., Perez-Pellitero, E., Zhou, Y., Li, Z., & Busam, B. (2023). Deformable 3D Gaussian Splatting for Animatable Human Avatars. arXiv preprint arXiv:2312.15059"},{"key":"2725_CR29","doi-asserted-by":"crossref","unstructured":"Kavan, L., Collins, S., \u017d\u00e1ra, J., & O\u2019Sullivan, C. (2007, April). Skinning with Dual Quaternions. In: Symposium on Interactive 3D Graphics and Games.","DOI":"10.1145\/1230100.1230107"},{"key":"2725_CR30","doi-asserted-by":"crossref","unstructured":"Kerbl, B., Kopanas, G., Leimk\u00fchler, T., & Drettakis, G. (2023). 3D Gaussian Splatting for Real-Time Radiance Field Rendering. ACM ToG (2023)","DOI":"10.1145\/3592433"},{"key":"2725_CR31","doi-asserted-by":"crossref","unstructured":"Khakhulin, T., Sklyarova, V., Lempitsky, V.S., & Zakharov, E. (2022). Realistic one-shot mesh-based head avatars. ECCV.","DOI":"10.1007\/978-3-031-20086-1_20"},{"key":"2725_CR32","doi-asserted-by":"crossref","unstructured":"Kim, H., Garrido, P., Tewari, A., Xu, W., Thies, J., Niessner, M., & Theobalt, C. (2018). Deep Video Portraits. ACM ToG.","DOI":"10.1145\/3197517.3201283"},{"key":"2725_CR33","doi-asserted-by":"crossref","unstructured":"Kirschstein, T., Qian, S., Giebenhain, S., Walter, T., & Nie\u00dfner, M. (2023). NeRSemble: Multi-View Radiance Field Reconstruction of Human Heads. ACM ToG.","DOI":"10.1145\/3592455"},{"key":"2725_CR34","doi-asserted-by":"crossref","unstructured":"Kocabas, M., Chang, J. H. R., Gabriel, J., Tuzel, O., & Ranjan, A. (2024). HUGS: Human Gaussian Splatting. CVPR.","DOI":"10.1109\/CVPR52733.2024.00055"},{"key":"2725_CR35","unstructured":"Kulh\u00e1nek, J., Peng, S., K\u00fakelov\u00e1, Z., Pollefeys, M., & Sattler, T. (2024). Wildgaussians: 3d gaussian splatting in the wild. NeurIPS."},{"key":"2725_CR36","doi-asserted-by":"crossref","unstructured":"Li, Z., Chen, Z., Li, Z., & Xu, Y. (2024). Spacetime Gaussian Feature Splatting for Real-Time Dynamic View Synthesis. CVPR.","DOI":"10.1109\/CVPR52733.2024.00813"},{"key":"2725_CR37","doi-asserted-by":"crossref","unstructured":"Li, Z., Chen, Z., Li, Z., & Xu, Y. (2024). Spacetime Gaussian Feature Splatting for Real-Time Dynamic View Synthesis. CVPR.","DOI":"10.1109\/CVPR52733.2024.00813"},{"key":"2725_CR38","doi-asserted-by":"crossref","unstructured":"Li, Z., Niklaus, S., Snavely, N., & Wang, O. (2021). Neural scene flow fields for space-time view synthesis of dynamic scenes. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 6498-6508).","DOI":"10.1109\/CVPR46437.2021.00643"},{"key":"2725_CR39","doi-asserted-by":"crossref","unstructured":"Li, J., Zhang, J., Bai, X., Zheng, J., Ning, X., Zhou, J., & Gu, L. (2024, September). TalkingGaussian: Structure-Persistent 3D Talking Head Synthesis via Gaussian Splatting. ECCV, 127\u2013145.","DOI":"10.1007\/978-3-031-72684-2_8"},{"key":"2725_CR40","doi-asserted-by":"crossref","unstructured":"Li, J., Zhang, J., Bai, X., Zhou, J., & Gu, L. (2023). Efficient Region-Aware Neural Radiance Fields for High-Fidelity Talking Portrait Synthesis. ICCV, 7568\u20137578.","DOI":"10.1109\/ICCV51070.2023.00696"},{"key":"2725_CR41","doi-asserted-by":"crossref","unstructured":"Li, Z., Zheng, Z., Wang, L., & Liu, Y. (2024). Animatable Gaussians: Learning Pose-Dependent Gaussian Maps for High-Fidelity Human Avatar Modeling. CVPR.","DOI":"10.1109\/CVPR52733.2024.01864"},{"issue":"6","key":"2725_CR42","doi-asserted-by":"publisher","first-page":"194","DOI":"10.1145\/3130800.3130813","volume":"36","author":"T Li","year":"2017","unstructured":"Li, T., Bolkart, T., Black, M. J., Li, H., & Romero, J. (2017). Learning a model of facial shape and expression from 4D scans. ACM Trans. Graph.,36(6), 194\u20131.","journal-title":"ACM Trans. Graph."},{"key":"2725_CR43","doi-asserted-by":"crossref","unstructured":"Lin, Y., Dai, Z., Zhu, S., & Yao, Y. (2024). Gaussian-Flow: 4D Reconstruction with Dynamic 3D Gaussian Particle. CVPR.","DOI":"10.1109\/CVPR52733.2024.01997"},{"key":"2725_CR44","doi-asserted-by":"crossref","unstructured":"Lin, J., Tang, J., Tang, H., Yang, S., Chen, W. M., Wang, W. C., & Han, S. (2024). AWQ: Activation-Aware Weight Quantization for On-Device LLM Compression and Acceleration. Proceedings of Machine Learning and Systems.","DOI":"10.1145\/3714983.3714987"},{"key":"2725_CR45","doi-asserted-by":"crossref","unstructured":"Lombardi, S., Saragih, J., Simon, T., & Sheikh, Y. (2018). Deep Appearance Models for Face Rendering. ACM ToG","DOI":"10.1145\/3197517.3201401"},{"key":"2725_CR46","doi-asserted-by":"crossref","unstructured":"Lombardi, S., Simon, T., Schwartz, G., Zollhoefer, M., Sheikh, Y., & Saragih, J. (2021). Mixture of Volumetric Primitives for Efficient Neural Rendering. ACM ToG.","DOI":"10.1145\/3450626.3459863"},{"key":"2725_CR47","doi-asserted-by":"crossref","unstructured":"Loper, M., Mahmood, N., Romero, J., Pons-Moll, G., & Black, M. J. (2023). SMPL: A skinned multi-person linear model. In Seminal Graphics Papers: Pushing the Boundaries, Volume 2 (pp. 851-866).","DOI":"10.1145\/3596711.3596800"},{"key":"2725_CR48","doi-asserted-by":"crossref","unstructured":"Luiten, J., Kopanas, G., Leibe, B., & Ramanan, D. (2024, March). Dynamic 3D Gaussians: Tracking by Persistent Dynamic View Synthesis. 3DV.","DOI":"10.1109\/3DV62453.2024.00044"},{"key":"2725_CR49","doi-asserted-by":"crossref","unstructured":"Maggioni, M., Tanay, T., Babiloni, F., McDonagh, S., & Leonardis, A. (2023). Tunable Convolutions with Parametric Multi-Loss Optimization. CVPR.","DOI":"10.1109\/CVPR52729.2023.01937"},{"key":"2725_CR50","doi-asserted-by":"crossref","unstructured":"Merry, B., Marais, P., & Gain, J. (2006). Animation Space: A Truly Linear Framework for Character Animation. ACM ToG.","DOI":"10.1145\/1183287.1183294"},{"key":"2725_CR51","doi-asserted-by":"crossref","unstructured":"Mihajlovic, M., Bansal, A., Zollhoefer, M., Tang, S., & Saito, S. (2022, October). KeypointNeRF: Generalizing Image-Based Volumetric Avatars using Relative Spatial Encoding of Keypoints. arXiv preprint arXiv:2205.04992.","DOI":"10.1007\/978-3-031-19784-0_11"},{"key":"2725_CR52","doi-asserted-by":"crossref","unstructured":"Mildenhall, B., Srinivasan, P. P., Tancik, M., Barron, J. T., Ramamoorthi, R., & Ng, R. (2020). NeRF. ECCV","DOI":"10.1145\/3503250"},{"key":"2725_CR53","doi-asserted-by":"crossref","unstructured":"Mir, A., Puig, X., Kanazawa, A., & Pons-Moll, G. (2024, March). Generating Continual Human Motion in Diverse 3D Scenes. 3DV.","DOI":"10.1109\/3DV62453.2024.00061"},{"key":"2725_CR54","doi-asserted-by":"crossref","unstructured":"Moon, G., Shiratori, T., & Saito, S. (2024). Expressive whole-body 3D gaussian avatar. In: ECCV.","DOI":"10.1007\/978-3-031-72940-9_2"},{"key":"2725_CR55","unstructured":"Moreau, A., Brahimi, M., Shaw, R., Papaioannou, A., Tanay, T., Zhang, Z., & P\u00e9rez-Pellitero, E. (2025). Better together: Unified motion capture and 3d avatar reconstruction. ArXiv abs\/2503.09293."},{"key":"2725_CR56","doi-asserted-by":"crossref","unstructured":"Moreau, A., Song, J., Dhamo, H., Shaw, R., Zhou, Y., & P\u00e9rez-Pellitero, E. (2024). Human Gaussian Splatting: Real-Time Rendering of Animatable Avatars. CVPR.","DOI":"10.1109\/CVPR52733.2024.00081"},{"key":"2725_CR57","doi-asserted-by":"crossref","unstructured":"M\u00fcller, T., Evans, A., Schied, C., & Keller, A. (2022). Instant Neural Graphics Primitives with a Multiresolution Hash Encoding. ACM ToG","DOI":"10.1145\/3528223.3530127"},{"key":"2725_CR58","unstructured":"OpenAI, et al.: GPT-4 Technical Report. arXiv preprint arXiv:2303.08774 (2024)"},{"key":"2725_CR59","unstructured":"Ouyang, L., Wu, J., Jiang, X., Almeida, D., Wainwright, C., Mishkin, P., & Lowe, R. (2024). Training Language Models to Follow Instructions with Human Feedback. NeurIPS."},{"key":"2725_CR60","unstructured":"Pan, D., Zhuo, L., Piao, J., Luo, H., Cheng, W., Wang, Y., & Lin, K. Y. (2023). RenderMe-360: Large Digital Asset Library and Benchmark Towards High-Fidelity Head Avatars. In: 37th Conf. on Neural Information Processing Systems Datasets and Benchmarks Track."},{"key":"2725_CR61","doi-asserted-by":"crossref","unstructured":"Pang, H., Zhu, H., Kortylewski, A., Theobalt, C., & Habermann, M. (2024). ASH: Animatable Gaussian Splats for Efficient and Photoreal Human Rendering. CVPR.","DOI":"10.1109\/CVPR52733.2024.00117"},{"key":"2725_CR62","doi-asserted-by":"crossref","unstructured":"Park, K., Sinha, U., Barron, J. T., Bouaziz, S., Goldman, D. B., Seitz, S. M., & Martin-Brualla, R. (2021). Nerfies: Deformable neural radiance fields. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 5865-5874).","DOI":"10.1109\/ICCV48922.2021.00581"},{"key":"2725_CR63","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., Choutas, V., Ghorbani, N., Bolkart, T., Osman, A. A., Tzionas, D., & Black, M. J. (2019). Expressive body capture: 3d hands, face, and body from a single image. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 10975-10985).","DOI":"10.1109\/CVPR.2019.01123"},{"key":"2725_CR64","doi-asserted-by":"crossref","unstructured":"Peng, Z., Hu, W., Shi, Y., Zhu, X., Zhang, X., Zhao, H., & Fan, Z. (2024). SyncTalk: The Devil is in the Synchronization for Talking Head Synthesis. CVPR.","DOI":"10.1109\/CVPR52733.2024.00070"},{"key":"2725_CR65","doi-asserted-by":"crossref","unstructured":"Peng, S., Yan, Y., Shuai, Q., Bao, H., & Zhou, X. (2023). Representing volumetric videos as dynamic mlp maps. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (pp. 4252-4262).","DOI":"10.1109\/CVPR52729.2023.00414"},{"key":"2725_CR66","doi-asserted-by":"crossref","unstructured":"Pumarola, A., Corona, E., Pons-Moll, G., & Moreno-Noguer, F. (2020). D-NeRF: Neural Radiance Fields for Dynamic Scenes. CVPR","DOI":"10.1109\/CVPR46437.2021.01018"},{"key":"2725_CR67","doi-asserted-by":"crossref","unstructured":"Qian, S., Kirschstein, T., Schoneveld, L., Davoli, D., Giebenhain, S., & Nie\u00dfner, M. (2024). Gaussianavatars: Photorealistic head avatars with rigged 3d gaussians. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (pp. 20299-20309).","DOI":"10.1109\/CVPR52733.2024.01919"},{"key":"2725_CR68","doi-asserted-by":"crossref","unstructured":"Qian, Z., Wang, S., Mihajlovic, M., Geiger, A., & Tang, S. (2024). 3DGS-Avatar: Animatable Avatars via Deformable 3D Gaussian Splatting. CVPR.","DOI":"10.1109\/CVPR52733.2024.00480"},{"key":"2725_CR69","unstructured":"Qin, Z., Zhao, W., Yu, X., & Sun, X. (2023). OpenVoice: Versatile Instant Voice Cloning. arXiv preprint arXiv:2312.01479."},{"key":"2725_CR70","doi-asserted-by":"crossref","unstructured":"Saito, S., Schwartz, G., Simon, T., Li, J., & Nam, G. (2024). Relightable Gaussian Codec Avatars. CVPR.","DOI":"10.1109\/CVPR52733.2024.00021"},{"key":"2725_CR71","doi-asserted-by":"crossref","unstructured":"Saito, S., Simon, T., Saragih, J., & Joo, H. (2020). PIFuHD: Multi-Level Pixel-Aligned Implicit Function for High-Resolution 3D Human Digitization. CVPR.","DOI":"10.1109\/CVPR42600.2020.00016"},{"key":"2725_CR72","unstructured":"Schmidt, J., Giebenhain, S., & Nie\u00dfner, M. (2025). Becominglit: Relightable gaussian avatars with hybrid neural shading. ArXiv abs\/2506.06271"},{"key":"2725_CR73","doi-asserted-by":"crossref","unstructured":"Sch\u00f6nberger, J.L., & Frahm, J.-M. (2016). Structure-from-Motion Revisited. CVPR","DOI":"10.1109\/CVPR.2016.445"},{"key":"2725_CR74","doi-asserted-by":"crossref","unstructured":"Shaw, R., Nazarczuk, M., Song, J., Moreau, A., Catley-Chandar, S., Dhamo, H., & P\u00e9rez-Pellitero, E. (2024, September). Swings: sliding windows for dynamic 3d gaussian splatting. In European Conference on Computer Vision (pp. 37-54). Cham: Springer Nature Switzerland.","DOI":"10.1007\/978-3-031-73001-6_3"},{"key":"2725_CR75","doi-asserted-by":"crossref","unstructured":"Son Chung, J., Senior, A., Vinyals, O., & Zisserman, A. (2017). Lip Reading Sentences in the Wild. CVPR, 6447\u20136456.","DOI":"10.1109\/CVPR.2017.367"},{"key":"2725_CR76","doi-asserted-by":"crossref","unstructured":"Sun, J., Jiao, H., Li, G., Zhang, Z., Zhao, L., & Xing, W. (2024). 3DGStream: On-the-fly Training of 3D Gaussians for Efficient Streaming of Photo-Realistic Free-Viewpoint Videos. CVPR.","DOI":"10.1109\/CVPR52733.2024.01954"},{"issue":"4","key":"2725_CR77","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073640","volume":"36","author":"S Suwajanakorn","year":"2017","unstructured":"Suwajanakorn, S., Seitz, S. M., & Kemelmacher-Shlizerman, I. (2017). Synthesizing Obama: Learning Lip Sync from Audio. ACM ToG,36(4), 1\u201313.","journal-title":"ACM ToG"},{"key":"2725_CR78","doi-asserted-by":"crossref","unstructured":"Takikawa, T., Litalien, J., Yin, K., Kreis, K., Loop, C., Nowrouzezahrai, D., & Fidler, S. (2021). Neural Geometric Level of Detail: Real-Time Rendering with Implicit 3D Shapes. CVPR.","DOI":"10.1109\/CVPR46437.2021.01120"},{"key":"2725_CR79","unstructured":"Tang, J., Wang, K., Zhou, H., Chen, X., He, D., Hu, T., & Wang, J. (2022). Real-time Neural Radiance Talking Portrait Synthesis via Audio-spatial Decomposition. arXiv preprint arXiv:2211.12368."},{"key":"2725_CR80","doi-asserted-by":"crossref","unstructured":"Teed, Z., & Deng, J. (2020). RAFT: Recurrent All-Pairs Field Transforms for Optical Flow. ECCV.","DOI":"10.1007\/978-3-030-58536-5_24"},{"key":"2725_CR81","doi-asserted-by":"crossref","unstructured":"Thies, J., Zollhofer, M., Stamminger, M., Theobalt, C., & Nie\u00dfner, M. (2016). Face2Face: Real-Time Face Capture and Reenactment of RGB Videos. CVPR (2016)","DOI":"10.1109\/CVPR.2016.262"},{"key":"2725_CR82","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., & Polosukhin, I. (2017). Attention is All You Need. In: 31st International Conf. on Neural Information Processing Systems."},{"key":"2725_CR83","unstructured":"Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image Quality Assessment: From Error Visibility to Structural Similarity. IEEE Trans. on Image Processing."},{"key":"2725_CR84","doi-asserted-by":"crossref","unstructured":"Wang, D., Chandran, P., Zoss, G., Bradley, D., & Gotardo, P. (2022, July). MoRF: Morphable Radiance Fields for Multiview Neural Head Modeling. SIGGRAPH.","DOI":"10.1145\/3528233.3530753"},{"key":"2725_CR85","unstructured":"Wang, P., Liu, L., Liu, Y., Theobalt, C., Komura, T., & Wang, W. (2021). NeuS: Learning Neural Implicit Surfaces by Volume Rendering for Multi-View Reconstruction. arXiv preprint arXiv:2106.10689"},{"key":"2725_CR86","first-page":"1","volume":"43","author":"Y Wang","year":"2024","unstructured":"Wang, Y., Wang, C., Gong, B., & Xue, T. (2024). Bilateral guided radiance field processing. ACM ToG,43, 1\u201313.","journal-title":"ACM ToG"},{"key":"2725_CR87","doi-asserted-by":"crossref","unstructured":"Wen, J., Zhao, X., Ren, Z., Schwing, A. G., & Wang, S. (2024). GoMAvatar: Efficient Animatable Human Modeling from Monocular Video Using Gaussians-on-Mesh. CVPR.","DOI":"10.1109\/CVPR52733.2024.00201"},{"key":"2725_CR88","doi-asserted-by":"crossref","unstructured":"Wu, G., Yi, T., Fang, J., Xie, L., Zhang, X., Wei, W., & Wang, X. (2024). 4D Gaussian Splatting for Real-Time Dynamic Scene Rendering. CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01920"},{"key":"2725_CR89","doi-asserted-by":"crossref","unstructured":"Xiang, J., Gao, X., Guo, Y., & Zhang, J. (2024). Flashavatar: High-fidelity head avatar with efficient gaussian embedding. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (pp. 1802-1812).","DOI":"10.1109\/CVPR52733.2024.00177"},{"key":"2725_CR90","doi-asserted-by":"crossref","unstructured":"Xu, Y., Chen, B., Li, Z., Zhang, H., Wang, L., Zheng, Z., & Liu, Y. (2024). Gaussian head avatar: Ultra high-fidelity head avatar via dynamic gaussians. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 1931-1941).","DOI":"10.1109\/CVPR52733.2024.00189"},{"key":"2725_CR91","doi-asserted-by":"crossref","unstructured":"Yang, Z., Gao, X., Zhou, W., Jiao, S., Zhang, Y., & Jin, X. (2024). Deformable 3D Gaussians for High-Fidelity Monocular Dynamic Scene Reconstruction. CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01922"},{"key":"2725_CR92","unstructured":"Yang, Z., Yang, H., Pan, Z., & Zhang, L. (2023). Real-Time Photorealistic Dynamic Scene Representation and Rendering with 4D Gaussian Splatting. ICLR"},{"key":"2725_CR93","doi-asserted-by":"crossref","unstructured":"Yu, A., Li, R., Tancik, M., Li, H., Ng, R., & Kanazawa, A. (2021). Plenoctrees for real-time rendering of neural radiance fields. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 5752-5761).","DOI":"10.1109\/ICCV48922.2021.00570"},{"key":"2725_CR94","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Geiger, J., Pohjalainen, J., et al. (2018). Deep Learning for Environmentally Robust Speech Recognition: An Overview of Recent Developments. Technol: ACM Trans. Intell. Syst.","DOI":"10.1145\/3178115"},{"key":"2725_CR95","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A. A., Shechtman, E., & Wang, O. (2018). The Unreasonable Effectiveness of Deep Features as a Perceptual Metric. CVPR.","DOI":"10.1109\/CVPR.2018.00068"},{"key":"2725_CR96","unstructured":"Zhao, W. X., Zhou, K., Li, J., Tang, T., Wang, X., Hou, Y., & Wen, J. R. (2023). A Survey of Large Language Models. arXiv preprint arXiv:2303.18223."},{"key":"2725_CR97","doi-asserted-by":"crossref","unstructured":"Zheng, Y., Yifan, W., Wetzstein, G., Black, M. J., & Hilliges, O. (2022). PointAvatar: Deformable Point-Based Head Avatars from Videos. CVPR .","DOI":"10.1109\/CVPR52729.2023.02017"},{"key":"2725_CR98","doi-asserted-by":"crossref","unstructured":"Zielonka, W., Bolkart, T., & Thies, J. Instant Volumetric Head Avatars. CVPR (2022).","DOI":"10.1109\/CVPR52729.2023.00444"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02725-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02725-8","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02725-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,18]],"date-time":"2026-04-18T05:46:44Z","timestamp":1776491204000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02725-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,6]]},"references-count":98,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2026,4]]}},"alternative-id":["2725"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02725-8","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3,6]]},"assertion":[{"value":"14 April 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 December 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 March 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"161"}}