{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T20:39:56Z","timestamp":1769632796720,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,15]]},"DOI":"10.1145\/3757377.3763939","type":"proceedings-article","created":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T16:27:29Z","timestamp":1765211249000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Audio Driven Universal Gaussian Head Avatars"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-6985-7159","authenticated-orcid":false,"given":"Kartik","family":"Teotia","sequence":"first","affiliation":[{"name":"Max Planck Institute for Informatics, Saarbr\u00fccken, Germany and Saarland Informatics Campus, Saarbr\u00fccken, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2692-0801","authenticated-orcid":false,"given":"Helge","family":"Rhodin","sequence":"additional","affiliation":[{"name":"Max Planck Institute for Informatics, Saarbr\u00fccken, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5577-157X","authenticated-orcid":false,"given":"Mohit","family":"Mendiratta","sequence":"additional","affiliation":[{"name":"Max Planck Institute for Informatics, Saarbr\u00fccken, Germany and Saarland Informatics Campus, Saarbr\u00fccken, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0858-0882","authenticated-orcid":false,"given":"Hyeongwoo","family":"Kim","sequence":"additional","affiliation":[{"name":"Imperial College London, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3899-7515","authenticated-orcid":false,"given":"Marc","family":"Habermann","sequence":"additional","affiliation":[{"name":"Max Planck Institute for Informatics, Saarbr\u00fccken, Germany and Saarland Informatics Campus, Saarbr\u00fccken, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6104-6625","authenticated-orcid":false,"given":"Christian","family":"Theobalt","sequence":"additional","affiliation":[{"name":"Max Planck Institute for Informatics, Saarbr\u00fccken, Germany and Saarland Informatics Campus, Saarbr\u00fccken, Germany"}]}],"member":"320","published-online":{"date-parts":[[2025,12,14]]},"reference":[{"key":"e_1_3_3_2_2_1","unstructured":"Shivangi Aneja Artem Sevastopolsky Tobias Kirschstein Justus Thies Angela Dai and Matthias Nie\u00dfner. 2024a. GaussianSpeech: Audio-Driven Gaussian Avatars. arxiv:https:\/\/arXiv.org\/abs\/2411.18675\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2411.18675"},{"key":"e_1_3_3_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02009"},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"crossref","unstructured":"Shivangi Aneja Sebastian Weiss Irene Baeza Prashanth Chandran Gaspard Zoss Matthias Nie\u00dfner and Derek Bradley. 2025. ScaffoldAvatar: High-Fidelity Gaussian Avatars with Patch Expressions. arxiv:https:\/\/arXiv.org\/abs\/2507.10542\u00a0[cs.GR] https:\/\/arxiv.org\/abs\/2507.10542","DOI":"10.1145\/3721238.3730729"},{"key":"e_1_3_3_2_5_1","doi-asserted-by":"crossref","unstructured":"Monica\u00a0Villanueva Aylagas Hector\u00a0Anadon Leon Mattias Teye and Konrad Tollmar. 2022. Voice2face: Audio-driven facial and tongue rig animations with cvaes.","DOI":"10.1111\/cgf.14640"},{"key":"e_1_3_3_2_6_1","unstructured":"Alexei Baevski Henry Zhou Abdelrahman Mohamed and Michael Auli. 2020. wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations. arxiv:https:\/\/arXiv.org\/abs\/2006.11477\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2006.11477"},{"key":"e_1_3_3_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/311535.311556"},{"key":"e_1_3_3_2_8_1","doi-asserted-by":"crossref","unstructured":"Chen Cao Tomas Simon Jin\u00a0Kyu Kim Gabe Schwartz Michael Zollh\u00f6fer Shunsuke Saito Stephen Lombardi Shih-En Wei Danielle Belko Shoou-I Yu Yaser Sheikh and Jason\u00a0M. Saragih. 2022. Authentic volumetric avatars from a phone scan. ACM Trans. Graph. 41 4 (2022) 163:1\u2013163:19.","DOI":"10.1145\/3528223.3530143"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_32"},{"key":"e_1_3_3_2_10_1","volume-title":"Workshop on Multi-view Lip-reading, ACCV","author":"Chung J.\u00a0S.","year":"2016","unstructured":"J.\u00a0S. Chung and A. Zisserman. 2016. Out of time: automated lip sync in the wild. In Workshop on Multi-view Lip-reading, ACCV."},{"key":"e_1_3_3_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01967"},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01828"},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01821"},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3680528.3687689"},{"key":"e_1_3_3_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01810"},{"key":"e_1_3_3_2_16_1","unstructured":"Jiazhi Guan Zhanwang Zhang Hang Zhou Tianshu Hu Kaisiyuan Wang Dongliang He Haocheng Feng Jingtuo Liu Errui Ding Ziwei Liu and Jingdong Wang. 2023. StyleSync: High-Fidelity Generalized and Personalized Lip Sync in Style-based Generator. arxiv:https:\/\/arXiv.org\/abs\/2305.05445\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2305.05445"},{"key":"e_1_3_3_2_17_1","unstructured":"Jianzhu Guo Dingyun Zhang Xiaoqiang Liu Zhizhou Zhong Yuan Zhang Pengfei Wan and Di Zhang. 2024. LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.03168 (2024)."},{"key":"e_1_3_3_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00572"},{"key":"e_1_3_3_2_19_1","volume-title":"SIGGRAPH 2024 Conference Proceedings","author":"Haotian Yang","year":"2024","unstructured":"Yang Haotian, Zheng Mingwu, Ma ChongYang, Lai Yu-Kun, Wan Pengfei, and Huang Haibin. 2024. VRMM: A Volumetric Relightable Morphable Head Model. In SIGGRAPH 2024 Conference Proceedings."},{"key":"e_1_3_3_2_20_1","first-page":"6840","volume-title":"Advances in Neural Information Processing Systems","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. In Advances in Neural Information Processing Systems , H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.F. Balcan, and H.\u00a0Lin (Eds.), Vol.\u00a033. Curran Associates, Inc., 6840\u20136851. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf"},{"key":"e_1_3_3_2_21_1","unstructured":"Cheng hsin Wuu Ningyuan Zheng Scott Ardisson Rohan Bali Danielle Belko Eric Brockmeyer Lucas Evans Timothy Godisart Hyowon Ha Xuhua Huang Alexander Hypes Taylor Koska Steven Krenn Stephen Lombardi Xiaomin Luo Kevyn McPhail Laura Millerschoen Michal Perdoch Mark Pitts Alexander Richard Jason Saragih Junko Saragih Takaaki Shiratori Tomas Simon Matt Stewart Autumn Trimble Xinshuo Weng David Whitewolf Chenglei Wu Shoou-I Yu and Yaser Sheikh. 2023. Multiface: A Dataset for Neural Face Rendering. arxiv:https:\/\/arXiv.org\/abs\/2207.11243\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2207.11243"},{"key":"e_1_3_3_2_22_1","unstructured":"Justin Johnson Alexandre Alahi and Li Fei-Fei. 2016. Perceptual Losses for Real-Time Style Transfer and Super-Resolution. CoRR abs\/1603.08155 (2016). arXiv:https:\/\/arXiv.org\/abs\/1603.08155http:\/\/arxiv.org\/abs\/1603.08155"},{"key":"e_1_3_3_2_23_1","doi-asserted-by":"crossref","unstructured":"Tero Karras Timo Aila Samuli Laine Antti Herva and Jaakko Lehtinen. 2017. Audio-driven facial animation by joint end-to-end learning of pose and emotion.","DOI":"10.1145\/3072959.3073658"},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"publisher","unstructured":"Bernhard Kerbl Georgios Kopanas Thomas Leimkuehler and George Drettakis. 2023b. 3D Gaussian Splatting for Real-Time Radiance Field Rendering. ACM Trans. Graph. 42 4 Article 139 (jul 2023) 14\u00a0pages. 10.1145\/3592433","DOI":"10.1145\/3592433"},{"key":"e_1_3_3_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3592433"},{"key":"e_1_3_3_2_26_1","unstructured":"Tobias Kirschstein Javier Romero Artem Sevastopolsky Matthias Nie\u00dfner and Shunsuke Saito. 2025. Avat3r: Large Animatable Gaussian Reconstruction Model for High-fidelity 3D Head Avatars. arxiv:https:\/\/arXiv.org\/abs\/2502.20220\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2502.20220"},{"key":"e_1_3_3_2_27_1","volume-title":"IEEE\/CVF International Conference on Computer Vision (ICCV)","author":"Li Junxuan","year":"2023","unstructured":"Junxuan Li, Chen Cao, Gabriel Schwartz, Rawal Khirodkar, Christian Richardt, Tomas Simon, Yaser Sheikh, and Shunsuke Saito. 2023. ER\u2011NeRF: Efficient Region\u2011Aware Neural Radiance Fields for High\u2011Fidelity Talking Portrait Synthesis. In IEEE\/CVF International Conference on Computer Vision (ICCV)."},{"key":"e_1_3_3_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3680528.3687653"},{"key":"e_1_3_3_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01004"},{"key":"e_1_3_3_2_30_1","doi-asserted-by":"crossref","unstructured":"Tianye Li Timo Bolkart Michael\u00a0J. Black Hao Li and Javier Romero. 2017. Learning a model of facial shape and expression from 4D scans. ACM Trans. Graph. 36 6 (2017) 194:1\u2013194:17.","DOI":"10.1145\/3130800.3130813"},{"key":"e_1_3_3_2_31_1","unstructured":"Xuanchen Li Jianyu Wang Yuhao Cheng Yikun Zeng Xingyu Ren Wenhan Zhu Weiming Zhao and Yichao Yan. 2025b. Towards High-fidelity 3D Talking Avatar with Personalized Dynamic Texture. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2503.00495 (2025)."},{"key":"e_1_3_3_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657462"},{"key":"e_1_3_3_2_33_1","unstructured":"Julieta Martinez Emily Kim Javier Romero Timur Bagautdinov Shunsuke Saito Shoou-I Yu Stuart Anderson Michael Zollh\u00f6fer Te-Li Wang Shaojie Bai Chenghui Li Shih-En Wei Rohan Joshi Wyatt Borsos Tomas Simon Jason Saragih Paul Theodosis Alexander Greene Anjani Josyula Silvio\u00a0Mano Maeta Andrew\u00a0I. Jewett Simon Venshtain Christopher Heilman Yueh-Tung Chen Sidi Fu Mohamed Ezzeldin\u00a0A. Elshaer Tingfang Du Longhua Wu Shen-Chi Chen Kai Kang Michael Wu Youssef Emad Steven Longay Ashley Brewer Hitesh Shah James Booth Taylor Koska Kayla Haidle Matt Andromalos Joanna Hsu Thomas Dauer Peter Selednik Tim Godisart Scott Ardisson Matthew Cipperly Ben Humberston Lon Farr Bob Hansen Peihong Guo Dave Braun Steven Krenn He Wen Lucas Evans Natalia Fadeeva Matthew Stewart Gabriel Schwartz Divam Gupta Gyeongsik Moon Kaiwen Guo Yuan Dong Yichen Xu Takaaki Shiratori Fabian Prada Bernardo\u00a0R. Pires Bo Peng Julia Buffalini Autumn Trimble Kevyn McPhail Melissa Schoeller and Yaser Sheikh. 2024. Codec Avatar Studio: Paired Human Captures for Complete Driveable and Generalizable Avatars. NeurIPS Track on Datasets and Benchmarks (2024)."},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"e_1_3_3_2_35_1","unstructured":"Evonne Ng Javier Romero Timur Bagautdinov Shaojie Bai Trevor Darrell Angjoo Kanazawa and Alexander Richard. 2024. From Audio to Photoreal Embodiment: Synthesizing Humans in Conversations. arxiv:https:\/\/arXiv.org\/abs\/2401.01885\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2401.01885"},{"key":"e_1_3_3_2_36_1","unstructured":"Dongwei Pan Long Zhuo Jingtan Piao Huiwen Luo Wei Cheng Yuxin Wang Siming Fan Shengqi Liu Lei Yang Bo Dai Ziwei Liu Chen\u00a0Change Loy Chen Qian Wayne Wu Dahua Lin and Kwan-Yee Lin. 2024. RenderMe-360: A Large Digital Asset Library and Benchmarks Towards High-fidelity Head Avatars. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_2_37_1","doi-asserted-by":"crossref","unstructured":"Ziqiao Peng Haoyu Wu Zhenbo Song Hao Xu Xiangyu Zhu Jun He Hongyan Liu and Zhaoxin Fan. 2023. Emotalk: Speech-driven emotional disentanglement for 3d face animation.","DOI":"10.1109\/ICCV51070.2023.01891"},{"key":"e_1_3_3_2_38_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11671"},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01919"},{"key":"e_1_3_3_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01919"},{"key":"e_1_3_3_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00009"},{"key":"e_1_3_3_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00121"},{"key":"e_1_3_3_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00021"},{"key":"e_1_3_3_2_44_1","doi-asserted-by":"crossref","unstructured":"Stefan Stan Kazi\u00a0Injamamul Haque and Zerrin Yumak. 2023a. FaceDiffuser: Speech-Driven 3D Facial Animation Synthesis Using Diffusion. arxiv:https:\/\/arXiv.org\/abs\/2309.11306\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2309.11306","DOI":"10.1145\/3623264.3624447"},{"key":"e_1_3_3_2_45_1","doi-asserted-by":"crossref","unstructured":"Stefan Stan Kazi\u00a0Injamamul Haque and Zerrin Yumak. 2023b. FaceDiffuser: Speech-Driven 3D Facial Animation Synthesis Using Diffusion. arxiv:https:\/\/arXiv.org\/abs\/2309.11306\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2309.11306","DOI":"10.1145\/3623264.3624447"},{"key":"e_1_3_3_2_46_1","doi-asserted-by":"publisher","unstructured":"Zhiyao Sun Tian Lv Sheng Ye Matthieu Lin Jenny Sheng Yu-Hui Wen Minjing Yu and Yong-Jin Liu. 2024a. DiffPoseTalk: Speech-Driven Stylistic 3D Facial Animation and Head Pose Generation via Diffusion Models. ACM Transactions on Graphics (2024). 10.1145\/3679561Proceedings of SIGGRAPH 2024.","DOI":"10.1145\/3679561"},{"key":"e_1_3_3_2_47_1","doi-asserted-by":"publisher","unstructured":"Zhiyao Sun Tian Lv Sheng Ye Matthieu Lin Jenny Sheng Yu-Hui Wen Minjing Yu and Yong-Jin Liu. 2024b. DiffPoseTalk: Speech-Driven Stylistic 3D Facial Animation and Head Pose Generation via Diffusion Models. ACM Transactions on Graphics (TOG) 43 4 Article 46 (2024) 9\u00a0pages. 10.1145\/3658221","DOI":"10.1145\/3658221"},{"key":"e_1_3_3_2_48_1","unstructured":"Jiaxiang Tang Kaisiyuan Wang Hang Zhou Xiaokang Chen Dongliang He Jingtuo Liu Tianshu Hu Gang Zeng and Jingdong Wang. 2022. RAD\u2011NeRF: Real\u2011Time Neural Radiance Talking Portrait Synthesis via Audio\u2011Spatial Decomposition. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2211.12368 (2022)."},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","unstructured":"Sarah Taylor Taehwan Kim Yisong Yue Moshe Mahler James Krahe Anastasio\u00a0Garcia Rodriguez Jessica Hodgins and Iain Matthews. 2017. A Deep Learning Approach for Generalized Speech Animation. ACM Transactions on Graphics 36 4 (2017) 93:1\u201393:12. 10.1145\/3072959.3073699","DOI":"10.1145\/3072959.3073699"},{"key":"e_1_3_3_2_50_1","doi-asserted-by":"publisher","unstructured":"Kartik Teotia Hyeongwoo Kim Pablo Garrido Marc Habermann Mohamed Elgharib and Christian Theobalt. 2024. GaussianHeads: End-to-End Learning of Drivable Gaussian Head Avatars from Coarse-to-fine Representations. ACM Trans. Graph. 43 6 Article 264 (Nov. 2024) 12\u00a0pages. 10.1145\/3687927","DOI":"10.1145\/3687927"},{"key":"e_1_3_3_2_51_1","doi-asserted-by":"crossref","unstructured":"Kartik Teotia Mallikarjun\u00a0B R Xingang Pan Hyeongwoo Kim Pablo Garrido Mohamed Elgharib and Christian Theobalt. 2023. HQ3DAvatar: High Quality Controllable 3D Head Avatar. arxiv:https:\/\/arXiv.org\/abs\/2303.14471\u00a0[cs.CV]","DOI":"10.1145\/3649889"},{"key":"e_1_3_3_2_52_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N. Gomez Lukasz Kaiser and Illia Polosukhin. 2023. Attention Is All You Need. arxiv:https:\/\/arXiv.org\/abs\/1706.03762\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/1706.03762"},{"key":"e_1_3_3_2_53_1","doi-asserted-by":"crossref","unstructured":"Zhou Wang Alan\u00a0C. Bovik Hamid\u00a0R. Sheikh and Eero\u00a0P. Simoncelli. 2004. Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13 4 (2004) 600\u2013612.","DOI":"10.1109\/TIP.2003.819861"},{"key":"e_1_3_3_2_54_1","doi-asserted-by":"publisher","unstructured":"Sebastian Winberg Gaspard Zoss Prashanth Chandran Paulo Gotardo and Derek Bradley. 2022. Facial hair tracking for high fidelity performance capture. ACM Trans. Graph. 41 4 Article 165 (July 2022) 12\u00a0pages. 10.1145\/3528223.3530116","DOI":"10.1145\/3528223.3530116"},{"key":"e_1_3_3_2_55_1","doi-asserted-by":"crossref","unstructured":"Jinbo Xing Menghan Xia Yuechen Zhang Xiaodong Cun Jue Wang and Tien-Tsin Wong. 2023. Codetalker: Speech-driven 3d facial animation with discrete motion prior.","DOI":"10.1109\/CVPR52729.2023.01229"},{"key":"e_1_3_3_2_56_1","doi-asserted-by":"crossref","unstructured":"Sicheng Xu Guojun Chen Yu-Xiao Guo Jiaolong Yang Chong Li Zhenyu Zang Yizhong Zhang Xin Tong and Baining Guo. 2024a. VASA-1: Lifelike Audio-Driven Talking Faces Generated in Real Time. arxiv:https:\/\/arXiv.org\/abs\/2404.10667\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2404.10667","DOI":"10.52202\/079017-0021"},{"key":"e_1_3_3_2_57_1","volume-title":"Proceedings of the European Conference on Computer Vision (ECCV)","author":"Xu Yuelang","year":"2024","unstructured":"Yuelang Xu, Lizhen Wang, Zerong Zheng, Zhaoqi Su, and Yebin Liu. 2024b. 3D Gaussian Parametric Head Model. In Proceedings of the European Conference on Computer Vision (ECCV)."},{"key":"e_1_3_3_2_58_1","unstructured":"Zhenhui Ye Ziyue Jiang Yi Ren Jinglin Liu Jinzheng He and Zhou Zhao. 2023. GeneFace: Generalized and High\u2011Fidelity Audio\u2011Driven 3D Talking Face Synthesis. International Conference on Learning Representations (ICLR) (2023)."},{"key":"e_1_3_3_2_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_3_2_60_1","doi-asserted-by":"publisher","unstructured":"Qingcheng Zhao Pengyu Long Qixuan Zhang et\u00a0al. 2024a. Media2Face: Co-speech Facial Animation Generation with Multi-Modality Guidance. ACM Transactions on Graphics (2024). 10.1145\/3641519.3657413Proceedings of SIGGRAPH 2024.","DOI":"10.1145\/3641519.3657413"},{"key":"e_1_3_3_2_61_1","unstructured":"Qingcheng Zhao Pengyu Long Qixuan Zhang Dafei Qin Han Liang Longwen Zhang Yingliang Zhang Jingyi Yu and Lan Xu. 2024b. Media2face: Co-speech facial animation generation with multi-modality guidance."},{"key":"e_1_3_3_2_62_1","unstructured":"Xiaozheng Zheng Chao Wen Zhaohu Li Weiyi Zhang Zhuo Su Xu Chang Yang Zhao Zheng Lv Xiaoyuan Zhang Yongjie Zhang Guidong Wang and Xu Lan. 2024. HeadGAP: Few-shot 3D Head Avatar via Generalizable Gaussian Priors. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.06019 (2024)."},{"key":"e_1_3_3_2_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00444"}],"event":{"name":"SA Conference Papers '25: SIGGRAPH Asia 2025 Conference Papers","location":"Hong Kong Hong Kong","acronym":"SA Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the SIGGRAPH Asia 2025 Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3757377.3763939","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T03:20:17Z","timestamp":1765250417000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3757377.3763939"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,14]]},"references-count":62,"alternative-id":["10.1145\/3757377.3763939","10.1145\/3757377"],"URL":"https:\/\/doi.org\/10.1145\/3757377.3763939","relation":{},"subject":[],"published":{"date-parts":[[2025,12,14]]},"assertion":[{"value":"2025-12-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}