{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T13:02:20Z","timestamp":1771074140125,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":18,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,19]]},"DOI":"10.1145\/3788731.3788741","type":"proceedings-article","created":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T11:49:28Z","timestamp":1771069768000},"page":"65-70","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["TalkPersonaDiff: High-Fidelity Speech-Driven 3D Facial Animation Generation via Unified Multimodal Synergistic Encoding and Dual-Style Modulation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-5522-1941","authenticated-orcid":false,"given":"Peng","family":"Ouyang","sequence":"first","affiliation":[{"name":"School of Information and Communication Engineering, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0285-8176","authenticated-orcid":false,"given":"Zhuoyuan","family":"Yu","sequence":"additional","affiliation":[{"name":"School of Information and Communication Engineering, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-4263-3650","authenticated-orcid":false,"given":"Tong","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Information and Communication Engineering, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3813-3970","authenticated-orcid":false,"given":"Zhiqian","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Information and Communication Engineering, Beijing University of Posts and Telecommunications, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2026,2,14]]},"reference":[{"key":"e_1_3_3_1_1_2","doi-asserted-by":"crossref","unstructured":"Jason Jerald. 2015. The VR Book: Human-Centered Design for Virtual Reality. ACM and Morgan & Claypool.","DOI":"10.1145\/2792790"},{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01034"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073658"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"M. M. Cohen and D. W. Massaro. 1993. Modeling coarticulation in synthetic visual speech. In Models and Techniques in Computer Animation. Springer 139\u2013156.","DOI":"10.1007\/978-4-431-66911-1_13"},{"key":"e_1_3_3_1_5_2","volume-title":"Proc. ACM SIGGRAPH. 21\u201328","year":"1999","unstructured":"Matthew Brand. 1999. Voice puppetry. In Proc. ACM SIGGRAPH. 21\u201328."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/2897824.2925984"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073699"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01821"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01229"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3658221"},{"key":"e_1_3_3_1_11_2","volume-title":"Proc. ACM SIGGRAPH Motion Interact. Games (MIG). Art. no. 13","author":"Stan S.","unstructured":"S. Stan, K. I. Haque, and Z. Yumak. 2023. FaceDiffuser: Speech-driven 3D facial animation synthesis using diffusion. In Proc. ACM SIGGRAPH Motion Interact. Games (MIG). Art. no. 13."},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2010.2052239"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"crossref","unstructured":"W. Song et al. 2024. TalkingStyle: Personalized speech-driven 3D facial animation with style preservation. IEEE Trans. Vis. Comput. Graph. (2024).","DOI":"10.1109\/TVCG.2024.3409568"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01885"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3130800.3130813"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01891"},{"key":"e_1_3_3_1_18_2","volume-title":"Proc. ACM SIGGRAPH Motion Interact. Games (MIG). 1\u201312","author":"Wu S.","unstructured":"S. Wu, K. I. Haque, and Z. Yumak. 2024. ProbTalk3D: Non-deterministic emotion controllable speech-driven 3D facial animation synthesis using VQ-VAE. In Proc. ACM SIGGRAPH Motion Interact. Games (MIG). 1\u201312."}],"event":{"name":"EILM 2025: 2025 International Conference on Embodied Intelligence and Large Models","location":"Chengdu China","acronym":"EILM 2025"},"container-title":["Proceedings of the 2025 International Conference on Embodied Intelligence and Large Models"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3788731.3788741","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T12:09:04Z","timestamp":1771070944000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3788731.3788741"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,19]]},"references-count":18,"alternative-id":["10.1145\/3788731.3788741","10.1145\/3788731"],"URL":"https:\/\/doi.org\/10.1145\/3788731.3788741","relation":{},"subject":[],"published":{"date-parts":[[2025,12,19]]},"assertion":[{"value":"2026-02-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}