{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:05:40Z","timestamp":1750309540541,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:00:00Z","timestamp":1740096000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,2,21]]},"DOI":"10.1145\/3728725.3728752","type":"proceedings-article","created":{"date-parts":[[2025,6,4]],"date-time":"2025-06-04T21:00:02Z","timestamp":1749070802000},"page":"171-177","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Audio-driven Talking-face Synthesis based on 3D Gaussian"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-5902-2423","authenticated-orcid":false,"given":"Botao","family":"Xiong","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, Anhui, China"}]}],"member":"320","published-online":{"date-parts":[[2025,6,3]]},"reference":[{"key":"e_1_3_3_1_1_2","doi-asserted-by":"publisher","DOI":"10.1145\/258734.258880"},{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","unstructured":"WILES O KOEPKE A S ZISSERMAN A. X2face: A network for controlling face generation using images audio and pose codes[M\/OL]. 2018: 690\u2013706. 10.1007\/978-3-030-01261-8_41.","DOI":"10.1007\/978-3-030-01261-8_41"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/tcsvt.2020.2973374"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019299"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01251-8"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.3156\/jsoft.29.5_177_2"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2017.632"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","unstructured":"MILDENHALL B SRINIVASAN P P TANCIK M et al. Nerf: Representing scenes as neural radiance fields for view synthesis[M\/OL]. 2020: 405\u2013421. 10.1007\/978-3-030-58452-8_24.","DOI":"10.1007\/978-3-030-58452-8_24"},{"volume-title":"Live Speech Portraits: Real-Time Photorealistic Talking-Head Animation","year":"2021","key":"e_1_3_3_1_10_2","unstructured":"Lu, Yuanxun, et al. \u201cLive Speech Portraits: Real-Time Photorealistic Talking-Head Animation.\u201d Cornell University - arXiv, Cornell University - arXiv, Sept. 2021."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","unstructured":"WANG K WU Q SONG L et al. Mead: A large-scale audio-visual dataset for emotional talking-face generation[M\/OL]. 2020: 700\u2013717. 10.1007\/978-3-030-58589-1_42.","DOI":"10.1007\/978-3-030-58589-1_42"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3592433"},{"volume-title":"IEEE","year":"2024","key":"e_1_3_3_1_13_2","unstructured":"Luiten, Jonathon, et al. \"Dynamic 3d gaussians: Tracking by persistent dynamic view synthesis.\"\u00a02024 International Conference on 3D Vision (3DV). IEEE, 2024."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1145\/3130800.3130813"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/iccv48922.2021.00573"},{"volume-title":"Real-Time Neural Radiance Talking Portrait Synthesis via Audio-Spatial Decomposition","year":"2022","key":"e_1_3_3_1_16_2","unstructured":"Tang, Jiaxiang, et al. Real-Time Neural Radiance Talking Portrait Synthesis via Audio-Spatial Decomposition. Nov. 2022."},{"volume-title":"Gaussianhead: Impressive head avatars with learnable gaussian diffusion[J]. arXiv preprint arXiv:2312.01632","year":"2023","key":"e_1_3_3_1_17_2","unstructured":"Wang J, Xie J C, Li X, et al. Gaussianhead: Impressive head avatars with learnable gaussian diffusion[J]. arXiv preprint arXiv:2312.01632, 2023."},{"key":"e_1_3_3_1_18_2","first-page":"1931","volume":"2024","unstructured":"Xu Y, Chen B, Li Z, et al. Gaussian head avatar: Ultra high-fidelity head avatar via dynamic gaussians[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2024: 1931-1941.","journal-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52688.2022.01821"},{"key":"e_1_3_3_1_20_2","volume-title":"Emotion driven monocular face capture and animation.\"\u00a0Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"\u011b\u010d","year":"2022","unstructured":"Dan\u011b\u010dek, Radek, Michael J. Black, and Timo Bolkart. \"Emoca: Emotion driven monocular face capture and animation.\"\u00a0Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2022."},{"key":"e_1_3_3_1_21_2","volume-title":"GeneFace++: Generalized and Stable Real-Time Audio-Driven 3D Talking Face Generation","author":"Z.","year":"2023","unstructured":"Ye, Z., He, J., Jiang, Z., Huang, R., Huang, J., Liu, J., \u2026 Zhao, Z. (2023). GeneFace++: Generalized and Stable Real-Time Audio-Driven 3D Talking Face Generation."},{"key":"e_1_3_3_1_22_2","volume-title":"Emotion driven monocular face capture and animation.\"\u00a0Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"\u011b\u010d","year":"2022","unstructured":"Dan\u011b\u010dek, Radek, Michael J. Black, and Timo Bolkart. \"Emoca: Emotion driven monocular face capture and animation.\"\u00a0Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2022."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/iccv48922.2021.00573"},{"volume-title":"Real-Time Neural Radiance Talking Portrait Synthesis via Audio-Spatial Decomposition","year":"2022","key":"e_1_3_3_1_24_2","unstructured":"Tang, Jiaxiang, et al. Real-Time Neural Radiance Talking Portrait Synthesis via Audio-Spatial Decomposition. Nov. 2022."},{"volume-title":"Efficient region-aware neural radiance fields for high-fidelity talking portrait synthesis.\"\u00a0Proceedings of the IEEE\/CVF International Conference on Computer Vision","year":"2023","key":"e_1_3_3_1_25_2","unstructured":"Li, Jiahe, et al. \"Efficient region-aware neural radiance fields for high-fidelity talking portrait synthesis.\"\u00a0Proceedings of the IEEE\/CVF International Conference on Computer Vision. 2023."}],"event":{"name":"GAIIS 2025: 2025 2nd International Conference on Generative Artificial Intelligence and Information Security","acronym":"GAIIS 2025","location":"Hangzhou China"},"container-title":["Proceedings of the 2025 2nd International Conference on Generative Artificial Intelligence and Information Security"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3728725.3728752","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3728725.3728752","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:36Z","timestamp":1750295916000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3728725.3728752"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,21]]},"references-count":25,"alternative-id":["10.1145\/3728725.3728752","10.1145\/3728725"],"URL":"https:\/\/doi.org\/10.1145\/3728725.3728752","relation":{},"subject":[],"published":{"date-parts":[[2025,2,21]]},"assertion":[{"value":"2025-06-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}