{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T05:03:31Z","timestamp":1780635811604,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":89,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,13]],"date-time":"2024-07-13T00:00:00Z","timestamp":1720828800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"STCSM","award":["2015F0203-000-06"],"award-info":[{"award-number":["2015F0203-000-06"]}]},{"name":"National Key R&D Program of China","award":["2022YFF0902301"],"award-info":[{"award-number":["2022YFF0902301"]}]},{"name":"NSFC programs","award":["61976138, 61977047"],"award-info":[{"award-number":["61976138, 61977047"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/100007836","name":"Shanghai Frontiers Science Center of Human-centered Artificial Intelligence","doi-asserted-by":"publisher","id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/100007836","id-type":"DOI","asserted-by":"publisher"}]},{"name":"SHMEC","award":["2019-01-07-00-01-E00003"],"award-info":[{"award-number":["2019-01-07-00-01-E00003"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,13]]},"DOI":"10.1145\/3641519.3657413","type":"proceedings-article","created":{"date-parts":[[2024,7,12]],"date-time":"2024-07-12T10:39:28Z","timestamp":1720780768000},"page":"1-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":40,"title":["Media2Face: Co-speech Facial Animation Generation With Multi-Modality Guidance"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-8968-8341","authenticated-orcid":false,"given":"Qingcheng","family":"Zhao","sequence":"first","affiliation":[{"name":"ShanghaiTech University, China and Deemos Technology, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8490-2652","authenticated-orcid":false,"given":"Pengyu","family":"Long","sequence":"additional","affiliation":[{"name":"ShanghaiTech University, China and Deemos Technology, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4837-7152","authenticated-orcid":false,"given":"Qixuan","family":"Zhang","sequence":"additional","affiliation":[{"name":"ShanghaiTech University, China and Deemos Technology, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4992-4760","authenticated-orcid":false,"given":"Dafei","family":"Qin","sequence":"additional","affiliation":[{"name":"University of Hong Kong, China and Deemos Technology, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2444-9685","authenticated-orcid":false,"given":"Han","family":"Liang","sequence":"additional","affiliation":[{"name":"ShanghaiTech University, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8508-3359","authenticated-orcid":false,"given":"Longwen","family":"Zhang","sequence":"additional","affiliation":[{"name":"ShanghaiTech University, China and Deemos Technology, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0594-7549","authenticated-orcid":false,"given":"Yingliang","family":"Zhang","sequence":"additional","affiliation":[{"name":"DGene Digital Technology Co., Ltd., China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9198-6853","authenticated-orcid":false,"given":"Jingyi","family":"Yu","sequence":"additional","affiliation":[{"name":"ShanghaiTech University, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8807-7787","authenticated-orcid":false,"given":"Lan","family":"Xu","sequence":"additional","affiliation":[{"name":"ShanghaiTech University, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,7,13]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3592458"},{"key":"e_1_3_2_2_2_1","unstructured":"Shivangi Aneja Justus Thies Angela Dai and Matthias Nie\u00dfner. 2023. FaceTalk: Audio-Driven Motion Diffusion for Neural Parametric Head Models. arxiv:2312.08459\u00a0[cs.CV]"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","unstructured":"Tenglong Ao Zeyi Zhang and Libin Liu. 2023. GestureDiffuCLIP: Gesture Diffusion Model with CLIP Latents. ACM Trans. Graph. (2023) 18\u00a0pages. https:\/\/doi.org\/10.1145\/3592097","DOI":"10.1145\/3592097"},{"key":"e_1_3_2_2_4_1","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"Baevski Alexei","year":"2020","unstructured":"Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, and Michael Auli. 2020. wav2vec 2.0: a framework for self-supervised learning of speech representations. In Proceedings of the 34th International Conference on Neural Information Processing Systems (Vancouver, BC, Canada) (NIPS\u201920). Curran Associates Inc., Red Hook, NY, USA, Article 1044, 12\u00a0pages."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/311535.311556"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.580"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00731"},{"key":"e_1_3_2_2_8_1","volume-title":"Crema-d: Crowd-sourced emotional multimodal actors dataset","author":"Cao Houwei","year":"2014","unstructured":"Houwei Cao, David\u00a0G Cooper, Michael\u00a0K Keutmann, Ruben\u00a0C Gur, Ani Nenkova, and Ragini Verma. 2014. Crema-d: Crowd-sourced emotional multimodal actors dataset. IEEE transactions on affective computing 5, 4 (2014), 377\u2013390."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1095878.1095881"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19790-1_22"},{"key":"e_1_3_2_2_11_1","volume-title":"Semantic Deep Face Models. 2020 International Conference on 3D Vision (3DV)","author":"Chandran Prashanth","year":"2020","unstructured":"Prashanth Chandran, Derek Bradley, Markus\u00a0H. Gross, and Thabo Beeler. 2020. Semantic Deep Face Models. 2020 International Conference on 3D Vision (3DV) (2020), 345\u2013354. https:\/\/api.semanticscholar.org\/CorpusID:228102289"},{"key":"e_1_3_2_2_12_1","volume-title":"Facial Animation with Disentangled Identity and Motion using Transformers. Computer Graphics Forum 41","author":"Chandran Prashanth","year":"2022","unstructured":"Prashanth Chandran, Gaspard Zoss, Markus\u00a0H. Gross, Paulo F.\u00a0U. Gotardo, and Derek Bradley. 2022. Facial Animation with Disentangled Identity and Motion using Transformers. Computer Graphics Forum 41 (2022). https:\/\/api.semanticscholar.org\/CorpusID:253164199"},{"key":"e_1_3_2_2_13_1","unstructured":"Peng Chen Xiaobao Wei Ming Lu Yitong Zhu Naiming Yao Xingyu Xiao and Hui Chen. 2023. DiffusionTalker: Personalization and Acceleration for Speech-Driven 3D Face Diffuser. arxiv:2311.16565\u00a0[cs.CV]"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01296"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/1185657.1185861"},{"key":"e_1_3_2_2_16_1","volume-title":"CorrTalk: Correlation Between Hierarchical Speech and Facial Activity Variances for 3D Animation","author":"Chu Zhaojie","year":"2024","unstructured":"Zhaojie Chu, Kailing Guo, Xiaofen Xing, Yilin Lan, Bolun Cai, and Xiangmin Xu. 2024. CorrTalk: Correlation Between Hierarchical Speech and Facial Activity Variances for 3D Animation. IEEE Transactions on Circuits and Systems for Video Technology (2024)."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01034"},{"key":"e_1_3_2_2_18_1","volume-title":"EMOCA: Emotion Driven Monocular Face Capture and Animation. In Conference on Computer Vision and Pattern Recognition (CVPR). 20311\u201320322","author":"Danecek Radek","year":"2022","unstructured":"Radek Danecek, Michael\u00a0J. Black, and Timo Bolkart. 2022. EMOCA: Emotion Driven Monocular Face Capture and Animation. In Conference on Computer Vision and Pattern Recognition (CVPR). 20311\u201320322."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610548.3618183"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2897824.2925984"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3395208"},{"key":"e_1_3_2_2_22_1","volume-title":"Facial action coding system. Environmental Psychology & Nonverbal Behavior","author":"Ekman Paul","year":"1978","unstructured":"Paul Ekman and Wallace\u00a0V Friesen. 1978. Facial action coding system. Environmental Psychology & Nonverbal Behavior (1978)."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CA.1998.681913"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01821"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3522615"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2010.2052239"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3450626.3459936"},{"key":"e_1_3_2_2_28_1","unstructured":"Rinon Gal Yuval Alaluf Yuval Atzmon Or Patashnik Amit\u00a0H. Bermano Gal Chechik and Daniel Cohen-Or. 2022. An Image is Worth One Word: Personalizing Text-to-Image Generation using Textual Inversion. arxiv:2208.01618\u00a0[cs.CV]"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02012"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3577190.3614157"},{"key":"e_1_3_2_2_31_1","volume-title":"Denoising diffusion probabilistic models. Advances in neural information processing systems 33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems 33 (2020), 6840\u20136851."},{"key":"e_1_3_2_2_32_1","volume-title":"Classifier-Free Diffusion Guidance. In NeurIPS 2021 Workshop on Deep Generative Models and Downstream Applications.","author":"Ho Jonathan","year":"2021","unstructured":"Jonathan Ho and Tim Salimans. 2021. Classifier-Free Diffusion Guidance. In NeurIPS 2021 Workshop on Deep Generative Models and Downstream Applications."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3267935.3267950"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.5220\/0005669500790086"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383652.3423911"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CA.2001.982373"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073658"},{"key":"e_1_3_2_2_38_1","unstructured":"Akio Kodaira Chenfeng Xu Toshiki Hazama Takanori Yoshimoto Kohei Ohno Shogo Mitsuhori Soichi Sugano Hanying Cho Zhijian Liu and Kurt Keutzer. 2023. StreamDiffusion: A Pipeline-level Solution for Real-time Interactive Generation. (2023). arxiv:2312.12491\u00a0[cs.CV]"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2017.09.006"},{"key":"e_1_3_2_2_40_1","volume-title":"Practice and theory of blendshape facial models.Eurographics (State of the Art Reports) 1, 8","author":"Lewis P","year":"2014","unstructured":"John\u00a0P Lewis, Ken Anjyo, Taehyun Rhee, Mengjie Zhang, Frederic\u00a0H Pighin, and Zhigang Deng. 2014. Practice and theory of blendshape facial models.Eurographics (State of the Art Reports) 1, 8 (2014), 2."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3414685.3417817"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00347"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3130800.3130813"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","unstructured":"Tianye Li Timo Bolkart Michael.\u00a0J. Black Hao Li and Javier Romero. 2017b. Learning a model of facial shape and expression from 4D scans. ACM Transactions on Graphics (Proc. SIGGRAPH Asia) 36 6 194:1\u2013194:17. https:\/\/doi.org\/10.1145\/3130800.3130813","DOI":"10.1145\/3130800.3130813"},{"key":"e_1_3_2_2_45_1","volume-title":"OMG: Towards Open-vocabulary Motion Generation via Mixture of Controllers. arXiv preprint arXiv:2312.08985","author":"Liang Han","year":"2023","unstructured":"Han Liang, Jiacheng Bao, Ruichi Zhang, Sihan Ren, Yuecheng Xu, Sibei Yang, Xin Chen, Jingyi Yu, and Lan Xu. 2023. OMG: Towards Open-vocabulary Motion Generation via Mixture of Controllers. arXiv preprint arXiv:2312.08985 (2023)."},{"key":"e_1_3_2_2_46_1","volume-title":"BEAT: A Large-Scale Semantic and\u00a0Emotional Multi-modal Dataset for\u00a0Conversational Gestures Synthesis. In Computer Vision \u2013 ECCV","author":"Liu Haiyang","year":"2022","unstructured":"Haiyang Liu, Zihao Zhu, Naoya Iwamoto, Yichen Peng, Zhengqing Li, You Zhou, Elif Bozkurt, and Bo Zheng. 2022. BEAT: A Large-Scale Semantic and\u00a0Emotional Multi-modal Dataset for\u00a0Conversational Gestures Synthesis. In Computer Vision \u2013 ECCV 2022, Shai Avidan, Gabriel Brostow, Moustapha Ciss\u00e9, Giovanni\u00a0Maria Farinella, and Tal Hassner (Eds.). Springer Nature Switzerland, Cham, 612\u2013630."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0196391"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"e_1_3_2_2_49_1","volume-title":"32nd British Machine Vision Conference, BMVC","author":"Montesinos F","year":"2021","unstructured":"Juan\u00a0F Montesinos, Venkatesh\u00a0S Kadandale, and Gloria Haro. 2021. A cappella: Audio-visual Singing Voice Separation. In 32nd British Machine Vision Conference, BMVC 2021."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01975"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611734"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01891"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3242969.3243017"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3588432.3591556"},{"key":"e_1_3_2_2_55_1","volume-title":"International conference on machine learning. PMLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00121"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_58_1","unstructured":"Tim Salimans and Jonathan Ho. 2022. Progressive Distillation for Fast Sampling of Diffusion Models. arxiv:2202.00512\u00a0[cs.LG]"},{"key":"e_1_3_2_2_59_1","volume-title":"Human motion diffusion as a generative prior. arXiv preprint arXiv:2303.01418","author":"Shafir Yonatan","year":"2023","unstructured":"Yonatan Shafir, Guy Tevet, Roy Kapon, and Amit\u00a0H Bermano. 2023. Human motion diffusion as a generative prior. arXiv preprint arXiv:2303.01418 (2023)."},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","unstructured":"Il-Kyu Shin A.\u00a0Cengiz \u00d6ztireli Hyeon-Joong Kim Thabo Beeler Markus Gross and Soo-Mi Choi. 2014. Extraction and Transfer of Facial Expression Wrinkles for Facial Performance Enhancement. In Pacific Graphics Short Papers John Keyser Young\u00a0J. Kim and Peter Wonka (Eds.). The Eurographics Association. https:\/\/doi.org\/10.2312\/pgs.20141262","DOI":"10.2312\/pgs.20141262"},{"key":"e_1_3_2_2_61_1","volume-title":"Proceedings of the 32nd International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a037)","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep Unsupervised Learning using Nonequilibrium Thermodynamics. In Proceedings of the 32nd International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a037), Francis Bach and David Blei (Eds.). PMLR, Lille, France, 2256\u20132265. https:\/\/proceedings.mlr.press\/v37\/sohl-dickstein15.html"},{"key":"e_1_3_2_2_62_1","unstructured":"Jiaming Song Chenlin Meng and Stefano Ermon. 2022. Denoising Diffusion Implicit Models. arxiv:2010.02502\u00a0[cs.LG]"},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3623264.3624447"},{"key":"e_1_3_2_2_64_1","doi-asserted-by":"crossref","unstructured":"Zhiyao Sun Tian Lv Sheng Ye Matthieu\u00a0Gaetan Lin Jenny Sheng Yu-Hui Wen Minjing Yu and Yong jin Liu. 2023. DiffPoseTalk: Speech-Driven Stylistic 3D Facial Animation and Head Pose Generation via Diffusion Models. arxiv:2310.00434\u00a0[cs.CV]","DOI":"10.1145\/3658221"},{"key":"e_1_3_2_2_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073699"},{"key":"e_1_3_2_2_66_1","unstructured":"Guy Tevet Sigal Raab Brian Gordon Yonatan Shafir Daniel Cohen-Or and Amit\u00a0H Bermano. 2022. Human motion diffusion model. arXiv preprint arXiv:2209.14916."},{"key":"e_1_3_2_2_67_1","doi-asserted-by":"crossref","unstructured":"Balamurugan Thambiraja Sadegh Aliakbarian Darren Cosker and Justus Thies. 2023a. 3DiFACE: Diffusion-based Speech-driven 3D Facial Animation and Editing. arxiv:2312.00870\u00a0[cs.CV]","DOI":"10.1109\/ICCV51070.2023.01885"},{"key":"e_1_3_2_2_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01885"},{"key":"e_1_3_2_2_69_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58517-4_42"},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00122"},{"key":"e_1_3_2_2_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00767"},{"key":"e_1_3_2_2_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2927975"},{"key":"e_1_3_2_2_73_1","volume-title":"Neural discrete representation learning. Advances in neural information processing systems 30","author":"Den\u00a0Oord Aaron Van","year":"2017","unstructured":"Aaron Van Den\u00a0Oord, Oriol Vinyals, 2017. Neural discrete representation learning. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_2_74_1","doi-asserted-by":"publisher","DOI":"10.1111\/cgf.14640"},{"key":"e_1_3_2_2_75_1","volume-title":"MEAD: A Large-Scale Audio-Visual Dataset for Emotional Talking-Face Generation. In Computer Vision \u2013 ECCV","author":"Wang Kaisiyuan","year":"2020","unstructured":"Kaisiyuan Wang, Qianyi Wu, Linsen Song, Zhuoqian Yang, Wayne Wu, Chen Qian, Ran He, Yu Qiao, and Chen\u00a0Change Loy. 2020. MEAD: A Large-Scale Audio-Visual Dataset for Emotional Talking-Face Generation. In Computer Vision \u2013 ECCV 2020, Andrea Vedaldi, Horst Bischof, Thomas Brox, and Jan-Michael Frahm (Eds.). Springer International Publishing, Cham, 700\u2013717."},{"key":"e_1_3_2_2_76_1","volume-title":"Multiface: A dataset for neural face rendering. arXiv preprint arXiv:2207.11243","author":"Zheng Ningyuan","year":"2022","unstructured":"Cheng-hsin Wuu, Ningyuan Zheng, Scott Ardisson, Rohan Bali, Danielle Belko, Eric Brockmeyer, Lucas Evans, Timothy Godisart, Hyowon Ha, Xuhua Huang, 2022. Multiface: A dataset for neural face rendering. arXiv preprint arXiv:2207.11243 (2022)."},{"key":"e_1_3_2_2_77_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01229"},{"key":"e_1_3_2_2_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626235"},{"key":"e_1_3_2_2_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01261"},{"key":"e_1_3_2_2_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00053"},{"key":"e_1_3_2_2_81_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00053"},{"key":"e_1_3_2_2_82_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2021.3117484"},{"key":"e_1_3_2_2_83_1","doi-asserted-by":"publisher","DOI":"10.1145\/3618342"},{"key":"e_1_3_2_2_84_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_2_85_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550454.3555469"},{"key":"e_1_3_2_2_86_1","doi-asserted-by":"crossref","unstructured":"Wenxuan Zhang Xiaodong Cun Xuan Wang Yong Zhang Xi Shen Yu Guo Ying Shan and Fei Wang. 2022a. SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation. arXiv:2211.12194","DOI":"10.1109\/CVPR52729.2023.00836"},{"key":"e_1_3_2_2_87_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00366"},{"key":"e_1_3_2_2_88_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01970"},{"key":"e_1_3_2_2_89_1","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201292"}],"event":{"name":"SIGGRAPH '24: Special Interest Group on Computer Graphics and Interactive Techniques Conference","location":"Denver CO USA","acronym":"SIGGRAPH '24","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3641519.3657413","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3641519.3657413","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:09:35Z","timestamp":1750295375000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3641519.3657413"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,13]]},"references-count":89,"alternative-id":["10.1145\/3641519.3657413","10.1145\/3641519"],"URL":"https:\/\/doi.org\/10.1145\/3641519.3657413","relation":{},"subject":[],"published":{"date-parts":[[2024,7,13]]},"assertion":[{"value":"2024-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}