{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T11:28:01Z","timestamp":1742988481695,"version":"3.40.3"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031820205"},{"type":"electronic","value":"9783031820212"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-82021-2_17","type":"book-chapter","created":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T09:46:02Z","timestamp":1740822362000},"page":"245-257","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Expression Fusion to\u00a0Enhance Video and\u00a0Speech-Driven 3D Facial Animation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-8615-4088","authenticated-orcid":false,"given":"Yangyue","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8505-5910","authenticated-orcid":false,"given":"Yong","family":"Hu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8509-9393","authenticated-orcid":false,"given":"Xukun","family":"Shen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,3,1]]},"reference":[{"issue":"3","key":"17_CR1","doi-asserted-by":"publisher","first-page":"1075","DOI":"10.1109\/TASL.2006.885910","volume":"15","author":"C Busso","year":"2007","unstructured":"Busso, C., Deng, Z., Grimm, M., Neumann, U., Narayanan, S.: Rigid head motion in expressive speech animation: Analysis and synthesis. IEEE Trans. Audio Speech Lang. Process. 15(3), 1075\u20131086 (2007)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"3\u20134","key":"17_CR2","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1002\/cav.80","volume":"16","author":"C Busso","year":"2005","unstructured":"Busso, C., Deng, Z., Neumann, U., Narayanan, S.: Natural head motion synthesis driven by acoustic prosodic features. Comput. Animation Virtual Worlds 16(3\u20134), 283\u2013290 (2005)","journal-title":"Comput. Animation Virtual Worlds"},{"doi-asserted-by":"crossref","unstructured":"Chai, Z., et al.: HiFace: high-fidelity 3D face reconstruction by learning static and dynamic details. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9087\u20139098 (2023)","key":"17_CR3","DOI":"10.1109\/ICCV51070.2023.00834"},{"doi-asserted-by":"crossref","unstructured":"Cudeiro, D., Bolkart, T., Laidlaw, C., Ranjan, A., Black, M.J.: Capture, learning, and synthesis of 3D speaking styles. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10101\u201310111 (2019)","key":"17_CR4","DOI":"10.1109\/CVPR.2019.01034"},{"doi-asserted-by":"crossref","unstructured":"Dan\u011b\u010dek, R., Black, M.J., Bolkart, T.: EMOCA: emotion driven monocular face capture and animation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20311\u201320322 (2022)","key":"17_CR5","DOI":"10.1109\/CVPR52688.2022.01967"},{"doi-asserted-by":"crossref","unstructured":"Doukas, M.C., Ververas, E., Sharmanska, V., Zafeiriou, S.: Free-HeadGAN: neural talking head synthesis with explicit gaze control. IEEE Trans. Pattern Anal. Mach. Intell. (2023)","key":"17_CR6","DOI":"10.1109\/TPAMI.2023.3253243"},{"doi-asserted-by":"crossref","unstructured":"Fan, Y., Lin, Z., Saito, J., Wang, W., Komura, T.: FaceFormer: speech-driven 3D facial animation with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18770\u201318780 (2022)","key":"17_CR7","DOI":"10.1109\/CVPR52688.2022.01821"},{"issue":"4","key":"17_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3450626.3459936","volume":"40","author":"Y Feng","year":"2021","unstructured":"Feng, Y., Feng, H., Black, M.J., Bolkart, T.: Learning an animatable detailed 3D face model from in-the-wild images. ACM Trans. Graph. (ToG) 40(4), 1\u201313 (2021)","journal-title":"ACM Trans. Graph. (ToG)"},{"doi-asserted-by":"crossref","unstructured":"Filntisis, P.P., Retsinas, G., Paraperas-Papantoniou, F., Katsamanis, A., Roussos, A., Maragos, P.: Visual speech-aware perceptual 3D facial expression reconstruction from videos. arXiv preprint arXiv:2207.11094 (2022)","key":"17_CR9","DOI":"10.1109\/CVPRW59228.2023.00609"},{"doi-asserted-by":"crossref","unstructured":"Furukawa, Y., Hern\u00e1ndez, C., et\u00a0al.: Multi-view stereo: a tutorial. Found. Trends\u00ae Comput. Graph. Vis. 9(1-2), 1\u2013148 (2015)","key":"17_CR10","DOI":"10.1561\/0600000052"},{"doi-asserted-by":"crossref","unstructured":"Guo, Y., Chen, K., Liang, S., Liu, Y.J., Bao, H., Zhang, J.: AD-NeRF: audio driven neural radiance fields for talking head synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5784\u20135794 (2021)","key":"17_CR11","DOI":"10.1109\/ICCV48922.2021.00573"},{"doi-asserted-by":"crossref","unstructured":"Jung, H., Oh, M.S., Lee, S.W.: Learning free-form deformation for 3D face reconstruction from in-the-wild images. In: 2021 IEEE International Conference on Systems, Man, and Cybernetics (SMC), pp. 2737\u20132742. IEEE (2021)","key":"17_CR12","DOI":"10.1109\/SMC52423.2021.9659124"},{"issue":"4","key":"17_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073658","volume":"36","author":"T Karras","year":"2017","unstructured":"Karras, T., Aila, T., Laine, S., Herva, A., Lehtinen, J.: Audio-driven facial animation by joint end-to-end learning of pose and emotion. ACM Trans. Graph. (TOG) 36(4), 1\u201312 (2017)","journal-title":"ACM Trans. Graph. (TOG)"},{"doi-asserted-by":"crossref","unstructured":"Lahiri, A., Kwatra, V., Frueh, C., Lewis, J., Bregler, C.: LipSync3D: data-efficient learning of personalized 3D talking faces from video using pose and lighting normalization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2755\u20132764 (2021)","key":"17_CR14","DOI":"10.1109\/CVPR46437.2021.00278"},{"doi-asserted-by":"crossref","unstructured":"Li, T., Bolkart, T., Black, M.J., Li, H., Romero, J.: Learning a model of facial shape and expression from 4D scans. ACM Trans. Graph. 36(6), 1\u201317, 194 (2017)","key":"17_CR15","DOI":"10.1145\/3130800.3130813"},{"doi-asserted-by":"crossref","unstructured":"Ma, L., Deng, Z.: Real-time hierarchical facial performance capture. In: Proceedings of the ACM SIGGRAPH Symposium on Interactive 3D Graphics and Games, pp. 1\u201310 (2019)","key":"17_CR16","DOI":"10.1145\/3306131.3317016"},{"doi-asserted-by":"crossref","unstructured":"Paysan, P., Knothe, R., Amberg, B., Romdhani, S., Vetter, T.: A 3D face model for pose and illumination invariant face recognition. In: 2009 Sixth IEEE International Conference on Advanced Video and Signal Based Surveillance, pp. 296\u2013301. IEEE (2009)","key":"17_CR17","DOI":"10.1109\/AVSS.2009.58"},{"doi-asserted-by":"crossref","unstructured":"Richard, A., Lea, C., Ma, S., Gall, J., De\u00a0la Torre, F., Sheikh, Y.: Audio-and gaze-driven facial animation of codec avatars. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 41\u201350 (2021)","key":"17_CR18","DOI":"10.1109\/WACV48630.2021.00009"},{"doi-asserted-by":"crossref","unstructured":"Richard, A., Zollh\u00f6fer, M., Wen, Y., De\u00a0la Torre, F., Sheikh, Y.: MeshTalk: 3D face animation from speech using cross-modality disentanglement. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1173\u20131182 (2021)","key":"17_CR19","DOI":"10.1109\/ICCV48922.2021.00121"},{"key":"17_CR20","doi-asserted-by":"publisher","first-page":"5793","DOI":"10.1109\/TIP.2021.3087397","volume":"30","author":"Z Ruan","year":"2021","unstructured":"Ruan, Z., Zou, C., Wu, L., Wu, G., Wang, L.: SADRNet: self-aligned dual face regression networks for robust 3D dense face alignment and reconstruction. IEEE Trans. Image Process. 30, 5793\u20135806 (2021)","journal-title":"IEEE Trans. Image Process."},{"doi-asserted-by":"crossref","unstructured":"Sanyal, S., Bolkart, T., Feng, H., Black, M.J.: Learning to regress 3D face shape and expression from an image without 3D supervision. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7763\u20137772 (2019)","key":"17_CR21","DOI":"10.1109\/CVPR.2019.00795"},{"doi-asserted-by":"crossref","unstructured":"Schneider, S., Baevski, A., Collobert, R., Auli, M.: wav2vec: unsupervised pre-training for speech recognition. arXiv preprint arXiv:1904.05862 (2019)","key":"17_CR22","DOI":"10.21437\/Interspeech.2019-1873"},{"issue":"6","key":"17_CR23","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2661229.2661290","volume":"33","author":"F Shi","year":"2014","unstructured":"Shi, F., Wu, H.T., Tong, X., Chai, J.: Automatic acquisition of high-fidelity facial performances using monocular videos. ACM Trans. Graph. (TOG) 33(6), 1\u201313 (2014)","journal-title":"ACM Trans. Graph. (TOG)"},{"doi-asserted-by":"crossref","unstructured":"Thambiraja, B., Habibie, I., Aliakbarian, S., Cosker, D., Theobalt, C., Thies, J.: Imitator: Personalized speech-driven 3D facial animation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 20621\u201320631 (2023)","key":"17_CR24","DOI":"10.1109\/ICCV51070.2023.01885"},{"unstructured":"Yao, S., Zhong, R., Yan, Y., Zhai, G., Yang, X.: DFA-NeRF: personalized talking head generation via disentangled face attributes neural rendering. arXiv preprint arXiv:2201.00791 (2022)","key":"17_CR25"},{"unstructured":"Zhang, C., et al.: 3D talking face with personalized pose dynamics. IEEE Trans. Vis. Comput. Graph. (2021)","key":"17_CR26"},{"doi-asserted-by":"crossref","unstructured":"Zhang, C., et al.: Facial: synthesizing dynamic talking face with implicit attribute learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3867\u20133876 (2021)","key":"17_CR27","DOI":"10.1109\/ICCV48922.2021.00384"},{"doi-asserted-by":"crossref","unstructured":"Zhang, W., et al.: SadTalker: learning realistic 3D motion coefficients for stylized audio-driven single image talking face animation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8652\u20138661 (2023)","key":"17_CR28","DOI":"10.1109\/CVPR52729.2023.00836"},{"doi-asserted-by":"crossref","unstructured":"Zhou, H., Sun, Y., Wu, W., Loy, C.C., Wang, X., Liu, Z.: Pose-controllable talking face generation by implicitly modularized audio-visual representation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4176\u20134186 (2021)","key":"17_CR29","DOI":"10.1109\/CVPR46437.2021.00416"},{"key":"17_CR30","doi-asserted-by":"publisher","first-page":"250","DOI":"10.1007\/978-3-031-19778-9_15","volume-title":"Computer Vision \u2013 ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XIII","author":"W Zielonka","year":"2022","unstructured":"Zielonka, W., Bolkart, T., Thies, J.: Towards metrical reconstruction of\u00a0human faces. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision \u2013 ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XIII, pp. 250\u2013269. Springer Nature Switzerland, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19778-9_15"}],"container-title":["Lecture Notes in Computer Science","Advances in Computer Graphics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-82021-2_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T09:46:17Z","timestamp":1740822377000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-82021-2_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031820205","9783031820212"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-82021-2_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"1 March 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CGI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Computer Graphics International Conference","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Geneva","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Switzerland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 July 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 July 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"41","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cgi2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.cgs-network.org\/cgi24\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}