{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T21:55:48Z","timestamp":1776117348208,"version":"3.50.1"},"publisher-location":"Cham","reference-count":57,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031197741","type":"print"},{"value":"9783031197758","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-19775-8_39","type":"book-chapter","created":{"date-parts":[[2022,10,22]],"date-time":"2022-10-22T12:12:59Z","timestamp":1666440779000},"page":"666-682","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":87,"title":["Learning Dynamic Facial Radiance Fields for\u00a0Few-Shot Talking Head Synthesis"],"prefix":"10.1007","author":[{"given":"Shuai","family":"Shen","sequence":"first","affiliation":[]},{"given":"Wanhua","family":"Li","sequence":"additional","affiliation":[]},{"given":"Zheng","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Yueqi","family":"Duan","sequence":"additional","affiliation":[]},{"given":"Jie","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Jiwen","family":"Lu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,23]]},"reference":[{"key":"39_CR1","unstructured":"Andrew, A.M.: Multiple view geometry in computer vision. Kybernetes (2001)"},{"key":"39_CR2","doi-asserted-by":"crossref","unstructured":"Blanz, V., Vetter, T.: A morphable model for the synthesis of 3D faces. In: Annual Conference on Computer Graphics and Interactive Techniques (1999)","DOI":"10.1145\/311535.311556"},{"key":"39_CR3","doi-asserted-by":"crossref","unstructured":"Chan, E.R., Monteiro, M., Kellnhofer, P., Wu, J., Wetzstein, G.: pi-GAN: periodic implicit generative adversarial networks for 3D-aware image synthesis. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00574"},{"key":"39_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1007\/978-3-030-58545-7_3","volume-title":"Computer Vision \u2013 ECCV 2020","author":"L Chen","year":"2020","unstructured":"Chen, L., et al.: Talking-head generation with rhythmic head motion. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12354, pp. 35\u201351. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58545-7_3"},{"key":"39_CR5","doi-asserted-by":"crossref","unstructured":"Chen, L., Maddox, R.K., Duan, Z., Xu, C.: Hierarchical cross-modal talking face generation with dynamic pixel-wise loss. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00802"},{"key":"39_CR6","unstructured":"Christos Doukas, M., Zafeiriou, S., Sharmanska, V.: HeadGAN: video-and-audio-driven talking head synthesis. arXiv (2020)"},{"key":"39_CR7","unstructured":"Chung, J.S., Jamaludin, A., Zisserman, A.: You said that? In: BMVC (2017)"},{"key":"39_CR8","doi-asserted-by":"crossref","unstructured":"Chung, J.S., Zisserman, A.: Out of time: automated lip sync in the wild. In: ACCV (2016)","DOI":"10.1007\/978-3-319-54427-4_19"},{"key":"39_CR9","doi-asserted-by":"crossref","unstructured":"Cudeiro, D., Bolkart, T., Laidlaw, C., Ranjan, A., Black, M.J.: Capture, learning, and synthesis of 3D speaking styles. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01034"},{"key":"39_CR10","doi-asserted-by":"crossref","unstructured":"Curless, B., Levoy, M.: A volumetric method for building complex models from range images. In: Annual Conference on Computer Graphics and Interactive Techniques (1996)","DOI":"10.1145\/237170.237269"},{"key":"39_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"408","DOI":"10.1007\/978-3-030-58577-8_25","volume-title":"Computer Vision \u2013 ECCV 2020","author":"D Das","year":"2020","unstructured":"Das, D., Biswas, S., Sinha, S., Bhowmick, B.: Speech-driven facial animation using cascaded GANs for learning of motion and texture. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12375, pp. 408\u2013424. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58577-8_25"},{"key":"39_CR12","first-page":"3480","volume":"24","author":"SE Eskimez","year":"2021","unstructured":"Eskimez, S.E., Zhang, Y., Duan, Z.: Speech driven talking face generation from a single image and an emotion condition. TMM 24, 3480\u20133490 (2021)","journal-title":"TMM"},{"key":"39_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3306346.3323028","volume":"38","author":"O Fried","year":"2019","unstructured":"Fried, O., et al.: Text-based editing of talking-head video. TOG 38, 1\u201314 (2019)","journal-title":"TOG"},{"key":"39_CR14","doi-asserted-by":"crossref","unstructured":"Gafni, G., Thies, J., Zollhofer, M., Nie\u00dfner, M.: Dynamic neural radiance fields for monocular 4D facial avatar reconstruction. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00854"},{"key":"39_CR15","unstructured":"Gao, C., Shih, Y., Lai, W.S., Liang, C.K., Huang, J.B.: Portrait neural radiance fields from a single image. arXiv (2020)"},{"key":"39_CR16","doi-asserted-by":"crossref","unstructured":"Gu, K., Zhou, Y., Huang, T.: FLNet: landmark driven fetching and learning network for faithful talking facial animation synthesis. In: AAAI (2020)","DOI":"10.1609\/aaai.v34i07.6717"},{"key":"39_CR17","doi-asserted-by":"crossref","unstructured":"Guo, Y., Chen, K., Liang, S., Liu, Y., Bao, H., Zhang, J.: AD-NeRF: audio driven neural radiance fields for talking head synthesis. In: ECCV (2021)","DOI":"10.1109\/ICCV48922.2021.00573"},{"key":"39_CR18","unstructured":"Hannun, A., et al.: Deep speech: Scaling up end-to-end speech recognition. arXiv (2014)"},{"key":"39_CR19","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., et al.: Spatial transformer networks. In: NeurIPS (2015)"},{"key":"39_CR20","doi-asserted-by":"crossref","unstructured":"Ji, X., et al.: Audio-driven emotional video portraits. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01386"},{"key":"39_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073658","volume":"36","author":"T Karras","year":"2017","unstructured":"Karras, T., Aila, T., Laine, S., Herva, A., Lehtinen, J.: Audio-driven facial animation by joint end-to-end learning of pose and emotion. TOG 36, 1\u201312 (2017)","journal-title":"TOG"},{"key":"39_CR22","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv (2014)"},{"key":"39_CR23","doi-asserted-by":"crossref","unstructured":"Kumar, N., Goel, S., Narang, A., Hasan, M.: Robust one shot audio to video generation. In: CVPRW (2020)","DOI":"10.1109\/CVPRW50498.2020.00393"},{"key":"39_CR24","unstructured":"Locatello, F., et al.: Object-centric learning with slot attention. arXiv (2020)"},{"key":"39_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3478513.3480484","volume":"40","author":"Y Lu","year":"2021","unstructured":"Lu, Y., Chai, J., Cao, X.: Live speech portraits: real-time photorealistic talking-head animation. TOG 40, 1\u201317 (2021)","journal-title":"TOG"},{"key":"39_CR26","doi-asserted-by":"crossref","unstructured":"Meshry, M., Suri, S., Davis, L.S., Shrivastava, A.: Learned spatial representations for few-shot talking-head synthesis. arXiv (2021)","DOI":"10.1109\/ICCV48922.2021.01357"},{"key":"39_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1007\/978-3-030-58452-8_24","volume-title":"Computer Vision \u2013 ECCV 2020","author":"B Mildenhall","year":"2020","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 405\u2013421. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_24"},{"key":"39_CR28","doi-asserted-by":"crossref","unstructured":"Niemeyer, M., Mescheder, L., Oechsle, M., Geiger, A.: Differentiable volumetric rendering: learning implicit 3D representations without 3d supervision. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00356"},{"key":"39_CR29","doi-asserted-by":"crossref","unstructured":"Park, K., et al.: Nerfies: deformable neural radiance fields. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00581"},{"key":"39_CR30","unstructured":"Paszke, A., et al.: PyTorch: an imperative style, high-performance deep learning library. In: NeurIPS (2019)"},{"key":"39_CR31","doi-asserted-by":"crossref","unstructured":"Prajwal, K., Mukhopadhyay, R., Namboodiri, V.P., Jawahar, C.: A lip sync expert is all you need for speech to lip generation in the wild. In: ACM MM (2020)","DOI":"10.1145\/3394171.3413532"},{"key":"39_CR32","doi-asserted-by":"crossref","unstructured":"Pumarola, A., Corona, E., Pons-Moll, G., Moreno-Noguer, F.: D-NeRF: neural radiance fields for dynamic scenes. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01018"},{"key":"39_CR33","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1023\/A:1008176507526","volume":"35","author":"SM Seitz","year":"1999","unstructured":"Seitz, S.M., Dyer, C.R.: Photorealistic scene reconstruction by voxel coloring. IJCV 35, 151\u2013173 (1999). https:\/\/doi.org\/10.1023\/A:1008176507526","journal-title":"IJCV"},{"key":"39_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1007\/978-3-030-58555-6_4","volume-title":"Computer Vision \u2013 ECCV 2020","author":"J Shang","year":"2020","unstructured":"Shang, J., Shen, T., Li, S., Zhou, L., Zhen, M., Fang, T., Quan, L.: Self-supervised monocular 3D face reconstruction by occlusion-aware multi-view geometry consistency. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12360, pp. 53\u201370. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58555-6_4"},{"key":"39_CR35","unstructured":"Sitzmann, V., Zollh\u00f6fer, M., Wetzstein, G.: Scene representation networks: Continuous 3D-structure-aware neural scene representations. arXiv (2019)"},{"key":"39_CR36","unstructured":"Song, L., Wu, W., Qian, C., He, R., Loy, C.C.: Everybody\u2019s talkin\u2019: let me talk as you want. arXiv (2020)"},{"key":"39_CR37","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073640","volume":"36","author":"S Suwajanakorn","year":"2017","unstructured":"Suwajanakorn, S., Seitz, S.M., Kemelmacher-Shlizerman, I.: Synthesizing Obama: learning lip sync from audio. TOG 36, 1\u201313 (2017)","journal-title":"TOG"},{"key":"39_CR38","doi-asserted-by":"crossref","unstructured":"Tewari, A., et al.: State of the art on neural rendering. In: Computer Graphics Forum (2020)","DOI":"10.1111\/cgf.14022"},{"key":"39_CR39","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"716","DOI":"10.1007\/978-3-030-58517-4_42","volume-title":"Computer Vision \u2013 ECCV 2020","author":"J Thies","year":"2020","unstructured":"Thies, J., Elgharib, M., Tewari, A., Theobalt, C., Nie\u00dfner, M.: Neural voice puppetry: audio-driven facial reenactment. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12361, pp. 716\u2013731. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58517-4_42"},{"key":"39_CR40","doi-asserted-by":"crossref","unstructured":"Thies, J., Zollhofer, M., Stamminger, M., Theobalt, C., Nie\u00dfner, M.: Face2Face: real-time face capture and reenactment of RGB videos. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.262"},{"key":"39_CR41","doi-asserted-by":"crossref","unstructured":"Tretschk, E., Tewari, A., Golyanik, V., Zollhofer, M., Lassner, C., Theobalt, C.: Non-rigid neural radiance fields: reconstruction and novel view synthesis of a dynamic scene from monocular video. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01272"},{"key":"39_CR42","doi-asserted-by":"crossref","unstructured":"Trevithick, A., Yang, B.: GRF: learning a general radiance field for 3D representation and rendering. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01490"},{"key":"39_CR43","doi-asserted-by":"crossref","unstructured":"Wang, Q., et al.: IBRNet: learning multi-view image-based rendering. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00466"},{"key":"39_CR44","doi-asserted-by":"crossref","unstructured":"Wang, S., Li, L., Ding, Y., Fan, C., Yu, X.: Audio2Head: audio-driven one-shot talking-head generation with natural head motion. arXiv (2021)","DOI":"10.24963\/ijcai.2021\/152"},{"key":"39_CR45","doi-asserted-by":"crossref","unstructured":"Wang, T.C., Mallya, A., Liu, M.Y.: One-shot free-view neural talking-head synthesis for video conferencing. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00991"},{"key":"39_CR46","first-page":"600","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. TIP 13, 600\u2013612 (2004)","journal-title":"TIP"},{"key":"39_CR47","unstructured":"Yao, S., Zhong, R., Yan, Y., Zhai, G., Yang, X.: DFA-NeRF: personalized talking head generation via disentangled face attributes neural rendering. arXiv (2022)"},{"key":"39_CR48","doi-asserted-by":"crossref","unstructured":"Yenamandra, T., et al.: i3DMM: deep implicit 3D morphable model of human heads. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01261"},{"key":"39_CR49","unstructured":"Yi, R., Ye, Z., Zhang, J., Bao, H., Liu, Y.J.: Audio-driven talking face video generation with learning-based personalized head pose. arXiv (2020)"},{"key":"39_CR50","doi-asserted-by":"crossref","unstructured":"Yu, A., Ye, V., Tancik, M., Kanazawa, A.: pixelNeRF: neural radiance fields from one or few images. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00455"},{"key":"39_CR51","doi-asserted-by":"crossref","unstructured":"Zakharov, E., Shysheya, A., Burkov, E., Lempitsky, V.: Few-shot adversarial learning of realistic neural talking head models. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00955"},{"key":"39_CR52","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"39_CR53","doi-asserted-by":"crossref","unstructured":"Zhang, X., Wu, X., Zhai, X., Ben, X., Tu, C.: DAVD-Net: deep audio-aided video decompression of talking heads. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.01235"},{"key":"39_CR54","doi-asserted-by":"crossref","unstructured":"Zhou, H., Liu, Y., Liu, Z., Luo, P., Wang, X.: Talking face generation by adversarially disentangled audio-visual representation. In: AAAI (2019)","DOI":"10.1609\/aaai.v33i01.33019299"},{"key":"39_CR55","first-page":"1","volume":"39","author":"Y Zhou","year":"2020","unstructured":"Zhou, Y., Han, X., Shechtman, E., Echevarria, J., Kalogerakis, E., Li, D.: MakeltTalk: speaker-aware talking-head animation. TOG 39, 1\u201315 (2020)","journal-title":"TOG"},{"key":"39_CR56","doi-asserted-by":"crossref","unstructured":"Zhu, H., Huang, H., Li, Y., Zheng, A., He, R.: Arbitrary talking face generation via attentional audio-visual coherence learning. In: IJCAI (2020)","DOI":"10.24963\/ijcai.2020\/327"},{"key":"39_CR57","doi-asserted-by":"crossref","unstructured":"Zollh\u00f6fer, M., et al.: State of the art on monocular 3D face reconstruction, tracking, and applications. In: Computer Graphics Forum (2018)","DOI":"10.1111\/cgf.13382"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-19775-8_39","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,6]],"date-time":"2024-10-06T10:20:36Z","timestamp":1728210036000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-19775-8_39"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031197741","9783031197758"],"references-count":57,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-19775-8_39","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"23 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}