{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T18:11:55Z","timestamp":1771956715832,"version":"3.50.1"},"publisher-location":"Cham","reference-count":55,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031726422","type":"print"},{"value":"9783031726439","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72643-9_19","type":"book-chapter","created":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T20:48:34Z","timestamp":1732222114000},"page":"316-333","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Portrait4D-V2: Pseudo Multi-view Data Creates Better 4D Head Synthesizer"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7241-8519","authenticated-orcid":false,"given":"Yu","family":"Deng","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9507-6741","authenticated-orcid":false,"given":"Duomin","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8268-7517","authenticated-orcid":false,"given":"Baoyuan","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,22]]},"reference":[{"key":"19_CR1","doi-asserted-by":"crossref","unstructured":"Blanz, V., Vetter, T.: A morphable model for the synthesis of 3D faces. In: 26th Annual Conference on Computer Graphics and Interactive Techniques (SIGGRAPH 1999), pp. 187\u2013194. ACM Press (1999)","DOI":"10.1145\/311535.311556"},{"key":"19_CR2","unstructured":"Bolkart, T.: BFM to FLAME. https:\/\/github.com\/TimoBolkart\/BFM_to_FLAME (2020)"},{"key":"19_CR3","doi-asserted-by":"crossref","unstructured":"Burkov, E., Pasechnik, I., Grigorev, A., Lempitsky, V.: Neural head reenactment with latent pose descriptors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13786\u201313795 (2020)","DOI":"10.1109\/CVPR42600.2020.01380"},{"key":"19_CR4","doi-asserted-by":"crossref","unstructured":"Chan, E.R., et\u00a0al.: Efficient geometry-aware 3D generative adversarial networks. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.01565"},{"key":"19_CR5","unstructured":"Chu, X., Li, Y., Zeng, A., Yang, T., Lin, L., Liu, Y., Harada, T.: GPAvatar: generalizable and precise head avatar from image (s). arXiv preprint arXiv:2401.10215 (2024)"},{"key":"19_CR6","doi-asserted-by":"publisher","unstructured":"Chung, J.S., Zisserman, A.: Out of time: automated lip sync in the wild. In: Computer Vision\u2013ACCV 2016 Workshops: ACCV 2016 International Workshops, Taipei, Taiwan, November 20-24, 2016, Revised Selected Papers, Part II 13, pp. 251\u2013263. Springer (2017). https:\/\/doi.org\/10.1007\/978-3-319-54427-4_19","DOI":"10.1007\/978-3-319-54427-4_19"},{"key":"19_CR7","doi-asserted-by":"crossref","unstructured":"Dan\u011b\u010dek, R., Black, M.J., Bolkart, T.: EMOCA: emotion driven monocular face capture and animation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20311\u201320322 (2022)","DOI":"10.1109\/CVPR52688.2022.01967"},{"key":"19_CR8","doi-asserted-by":"crossref","unstructured":"Deng, J., Guo, J., Xue, N., Zafeiriou, S.: ArcFace: additive angular margin loss for deep face recognition. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4690\u20134699 (2019)","DOI":"10.1109\/CVPR.2019.00482"},{"key":"19_CR9","doi-asserted-by":"crossref","unstructured":"Deng, Y., Wang, D., Ren, X., Chen, X., Wang, B.: Learning one-shot 4D head avatar synthesis using synthetic data. arXiv preprint arXiv:2311.18729 (2023)","DOI":"10.1109\/CVPR52733.2024.00680"},{"key":"19_CR10","doi-asserted-by":"crossref","unstructured":"Deng, Y., Yang, J., Xu, S., Chen, D., Jia, Y., Tong, X.: Accurate 3D face reconstruction with weakly-supervised learning: from single image to image set. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, pp.\u00a00\u20130 (2019)","DOI":"10.1109\/CVPRW.2019.00038"},{"key":"19_CR11","doi-asserted-by":"crossref","unstructured":"Drobyshev, N., et al.: MegaPortraits: one-shot megapixel neural head avatars. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 2663\u20132671 (2022)","DOI":"10.1145\/3503161.3547838"},{"issue":"4","key":"19_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3450626.3459936","volume":"40","author":"Y Feng","year":"2021","unstructured":"Feng, Y., Feng, H., Black, M.J., Bolkart, T.: Learning an animatable detailed 3D face model from in-the-wild images. ACM Trans. Graph. (ToG) 40(4), 1\u201313 (2021)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"19_CR13","unstructured":"He, T., et\u00a0al.: Gaia: zero-shot talking avatar generation. arXiv preprint arXiv:2311.15230 (2023)"},{"key":"19_CR14","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local nash equilibrium. In: Advances in Neural Information Processing Systems, pp. 6626\u20136637 (2017)"},{"key":"19_CR15","doi-asserted-by":"crossref","unstructured":"Hong, F.T., Zhang, L., Shen, L., Xu, D.: Depth-aware generative adversarial network for talking head video generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3397\u20133406 (2022)","DOI":"10.1109\/CVPR52688.2022.00339"},{"key":"19_CR16","doi-asserted-by":"crossref","unstructured":"Hong, Y., Peng, B., Xiao, H., Liu, L., Zhang, J.: HeadNeRF: a real-time nerf-based parametric head model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20374\u201320384 (2022)","DOI":"10.1109\/CVPR52688.2022.01973"},{"key":"19_CR17","unstructured":"Hong, Y., et al.: LRM: Large reconstruction model for single image to 3D. arXiv preprint arXiv:2311.04400 (2023)"},{"key":"19_CR18","doi-asserted-by":"crossref","unstructured":"Huang, X., Belongie, S.: Arbitrary style transfer in real-time with adaptive instance normalization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1501\u20131510 (2017)","DOI":"10.1109\/ICCV.2017.167"},{"key":"19_CR19","doi-asserted-by":"crossref","unstructured":"Isola, P., Zhu, J.Y., Zhou, T., Efros, A.A.: Image-to-image translation with conditional adversarial networks. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 1125\u20131134 (2017)","DOI":"10.1109\/CVPR.2017.632"},{"key":"19_CR20","doi-asserted-by":"crossref","unstructured":"Jiang, Z.H., Wu, Q., Chen, K., Zhang, J.: Disentangled representation learning for 3D face shape. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11957\u201311966 (2019)","DOI":"10.1109\/CVPR.2019.01223"},{"key":"19_CR21","doi-asserted-by":"crossref","unstructured":"Kajiya, J.T., Von Herzen, B.P.: Ray tracing volume densities. ACM SIGGRAPH 18(3), 165\u2013174 (1984)","DOI":"10.1145\/964965.808594"},{"key":"19_CR22","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aila, T.: A style-based generator architecture for generative adversarial networks. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4401\u20134410 (2019)","DOI":"10.1109\/CVPR.2019.00453"},{"key":"19_CR23","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aittala, M., Hellsten, J., Lehtinen, J., Aila, T.: Analyzing and improving the image quality of styleGAN. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8110\u20138119 (2020)","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"19_CR24","doi-asserted-by":"publisher","unstructured":"Khakhulin, T., Sklyarova, V., Lempitsky, V., Zakharov, E.: Realistic one-shot mesh-based head avatars. In: European Conference on Computer Vision, pp. 345\u2013362. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-20086-1_20","DOI":"10.1007\/978-3-031-20086-1_20"},{"key":"19_CR25","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: International Conference on Learning Representations (2015)"},{"key":"19_CR26","unstructured":"Li, J., et al.: Instant3D: fast text-to-3D with sparse-view generation and large reconstruction model. arXiv preprint arXiv:2311.06214 (2023)"},{"issue":"6","key":"19_CR27","first-page":"1","volume":"36","author":"T Li","year":"2017","unstructured":"Li, T., Bolkart, T., Black, M.J., Li, H., Romero, J.: Learning a model of facial shape and expression from 4D scans. ACM Trans. Graph. 36(6), 1\u201317 (2017)","journal-title":"ACM Trans. Graph."},{"key":"19_CR28","doi-asserted-by":"crossref","unstructured":"Li, W., et al.: One-shot high-fidelity talking-head synthesis with deformable neural radiance field. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17969\u201317978 (2023)","DOI":"10.1109\/CVPR52729.2023.01723"},{"key":"19_CR29","unstructured":"Li, X., De\u00a0Mello, S., Liu, S., Nagano, K., Iqbal, U., Kautz, J.: Generalizable one-shot neural head avatar. arXiv preprint arXiv:2306.08768 (2023)"},{"key":"19_CR30","unstructured":"Lin, C.Z., Lindell, D.B., Chan, E.R., Wetzstein, G.: 3D GAN inversion for controllable portrait image animation. arXiv preprint arXiv:2203.13441 (2022)"},{"issue":"4","key":"19_CR31","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1145\/37402.37422","volume":"21","author":"WE Lorensen","year":"1987","unstructured":"Lorensen, W.E., Cline, H.E.: Marching cubes: a high resolution 3D surface construction algorithm. ACM SIGGRAPH 21(4), 163\u2013169 (1987)","journal-title":"ACM SIGGRAPH"},{"key":"19_CR32","doi-asserted-by":"crossref","unstructured":"Ma, Z., Zhu, X., Qi, G.J., Lei, Z., Zhang, L.: OTAvatar: one-shot talking face avatar with controllable tri-plane rendering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16901\u201316910 (2023)","DOI":"10.1109\/CVPR52729.2023.01621"},{"key":"19_CR33","doi-asserted-by":"publisher","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. In: European Conference on Computer Vision. pp. 405\u2013421. Springer (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_24","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"19_CR34","doi-asserted-by":"crossref","unstructured":"Nagrani, A., Chung, J.S., Zisserman, A.: VoxCeleb: a large-scale speaker identification dataset. arXiv preprint arXiv:1706.08612 (2017)","DOI":"10.21437\/Interspeech.2017-950"},{"key":"19_CR35","doi-asserted-by":"crossref","unstructured":"Paysan, P., Knothe, R., Amberg, B., Romdhani, S., Vetter, T.: A 3D face model for pose and illumination invariant face recognition. In: IEEE International Conference on Advanced Video and Signal Based Surveillance, pp. 296\u2013301 (2009)","DOI":"10.1109\/AVSS.2009.58"},{"key":"19_CR36","doi-asserted-by":"crossref","unstructured":"Ren, Y., Li, G., Chen, Y., Li, T.H., Liu, S.: PIRenderer: controllable portrait image generation via semantic neural rendering. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13759\u201313768 (2021)","DOI":"10.1109\/ICCV48922.2021.01350"},{"key":"19_CR37","unstructured":"Siarohin, A., Lathuili\u00e8re, S., Tulyakov, S., Ricci, E., Sebe, N.: First order motion model for image animation. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"19_CR38","doi-asserted-by":"crossref","unstructured":"Tran, P., Zakharov, E., Ho, L.N., Tran, A.T., Hu, L., Li, H.: VOODOO 3D: volumetric portrait disentanglement for one-shot 3D head reenactment. arXiv preprint arXiv:2312.04651 (2023)","DOI":"10.1109\/CVPR52733.2024.00984"},{"issue":"4","key":"19_CR39","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3592460","volume":"42","author":"A Trevithick","year":"2023","unstructured":"Trevithick, A., et al.: Real-time radiance fields for single-image portrait view synthesis. ACM Trans. Graph. (TOG) 42(4), 1\u201315 (2023)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"19_CR40","doi-asserted-by":"crossref","unstructured":"Wang, D., Deng, Y., Yin, Z., Shum, H.Y., Wang, B.: Progressive disentangled representation learning for fine-grained controllable talking head synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17979\u201317989 (2023)","DOI":"10.1109\/CVPR52729.2023.01724"},{"key":"19_CR41","doi-asserted-by":"crossref","unstructured":"Wang, T.C., Mallya, A., Liu, M.Y.: One-shot free-view neural talking-head synthesis for video conferencing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10039\u201310049 (2021)","DOI":"10.1109\/CVPR46437.2021.00991"},{"key":"19_CR42","doi-asserted-by":"crossref","unstructured":"Wang, X., Li, Y., Zhang, H., Shan, Y.: Towards real-world blind face restoration with generative facial prior. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9168\u20139178 (2021)","DOI":"10.1109\/CVPR46437.2021.00905"},{"key":"19_CR43","doi-asserted-by":"crossref","unstructured":"Xie, L., Wang, X., Zhang, H., Dong, C., Shan, Y.: VFHQ: a high-quality dataset and benchmark for video face super-resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 657\u2013666 (2022)","DOI":"10.1109\/CVPRW56347.2022.00081"},{"key":"19_CR44","doi-asserted-by":"crossref","unstructured":"Xu, S., et al.: Deep 3D portrait from a single image. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7710\u20137720 (2020)","DOI":"10.1109\/CVPR42600.2020.00773"},{"key":"19_CR45","unstructured":"Ye, Z., et\u00a0al.: Real3D-portrait: one-shot realistic 3D talking portrait synthesis. arXiv preprint arXiv:2401.08503 (2024)"},{"key":"19_CR46","doi-asserted-by":"crossref","unstructured":"Yin, F., et al.: StyleHEAT: one-shot high-resolution editable talking face generation via pretrained stylegan. arXiv preprint arXiv:2203.04036 (2022)","DOI":"10.1007\/978-3-031-19790-1_6"},{"key":"19_CR47","doi-asserted-by":"crossref","unstructured":"Yu, J., Zhu, H., Jiang, L., Loy, C.C., Cai, W., Wu, W.: CelebV-text: a large-scale facial text-video dataset. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14805\u201314814 (2023)","DOI":"10.1109\/CVPR52729.2023.01422"},{"key":"19_CR48","doi-asserted-by":"crossref","unstructured":"Yu, W., et\u00a0al.: NOFA: NeRF-based one-shot facial avatar reconstruction. In: ACM SIGGRAPH 2023 Conference Proceedings, pp. 1\u201312 (2023)","DOI":"10.1145\/3588432.3591555"},{"key":"19_CR49","doi-asserted-by":"crossref","unstructured":"Zakharov, E., Shysheya, A., Burkov, E., Lempitsky, V.: Few-shot adversarial learning of realistic neural talking head models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9459\u20139468 (2019)","DOI":"10.1109\/ICCV.2019.00955"},{"key":"19_CR50","doi-asserted-by":"crossref","unstructured":"Zhang, B., et al.: MetaPortrait: identity-preserving talking head generation with fast personalized adaptation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22096\u201322105 (2023)","DOI":"10.1109\/CVPR52729.2023.02116"},{"key":"19_CR51","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 586\u2013595 (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"19_CR52","doi-asserted-by":"crossref","unstructured":"Zhou, H., Sun, Y., Wu, W., Loy, C.C., Wang, X., Liu, Z.: Pose-controllable talking face generation by implicitly modularized audio-visual representation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4176\u20134186 (2021)","DOI":"10.1109\/CVPR46437.2021.00416"},{"key":"19_CR53","doi-asserted-by":"publisher","unstructured":"Zhu, H., et al.: CelebV-HQ: a large-scale video facial attributes dataset. In: European conference on computer vision, pp. 650\u2013667. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-20071-7_3","DOI":"10.1007\/978-3-031-20071-7_3"},{"key":"19_CR54","doi-asserted-by":"publisher","unstructured":"Zhuang, Y., Zhu, H., Sun, X., Cao, X.: MoFaNeRF: morphable facial neural radiance field. In: European Conference on Computer Vision, pp. 268\u2013285. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-20062-5_16","DOI":"10.1007\/978-3-031-20062-5_16"},{"key":"19_CR55","doi-asserted-by":"crossref","unstructured":"Zou, Z.X., et al.: Triplane meets gaussian splatting: fast and generalizable single-view 3D reconstruction with transformers. arXiv preprint arXiv:2312.09147 (2023)","DOI":"10.1109\/CVPR52733.2024.00983"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72643-9_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T21:27:28Z","timestamp":1732224448000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72643-9_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,22]]},"ISBN":["9783031726422","9783031726439"],"references-count":55,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72643-9_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,22]]},"assertion":[{"value":"22 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}