{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T16:36:30Z","timestamp":1758040590172,"version":"3.44.0"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783032045546"},{"type":"electronic","value":"9783032045553"}],"license":[{"start":{"date-parts":[[2025,9,12]],"date-time":"2025-09-12T00:00:00Z","timestamp":1757635200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,12]],"date-time":"2025-09-12T00:00:00Z","timestamp":1757635200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-04555-3_30","type":"book-chapter","created":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T08:56:14Z","timestamp":1757580974000},"page":"366-377","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DA-NeRF: High-Fidelity Talking Face Generation From Speech With Neural Radiance Fields"],"prefix":"10.1007","author":[{"given":"Yali","family":"Cai","sequence":"first","affiliation":[]},{"given":"Peng","family":"Qiao","sequence":"additional","affiliation":[]},{"given":"Dongsheng","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,12]]},"reference":[{"key":"30_CR1","doi-asserted-by":"crossref","unstructured":"Bai, Y., et al.: High-fidelity facial avatar reconstruction from monocular video with generative priors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4541\u20134551 (2023)","DOI":"10.1109\/CVPR52729.2023.00441"},{"key":"30_CR2","doi-asserted-by":"publisher","unstructured":"Bi, C., Liu, X., Liu, Z.: NeRF-AD: neural radiance field with attention-based disentanglement for talking face synthesis. In: ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 3490\u20133494. IEEE (2024). https:\/\/doi.org\/10.1109\/icassp48485.2024.10446195","DOI":"10.1109\/icassp48485.2024.10446195"},{"key":"30_CR3","doi-asserted-by":"crossref","unstructured":"Bigioi, D., Basak, S., Jordan, H., McDonnell, R., Corcoran, P.: Speech driven video editing via an audio-conditioned diffusion model. arXiv preprint arXiv:2301.04474 (2023)","DOI":"10.1016\/j.imavis.2024.104911"},{"key":"30_CR4","doi-asserted-by":"crossref","unstructured":"Cai, C., et al.: Speak: speech-driven pose and emotion-adjustable talking head generation. In: ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.\u00a01\u20135. IEEE (2025)","DOI":"10.1109\/ICASSP49660.2025.10887862"},{"key":"30_CR5","doi-asserted-by":"crossref","unstructured":"Chen, L., et al.: Talking-head generation with rhythmic head motion. In: European Conference on Computer Vision, pp. 35\u201351. Springer (2020)","DOI":"10.1007\/978-3-030-58545-7_3"},{"key":"30_CR6","doi-asserted-by":"crossref","unstructured":"Chen, L., Li, Z., Maddox, R.K., Duan, Z., Xu, C.: Lip movements generation at a glance. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 520\u2013535 (2018)","DOI":"10.1007\/978-3-030-01234-2_32"},{"key":"30_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"408","DOI":"10.1007\/978-3-030-58577-8_25","volume-title":"Computer Vision \u2013 ECCV 2020","author":"D Das","year":"2020","unstructured":"Das, D., Biswas, S., Sinha, S., Bhowmick, B.: Speech-driven facial animation using cascaded GANs for learning of motion and texture. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12375, pp. 408\u2013424. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58577-8_25"},{"key":"30_CR8","doi-asserted-by":"crossref","unstructured":"Deng, K., et al.: Degstalk: decomposed per-embedding gaussian fields for hair-preserving talking face synthesis. arXiv:2412.20148 (2024)","DOI":"10.1109\/ICASSP49660.2025.10890278"},{"key":"30_CR9","doi-asserted-by":"crossref","unstructured":"Godard, C., Mac\u00a0Aodha, O., Firman, M., Brostow, G.J.: Digging into self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3828\u20133838 (2019)","DOI":"10.1109\/ICCV.2019.00393"},{"key":"30_CR10","doi-asserted-by":"crossref","unstructured":"Gu, K., Zhou, Y., Huang, T.: FLNet: landmark driven fetching and learning network for faithful talking facial animation synthesis. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 10861\u201310868 (2020)","DOI":"10.1609\/aaai.v34i07.6717"},{"key":"30_CR11","doi-asserted-by":"crossref","unstructured":"Guo, Y., Chen, K., Liang, S., Liu, Y.J., Bao, H., Zhang, J.: AD-NeRF: audio driven neural radiance fields for talking head synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5784\u20135794 (2021)","DOI":"10.1109\/ICCV48922.2021.00573"},{"key":"30_CR12","doi-asserted-by":"crossref","unstructured":"Hong, F.T., Zhang, L., Shen, L., Xu, D.: Depth-aware generative adversarial network for talking head video generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3397\u20133406 (2022)","DOI":"10.1109\/CVPR52688.2022.00339"},{"key":"30_CR13","doi-asserted-by":"crossref","unstructured":"Li, D., Zhao, K., Wang, W., Peng, B., Zhang, Y., Dong, J., Tan, T.: Ae-NeRF: audio enhanced neural radiance field for few shot talking head synthesis. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 3037\u20133045 (2024)","DOI":"10.1609\/aaai.v38i4.28086"},{"key":"30_CR14","doi-asserted-by":"crossref","unstructured":"Li, J., et al.: Talkinggaussian: structure-persistent 3d talking head synthesis via gaussian splatting. In: European Conference on Computer Vision, pp. 127\u2013145. Springer (2024)","DOI":"10.1007\/978-3-031-72684-2_8"},{"key":"30_CR15","unstructured":"Li, T., et al.: Lokitalk: learning fine-grained and generalizable correspondences to enhance nerf-based talking head synthesis. arXiv:2411.19525 (2024)"},{"key":"30_CR16","doi-asserted-by":"crossref","unstructured":"Liu, X., Liu, Z., Bi, C.: Nerf-3dtalker: neural radiance field with 3D prior aided audio disentanglement for talking head synthesis. In: ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.\u00a01\u20135. IEEE (2025)","DOI":"10.1109\/ICASSP49660.2025.10889538"},{"issue":"1","key":"30_CR17","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"key":"30_CR18","doi-asserted-by":"crossref","unstructured":"Prajwal, K., Mukhopadhyay, R., Namboodiri, V.P., Jawahar, C.: A lip sync expert is all you need for speech to lip generation in the wild. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 484\u2013492 (2020)","DOI":"10.1145\/3394171.3413532"},{"key":"30_CR19","doi-asserted-by":"crossref","unstructured":"Pumarola, A., Corona, E., Pons-Moll, G., Moreno-Noguer, F.: D-NeRF: neural radiance fields for dynamic scenes (2020)","DOI":"10.1109\/CVPR46437.2021.01018"},{"key":"30_CR20","unstructured":"Sanderson, C.: The vidtimit database. IDIAP Commun. (2004)"},{"key":"30_CR21","doi-asserted-by":"crossref","unstructured":"Shen, S., Li, W., Zhu, Z., Duan, Y., Zhou, J., Lu, J.: Learning dynamic facial radiance fields for few-shot talking head synthesis (2022)","DOI":"10.1007\/978-3-031-19775-8_39"},{"key":"30_CR22","unstructured":"Siarohin, A., Lathuili\u00e8re, S., Tulyakov, S., Ricci, E., Sebe, N.: First order motion model for image animation. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"30_CR23","doi-asserted-by":"crossref","unstructured":"Stypu\u0142kowski, M., Vougioukas, K., He, S., Zi\u0119ba, M., Petridis, S., Pantic, M.: Diffused heads: diffusion models beat GANs on talking-face generation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 5091\u20135100 (2024)","DOI":"10.1109\/WACV57701.2024.00502"},{"key":"30_CR24","doi-asserted-by":"crossref","unstructured":"Xu, Y., et al.: Latentavatar: learning latent expression code for expressive neural head avatar. arXiv preprint arXiv:2305.01190 (2023)","DOI":"10.1145\/3588432.3591545"},{"key":"30_CR25","doi-asserted-by":"crossref","unstructured":"Ye, Y., Yi, R., Gao, Z., Zhu, C., Cai, Z., Xu, K.: NeRF: neural edge fields for 3D parametric curve reconstruction from multi-view images. arXiv:2303.07653 (2023)","DOI":"10.1109\/CVPR52729.2023.00820"},{"key":"30_CR26","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Li, L., Ding, Y., Fan, C.: Flow-guided one-shot talking face generation with a high-resolution audio-visual dataset. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3661\u20133670 (2021)","DOI":"10.1109\/CVPR46437.2021.00366"},{"key":"30_CR27","doi-asserted-by":"crossref","unstructured":"Zhong, W., et al.: Identity-preserving talking face generation with landmark and appearance priors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738 (2023)","DOI":"10.1109\/CVPR52729.2023.00938"},{"issue":"6","key":"30_CR28","first-page":"1","volume":"39","author":"Y Zhou","year":"2020","unstructured":"Zhou, Y., Han, X., Shechtman, E., Echevarria, J., Kalogerakis, E., Li, D.: Makelttalk: speaker-aware talking-head animation. ACM Trans. Graph. (TOG) 39(6), 1\u201315 (2020)","journal-title":"ACM Trans. Graph. (TOG)"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks and Machine Learning \u2013 ICANN 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-04555-3_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T08:56:24Z","timestamp":1757580984000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-04555-3_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,12]]},"ISBN":["9783032045546","9783032045553"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-04555-3_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,9,12]]},"assertion":[{"value":"12 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICANN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Neural Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kaunas","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lithuania","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"34","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icann2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/e-nns.org\/icann2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}