{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,2]],"date-time":"2026-03-02T11:07:20Z","timestamp":1772449640477,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":27,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819699131","type":"print"},{"value":"9789819699148","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-9914-8_42","type":"book-chapter","created":{"date-parts":[[2025,7,16]],"date-time":"2025-07-16T14:24:03Z","timestamp":1752675843000},"page":"499-510","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["JoyLive: Efficient Audio-Driven Portrait Animation by 3D Implict Keypoints"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-5346-5220","authenticated-orcid":false,"given":"Siyuan","family":"Jin","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8103-0321","authenticated-orcid":false,"given":"Sirui","family":"Zhao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2676-6613","authenticated-orcid":false,"given":"Yifan","family":"Xu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6515-9886","authenticated-orcid":false,"given":"Shifeng","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9501-7875","authenticated-orcid":false,"given":"Mengduo","family":"Wu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4246-5386","authenticated-orcid":false,"given":"Tong","family":"Xu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,17]]},"reference":[{"key":"42_CR1","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., Auli, M.: Wav2vec 2.0: a framework for self-supervised learning of speech representations. In: Advances in Neural Information Processing Systems, vol. 33, pp. 12449\u201312460 (2020)"},{"key":"42_CR2","doi-asserted-by":"crossref","unstructured":"Wu, M., Zhao, S., Wu, T., Xu, Y., Xu, T., Chen, E.: AVF-LIP: high-fidelity talking face generation via audio-visual fusion. In: 2024 7th International Conference on Pattern Recognition and Artificial Intelligence (PRAI), pp. 491\u2013499. IEEE (2024)","DOI":"10.1109\/PRAI62207.2024.10827447"},{"issue":"1","key":"42_CR3","first-page":"53","volume":"18","author":"A Bozkurt","year":"2023","unstructured":"Bozkurt, A., et al.: Speculative futures on chatgpt and generative artificial intelligence (ai): a collective reflection from the educational landscape. Asian J. Distance Educ. 18(1), 53\u2013130 (2023)","journal-title":"Asian J. Distance Educ."},{"key":"42_CR4","unstructured":"Cao, X., et al.: JoyVASA: portrait and ani-mal image animation with diffusion-based audio-driven facial dynamics and head motion generation. arXiv preprint arXiv:2411.09209 (2024)"},{"key":"42_CR5","doi-asserted-by":"crossref","unstructured":"Chen, L., Maddox, R.K., Duan, Z., Xu, C.: Hierarchical cross-modal talking face generation with dynamic pixel-wise loss. In: CVPR, pp. 7832\u20137841 (2019)","DOI":"10.1109\/CVPR.2019.00802"},{"key":"42_CR6","doi-asserted-by":"crossref","unstructured":"Chen, Z., Cao, J., Chen, Z., Li, Y., Ma, C.: Echomimic: lifelike audio-driven portrait animations through editable landmark conditioning (2024)","DOI":"10.1609\/aaai.v39i3.32241"},{"key":"42_CR7","unstructured":"Zhao, Y., et al.: ChatAnything: facetime chat with llm-enhanced personas. arXiv preprint arXiv:2311.06772 (2023)"},{"key":"42_CR8","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Li, L., Ding, Y., Fan, C.: Flow-guided one-shot talking face generation with a high-resolution audio-visual dataset. In: Proceedings of The IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3661\u20133670 (2021)","DOI":"10.1109\/CVPR46437.2021.00366"},{"key":"42_CR9","unstructured":"Guo, J., et al.: LivePortrait: efficient portrait animation with stitching and retargeting control. arXiv preprint arXiv:2407.03168 (2024)"},{"key":"42_CR10","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local nash equilibrium. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"42_CR11","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"42_CR12","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)"},{"key":"42_CR13","doi-asserted-by":"crossref","unstructured":"Huang, Z., et al.: VBench: comprehensive benchmark suite for video generative models. In: Pro-ceedings of The IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21807\u201321818 (2024)","DOI":"10.1109\/CVPR52733.2024.02060"},{"key":"42_CR14","unstructured":"Ma, Y., Zhang, S., Wang, J., Wang, X., Zhang, Y., Deng, Z.: DreamTalk: when expressive talking head generation meets diffusion probabilistic models. arXiv preprint arXiv:2312.09767 (2023)"},{"key":"42_CR15","unstructured":"Nichol, A.Q., Dhariwal, P.: Improved denoising diffusion probabilistic models. In: International Conference on Machine Learning, pp. 8162\u20138171. PMLR (2021)"},{"key":"42_CR16","doi-asserted-by":"crossref","unstructured":"Prajwal, K., Mukhopadhyay, R., Namboodiri, V.P., Jawahar, C.: A lip sync expert is all you need for speech to lip generation in the wild. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 484\u2013492 (2020)","DOI":"10.1145\/3394171.3413532"},{"key":"42_CR17","doi-asserted-by":"publisher","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution Image Synthesis with Latent Diffusion Models. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2022). https:\/\/doi.org\/10.1109\/cvpr52688.2022.01042","DOI":"10.1109\/cvpr52688.2022.01042"},{"key":"42_CR18","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Chong, E., Rehg, J.M.: Fine-grained head pose estimation without keypoints. In: Proceedings of The IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 2074\u20132083 (2018)","DOI":"10.1109\/CVPRW.2018.00281"},{"key":"42_CR19","unstructured":"Shi, S., Cao, X., Zhao, J., Wang, G.: Joyhallo: digital human model for mandarin. ArXiv preprint arXiv:2409.13268 (2024)"},{"key":"42_CR20","doi-asserted-by":"crossref","unstructured":"Song, H.K., et al.: Talking face generation with multilingual TTS. In: Proceedings of The IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21425\u201321430 (2022)","DOI":"10.1109\/CVPR52688.2022.02074"},{"key":"42_CR21","doi-asserted-by":"publisher","unstructured":"Thies, J., Elgharib, M., Tewari, A., Theobalt, C., Nie\u00dfner, M.: Neural Voice Puppetry: Audio-driven Facial Reenactment. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, JM. (eds.) Computer Vision\u2013ECCV 2020, pp. 716\u2013731. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58517-4_42","DOI":"10.1007\/978-3-030-58517-4_42"},{"key":"42_CR22","doi-asserted-by":"crossref","unstructured":"Zhang, W., et al.: Sadtalker: learning realistic 3D motion coefficients for stylized audio-driven single image talking face animation. In: CVPR, pp. 8652\u20138661 (2023)","DOI":"10.1109\/CVPR52729.2023.00836"},{"key":"42_CR23","unstructured":"Unterthiner, T., van Steenkiste, S., Kurach, K., Marinier, R., Michalski, M., Gelly, S.: FVD: a new metric for video generation. In: Deep Generative Models for Highly Structured Data, ICLR 2019 Workshop, New Orleans, Louisiana, United States, May 6, 2019. OpenReview.net (2019). https:\/\/openreview.net\/forum?id=rylgEULtdN"},{"key":"42_CR24","doi-asserted-by":"crossref","unstructured":"Wang, T.C., Mallya, A., Liu, M.Y.: One-shot free-view neural talking-head synthesis for video conferencing. In: Proceedings of The IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10039\u201310049 (2021)","DOI":"10.1109\/CVPR46437.2021.00991"},{"key":"42_CR25","unstructured":"Wei, H., Yang, Z., Wang, Z.: Aniportrait: audio-driven synthesis of photorealistic portrait animation. arXiv preprint arXiv:2403.17694 (2024)"},{"key":"42_CR26","doi-asserted-by":"crossref","unstructured":"Woo, S., et al.: Convnext v2: co-designing and scaling convnets with masked autoencoders. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.01548"},{"key":"42_CR27","unstructured":"Xu, M., et al.: Hallo: hierarchical audio-driven visual synthesis for portrait image animation (2024)"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-9914-8_42","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,2]],"date-time":"2026-03-02T10:38:41Z","timestamp":1772447921000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-9914-8_42"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819699131","9789819699148"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-9914-8_42","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"17 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ningbo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/icg\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}