{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T04:18:29Z","timestamp":1745554709446,"version":"3.40.4"},"reference-count":19,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T00:00:00Z","timestamp":1727568000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T00:00:00Z","timestamp":1727568000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1007\/s00371-024-03667-x","type":"journal-article","created":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T18:01:58Z","timestamp":1727632918000},"page":"4395-4403","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["High-definition multi-scale voice-driven facial animation: enhancing lip-sync clarity and image detail"],"prefix":"10.1007","volume":"41","author":[{"given":"Long","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"QingHua","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuai","family":"Tang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yunxiang","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,29]]},"reference":[{"issue":"6","key":"3667_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2816795.2818130","volume":"34","author":"Y Liu","year":"2015","unstructured":"Liu, Y., Xu, F., Chai, J., Tong, X., Wang, L., Huo, Q.: Video-audio driven real-time facial animation. ACM Trans. Graph. (ToG) 34(6), 1\u201310 (2015)","journal-title":"ACM Trans. Graph. (ToG)"},{"issue":"1","key":"3667_CR2","doi-asserted-by":"publisher","first-page":"206","DOI":"10.1038\/s41746-024-01204-7","volume":"7","author":"Z Qi","year":"2024","unstructured":"Qi, Z., et al.: A deep learning system for myopia onset prediction and intervention effectiveness evaluation in children. npj Digit. Med. 7(1), 206 (2024)","journal-title":"npj Digit. Med."},{"key":"3667_CR3","first-page":"1","volume":"19","author":"J Li","year":"2024","unstructured":"Li, J., et al.: Integrated image-based deep learning and language models for primary diabetes care. Nat. Med. 19, 1\u201311 (2024)","journal-title":"Nat. Med."},{"key":"3667_CR4","doi-asserted-by":"publisher","first-page":"101213","DOI":"10.1016\/j.xcrm.2023.101213","volume":"4","author":"Z Guan","year":"2023","unstructured":"Guan, Z., et al.: Artificial intelligence in diabetes management: advancements, opportunities, and challenges. Cell Rep. Med. 4, 101213 (2023)","journal-title":"Cell Rep. Med."},{"issue":"8","key":"3667_CR5","doi-asserted-by":"publisher","first-page":"569","DOI":"10.1016\/S2213-8587(24)00154-2","volume":"12","author":"B Sheng","year":"2024","unstructured":"Sheng, B., et al.: Artificial intelligence for diabetes care: current and future prospects. Lancet Diabetes Endocrinol. 12(8), 569\u2013595 (2024)","journal-title":"Lancet Diabetes Endocrinol."},{"issue":"4","key":"3667_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073640","volume":"36","author":"S Suwajanakorn","year":"2017","unstructured":"Suwajanakorn, S., Seitz, S.M., Kemelmacher-Shlizerman, I.: Synthesizing obama: learning lip sync from audio. ACM Trans. Graph. (ToG) 36(4), 1\u201313 (2017)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"3667_CR7","unstructured":"Radford, A., Metz, L., Chintala, S.: Unsupervised representation learning with deep convolutional generative adversarial networks. arXiv preprint arXiv:1511.06434 (2015)"},{"key":"3667_CR8","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1075\/sibil.28.16mon","volume-title":"First Language Attrition: Interdisciplinary Perspectives on Methodological","author":"S Montrul","year":"2004","unstructured":"Montrul, S.: Convergent outcomes in L2 acquisition and L1 loss. In: First Language Attrition: Interdisciplinary Perspectives on Methodological, pp. 259\u2013279. John Benjamins Publishing Company, Amsterdam (2004)"},{"key":"3667_CR9","doi-asserted-by":"crossref","unstructured":"Prajwal, K., Mukhopadhyay, R., Namboodiri, V.P., Jawahar, C.: A lip sync expert is all you need for speech to lip generation in the wild. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 484\u2013492 (2020)","DOI":"10.1145\/3394171.3413532"},{"key":"3667_CR10","doi-asserted-by":"crossref","unstructured":"Chung, J.S., Zisserman, A.: Out of time: automated lip sync in the wild. In: Computer Vision\u2013ACCV 2016 Workshops: ACCV 2016 International Workshops, Taipei, Taiwan, November 20\u201324, 2016, Revised Selected Papers, Part II 13, pp. 251\u2013263. Springer (2017)","DOI":"10.1007\/978-3-319-54427-4_19"},{"key":"3667_CR11","unstructured":"Wang, Z., Simoncelli, E.P., Bovik, A.C.: Multiscale structural similarity for image quality assessment. In: The Thrity-Seventh Asilomar Conference on Signals, Systems & Computers, vol. 2, pp. 1398\u20131402. IEEE (2003)"},{"key":"3667_CR12","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: Gans trained by a two time-scale update rule converge to a local nash equilibrium. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"3667_CR13","doi-asserted-by":"crossref","unstructured":"Korhonen, J., You, J.: Peak signal-to-noise ratio revisited: Is simple beautiful? In: 2012 Fourth International Workshop on Quality of Multimedia Experience, pp. 37\u201338. IEEE (2012)","DOI":"10.1109\/QoMEX.2012.6263880"},{"key":"3667_CR14","doi-asserted-by":"crossref","unstructured":"Ephrat, A., et al.: Looking to listen at the cocktail party: A speaker-independent audio-visual model for speech separation. arXiv preprint arXiv:1804.03619 (2018)","DOI":"10.1145\/3197517.3201357"},{"key":"3667_CR15","unstructured":"Karen, S.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"3667_CR16","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems, vol. 25 (2012)"},{"key":"3667_CR17","unstructured":"KR, P., Mukhopadhyay, R., Philip, J., Jha, A., Namboodiri, V., Jawahar, C.: Towards automatic face-to-face translation. In: Proceedings of the 27th ACM International Conference on Multimedia, pp. 1428\u20131436 (2019)"},{"issue":"4","key":"3667_CR18","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."},{"key":"3667_CR19","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03667-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-024-03667-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03667-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,24]],"date-time":"2025-04-24T10:01:21Z","timestamp":1745488881000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-024-03667-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,29]]},"references-count":19,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2025,5]]}},"alternative-id":["3667"],"URL":"https:\/\/doi.org\/10.1007\/s00371-024-03667-x","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"type":"print","value":"0178-2789"},{"type":"electronic","value":"1432-2315"}],"subject":[],"published":{"date-parts":[[2024,9,29]]},"assertion":[{"value":"24 September 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 September 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}