{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,28]],"date-time":"2025-06-28T04:04:34Z","timestamp":1751083474029,"version":"3.41.0"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"19","license":[{"start":{"date-parts":[[2024,12,22]],"date-time":"2024-12-22T00:00:00Z","timestamp":1734825600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,22]],"date-time":"2024-12-22T00:00:00Z","timestamp":1734825600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100021171","name":"Basic and Applied Basic Research Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2020A1515110523"],"award-info":[{"award-number":["2020A1515110523"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["QTZX22079"],"award-info":[{"award-number":["QTZX22079"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100020766","name":"Key Laboratory of Leather Chemistry and Engineering of Ministry of Education","doi-asserted-by":"publisher","award":["KX202045"],"award-info":[{"award-number":["KX202045"]}],"id":[{"id":"10.13039\/100020766","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s00521-024-10608-1","type":"journal-article","created":{"date-parts":[[2024,12,22]],"date-time":"2024-12-22T10:08:54Z","timestamp":1734862134000},"page":"13267-13282","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A video course enhancement technique utilizing generated talking heads"],"prefix":"10.1007","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2743-2017","authenticated-orcid":false,"given":"Zixiang","family":"Lu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bujia","family":"Tian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ping","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiguang","family":"Miao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kun","family":"Xie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruyi","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yining","family":"Quan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,22]]},"reference":[{"key":"10608_CR1","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1016\/j.compedu.2016.01.013","volume":"96","author":"W Watson","year":"2016","unstructured":"Watson W, Kim W, Watson S (2016) Learning outcomes of a MOOC designed for attitudinal change: a case study of an animal behavior and welfare MOOC. Comput Educ 96:83\u201393","journal-title":"Comput Educ"},{"key":"10608_CR2","doi-asserted-by":"publisher","first-page":"96","DOI":"10.1016\/j.compedu.2017.01.015","volume":"108","author":"G Alexandron","year":"2017","unstructured":"Alexandron G, Ruiperez-Valiente JA, Chen Z, Munoz-Merino PJ, Pritchard DE (2017) Copying@scale: using harvesting accounts for collecting correct answers in a MOOC. Comput Educ 108:96\u2013114","journal-title":"Comput Educ"},{"key":"10608_CR3","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1016\/j.compedu.2018.05.002","volume":"123","author":"M Ib\u00e1\u00f1ez","year":"2018","unstructured":"Ib\u00e1\u00f1ez M, Delgado-Kloos C (2018) Augmented reality for STEM learning: a systematic review. Comput Educ 123:109\u2013123","journal-title":"Comput Educ"},{"key":"10608_CR4","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. Adv Neural Inf Process Syst 30"},{"key":"10608_CR5","doi-asserted-by":"crossref","unstructured":"Chen L, Cui G, Liu C, Li Z, Kou Z, Xu Y, Xu C (2020) Talking-head generation with rhythmic head motion. In: European conference on computer vision, pp. 35\u201351","DOI":"10.1007\/978-3-030-58545-7_3"},{"key":"10608_CR6","doi-asserted-by":"crossref","unstructured":"Prajwal K, Mukhopadhyay R, Namboodiri VP, Jawahar C (2020) A lip sync expert is all you need for speech to lip generation in the wild. In: Proceedings of the 28th ACM international conference on multimedia, pp. 484\u2013492","DOI":"10.1145\/3394171.3413532"},{"key":"10608_CR7","doi-asserted-by":"crossref","unstructured":"Wang S, Li L, Ding Y, Fan C, Yu X (2021) Audio2head: Audio-driven one-shot talking-head generation with natural head motion. arXiv preprint arXiv:2107.09293","DOI":"10.24963\/ijcai.2021\/152"},{"key":"10608_CR8","doi-asserted-by":"crossref","unstructured":"Wang S, Li L, Ding Y, Yu X (2022) One-shot talking face generation from single-speaker audio-visual correlation learning. In: Proceedings of the AAAI conference on artificial intelligence, pp. 2531\u20132539","DOI":"10.1609\/aaai.v36i3.20154"},{"key":"10608_CR9","doi-asserted-by":"crossref","unstructured":"Ma Y, Wang S, Hu Z, Fan C, Lv T, Ding Y, Deng Z, Yu X (2023) Styletalk: One-shot talking head generation with controllable speaking styles. In: Proceedings of the AAAI conference on artificial intelligence, vol. 37, pp. 1896\u20131904","DOI":"10.1609\/aaai.v37i2.25280"},{"key":"10608_CR10","doi-asserted-by":"crossref","unstructured":"Guo Y, Chen K, Liang S, Liu Y-J, Bao H, Zhang J (2021) Ad-nerf: Audio driven neural radiance fields for talking head synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5784\u20135794","DOI":"10.1109\/ICCV48922.2021.00573"},{"key":"10608_CR11","doi-asserted-by":"crossref","unstructured":"Li D, Zhao K, Wang W, Peng B, Zhang Y, Dong J, Tan T (2023) Ae-nerf: Audio enhanced neural radiance field for few shot talking head synthesis. arXiv preprint arXiv:2312.10921","DOI":"10.1609\/aaai.v38i4.28086"},{"key":"10608_CR12","doi-asserted-by":"crossref","unstructured":"Ren Y, Li G, Chen Y, Li TH, Liu S (2021) Pirenderer: Controllable portrait image generation via semantic neural rendering. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 13759\u201313768","DOI":"10.1109\/ICCV48922.2021.01350"},{"key":"10608_CR13","doi-asserted-by":"crossref","unstructured":"Zhou M, Bai Y, Zhang W, Yao T, Zhao T, Mei T (2022) Responsive listening head generation: a benchmark dataset and baseline. In: European conference on computer vision, pp. 124\u2013142","DOI":"10.1007\/978-3-031-19839-7_8"},{"key":"10608_CR14","doi-asserted-by":"crossref","unstructured":"Zhou H, Liu Y, Liu Z, Luo P, Wang X (2019) Talking face generation by adversarially disentangled audio-visual representation. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 9299\u20139306","DOI":"10.1609\/aaai.v33i01.33019299"},{"key":"10608_CR15","doi-asserted-by":"crossref","unstructured":"Chen, Z, Wang C, Yuan B, Tao D (2020) Puppeteergan: Arbitrary portrait animation with semantic-aware appearance transformation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 13518\u201313527","DOI":"10.1109\/CVPR42600.2020.01353"},{"key":"10608_CR16","doi-asserted-by":"crossref","unstructured":"Wang K, Wu Q, Song L, Yang Z, Wu W, Qian C, He R, Qiao Y, Loy CC (2020) Mead: a large-scale audio-visual dataset for emotional talking-face generation. In: European conference on computer vision, pp. 700\u2013717","DOI":"10.1007\/978-3-030-58589-1_42"},{"key":"10608_CR17","doi-asserted-by":"crossref","unstructured":"Zhua Y, Zhanga C, Liub Q, Zhoub X (2023) Audio-driven talking head video generation with diffusion model. In: ICASSP 2023-2023 IEEE International conference on acoustics, speech and signal processing (ICASSP), pp. 1\u20135","DOI":"10.1109\/ICASSP49357.2023.10094937"},{"key":"10608_CR18","doi-asserted-by":"crossref","unstructured":"Dua M, Sethi PS, Agrawal V, Chawla R (2021) Speaker recognition using noise robust features and lstm-rnn. In: Progress in advanced computing and intelligent engineering: Proceedings of ICACIE 2020, pp. 19\u201328","DOI":"10.1007\/978-981-33-4299-6_2"},{"key":"10608_CR19","unstructured":"Chung J, Gulcehre C, Cho K, Bengio Y (2014) Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555"},{"key":"10608_CR20","doi-asserted-by":"crossref","unstructured":"Deng Y, Yang J, Xu S, Chen D, Jia Y, Tong X (2019) Accurate 3d face reconstruction with weakly-supervised learning: From single image to image set. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops, pp. 0\u20130","DOI":"10.1109\/CVPRW.2019.00038"},{"key":"10608_CR21","doi-asserted-by":"crossref","unstructured":"Bagdanov AD, Del\u00a0Bimbo A, Masi I (2011) The florence 2d\/3d hybrid face dataset. In: Proceedings of the 2011 joint ACM workshop on human gesture and behavior understanding, pp. 79\u201380","DOI":"10.1145\/2072572.2072597"},{"issue":"3","key":"10608_CR22","first-page":"413","volume":"20","author":"C Cao","year":"2013","unstructured":"Cao C, Weng Y, Zhou S, Tong Y, Zhou K (2013) Facewarehouse: a 3d facial expression database for visual computing. IEEE Trans Visualization Comput Gr 20(3):413\u2013425","journal-title":"IEEE Trans Visualization Comput Gr"},{"key":"10608_CR23","unstructured":"Graves A (2013) Generating sequences with recurrent neural networks. arXiv preprint arXiv:1308.0850"},{"key":"10608_CR24","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"10608_CR25","unstructured":"Ba JL, Kiros JR, Hinton GE (2016) Layer normalization. arXiv preprint arXiv:1607.06450"},{"key":"10608_CR26","unstructured":"Ke Z, Li K, Zhou Y, Wu Q, Mao X, Yan Q, Lau RW (2020) Is a green screen really necessary for real-time portrait matting. arXiv preprint arXiv:2011.119613(5)"},{"issue":"1","key":"10608_CR27","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1109\/T-AFFC.2011.20","volume":"3","author":"G McKeown","year":"2011","unstructured":"McKeown G, Valstar M, Cowie R, Pantic M, Schroder M (2011) The semaine database: annotated multimodal records of emotionally colored conversations between a person and a limited agent. IEEE Trans Affective Computing 3(1):5\u201317","journal-title":"IEEE Trans Affective Computing"},{"issue":"2","key":"10608_CR28","doi-asserted-by":"publisher","first-page":"186","DOI":"10.1016\/j.imavis.2012.08.014","volume":"31","author":"S Petridis","year":"2013","unstructured":"Petridis S, Martinez B, Pantic M (2013) The mahnob laughter database. Image Vision Computing 31(2):186\u2013202","journal-title":"Image Vision Computing"},{"key":"10608_CR29","unstructured":"Muda L, Begam M, Elamvazuthi I (2010) Voice recognition algorithms using mel frequency cepstral coefficient (mfcc) and dynamic time warping (dtw) techniques. arXiv preprint arXiv:1003.4083"},{"key":"10608_CR30","doi-asserted-by":"crossref","unstructured":"Hore A, Ziou D (2010) Image quality metrics: Psnr vs. ssim. In: 2010 20th International conference on pattern recognition, pp. 2366\u20132369","DOI":"10.1109\/ICPR.2010.579"},{"key":"10608_CR31","unstructured":"Heusel M, Ramsauer H, Unterthiner T, Nessler B, Hochreiter S (2017) Gans trained by a two time-scale update rule converge to a local nash equilibrium. Adv Neural Inf Process Syst 30"},{"issue":"9","key":"10608_CR32","doi-asserted-by":"publisher","first-page":"2678","DOI":"10.1109\/TIP.2011.2131660","volume":"20","author":"ND Narvekar","year":"2011","unstructured":"Narvekar ND, Karam LJ (2011) A no-reference image blur metric based on the cumulative probability of blur detection (cpbd). IEEE Trans Image Process 20(9):2678\u20132683","journal-title":"IEEE Trans Image Process"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-10608-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-024-10608-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-10608-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,27]],"date-time":"2025-06-27T08:27:47Z","timestamp":1751012867000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-024-10608-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,22]]},"references-count":32,"journal-issue":{"issue":"19","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["10608"],"URL":"https:\/\/doi.org\/10.1007\/s00521-024-10608-1","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2024,12,22]]},"assertion":[{"value":"4 January 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 October 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 December 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All authors certify that there is no conflict of interest with any individual or organization for this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}