{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T00:55:04Z","timestamp":1773104104097,"version":"3.50.1"},"reference-count":155,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T00:00:00Z","timestamp":1750809600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T00:00:00Z","timestamp":1750809600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"Research Project on the Protection, Inheritance, and Promotion of Yangtze River Culture in Hubei Province","award":["HCYK2024Y22"],"award-info":[{"award-number":["HCYK2024Y22"]}]},{"name":"Lhasa Key Science and Technology Plan, Tibetan Plateau Culture and Art Digital Platform","award":["LSKJ202404"],"award-info":[{"award-number":["LSKJ202404"]}]},{"name":"Key Research and Development Program of Hubei Province of China","award":["2023BAB085"],"award-info":[{"award-number":["2023BAB085"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Discov Computing"],"DOI":"10.1007\/s10791-025-09628-9","type":"journal-article","created":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T14:24:42Z","timestamp":1750861482000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Bibliometric analysis and review of AI-based video generation: research dynamics and application trends (2020\u20132025)"],"prefix":"10.1007","volume":"28","author":[{"given":"Wei","family":"Xie","sequence":"first","affiliation":[]},{"given":"Anshu","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Qing","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Junjie","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Ruoyu","family":"Wan","sequence":"additional","affiliation":[]},{"given":"Yuhan","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,25]]},"reference":[{"key":"9628_CR1","unstructured":"Goodfellow I J, Pouget-Abadie J, Mirza M, et al. Generative adversarial nets[J]. Adv Neural Inf Process Syst 2014, 27."},{"key":"9628_CR2","unstructured":"Kingma DP, Welling M. Auto-encoding variational Bayes; 2013. CoRR abs\/1312.6114. http:\/\/arxiv.org\/abs\/1312.6114"},{"key":"9628_CR3","unstructured":"Video generation models as world simulators [Internet]. OpenAI; c2024 [cited 2024 Oct 25]. Available from: https:\/\/openai.com\/index\/video-generation-models-as-world-simulators\/"},{"key":"9628_CR4","doi-asserted-by":"crossref","unstructured":"Bhagwatkar R, Bachu S, Fitter K, et al. A review of video generation approaches. In: 2020 international conference on power, instrumentation, control and computing (PICC). IEEE, Thrissur, India; 2020, p. 1\u20135.","DOI":"10.1109\/PICC51425.2020.9362485"},{"key":"9628_CR5","unstructured":"Zhu Z, Wang X, Zhao W, et al. Is sora a world simulator? a comprehensive survey on general world models and beyond[J].arXiv:2405.03520 [Preprint]. 2024 [cited 2024 Oct 18]. Available from: https:\/\/arxiv.org\/abs\/2405.03520."},{"key":"9628_CR6","doi-asserted-by":"crossref","unstructured":"Sun W, Tu R C, Liao J, et al. Diffusion model-based video editing: A survey[J]. arXiv:2407.07111 [Preprint]. 2024 [cited 2024 Oct 18]. Available from: https:\/\/arxiv.org\/abs\/2407.07111 7 Xing Z, Feng Q, Chen H, et al. A survey on video diffusion models[J]. ACM Comput Survey. 2024;57(2):1\u201342.","DOI":"10.1145\/3696415"},{"key":"9628_CR7","unstructured":"Xing Z, Feng Q, Chen H, et al. A survey on video diffusion models; 2023"},{"key":"9628_CR8","unstructured":"Vaswani A, Shazeer N, Parmar N, et al. Attention is all you need[J]. Advances in neural information processing systems, 2017, 30."},{"key":"9628_CR9","unstructured":"Sun R, Zhang Y, Shah T, et al. From sora what we can see: A survey of text-to-video generation[J]. arXiv:2405.10674 [Preprint]. 2024 [cited 2024 Oct 18]. Available from: https:\/\/arxiv.org\/abs\/2405.10674."},{"key":"9628_CR10","unstructured":"Singer U, Polyak A, Hayes T, et al. Make-a-video: text-to-video generation without text-video data; 2022. ArXiv abs\/2209.14792. http:\/\/arxiv.org\/abs\/2209.14792"},{"key":"9628_CR11","first-page":"523","volume-title":"Communications in computer and information science","author":"D Leiker","year":"2023","unstructured":"Leiker D, Gyllen AR, Eldesouky I, Cukurova M. Generative AI for learning: investigating the potential of learning videos with synthetic virtual instructors. In: Communications in computer and information science. Cham: Springer Nature Switzerland; 2023. p. 523\u20139."},{"key":"9628_CR12","unstructured":"Liu Y, Zhang K, Li Y, et al. Sora: a review on background, technology, limitations, and opportunities of large vision models[J]. arXiv:2402.17177, 2024."},{"key":"9628_CR13","unstructured":"Li C, Huang D, Lu Z, et al. A survey on long video generation: Challenges, methods, and prospects[J]. arXiv:2403.16407 [Preprint]. 2024 [cited 2024 Oct 18]. Available from: https:\/\/arxiv.org\/abs\/2403.16407."},{"key":"9628_CR14","unstructured":"Cao Y, Li S, Liu Y, et al. A comprehensive survey of ai-generated content (aigc): A history of generative ai from gan to chatgpt[J]. arXiv:2303.04226 [Preprint]. 2023 [cited 2024 Oct 18]. Available from: https:\/\/arxiv.org\/abs\/2303.04226."},{"key":"9628_CR15","unstructured":"Achiam J, Adler S, Agarwal S, et al. Gpt-4 technical report[J]. arXiv:2303.08774 [Preprint]. 2023 [cited 2024 Oct 21]. Available from: https:\/\/arxiv.org\/abs\/2303.08774."},{"key":"9628_CR16","unstructured":"Niu K, Liu W, Sharif N, et al. Conditional video generation guided by multimodal inputs: A Comprehensive Survey[J]. 2024."},{"key":"9628_CR17","doi-asserted-by":"crossref","unstructured":"Zhou P, Wang L, Liu Z, et al. A survey on generative ai and llm for video generation, understanding, and streaming[J]. arXiv:2404.16038 [Preprint]. 2024[cited 2024 Oct 21]. Available from: https:\/\/arxiv.org\/abs\/2404.16038.","DOI":"10.36227\/techrxiv.171172801.19993069\/v1"},{"key":"9628_CR18","doi-asserted-by":"publisher","first-page":"56400","DOI":"10.1109\/ACCESS.2023.3283260","volume":"11","author":"S Fengxue","year":"2023","unstructured":"Fengxue S, Yanguo S, Zhenping L, et al. Image and video style transfer based on transformer. IEEE ACCESS. 2023;11:56400\u20137. https:\/\/doi.org\/10.1109\/ACCESS.2023.3283260.","journal-title":"IEEE ACCESS"},{"key":"9628_CR19","unstructured":"Kim D, Woo S, Lee J-Y, Kweon IS. Deep video inpainting."},{"key":"9628_CR20","unstructured":"Ilan S, Shamir A. Data-driven video completion. Eurographics 2014\u2014state of the art reports; 2014. p. 15"},{"key":"9628_CR21","doi-asserted-by":"publisher","DOI":"10.1080\/02564602.2024.2327566","author":"A Verma","year":"2024","unstructured":"Verma A, Meenpal T, Acharya B. Action-guided CycleGAN for bi-directional video prediction. IETE Tech Rev. 2024. https:\/\/doi.org\/10.1080\/02564602.2024.2327566.","journal-title":"IETE Tech Rev"},{"key":"9628_CR22","unstructured":"Tan Z, Yang X, Liu S, et al. Video-infinity: Distributed long video generation[J]. arXiv:2406.16260 [Preprint]. 2024[cited 2024 Oct 21]. Available from: https:\/\/arxiv.org\/abs\/2406.16260."},{"key":"9628_CR23","unstructured":"video-generation-survey [Internet]. yzhang2016; c2022 [cited 2024 Oct 25]. Available from: https:\/\/github.com\/yzhang2016\/video-generation-survey\/blob\/main\/video-generation.md"},{"key":"9628_CR24","doi-asserted-by":"crossref","unstructured":"Groos O V, Pritchard A. Documentation notes[J]. J Doc. 1969;25(4): 344\u20139.","DOI":"10.1108\/eb026482"},{"key":"9628_CR25","doi-asserted-by":"publisher","first-page":"985","DOI":"10.3390\/brainsci10120985","volume":"10","author":"N Carmona-Serrano","year":"2020","unstructured":"Carmona-Serrano N, L\u00f3pez-Belmonte J, Cuesta-G\u00f3mez J-L, Moreno-Guerrero A-J. Documentary analysis of the scientific literature on autism and technology in web of science. Brain Sci. 2020;10:985. https:\/\/doi.org\/10.3390\/brainsci10120985.","journal-title":"Brain Sci"},{"key":"9628_CR26","doi-asserted-by":"publisher","first-page":"1014","DOI":"10.3390\/encyclopedia4020065","volume":"4","author":"I Passas","year":"2024","unstructured":"Passas I. Bibliometric analysis: the main steps. Encyclopedia. 2024;4:1014\u201325. https:\/\/doi.org\/10.3390\/encyclopedia4020065.","journal-title":"Encyclopedia"},{"key":"9628_CR27","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1016\/j.jbusres.2021.04.070","volume":"133","author":"N Donthu","year":"2021","unstructured":"Donthu N, Kumar S, Mukherjee D, et al. How to conduct a bibliometric analysis: an overview and guidelines. J Bus Res. 2021;133:285\u201396. https:\/\/doi.org\/10.1016\/j.jbusres.2021.04.070.","journal-title":"J Bus Res"},{"key":"9628_CR28","doi-asserted-by":"publisher","first-page":"959","DOI":"10.1016\/j.joi.2017.08.007","volume":"11","author":"M Aria","year":"2017","unstructured":"Aria M, Cuccurullo C. bibliometrix: an R-tool for comprehensive science mapping analysis. J Informet. 2017;11:959\u201375. https:\/\/doi.org\/10.1016\/j.joi.2017.08.007.","journal-title":"J Informet"},{"key":"9628_CR29","doi-asserted-by":"publisher","first-page":"359","DOI":"10.1002\/asi.20317","volume":"57","author":"C Chen","year":"2006","unstructured":"Chen C. CiteSpace II: detecting and visualizing emerging trends and transient patterns in scientific literature. J Am Soc Inf Sci. 2006;57:359\u201377. https:\/\/doi.org\/10.1002\/asi.20317.","journal-title":"J Am Soc Inf Sci"},{"key":"9628_CR30","unstructured":"Van Eck N J, Waltman L. Text mining and visualization using VOSviewer[J]. arXiv:1109.2058[Preprint]. 2011 [cited 2024 Oct 21]. Available from: https:\/\/arxiv.org\/abs\/1109.2058."},{"key":"9628_CR31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-71921-0","volume-title":"Principles and practice of systematic reviews and meta-analysis","author":"S Patole","year":"2021","unstructured":"Patole S. Principles and practice of systematic reviews and meta-analysis. Springer; 2021."},{"key":"9628_CR32","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1007\/s11192-020-03387-8","volume":"123","author":"J Zhu","year":"2020","unstructured":"Zhu J, Liu W. A tale of two databases: the use of Web of Science and Scopus in academic papers. Scientometrics. 2020;123:321\u201335. https:\/\/doi.org\/10.1007\/s11192-020-03387-8.","journal-title":"Scientometrics"},{"key":"9628_CR33","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1111\/ijmr.12381","volume":"27","author":"G Marzi","year":"2025","unstructured":"Marzi G, Balzano M, Caputo A, Pellegrini MM. Guidelines for Bibliometric-Systematic Literature Reviews: 10 steps to combine analysis, synthesis and theory development. Int J Manag Rev. 2025;27:81\u2013103. https:\/\/doi.org\/10.1111\/ijmr.12381.","journal-title":"Int J Manag Rev"},{"key":"9628_CR34","unstructured":"Reed S, Akata Z, Yan X, et al. Generative adversarial text to image synthesis[C]\/\/International conference on machine learning. PMLR, 2016: 1060-1069."},{"key":"9628_CR35","first-page":"7065","volume-title":"Video generation from text","author":"Y Li","year":"2018","unstructured":"Li Y, Min M, Shen D, et al. Video generation from text. Duke University; 2018. p. 7065\u201372."},{"key":"9628_CR36","doi-asserted-by":"crossref","unstructured":"Bar-Tal O, Chefer H, Tov O, et al. Lumiere: A space-time diffusion model for video generation[C]\/\/SIGGRAPH Asia 2024 Conference Papers. 2024: 1\u201311.","DOI":"10.1145\/3680528.3687614"},{"key":"9628_CR37","unstructured":"Yin S, Wu C, Yang H, et al. Nuwa-xl: Diffusion over diffusion for extremely long video generation[J]. arXiv:2303.12346 [Preprint]. 2023. [cited 2024 Oct 21]. Available from: https:\/\/arxiv.org\/abs\/2303.12346."},{"key":"9628_CR38","doi-asserted-by":"publisher","first-page":"7454","DOI":"10.1109\/TIP.2020.3003227","volume":"29","author":"Q Chen","year":"2020","unstructured":"Chen Q, Wu Q, Chen J, et al. Scripted video generation with a bottom-up generative adversarial network. IEEE Trans Image Process. 2020;29:7454\u201367. https:\/\/doi.org\/10.1109\/TIP.2020.3003227.","journal-title":"IEEE Trans Image Process"},{"key":"9628_CR39","doi-asserted-by":"publisher","first-page":"153113","DOI":"10.1109\/ACCESS.2020.3017881","volume":"8","author":"D Kim","year":"2020","unstructured":"Kim D, Joo D, Kim J. TiVGAN: text to image to video generation with step-by-step evolutionary generator. IEEE ACCESS. 2020;8:153113\u201322. https:\/\/doi.org\/10.1109\/ACCESS.2020.3017881.","journal-title":"IEEE ACCESS"},{"key":"9628_CR40","unstructured":"Wu C, Huang L, Zhang Q, et al. GODIVA: generating open-DomaIn videos from nAtural descriptions; 2021. ArXiv abs\/2104.14806. http:\/\/arxiv.org\/abs\/2104.14806"},{"key":"9628_CR41","doi-asserted-by":"publisher","unstructured":"Ho J, Salimans T, Gritsenko A, et al. Video diffusion models; 2022. ArXiv abs\/2204.03458. https:\/\/doi.org\/10.48550\/arXiv.2204.03458","DOI":"10.48550\/arXiv.2204.03458"},{"key":"9628_CR42","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1109\/TMM.2023.3262972","volume":"26","author":"A K\u00f6ksal","year":"2024","unstructured":"K\u00f6ksal A, Ak K, Sun Y, et al. Controllable video generation with text-based instructions. IEEE Trans Multimedia. 2024;26:190\u2013201. https:\/\/doi.org\/10.1109\/TMM.2023.3262972.","journal-title":"IEEE Trans Multimedia"},{"key":"9628_CR43","unstructured":"He H, Xu Y, Guo Y, et al. CameraCtrl: enabling camera control for text-to-video generation; 2024."},{"key":"9628_CR44","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-025-02413-7","author":"WW Zhu","year":"2025","unstructured":"Zhu WW. ScenarioDiff: text-to-video generation with dynamic transformations of scene conditions. Int J Comput Vis. 2025. https:\/\/doi.org\/10.1007\/s11263-025-02413-7.","journal-title":"Int J Comput Vis"},{"key":"9628_CR45","doi-asserted-by":"crossref","unstructured":"Liang J, Fan Y, Zhang K, et al. Movideo: Motion-aware video generation with diffusion model[C]\/\/European Conference on Computer Vision. Cham: Springer Nature Switzerland, 2024; pp. 56\u201374.","DOI":"10.1007\/978-3-031-72784-9_4"},{"key":"9628_CR46","doi-asserted-by":"crossref","unstructured":"Oh G, Jeong J, Kim S, et al. Mevg: Multi-event video generation with text-to-video models[C]\/\/European Conference on Computer Vision. Cham: Springer Nature Switzerland, 2024; pp. 401\u201318.","DOI":"10.1007\/978-3-031-72775-7_23"},{"key":"9628_CR47","doi-asserted-by":"publisher","first-page":"1850","DOI":"10.1109\/LSP.2024.3422816","volume":"31","author":"J Gao","year":"2024","unstructured":"Gao J, Shu C, Zheng X, et al. MergeTalk: audio-driven talking head generation from single image with feature merge. IEEE Signal Process Lett. 2024;31:1850\u20134. https:\/\/doi.org\/10.1109\/LSP.2024.3422816.","journal-title":"IEEE Signal Process Lett"},{"key":"9628_CR48","doi-asserted-by":"crossref","unstructured":"Walker J, Marino K, Gupta A, Hebert M. The pose knows: video forecasting by generating pose futures. In: 2017 IEEE international conference on computer vision (ICCV). IEEE, Venice; 2017. p 3352\u201361.","DOI":"10.1109\/ICCV.2017.361"},{"key":"9628_CR49","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1111\/cgf.14202","volume":"40","author":"Y Zhao","year":"2021","unstructured":"Zhao Y, Yang L, Pei E, et al. Action unit driven facial expression synthesis from a single image with patch attentive GAN. Comput Graph Forum. 2021;40:47\u201361. https:\/\/doi.org\/10.1111\/cgf.14202.","journal-title":"Comput Graph Forum"},{"key":"9628_CR50","doi-asserted-by":"crossref","unstructured":"Zhang L, Pollett C. Facial expression video synthesis from the StyleGAN latent space[C]\/\/Thirteenth International Conference on Digital Image Processing (ICDIP 2021). SPIE, 2021, 11878: 469\u201378.","DOI":"10.1117\/12.2599392"},{"key":"9628_CR51","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.112233","author":"J Hou","year":"2024","unstructured":"Hou J, Lu Y, Wang M, et al. A Markov Chain approach for video-based virtual try-on with denoising diffusion generative adversarial network. Knowl Based Syst. 2024. https:\/\/doi.org\/10.1016\/j.knosys.2024.112233.","journal-title":"Knowl Based Syst"},{"key":"9628_CR52","doi-asserted-by":"crossref","unstructured":"Hu Y, Chen Z, Luo C. LaMD: Latent Motion Diffusion for Image-Conditional Video Generation[J]. International Journal of Computer Vision, 2025: 1\u201317.","DOI":"10.1007\/s11263-025-02386-7"},{"key":"9628_CR53","doi-asserted-by":"crossref","unstructured":"Kandala H, Gao J, Yang J. Pix2gif: Motion-guided diffusion for gif generation[C]\/\/European Conference on Computer Vision. Cham: Springer Nature Switzerland, 2024: 35\u201351.","DOI":"10.1007\/978-3-031-73013-9_3"},{"key":"9628_CR54","doi-asserted-by":"crossref","unstructured":"Yeganeh Y, Lazuardi R, Shamseddin A, et al. VISAGE: Video Synthesis Using Action Graphs for Surgery[C]\/\/International Conference on Medical Image Computing and Computer-Assisted Intervention. Cham: Springer Nature Switzerland, 2024; pp. 146\u201356.","DOI":"10.1007\/978-3-031-77610-6_14"},{"key":"9628_CR55","doi-asserted-by":"crossref","unstructured":"Li X, Zhang Y, Ye X. DrivingDiffusion: Layout-Guided Multi-view Driving Scenarios Video Generation with Latent Diffusion Model[C]\/\/European Conference on Computer Vision. Cham: Springer Nature Switzerland, 2024; pp. 469\u201385.","DOI":"10.1007\/978-3-031-73229-4_27"},{"key":"9628_CR56","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073640","volume":"36","author":"S Suwajanakorn","year":"2017","unstructured":"Suwajanakorn S, Seitz SM, Kemelmacher-Shlizerman I. Synthesizing Obama: learning lip sync from audio. ACM Trans Graph. 2017;36:1\u201313. https:\/\/doi.org\/10.1145\/3072959.3073640.","journal-title":"ACM Trans Graph"},{"key":"9628_CR57","first-page":"1716","volume-title":"Talking Face generation with expression-tailored generative adversarial network","author":"D Zeng","year":"2020","unstructured":"Zeng D, Liu H, Lin H, et al. Talking Face generation with expression-tailored generative adversarial network. Shanghai University; 2020. p. 1716\u201324."},{"key":"9628_CR58","first-page":"4395","volume":"41","author":"YX Chen","year":"2024","unstructured":"Chen YX. High-definition multi-scale voice-driven facial animation: enhancing lip-sync clarity and image detail. Visual Comput. 2024;41:4395\u2013403.","journal-title":"Visual Comput"},{"key":"9628_CR59","doi-asserted-by":"crossref","unstructured":"Aldausari N, Sowmya A, Marcus N, et al. Phonicsgan: Synthesizing graphical videos from phonics songs[C]\/\/Artificial Neural Networks and Machine Learning\u2013ICANN 2021: 30th International Conference on Artificial Neural Networks, Bratislava, Slovakia, September 14\u201317, 2021, Proceedings, Part II 30. Springer International Publishing, 2021: 599\u2013610.","DOI":"10.1007\/978-3-030-86340-1_48"},{"key":"9628_CR60","doi-asserted-by":"crossref","unstructured":"Zhuang W, Wang C, Chai J, et al. Music2dance: Dancenet for music-driven dance generation[J]. ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM), 2022, 18(2): 1\u201321.","DOI":"10.1145\/3485664"},{"key":"9628_CR61","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1007\/s41095-023-0343-7","volume":"10","author":"P Liu","year":"2024","unstructured":"Liu P, Deng W, Li H, et al. MusicFace: music-driven expressive singing face synthesis. Comput Vis Media. 2024;10:119\u201336. https:\/\/doi.org\/10.1007\/s41095-023-0343-7.","journal-title":"Comput Vis Media"},{"key":"9628_CR62","unstructured":"Vondrick C, Pirsiavash H, Torralba A. Generating videos with scene dynamics[J]. Advances in neural information processing systems, 2016, 29."},{"key":"9628_CR63","doi-asserted-by":"publisher","first-page":"501","DOI":"10.1049\/cvi2.12047","volume":"15","author":"J Kong","year":"2021","unstructured":"Kong J, Shen H, Huang K. DualPathGAN: facial reenacted emotion synthesis. IET Comput Vision. 2021;15:501\u201313. https:\/\/doi.org\/10.1049\/cvi2.12047.","journal-title":"IET Comput Vision"},{"key":"9628_CR64","doi-asserted-by":"publisher","DOI":"10.1145\/3478513.3480520","author":"Y Ishiwaka","year":"2021","unstructured":"Ishiwaka Y, Zeng X, Eastman M, et al. Foids: bio-inspired fish simulation for generating synthetic datasets. ACM Trans Graph. 2021. https:\/\/doi.org\/10.1145\/3478513.3480520.","journal-title":"ACM Trans Graph"},{"key":"9628_CR65","volume-title":"SURFGenerator: generative adversarial network modeling for synthetic flooding video generation","author":"S Lamczyk","year":"2022","unstructured":"Lamczyk S, Ampofo K, Salashour B, et al. SURFGenerator: generative adversarial network modeling for synthetic flooding video generation. Old Dominion University; 2022."},{"key":"9628_CR66","doi-asserted-by":"publisher","first-page":"1767","DOI":"10.1007\/s11263-019-01150-y","volume":"127","author":"A Jamaludin","year":"2019","unstructured":"Jamaludin A, Chung J, Zisserman A. You said that?: synthesising talking faces from audio. Int J Comput Vision. 2019;127:1767\u201379. https:\/\/doi.org\/10.1007\/s11263-019-01150-y.","journal-title":"Int J Comput Vision"},{"key":"9628_CR67","doi-asserted-by":"publisher","first-page":"20685","DOI":"10.1007\/s11042-022-12359-4","volume":"81","author":"H Yin","year":"2022","unstructured":"Yin H, Liu J, Chen X, Li G. WeAnimate: motion-coherent animation generation from video data. Multimedia Tools Appl. 2022;81:20685\u2013703. https:\/\/doi.org\/10.1007\/s11042-022-12359-4.","journal-title":"Multimedia Tools Appl"},{"key":"9628_CR68","doi-asserted-by":"crossref","unstructured":"Lv T, Wen Y H, Sun Z, et al. Generating Smooth and Facial-Details-Enhanced Talking Head Video: A Perspective of Pre and Post Processes[C]\/\/Proceedings of the 30th ACM International Conference on Multimedia. 2022; pp. 7079\u201383.","DOI":"10.1145\/3503161.3551583"},{"key":"9628_CR69","doi-asserted-by":"crossref","unstructured":"Lee S H, Oh G, Byeon W, et al. Sound-guided semantic video generation[C]\/\/European Conference on Computer Vision. Cham: Springer Nature Switzerland, 2022; pp. 34\u201350.","DOI":"10.1007\/978-3-031-19790-1_3"},{"key":"9628_CR70","unstructured":"Kondratyuk D, Yu L, Gu X, et al. Videopoet: A large language model for zero-shot video generation[J]. arXiv:2312.14125 [Preprint]. 2023 [cited 2024 Oct 28]. Available from: https:\/\/arxiv.org\/abs\/2312.14125."},{"key":"9628_CR71","doi-asserted-by":"publisher","DOI":"10.1007\/s40747-024-01481-5","author":"X Ji","year":"2024","unstructured":"Ji X, Liao Z, Dong L, et al. 3D facial animation driven by speech-video dual-modal signals. Complex Intell Syst. 2024. https:\/\/doi.org\/10.1007\/s40747-024-01481-5.","journal-title":"Complex Intell Syst"},{"key":"9628_CR72","doi-asserted-by":"crossref","unstructured":"Ruan L, Ma Y, Yang H, et al. MM-diffusion: learning multi-modal diffusion models for joint audio and video generation. In: 2023 IEEE\/CVF conference on computer vision and pattern recognition (CVPR). IEEE, Vancouver, BC, Canada; 2023, p. 10219\u201328.","DOI":"10.1109\/CVPR52729.2023.00985"},{"key":"9628_CR73","unstructured":"Zhang Y, Gu J, Wang L W, et al. Mimicmotion: High-quality human motion video generation with confidence-aware pose guidance[J]. arXiv:2406.19680 [Preprint]. 2024 [cited 2024 Oct 28]. Available from: https:\/\/arxiv.org\/abs\/2406.19680."},{"key":"9628_CR74","unstructured":"Blattmann A, Dockhorn T, Kulal S, et al. Stable video diffusion: Scaling latent video diffusion models to large datasets[J]. arXiv:2311.15127 [Preprint]. 2023 [cited 2024 Oct 28]. Available from: [2311.15127] Stable Video Diffusion: Scaling Latent Video Diffusion Models to Large Datasets."},{"key":"9628_CR75","first-page":"9731","volume-title":"A hierarchical variational neural uncertainty model for stochastic video prediction","author":"M Chatterjee","year":"2021","unstructured":"Chatterjee M, Ahuja N, Cherian A, IEEE. A hierarchical variational neural uncertainty model for stochastic video prediction. University of Illinois System; 2021. p. 9731\u201341."},{"key":"9628_CR76","first-page":"2693","volume-title":"Robust multi-frame future prediction by leveraging view synthesis","author":"K Ak","year":"2021","unstructured":"Ak K, Sun Y, Lim J, IEEE. Robust multi-frame future prediction by leveraging view synthesis. Agency for Science Technology & Research (A*STAR); 2021. p. 2693\u20137."},{"key":"9628_CR77","doi-asserted-by":"publisher","first-page":"3640","DOI":"10.1007\/s10489-021-02500-5","volume":"52","author":"B Jing","year":"2022","unstructured":"Jing B, Ding H, Yang Z, et al. Video prediction: a step-by-step improvement of a video synthesis network. Appl Intell. 2022;52:3640\u201352. https:\/\/doi.org\/10.1007\/s10489-021-02500-5.","journal-title":"Appl Intell"},{"key":"9628_CR78","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2022.103434","author":"Q Tran","year":"2022","unstructured":"Tran Q, Yang S. Video frame interpolation via down-up scale generative adversarial networks. Comput Vis Image Understanding. 2022. https:\/\/doi.org\/10.1016\/j.cviu.2022.103434.","journal-title":"Comput Vis Image Understanding"},{"key":"9628_CR79","first-page":"713","volume-title":"Splatting-based synthesis for video frame interpolation","author":"S Niklaus","year":"2023","unstructured":"Niklaus S, Hu P, Chen J, IEEE. Splatting-based synthesis for video frame interpolation. Adobe Systems Inc.; 2023. p. 713\u201323."},{"key":"9628_CR80","doi-asserted-by":"publisher","first-page":"2808","DOI":"10.1109\/TMM.2019.2963621","volume":"22","author":"W Wang","year":"2020","unstructured":"Wang W, Alameda-Pineda X, Xu D, et al. Learning how to smile: expression video generation with conditional adversarial recurrent nets. IEEE Trans Multimedia. 2020;22:2808\u201319. https:\/\/doi.org\/10.1109\/TMM.2019.2963621.","journal-title":"IEEE Trans Multimedia"},{"key":"9628_CR81","first-page":"4755","volume-title":"Facial prior based first order motion model for micro-expression generation","author":"Y Zhang","year":"2021","unstructured":"Zhang Y, Zhao Y, Wen Y, et al. Facial prior based first order motion model for micro-expression generation. Sun Yat Sen University; 2021. p. 4755\u20139."},{"key":"9628_CR82","doi-asserted-by":"crossref","unstructured":"Tzaban R, Mokady R, Gal R, et al. Stitch it in time: Gan-based facial editing of real videos[C]\/\/SIGGRAPH Asia 2022 Conference Papers. 2022: 1\u20139.","DOI":"10.1145\/3550469.3555382"},{"key":"9628_CR83","doi-asserted-by":"publisher","first-page":"848","DOI":"10.1109\/TPAMI.2020.3002500","volume":"44","author":"N Otberdout","year":"2022","unstructured":"Otberdout N, Daoudi M, Kacem A, et al. Dynamic facial expression generation on hilbert hypersphere with conditional wasserstein generative adversarial nets. IEEE Trans Pattern Anal Mach Intell. 2022;44:848\u201363. https:\/\/doi.org\/10.1109\/TPAMI.2020.3002500.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9628_CR84","doi-asserted-by":"publisher","first-page":"7529","DOI":"10.1007\/s11063-023-11272-7","volume":"55","author":"S Yang","year":"2023","unstructured":"Yang S, Qiao K, Shi S, et al. SATFace: subject agnostic talking face generation with natural head movement. Neural Process Lett. 2023;55:7529\u201342. https:\/\/doi.org\/10.1007\/s11063-023-11272-7.","journal-title":"Neural Process Lett"},{"key":"9628_CR85","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2025.107122","volume":"184","author":"WJ Mai","year":"2025","unstructured":"Mai WJ. VPT: video portraits transformer for realistic talking face generation. Neural Netw. 2025;184:107122.","journal-title":"Neural Netw"},{"key":"9628_CR86","doi-asserted-by":"publisher","DOI":"10.1145\/3383652.3423874","volume-title":"Generating coherent spontaneous speech and gesture from text","author":"S Alexanderson","year":"2020","unstructured":"Alexanderson S, Sz\u00e9kely \u00c9, Henter G, et al. Generating coherent spontaneous speech and gesture from text. Royal Institute of Technology; 2020."},{"key":"9628_CR87","doi-asserted-by":"publisher","DOI":"10.1007\/s12652-021-03640-9","author":"B Natarajan","year":"2022","unstructured":"Natarajan B, Elakkiya R, Prasad M. Sentence2SignGesture: a hybrid neural machine translation network for sign language video generation. J Ambient Intell Human Computi. 2022. https:\/\/doi.org\/10.1007\/s12652-021-03640-9.","journal-title":"J Ambient Intell Human Computi"},{"key":"9628_CR88","doi-asserted-by":"publisher","first-page":"21101","DOI":"10.1007\/s11042-023-14556-1","volume":"82","author":"R Tous","year":"2023","unstructured":"Tous R. Pictonaut: movie cartoonization using 3D human pose estimation and GANs. Multimedia Tools Appl. 2023;82:21101\u201315. https:\/\/doi.org\/10.1007\/s11042-023-14556-1.","journal-title":"Multimedia Tools Appl"},{"key":"9628_CR89","doi-asserted-by":"crossref","unstructured":"Gao L, Liu P, Wan L, et al. Spatial-Temporal Consistency Constraints for Chinese Sign Language Synthesis[C]\/\/International Conference on Computer-Aided Design and Computer Graphics. Singapore: Springer Nature Singapore, 2023:154\u201369.","DOI":"10.1007\/978-981-99-9666-7_11"},{"key":"9628_CR90","first-page":"19","volume-title":"Video Frame interpolation via multi-scale expandable deformable convolution","author":"D Zhang","year":"2023","unstructured":"Zhang D, Huang P, Ding X, et al. Video Frame interpolation via multi-scale expandable deformable convolution. Changsha University of Science & Technology; 2023. p. 19\u201328."},{"key":"9628_CR91","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2023.110028","author":"C Li","year":"2023","unstructured":"Li C, Chen X. Future video frame prediction based on generative motion-assistant discriminative network. Appl Soft Comput. 2023. https:\/\/doi.org\/10.1016\/j.asoc.2023.110028.","journal-title":"Appl Soft Comput"},{"key":"9628_CR92","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1016\/j.neucom.2022.09.025","volume":"511","author":"D Zeng","year":"2022","unstructured":"Zeng D, Zhao S, Zhang J, et al. Expression-tailored talking face generation with adaptive cross-modal weighting. Neurocomputing. 2022;511:117\u201330. https:\/\/doi.org\/10.1016\/j.neucom.2022.09.025.","journal-title":"Neurocomputing"},{"key":"9628_CR93","first-page":"1326","volume-title":"Speech driven talking head generation via attentional landmarks based representation","author":"W Wang","year":"2020","unstructured":"Wang W, Wang Y, Sun J, et al. Speech driven talking head generation via attentional landmarks based representation. Anhui University; 2020. p. 1326\u201330."},{"key":"9628_CR94","doi-asserted-by":"crossref","unstructured":"Cao Z, Simon T, Wei S-E, Sheikh Y. Realtime multi-person 2D pose estimation using part affinity fields. In: 2017 IEEE conference on computer vision and pattern recognition (CVPR). IEEE, Honolulu, HI; 2017. p. 1302\u201310.","DOI":"10.1109\/CVPR.2017.143"},{"key":"9628_CR95","doi-asserted-by":"publisher","first-page":"891","DOI":"10.1007\/s11263-019-01281-2","volume":"128","author":"S Stoll","year":"2020","unstructured":"Stoll S, Camgoz N, Hadfield S, Bowden R. Text2Sign: towards sign language production using neural machine translation and generative adversarial networks. Int J Comput Vision. 2020;128:891\u2013908. https:\/\/doi.org\/10.1007\/s11263-019-01281-2.","journal-title":"Int J Comput Vision"},{"key":"9628_CR96","first-page":"203","volume-title":"VRVideos: a flexible pipeline for virtual reality video creation","author":"A Dickson","year":"2022","unstructured":"Dickson A, Shanks J, Ventura J, et al. VRVideos: a flexible pipeline for virtual reality video creation. University of Otago; 2022. p. 203\u20136."},{"key":"9628_CR97","first-page":"1","volume-title":"Virtual human talking-head generation","author":"W Song","year":"2023","unstructured":"Song W, He Q, Chen G, ACM. Virtual human talking-head generation. Communication University of China; 2023. p. 1\u20135."},{"key":"9628_CR98","doi-asserted-by":"publisher","first-page":"29475","DOI":"10.1109\/ACCESS.2024.3368612","volume":"12","author":"T Islam","year":"2024","unstructured":"Islam T, Miron A, Liu X, Li Y. Deep learning in virtual try-on: a comprehensive survey. IEEE Access. 2024;12:29475\u2013502. https:\/\/doi.org\/10.1109\/ACCESS.2024.3368612.","journal-title":"IEEE Access"},{"key":"9628_CR99","doi-asserted-by":"publisher","DOI":"10.1145\/3411763.3451554","volume-title":"Real-time gesture animation generation from speech for virtual human interaction","author":"M Rebol","year":"2021","unstructured":"Rebol M, G\u00fctl C, Pietroszek K, ACM. Real-time gesture animation generation from speech for virtual human interaction. American University; 2021."},{"key":"9628_CR100","volume-title":"GenAICHI: generative AI and HCI","author":"M Muller","year":"2022","unstructured":"Muller M, Chilton L, Kantosalo A, et al. GenAICHI: generative AI and HCI. International Business Machines IBM; 2022."},{"key":"9628_CR101","doi-asserted-by":"publisher","first-page":"4240","DOI":"10.1109\/TIV.2024.3384835","volume":"9","author":"H Yu","year":"2024","unstructured":"Yu H, Liang W, Fan L, et al. Sora for social vision with parallel intelligence: social interaction in intelligent vehicles. IEEE Trans Intell Vehicles. 2024;9:4240\u20133. https:\/\/doi.org\/10.1109\/TIV.2024.3384835.","journal-title":"IEEE Trans Intell Vehicles"},{"key":"9628_CR102","doi-asserted-by":"crossref","unstructured":"He Z, Chen P, Wang G, et al. Wildvidfit: Video virtual try-on in the wild via image-based controlled diffusion models[C]\/\/European Conference on Computer Vision. Cham: Springer Nature Switzerland, 2024;123\u201339.","DOI":"10.1007\/978-3-031-72643-9_8"},{"key":"9628_CR103","first-page":"2544","volume-title":"JAFPro: joint appearance fusion and propagation for human video motion transfer from multiple reference images","author":"X Yu","year":"2020","unstructured":"Yu X, Liu H, Han X, et al. JAFPro: joint appearance fusion and propagation for human video motion transfer from multiple reference images. Texas A&M University System; 2020. p. 2544\u201352."},{"key":"9628_CR104","first-page":"83","volume-title":"AVGUST: a tool for generating usage-based tests from videos of app executions","author":"S Talebipour","year":"2023","unstructured":"Talebipour S, Park H, Baral K, et al. AVGUST: a tool for generating usage-based tests from videos of app executions. University of Southern California; 2023. p. 83\u20137."},{"key":"9628_CR105","first-page":"25","volume-title":"Vid2Pix-a framework for generating high-quality synthetic videos","author":"O Nedrejord","year":"2020","unstructured":"Nedrejord O, Thambawita V, Hicks S, et al. Vid2Pix-a framework for generating high-quality synthetic videos. University of Oslo; 2020. p. 25\u20136."},{"key":"9628_CR106","doi-asserted-by":"publisher","first-page":"13153","DOI":"10.1007\/s00500-022-07014-x","volume":"26","author":"B Natarajan","year":"2022","unstructured":"Natarajan B, Elakkiya R. Dynamic GAN for high-quality sign language video generation from skeletal poses using generative adversarial networks. Soft Comput. 2022;26:13153\u201375. https:\/\/doi.org\/10.1007\/s00500-022-07014-x.","journal-title":"Soft Comput"},{"key":"9628_CR107","doi-asserted-by":"crossref","unstructured":"Yilmaz R, Eschweiler D, Stegmaier J. Annotated biomedical video generation using denoising diffusion probabilistic models and flow fields[C]\/\/International Workshop on Simulation and Synthesis in Medical Imaging. Cham: Springer Nature Switzerland, 2024; pp. 197\u2013207.","DOI":"10.1007\/978-3-031-73281-2_19"},{"key":"9628_CR108","doi-asserted-by":"publisher","DOI":"10.1061\/(ASCE)CP.1943-5487.0000937","author":"W Calderon","year":"2021","unstructured":"Calderon W, Roberts D, Golparvar-Fard M. Synthesizing pose sequences from 3D assets for vision-based activity analysis. J Comput Civil Eng. 2021. https:\/\/doi.org\/10.1061\/(ASCE)CP.1943-5487.0000937.","journal-title":"J Comput Civil Eng"},{"key":"9628_CR109","doi-asserted-by":"crossref","unstructured":"Sun W, Li X, Li M, et al. Sequential fusion of multi-view video frames for 3D scene generation[C]\/\/CAAI International Conference on Artificial Intelligence. Cham: Springer Nature Switzerland, 2022; pp. 597\u2013608.","DOI":"10.1007\/978-3-031-20497-5_49"},{"key":"9628_CR110","doi-asserted-by":"publisher","first-page":"1805","DOI":"10.1109\/TCSVT.2021.3083257","volume":"32","author":"X Tu","year":"2022","unstructured":"Tu X, Zou Y, Zhao J, et al. Image-to-video generation via 3D facial dynamics. IEEE Trans Circ Syst Video Technol. 2022;32:1805\u201319. https:\/\/doi.org\/10.1109\/TCSVT.2021.3083257.","journal-title":"IEEE Trans Circ Syst Video Technol"},{"key":"9628_CR111","doi-asserted-by":"publisher","first-page":"2816","DOI":"10.1007\/s11263-023-01839-1","volume":"131","author":"S Wenfeng","year":"2023","unstructured":"Wenfeng S, Xinyu Z, Yuting G, et al. Automatic generation of 3D scene animation based on dynamic knowledge graphs and contextual encoding. Int J Comput Vision. 2023;131:2816\u201344. https:\/\/doi.org\/10.1007\/s11263-023-01839-1.","journal-title":"Int J Comput Vision"},{"key":"9628_CR112","first-page":"282","volume-title":"FaceXHuBERT: text-less speech-driven E(X)pressive 3D facial animation synthesis using self-supervised speech representation learning","author":"K Haque","year":"2023","unstructured":"Haque K, Yumak Z, ACM,. FaceXHuBERT: text-less speech-driven E(X)pressive 3D facial animation synthesis using self-supervised speech representation learning. Utrecht University; 2023. p. 282\u201391."},{"key":"9628_CR113","doi-asserted-by":"crossref","unstructured":"Niu G, Cheng S, Li T. NLDF: Neural Light Dynamic Fields for 3D Talking Head Generation[C]\/\/Pacific Rim International Conference on Artificial Intelligence. Singapore: Springer Nature Singapore, 2024; pp. 396\u2013402.","DOI":"10.1007\/978-981-96-0116-5_33"},{"key":"9628_CR114","first-page":"3292","volume-title":"Using GANs to generate lyric videos","author":"D Gareev","year":"2022","unstructured":"Gareev D, Glassl O, Nouzri S. Using GANs to generate lyric videos. University of Luxembourg; 2022. p. 3292\u20137."},{"key":"9628_CR115","doi-asserted-by":"publisher","DOI":"10.3389\/fnbot.2022.911469","author":"X Liu","year":"2022","unstructured":"Liu X, Ko Y. The use of deep learning technology in dance movement generation. Front Neurorobot. 2022. https:\/\/doi.org\/10.3389\/fnbot.2022.911469.","journal-title":"Front Neurorobot"},{"key":"9628_CR116","first-page":"634","volume-title":"POSTcard Landscapes from Lanzarote","author":"V Guljajeva","year":"2022","unstructured":"Guljajeva V, Sola M, ACM. POSTcard Landscapes from Lanzarote. Hong Kong University of Science & Technology Guangzhou; 2022. p. 634\u20136."},{"key":"9628_CR117","doi-asserted-by":"publisher","first-page":"24755","DOI":"10.1007\/s00521-023-08206-8","volume":"35","author":"X Cai","year":"2023","unstructured":"Cai X, Wang T, Lu R, et al. Automatic generation of Labanotation based on human pose estimation in folk dance videos. Neural Comput Appl. 2023;35:24755\u201371. https:\/\/doi.org\/10.1007\/s00521-023-08206-8.","journal-title":"Neural Comput Appl"},{"key":"9628_CR118","doi-asserted-by":"publisher","DOI":"10.17502\/mrcs.v11i2.710","author":"J Franganillo","year":"2023","unstructured":"Franganillo J. Generative artificial intelligence and its impact on media content creation. Methaodos Revista De Ciencias Sociales. 2023. https:\/\/doi.org\/10.17502\/mrcs.v11i2.710.","journal-title":"Methaodos Revista De Ciencias Sociales"},{"key":"9628_CR119","doi-asserted-by":"publisher","first-page":"1079","DOI":"10.1097\/PRS.0000000000006697","volume":"145","author":"D Crystal","year":"2020","unstructured":"Crystal D, Cuccolo N, Ibrahim A, et al. Photographic and video deepfakes have arrived: how machine learning may influence plastic surgery. Plast Reconstr Surg. 2020;145:1079\u201386. https:\/\/doi.org\/10.1097\/PRS.0000000000006697.","journal-title":"Plast Reconstr Surg"},{"key":"9628_CR120","doi-asserted-by":"crossref","unstructured":"Reynaud H, Vlontzos A, Dombrowski M, et al. D\u2019artagnan: Counterfactual video generation[C]\/\/International Conference on Medical Image Computing and Computer-Assisted Intervention. Cham: Springer Nature Switzerland, 2022; pp. 599\u2013609.","DOI":"10.1007\/978-3-031-16452-1_57"},{"key":"9628_CR121","doi-asserted-by":"publisher","DOI":"10.1007\/s10143-024-02514-w","author":"A Mohamed","year":"2024","unstructured":"Mohamed A, Lucke-Wold B. Text-to-video generative artificial intelligence: sora in neurosurgery. Neurosurg Rev. 2024. https:\/\/doi.org\/10.1007\/s10143-024-02514-w.","journal-title":"Neurosurg Rev"},{"key":"9628_CR122","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2024.106598","author":"Y Benezeth","year":"2024","unstructured":"Benezeth Y, Krishnamoorthy D, Monsalve D, et al. Video-based heart rate estimation from challenging scenarios using synthetic video generation. Biomed Signal Processing and Control. 2024. https:\/\/doi.org\/10.1016\/j.bspc.2024.106598.","journal-title":"Biomed Signal Processing and Control"},{"key":"9628_CR123","doi-asserted-by":"crossref","unstructured":"Hartholt A, Reilly A, Fast E, et al. Introducing canvas: Combining nonverbal behavior generation with user-generated content to rapidly create educational videos[C]\/\/Proceedings of the 20th ACM International Conference on Intelligent Virtual Agents. 2020: 1\u20133.","DOI":"10.1145\/3383652.3423880"},{"key":"9628_CR124","doi-asserted-by":"crossref","unstructured":"Mishra P, Diwan C, Srinivasa S, et al. A semi-automatic approach for generating video trailers for learning pathways[C]\/\/International Conference on Artificial Intelligence in Education. Cham: Springer International Publishing, 2022; pp. 302\u20135.","DOI":"10.1007\/978-3-031-11647-6_57"},{"key":"9628_CR125","doi-asserted-by":"publisher","first-page":"38849","DOI":"10.1007\/s11042-022-12590-z","volume":"81","author":"M Xu","year":"2022","unstructured":"Xu M, Yang C. Realistic video generation for American sign language. Multimedia Tools Appl. 2022;81:38849\u201386. https:\/\/doi.org\/10.1007\/s11042-022-12590-z.","journal-title":"Multimedia Tools Appl"},{"key":"9628_CR126","doi-asserted-by":"publisher","first-page":"104358","DOI":"10.1109\/ACCESS.2022.3210543","volume":"10","author":"B Natarajan","year":"2022","unstructured":"Natarajan B, Rajalakshmi E, Elakkiya R, et al. Development of an end-to-end deep learning framework for sign language recognition, translation, and video generation. IEEE Access. 2022;10:104358\u201374. https:\/\/doi.org\/10.1109\/ACCESS.2022.3210543.","journal-title":"IEEE Access"},{"key":"9628_CR127","first-page":"5131","volume-title":"Signing at scale: learning to co-articulate signs for large-scale photo-realistic sign language production","author":"B Saunders","year":"2022","unstructured":"Saunders B, Camgoz N, Bowden R, IEEE COMP SOC. Signing at scale: learning to co-articulate signs for large-scale photo-realistic sign language production. University of Surrey; 2022. p. 5131\u201341."},{"key":"9628_CR128","doi-asserted-by":"crossref","unstructured":"Anusree V A, Aarsha Das K M, Arya P S, et al. FactOrFake: Automatic Fact Checking Using Machine Learning Models[M]\/\/Machine Learning and Autonomous Systems: Proceedings of ICMLAS 2021. Singapore: Springer Nature Singapore, 2022: 179\u201391.","DOI":"10.1007\/978-981-16-7996-4_13"},{"key":"9628_CR129","first-page":"340","volume-title":"Rising above misinformation and deepfakes","author":"N Veerasamy","year":"2022","unstructured":"Veerasamy N, Pieterse H, ACAD CONF LTD. Rising above misinformation and deepfakes. Council for Scientific & Industrial Research CSIR; 2022. p. 340\u20138."},{"key":"9628_CR130","doi-asserted-by":"publisher","first-page":"18757","DOI":"10.1109\/ACCESS.2022.3151186","volume":"10","author":"A Malik","year":"2022","unstructured":"Malik A, Kuribayashi M, Abdullahi S, Khan A. DeepFake detection for human face images and videos: a survey. IEEE Access. 2022;10:18757\u201375. https:\/\/doi.org\/10.1109\/ACCESS.2022.3151186.","journal-title":"IEEE Access"},{"key":"9628_CR131","doi-asserted-by":"publisher","first-page":"1727","DOI":"10.1108\/INTR-07-2022-0563","volume":"33","author":"I Sharma","year":"2023","unstructured":"Sharma I, Jain K, Behl A, et al. Examining the motivations of sharing political deepfake videos: the role of political brand hate and moral consciousness. Internet Res. 2023;33:1727\u201349. https:\/\/doi.org\/10.1108\/INTR-07-2022-0563.","journal-title":"Internet Res"},{"key":"9628_CR132","first-page":"29","volume-title":"Why don\u2019t you speak?: a smartphone application to engage museum visitors through deepfakes creation","author":"M Zaramella","year":"2023","unstructured":"Zaramella M, Amerini I, Russo P, ACM. Why don\u2019t you speak?: a smartphone application to engage museum visitors through deepfakes creation. Sapienza University; 2023. p. 29\u201337."},{"key":"9628_CR133","doi-asserted-by":"publisher","DOI":"10.22967\/HCIS.2024.14.035","author":"J L\u00f3pez-Gil","year":"2024","unstructured":"L\u00f3pez-Gil J, Gil R, Garc\u00eda R. Analysis of the reliability of deepfake facial emotion expression synthesis. Human Centric Comput Inf Sci. 2024. https:\/\/doi.org\/10.22967\/HCIS.2024.14.035.","journal-title":"Human Centric Comput Inf Sci"},{"key":"9628_CR134","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-024-10810-6","author":"A Kaur","year":"2024","unstructured":"Kaur A, Hoshyar A, Saikrishna V, et al. Deepfake video detection: challenges and opportunities. Artif Intell Rev. 2024. https:\/\/doi.org\/10.1007\/s10462-024-10810-6.","journal-title":"Artif Intell Rev"},{"key":"9628_CR135","doi-asserted-by":"publisher","first-page":"1519","DOI":"10.1007\/s43681-023-00350-0","volume":"4","author":"R Songja","year":"2024","unstructured":"Songja R, Promboot I, Haetanurak B, Kerdvibulvech C. Deepfake AI images: should deepfakes be banned in Thailand? AI Ethics. 2024;4:1519\u201331. https:\/\/doi.org\/10.1007\/s43681-023-00350-0.","journal-title":"AI Ethics"},{"key":"9628_CR136","doi-asserted-by":"crossref","unstructured":"Sohrawardi S J, Wu Y K, Hickerson A, et al. Dungeons & deepfakes: Using scenario-based role-play to study journalists' behavior towards using AI-based verification tools for video content[C]\/\/Proceedings of the 2024 CHI Conference on Human Factors in Computing Systems. 2024; pp. 1\u201317.","DOI":"10.1145\/3613904.3641973"},{"key":"9628_CR137","doi-asserted-by":"crossref","unstructured":"Blattmann A, Rombach R, Ling H, et al. Align your latents: High-resolution video synthesis with latent diffusion models[C]\/\/Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2023; pp. 22563\u201375.","DOI":"10.1109\/CVPR52729.2023.02161"},{"key":"9628_CR138","doi-asserted-by":"publisher","first-page":"701","DOI":"10.1111\/cgf.14022","volume":"39","author":"A Tewari","year":"2020","unstructured":"Tewari A, Fried O, Thies J, et al. State of the Art on neural rendering. Comput Graph Forum. 2020;39:701\u201327. https:\/\/doi.org\/10.1111\/cgf.14022.","journal-title":"Comput Graph Forum"},{"key":"9628_CR139","doi-asserted-by":"crossref","unstructured":"Li L, Chen Y C, Cheng Y, et al. Hero: Hierarchical encoder for video+ language omni-representation pre-training[J]. arXiv:2005.00200 [Preprint]. 2020 [cited 2024 Oct 28]. Available from: https:\/\/arxiv.org\/abs\/2005.00200.","DOI":"10.18653\/v1\/2020.emnlp-main.161"},{"key":"9628_CR140","doi-asserted-by":"publisher","first-page":"589","DOI":"10.1007\/s10462-021-10039-7","volume":"55","author":"D Bull","year":"2022","unstructured":"Bull D. Artificial intelligence in the creative industries: a review. Artif Intell Rev. 2022;55:589\u2013656.","journal-title":"Artif Intell Rev"},{"key":"9628_CR141","doi-asserted-by":"publisher","first-page":"703","DOI":"10.1111\/cgf.14507","volume":"41","author":"V Golyanik","year":"2022","unstructured":"Golyanik V. Advances in neural rendering. Comput Graph Forum. 2022;41:703\u201335.","journal-title":"Comput Graph Forum"},{"key":"9628_CR142","doi-asserted-by":"publisher","DOI":"10.1016\/j.ebiom.2023.104512","author":"S Harrer","year":"2023","unstructured":"Harrer S. Attention is not all you need: the complicated case of ethically using large language models in healthcare and medicine. EBioMedicine. 2023. https:\/\/doi.org\/10.1016\/j.ebiom.2023.104512.","journal-title":"EBioMedicine"},{"key":"9628_CR143","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1016\/j.bushor.2019.11.006","volume":"63","author":"J Kietzmann","year":"2020","unstructured":"Kietzmann J, Lee L, McCarthy I, Kietzmann T. Deepfakes: trick or treat? Bus Horiz. 2020;63:135\u201346. https:\/\/doi.org\/10.1016\/j.bushor.2019.11.006.","journal-title":"Bus Horiz"},{"key":"9628_CR144","first-page":"1","volume":"55","author":"T Ward","year":"2023","unstructured":"Ward T. Generative adversarial networks in time series: a systematic literature review. ACM Comput Surv. 2023;55:1\u201331.","journal-title":"ACM Comput Surv"},{"key":"9628_CR145","doi-asserted-by":"publisher","first-page":"3974","DOI":"10.1007\/s10489-022-03766-z","volume":"53","author":"H Malik","year":"2023","unstructured":"Malik H. Deepfakes generation and detection: state-of-the-art, open challenges, countermeasures, and way forward. Appl Intell. 2023;53:3974\u20134026.","journal-title":"Appl Intell"},{"key":"9628_CR146","first-page":"75","volume":"39","author":"N Thuerey","year":"2020","unstructured":"Thuerey N. Learning temporal coherence via self-supervision for GAN-based video generation. ACM Trans Graph. 2020;39:75.","journal-title":"ACM Trans Graph"},{"key":"9628_CR147","doi-asserted-by":"crossref","unstructured":"Ni H, Shi C, Li K, et al. Conditional image-to-video generation with latent flow diffusion models[C]\/\/Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2023: 18444\u201355.","DOI":"10.1109\/CVPR52729.2023.01769"},{"key":"9628_CR148","doi-asserted-by":"crossref","unstructured":"Wu C, Liang J, Ji L, et al. N\u00fcwa: Visual synthesis pre-training for neural visual world creation[C]\/\/European conference on computer vision. Cham: Springer Nature Switzerland, 2022: 720\u201336.","DOI":"10.1007\/978-3-031-19787-1_41"},{"key":"9628_CR149","doi-asserted-by":"crossref","unstructured":"Dorkenwald M, Milbich T, Blattmann A, et al. Stochastic image-to-video synthesis using cinns[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2021:3742\u201353.","DOI":"10.1109\/CVPR46437.2021.00374"},{"key":"9628_CR150","doi-asserted-by":"crossref","unstructured":"Hu Y, Luo C, Chen Z. Make it move: controllable image-to-video generation with text descriptions[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2022: 18219\u201328.","DOI":"10.1109\/CVPR52688.2022.01768"},{"key":"9628_CR151","doi-asserted-by":"publisher","DOI":"10.1145\/3487891","author":"N Aldausari","year":"2023","unstructured":"Aldausari N, Sowmya A, Marcus N, Mohammadi G. Video generative adversarial networks: a review. ACM Comput Surv. 2023. https:\/\/doi.org\/10.1145\/3487891.","journal-title":"ACM Comput Surv"},{"key":"9628_CR152","doi-asserted-by":"publisher","first-page":"839","DOI":"10.1109\/JPROC.2021.3049196","volume":"109","author":"M Liu","year":"2021","unstructured":"Liu M, Huang X, Yu J, et al. Generative adversarial networks for image and video synthesis: algorithms and applications. Proc IEEE. 2021;109:839\u201362. https:\/\/doi.org\/10.1109\/JPROC.2021.3049196.","journal-title":"Proc IEEE"},{"key":"9628_CR153","doi-asserted-by":"publisher","first-page":"809","DOI":"10.1109\/JAS.2024.124383","volume":"11","author":"F Wang","year":"2024","unstructured":"Wang F, Miao Q, Li L, et al. When does sora show: the beginning of TAO to imaginative intelligence and scenarios engineering. IEEE-CAA J Autom Sin. 2024;11:809\u201315. https:\/\/doi.org\/10.1109\/JAS.2024.124383.","journal-title":"IEEE-CAA J Autom Sin"},{"key":"9628_CR154","doi-asserted-by":"publisher","first-page":"3117","DOI":"10.1109\/TIV.2024.3379989","volume":"9","author":"X Li","year":"2024","unstructured":"Li X, Miao Q, Li L, et al. Sora for senarios engineering of intelligent vehicles: V&V, C&C, and beyonds. IEEE Trans Intell Vehicles. 2024;9:3117\u201322. https:\/\/doi.org\/10.1109\/TIV.2024.3379989.","journal-title":"IEEE Trans Intell Vehicles"},{"key":"9628_CR155","doi-asserted-by":"publisher","first-page":"3123","DOI":"10.1109\/TIV.2024.3376575","volume":"9","author":"H Yu","year":"2024","unstructured":"Yu H, Liu X, Tian Y, et al. Sora-based parallel vision for smart sensing of intelligent vehicles: from foundation models to foundation intelligence. IEEE Trans Intell Vehicles. 2024;9:3123\u20136. https:\/\/doi.org\/10.1109\/TIV.2024.3376575.","journal-title":"IEEE Trans Intell Vehicles"}],"container-title":["Discover Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-025-09628-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10791-025-09628-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10791-025-09628-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T14:25:03Z","timestamp":1750861503000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10791-025-09628-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,25]]},"references-count":155,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["9628"],"URL":"https:\/\/doi.org\/10.1007\/s10791-025-09628-9","relation":{},"ISSN":["2948-2992"],"issn-type":[{"value":"2948-2992","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6,25]]},"assertion":[{"value":"24 February 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 May 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 June 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"This article does not contain any studies with human participants or animals performed by any of the authors.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Informed consent was obtained from all individual participants involved in the study.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed consent"}},{"value":"The authors confirm that the work described in the manuscript has not been published before, and is not under consideration for publication elsewhere. All authors have approved the manuscript and agree with its submission to the journal. The authors grant the publisher the right to publish the manuscript, including electronic and print forms, and any future forms of publication.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare no competing interests.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"130"}}