{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T02:07:38Z","timestamp":1763345258551,"version":"3.45.0"},"reference-count":37,"publisher":"Tech Science Press","issue":"1","license":[{"start":{"date-parts":[[2025,8,31]],"date-time":"2025-08-31T00:00:00Z","timestamp":1756598400000},"content-version":"vor","delay-in-days":242,"URL":"https:\/\/doi.org\/10.32604\/TSP-CROSSMARKPOLICY"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["CMC"],"published-print":{"date-parts":[[2025]]},"DOI":"10.32604\/cmc.2025.065529","type":"journal-article","created":{"date-parts":[[2025,8,4]],"date-time":"2025-08-04T08:49:50Z","timestamp":1754297390000},"page":"1883-1897","update-policy":"https:\/\/doi.org\/10.32604\/tsp-crossmarkpolicy","source":"Crossref","is-referenced-by-count":0,"title":["Optimizing Semantic and Texture Consistency in Video Generation"],"prefix":"10.32604","volume":"85","author":[{"given":"Xian","family":"Yu","sequence":"first","affiliation":[]},{"given":"Jianxun","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Siran","family":"Tian","sequence":"additional","affiliation":[]},{"given":"Xiaobao","family":"He","sequence":"additional","affiliation":[]}],"member":"17807","published-online":{"date-parts":[[2025]]},"reference":[{"key":"ref1","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"10684","article-title":"High-resolution image synthesis with latent diffusion models","author":"Rombach","year":"2022"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"139","DOI":"10.1145\/3422622","article-title":"Generative adversarial networks","volume":"63","author":"Goodfellow","year":"2020","journal-title":"Commun ACM"},{"key":"ref3","unstructured":"Nichol A, Dhariwal P, Ramesh A, Shyam P, Mishkin P, McGrew B, et al. Glide: towards photorealistic image generation and editing with text-guided diffusion models. arXiv:2112.10741. 2021."},{"key":"ref4","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3614425","article-title":"A survey on video diffusion models","volume":"57","author":"Xing","year":"2024","journal-title":"ACM Comput Surv"},{"key":"ref5","unstructured":"Ramesh A, Dhariwal P, Nichol A, Chu C, Chen M. Hierarchical text-conditional image generation with CLIP latents. arXiv:2204.06125. 2022."},{"key":"ref6","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"17897","article-title":"ZeroCap: zero-shot image-to-text generation for visual-semantic arithmetic","author":"Tewel","year":"2022"},{"key":"ref7","first-page":"8633","article-title":"Video diffusion models","volume":"35","author":"Ho","year":"2022","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref8","unstructured":"Zhang S, Wang J, Zhang Y, Zhao K, Yuan H, Qin Z, et al. I2vgen-xl: high-quality image-to-video synthesis via cascaded diffusion models. arXiv:2311.04145. 2023."},{"key":"ref9","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"6091","article-title":"Diffusion video autoencoders: toward temporally consistent face video editing via disentangled video encoding","author":"Kim","year":"2023"},{"key":"ref10","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"17857","article-title":"MonoNeRF: learning a generalizable dynamic radiance field from monocular videos","author":"Tian","year":"2023"},{"key":"ref11","series-title":"Proceedings of the International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"ref12","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"1526","article-title":"MoCoGAN: decomposing motion and content for video generation","author":"Tulyakov","year":"2018"},{"key":"ref13","series-title":"Proceedings of the IEEE International Conference on Computer Vision (ICCV)","article-title":"Temporal generative adversarial nets with singular value clipping","author":"Saito","year":"2017"},{"key":"ref14","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"7623","article-title":"Tune-a-video: one-shot tuning of image diffusion models for text-to-video generation","author":"Wu","year":"2023"},{"key":"ref15","first-page":"14866","article-title":"Generating diverse high-fidelity images with VQ-VAE-2","volume":"32","author":"Razavi","year":"2019","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref16","first-page":"852","article-title":"Alias-free generative adversarial networks","volume":"34","author":"Karras","year":"2021","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"100172","DOI":"10.1016\/j.adapen.2024.100172","article-title":"SkyGPT: probabilistic ultra-short-term solar forecasting using synthetic sky images from physics-constrained VideoGPT","volume":"14","author":"Nie","year":"2024","journal-title":"Adv Appl Energy"},{"key":"ref18","series-title":"Computer Vision-ECCV 2016: 14th European Conference; 2016 Oct 11\u201314","first-page":"694","article-title":"Perceptual losses for real-time style transfer and super-resolution","author":"Johnson","year":"2016"},{"key":"ref19","series-title":"Pattern Recognition: 38th German Conference, GCPR 2016; 2016 Sep 12\u201315","first-page":"26","article-title":"Artistic style transfer for videos","author":"Ruder","year":"2016"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1145\/3386569.3392457","article-title":"Learning temporal coherence via self-supervision for GAN-based video generation","volume":"39","author":"Chu","year":"2020","journal-title":"ACM Trans Graph"},{"key":"ref21","series-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV)","first-page":"507","article-title":"Strumming to the beat: audio-conditioned contrastive video textures","author":"Narasimhan","year":"2022"},{"key":"ref22","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"24142","article-title":"Generative image dynamics","author":"Li","year":"2024"},{"key":"ref23","unstructured":"Mao J, Huang X, Xie Y, Chang Y, Hui M, Xu B, et al. StoryAdapter: a training-free iterative framework for long story visualization. arXiv:2410.06244. 2024."},{"key":"ref24","series-title":"Proceedings of the 2023 IEEE Belgrade PowerTech","first-page":"1","article-title":"Denoising diffusion probabilistic models for probabilistic energy forecasting","author":"Capel","year":"2023"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"121482","DOI":"10.1016\/j.ins.2024.121482","article-title":"Elastic deep multi-view autoencoder with diversity embedding","volume":"689","author":"Daneshfar","year":"2025","journal-title":"Inf Sci"},{"key":"ref26","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"},{"key":"ref27","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"22500","article-title":"DreamBooth: fine tuning text-to-image diffusion models for subject-driven generation","author":"Ruiz","year":"2023"},{"key":"ref28","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"3836","article-title":"Adding conditional control to text-to-image diffusion models","author":"Zhang","year":"2023"},{"key":"ref29","doi-asserted-by":"crossref","first-page":"103234","DOI":"10.1109\/ACCESS.2025.3579539","article-title":"LoRA-Adv: boosting text classification in large language models through adversarial low-rank adaptations","volume":"13","author":"Ye","year":"2025","journal-title":"IEEE Access"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"103950","DOI":"10.1016\/j.cviu.2024.103950","article-title":"Self-supervised multi-scale semantic consistency regularization for unsupervised image-to-image translation","volume":"241","author":"Zhang","year":"2024","journal-title":"Comput Vis Image Underst"},{"key":"ref31","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"13919","article-title":"Focal frequency loss for image reconstruction and synthesis","author":"Jiang","year":"2021"},{"key":"ref32","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"6049","article-title":"Adaptive frequency filters as efficient global token mixers","author":"Huang","year":"2023"},{"key":"ref33","unstructured":"Hong W, Ding M, Zheng W, Liu X, Tang J. CogVideo: large-scale pretraining for text-to-video generation via transformers. arXiv:2205.15868. 2022."},{"key":"ref34","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"1921","article-title":"Plug-and-play diffusion features for text-driven image-to-image translation","author":"Tumanyan","year":"2023"},{"key":"ref35","series-title":"European Conference on Computer Vision","first-page":"707","article-title":"Text2Live: text-driven layered image and video editing","author":"Bar-Tal","year":"2022"},{"key":"ref36","unstructured":"Guo Y, Yang C, Rao A, Liang Z, Wang Y, Qiao Y, et al. AnimateDiff: animate your personalized text-to-image diffusion models without specific tuning. arXiv:2307.04725. 2023."},{"key":"ref37","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"15908","article-title":"Text2Video-Zero: text-to-image diffusion models are zero-shot video generators","author":"Khachatryan","year":"2023"}],"container-title":["Computers, Materials &amp; Continua"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/cdn.techscience.cn\/files\/cmc\/2025\/TSP_CMC-85-1\/TSP_CMC_65529\/TSP_CMC_65529.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T02:04:18Z","timestamp":1763345058000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.techscience.com\/cmc\/v85n1\/63519"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":37,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025]]},"published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.32604\/cmc.2025.065529","relation":{},"ISSN":["1546-2226"],"issn-type":[{"type":"electronic","value":"1546-2226"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"2025-03-15","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-07-17","order":1,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-08-29","order":2,"name":"published","label":"Published Online","group":{"name":"publication_history","label":"Publication History"}}]}}