{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:16:29Z","timestamp":1777655789273,"version":"3.51.4"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2025,2,8]],"date-time":"2025-02-08T00:00:00Z","timestamp":1738972800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,8]],"date-time":"2025-02-08T00:00:00Z","timestamp":1738972800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Sci. China Inf. Sci."],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1007\/s11432-023-4184-4","type":"journal-article","created":{"date-parts":[[2025,2,11]],"date-time":"2025-02-11T08:31:53Z","timestamp":1739262713000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["ControlVideo: conditional control for one-shot text-driven video editing and beyond"],"prefix":"10.1007","volume":"68","author":[{"given":"Min","family":"Zhao","sequence":"first","affiliation":[]},{"given":"Rongzhen","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Fan","family":"Bao","sequence":"additional","affiliation":[]},{"given":"Chongxuan","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Zhu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,2,8]]},"reference":[{"key":"4184_CR1","first-page":"7623","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","author":"J Z Wu","year":"2023","unstructured":"Wu J Z, Ge Y, Wang X, et al. Tune-A-Video: one-shot tuning of image diffusion models for text-to-video generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2023. 7623\u20137633"},{"key":"4184_CR2","first-page":"15932","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","author":"C Qi","year":"2023","unstructured":"Qi C, Cun X, Zhang Y, et al. FateZero: fusing attentions for zero-shot text-based video editing. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2023. 15932\u201315942"},{"key":"4184_CR3","volume-title":"Video-P2P: video editing with cross-attention control","author":"S Liu","year":"2023","unstructured":"Liu S, Zhang Y, Li W, et al. Video-P2P: video editing with cross-attention control. 2023. ArXiv:2303.04761"},{"key":"4184_CR4","volume-title":"Zero-shot video editing using off-the-shelf image diffusion models","author":"W Wang","year":"2023","unstructured":"Wang W, Xie K, Liu Z, et al. Zero-shot video editing using off-the-shelf image diffusion models. 2023. ArXiv:2303.17599"},{"key":"4184_CR5","first-page":"10684","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"R Rombach","year":"2022","unstructured":"Rombach R, Blattmann A, Lorenz D, et al. High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022. 10684\u201310695"},{"key":"4184_CR6","volume-title":"Proceedings of International Conference on Learning Representations","author":"J Ho","year":"2023","unstructured":"Ho J, Chan W, Saharia C, et al. Imagen video: high definition video generation with diffusion models. In: Proceedings of International Conference on Learning Representations, 2023"},{"key":"4184_CR7","volume-title":"Proceedings of International Conference on Learning Representations","author":"A Hertz","year":"2023","unstructured":"Hertz A, Mokady R, Tenenbaum J, et al. Prompt-to-prompt image editing with cross attention control. In: Proceedings of International Conference on Learning Representations, 2023"},{"key":"4184_CR8","first-page":"1","volume-title":"Proceedings of ACM SIGGRAPH 2023 Conference Proceedings","author":"G Parmar","year":"2023","unstructured":"Parmar G, Singh K K, Zhang R, et al. Zero-shot image-to-image translation. In: Proceedings of ACM SIGGRAPH 2023 Conference Proceedings, 2023. 1\u201311"},{"key":"4184_CR9","first-page":"1921","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"N Tumanyan","year":"2023","unstructured":"Tumanyan N, Geyer M, Bagon S, et al. Plug-and-play diffusion features for text-driven image-to-image translation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023. 1921\u20131930"},{"key":"4184_CR10","first-page":"3836","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","author":"L Zhang","year":"2023","unstructured":"Zhang L, Rao A, Agrawala M. Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2023. 3836\u20133847"},{"key":"4184_CR11","volume-title":"Proceedings of International Conference on Learning Representations","author":"E J Hu","year":"2021","unstructured":"Hu E J, Shen Y, Wallis P, et al. LoRA: low-rank adaptation of large language models. In: Proceedings of International Conference on Learning Representations, 2021"},{"key":"4184_CR12","first-page":"8748","volume-title":"Proceedings of International Conference on Machine Learning","author":"A Radford","year":"2021","unstructured":"Radford A, Kim J W, Hallacy C, et al. Learning transferable visual models from natural language supervision. In: Proceedings of International Conference on Machine Learning, 2021. 8748\u20138763"},{"key":"4184_CR13","first-page":"6840","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"J Ho","year":"2020","unstructured":"Ho J, Jain A, Abbeel P. Denoising diffusion probabilistic models. In: Proceedings of Advances in Neural Information Processing Systems, 2020. 6840\u20136851"},{"key":"4184_CR14","volume-title":"Proceedings of International Conference on Learning Representations","author":"F Bao","year":"2022","unstructured":"Bao F, Li C, Zhu J, et al. Analytic-DPM: an analytic estimate of the optimal reverse variance in diffusion probabilistic models. In: Proceedings of International Conference on Learning Representations, 2022"},{"key":"4184_CR15","volume-title":"Proceedings of International Conference on Learning Representations","author":"Y Song","year":"2021","unstructured":"Song Y, Sohl-Dickstein J, Kingma D P, et al. Score-based generative modeling through stochastic differential equations. In: Proceedings of International Conference on Learning Representations, 2021"},{"key":"4184_CR16","volume-title":"Proceedings of International Conference on Learning Representations","author":"J Song","year":"2020","unstructured":"Song J, Meng C, Ermon S. Denoising diffusion implicit models. In: Proceedings of International Conference on Learning Representations, 2020"},{"key":"4184_CR17","first-page":"3609","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"M Zhao","year":"2022","unstructured":"Zhao M, Bao F, Li C, et al. EGSDE: unpaired image-to-image translation via energy-guided stochastic differential equations. In: Proceedings of Advances in Neural Information Processing Systems, 2022. 3609\u20133623"},{"key":"4184_CR18","volume-title":"Proceedings of International Conference on Learning Representations","author":"C Meng","year":"2022","unstructured":"Meng C, Song Y, Song J, et al. SDEdit: image synthesis and editing with stochastic differential equations. In: Proceedings of International Conference on Learning Representations, 2022"},{"key":"4184_CR19","first-page":"4015","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","author":"A Kirillov","year":"2023","unstructured":"Kirillov A, Mintun E, Ravi N, et al. Segment anything. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2023. 4015\u20134026"},{"key":"4184_CR20","volume-title":"Proceedings of International Conference on Learning Representations","author":"U Singer","year":"2023","unstructured":"Singer U, Polyak A, Hayes T, et al. Make-a-video: text-to-video generation without text-video data. In: Proceedings of International Conference on Learning Representations, 2023"},{"key":"4184_CR21","volume-title":"Proceedings of International Conference on Learning Representations","author":"Y Guo","year":"2024","unstructured":"Guo Y, Yang C, Rao A, et al. AnimateDiff: animate your personalized text-to-image diffusion models without specific tuning. In: Proceedings of International Conference on Learning Representations, 2024"},{"key":"4184_CR22","first-page":"1737","volume-title":"Proceedings of International Conference on Machine Learning","author":"O Bar-Tal","year":"2023","unstructured":"Bar-Tal O, Yariv L, Lipman Y, et al. MultiDiffusion: fusing diffusion paths for controlled image generation. In: Proceedings of International Conference on Machine Learning, 2023. 1737\u20131752"},{"key":"4184_CR23","doi-asserted-by":"publisher","first-page":"151101","DOI":"10.1007\/s11432-022-3679-0","volume":"66","author":"M Liu","year":"2023","unstructured":"Liu M, Wei Y, Wu X, et al. Survey on leveraging pre-trained generative adversarial networks for image editing and restoration. Sci China Inf Sci, 2023, 66: 151101","journal-title":"Sci China Inf Sci"},{"key":"4184_CR24","first-page":"36479","volume-title":"Proceedings of Advances in Neural Information Processing Systems","author":"C Saharia","year":"2022","unstructured":"Saharia C, Chan W, Saxena S, et al. Photorealistic text-to-image diffusion models with deep language understanding. In: Proceedings of Advances in Neural Information Processing Systems, 2022. 36479\u201336494"},{"key":"4184_CR25","first-page":"7346","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","author":"P Esser","year":"2023","unstructured":"Esser P, Chiu J, Atighehchian P, et al. Structure and content-guided video synthesis with diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2023. 7346\u20137356"},{"key":"4184_CR26","volume-title":"Controlvideo: training-free controllable text-to-video generation","author":"Y Zhang","year":"2023","unstructured":"Zhang Y, Wei Y, Jiang D, et al. Controlvideo: training-free controllable text-to-video generation. 2023. ArXiv:2305.13077"},{"key":"4184_CR27","first-page":"724","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","author":"F SPerazzi","year":"2016","unstructured":"SPerazzi F, Pont-Tuset J, McWilliams B, et al. A benchmark dataset and evaluation methodology for video object segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2016. 724\u2013732"},{"key":"4184_CR28","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang Z, Bovik A C, Sheikh H R, et al. Image quality assessment: from error visibility to structural similarity. IEEE Trans Image Process, 2004, 13: 600\u2013612","journal-title":"IEEE Trans Image Process"}],"container-title":["Science China Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-023-4184-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11432-023-4184-4","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-023-4184-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,19]],"date-time":"2026-04-19T19:28:01Z","timestamp":1776626881000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11432-023-4184-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,8]]},"references-count":28,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2025,3]]}},"alternative-id":["4184"],"URL":"https:\/\/doi.org\/10.1007\/s11432-023-4184-4","relation":{},"ISSN":["1674-733X","1869-1919"],"issn-type":[{"value":"1674-733X","type":"print"},{"value":"1869-1919","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,8]]},"assertion":[{"value":"27 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 June 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 August 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 February 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"132107"}}