{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T21:03:01Z","timestamp":1775682181867,"version":"3.50.1"},"reference-count":61,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62036005"],"award-info":[{"award-number":["62036005"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"GPU Cluster built by MCC Laboratory of Information Science and Technology Institution, University of Science and Technology of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Circuits Syst. Video Technol."],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1109\/tcsvt.2025.3639307","type":"journal-article","created":{"date-parts":[[2025,12,2]],"date-time":"2025-12-02T18:50:15Z","timestamp":1764701415000},"page":"5467-5482","source":"Crossref","is-referenced-by-count":3,"title":["StableV2V: Stabilizing Shape Consistency in Video-to-Video Editing"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-1751-6206","authenticated-orcid":false,"given":"Chang","family":"Liu","sequence":"first","affiliation":[{"name":"MOE Key Laboratory of Brain-Inspired Intelligent Perception and Cognition, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3102-0932","authenticated-orcid":false,"given":"Rui","family":"Li","sequence":"additional","affiliation":[{"name":"MOE Key Laboratory of Brain-Inspired Intelligent Perception and Cognition, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6467-7112","authenticated-orcid":false,"given":"Kaidong","family":"Zhang","sequence":"additional","affiliation":[{"name":"MOE Key Laboratory of Brain-Inspired Intelligent Perception and Cognition, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8884-0853","authenticated-orcid":false,"given":"Yunwei","family":"Lan","sequence":"additional","affiliation":[{"name":"MOE Key Laboratory of Brain-Inspired Intelligent Perception and Cognition, University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9100-2906","authenticated-orcid":false,"given":"Dong","family":"Liu","sequence":"additional","affiliation":[{"name":"MOE Key Laboratory of Brain-Inspired Intelligent Perception and Cognition, University of Science and Technology of China, Hefei, China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"AnyV2V: A tuning-free framework for any video-to-video editing tasks","author":"Ku","year":"2024","journal-title":"arXiv:2403.14468"},{"key":"ref2","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume-title":"Proc. NeurIPS","volume":"33","author":"Ho"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref4","article-title":"Stable video diffusion: Scaling latent video diffusion models to large datasets","author":"Blattmann","year":"2023","journal-title":"arXiv:2311.15127"},{"key":"ref5","article-title":"SDXL: Improving latent diffusion models for high-resolution image synthesis","volume-title":"Proc. ICLR","author":"Podell"},{"key":"ref6","article-title":"TokenFlow: Consistent diffusion features for consistent video editing","volume-title":"Proc. ICLR","author":"Geyer"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00684"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00618"},{"key":"ref9","article-title":"VASE: Object-centric appearance and shape manipulation of real videos","author":"Peruzzo","year":"2024","journal-title":"arXiv:2401.02473"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3402620"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00809"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00701"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00821"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i10.33203"},{"key":"ref15","article-title":"VACE: All-in-one video creation and editing","author":"Jiang","year":"2025","journal-title":"arXiv:2503.07598"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01267-0_11"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3680528.3687656"},{"key":"ref18","article-title":"The 2017 DAVIS challenge on video object segmentation","author":"Pont-Tuset","year":"2017","journal-title":"arXiv:1704.00675"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2018.2867934"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3083257"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.2973374"},{"key":"ref23","article-title":"Denoising diffusion implicit models","volume-title":"Proc. ICLR","author":"Song"},{"key":"ref24","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. ICML","author":"Radford"},{"key":"ref25","article-title":"LaCon: Late-constraint diffusion for steerable guided image synthesis","author":"Liu","year":"2023","journal-title":"arXiv:2305.11520"},{"key":"ref26","article-title":"Imagen video: High definition video generation with diffusion models","author":"Ho","year":"2022","journal-title":"arXiv:2210.02303"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2025.3531390"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2025.3547337"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2025.3532495"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"ref31","article-title":"Latte: Latent diffusion transformer for video generation","author":"Ma","year":"2024","journal-title":"arXiv:2401.03048"},{"key":"ref32","article-title":"HunyuanVideo: A systematic framework for large video generative models","author":"Kong","year":"2024","journal-title":"arXiv:2412.03603"},{"key":"ref33","article-title":"LTX-video: Realtime video latent diffusion","author":"HaCohen","year":"2024","journal-title":"arXiv:2501.00103"},{"key":"ref34","article-title":"Step-video-T2V technical report: The practice, challenges, and future of video foundation model","volume-title":"arXiv:2502.10248","author":"Ma","year":"2025"},{"key":"ref35","article-title":"Wan: Open and advanced large-scale video generative models","author":"Wan","year":"2025","journal-title":"arXiv:2503.20314"},{"key":"ref36","article-title":"I2 VGen-XL: High-quality image-to-video synthesis via cascaded diffusion models","author":"Zhang","year":"2023","journal-title":"arXiv:2311.04145"},{"key":"ref37","article-title":"Ctrl-adapter: An efficient and versatile framework for adapting diverse controls to any diffusion model","volume-title":"Proc. ICLR","author":"Lin"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72655-2_7"},{"key":"ref40","article-title":"FLATTEN: Optical flow-guided attention for consistent text-to-video editing","volume-title":"Proc. ICLR","author":"Cong"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19784-0_41"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01376"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01763"},{"key":"ref44","article-title":"VideoGrain: Modulating space-time attention for multi-grained video editing","author":"Yang","year":"2025","journal-title":"arXiv:2502.17258"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00630"},{"key":"ref47","article-title":"A neural algorithm of artistic style","author":"Gatys","year":"2015","journal-title":"arXiv:1508.06576"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00323"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_24"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01633-5"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3319330"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2867733"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3019967"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00961"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01843"},{"key":"ref57","article-title":"FVD: A new metric for video generation","volume-title":"Proc. ICLR Workshop","author":"Unterthiner"},{"key":"ref58","article-title":"LoRA: Low-rank adaptation of large language models","volume-title":"Proc. ICLR","author":"Hu"},{"key":"ref59","article-title":"ModelScope text-to-video technical report","volume-title":"arXiv:2308.06571","author":"Wang","year":"2023"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00175"},{"key":"ref61","article-title":"Qwen-image technical report","volume-title":"arXiv:2508.02324","author":"Wu","year":"2025"}],"container-title":["IEEE Transactions on Circuits and Systems for Video Technology"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/76\/11475579\/11272911.pdf?arnumber=11272911","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T20:08:35Z","timestamp":1775678915000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11272911\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4]]},"references-count":61,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tcsvt.2025.3639307","relation":{},"ISSN":["1051-8215","1558-2205"],"issn-type":[{"value":"1051-8215","type":"print"},{"value":"1558-2205","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,4]]}}}