{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T03:31:39Z","timestamp":1777865499046,"version":"3.51.4"},"reference-count":74,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccv51701.2025.01390","type":"proceedings-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:45:49Z","timestamp":1777491949000},"page":"14983-14993","source":"Crossref","is-referenced-by-count":0,"title":["Dual-Expert Consistency Model for Efficient and High-Quality Video Generation"],"prefix":"10.1109","author":[{"given":"Zhengyao","family":"Lv","sequence":"first","affiliation":[{"name":"The University of Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chenyang","family":"Si","sequence":"additional","affiliation":[{"name":"Nanjing University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianlin","family":"Pan","sequence":"additional","affiliation":[{"name":"Nanjing University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhaoxi","family":"Chen","sequence":"additional","affiliation":[{"name":"Nanyang Technological University,S-Lab"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kwan-Yee K.","family":"Wong","sequence":"additional","affiliation":[{"name":"The University of Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu","family":"Qiao","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ziwei","family":"Liu","sequence":"additional","affiliation":[{"name":"Nanyang Technological University,S-Lab"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Tract: Denoising diffusion models with transitive closure time-distillation","author":"Berthelot","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02161"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52734.2025.00717"},{"key":"ref4","article-title":"Generative adversarial nets","volume":"27","author":"Goodfellow","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref5","article-title":"Boot: Data-free distillation of denoising diffusion models with bootstrapping","volume-title":"ICML 2023 Workshop on Structured Probabilistic Inference Generative Modeling","author":"Gu","year":"2023"},{"key":"ref6","article-title":"Animatediff: Animate your personalized text-to-image diffusion models without specific tuning","author":"Guo","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref7","article-title":"Ltx-video: Realtime video latent diffusion","author":"HaCohen","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref8","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.52202\/068431-0628"},{"key":"ref10","article-title":"Cogvideo: Large-scale pretraining for text-to-video generation via transformers","author":"Hong","year":"2022","journal-title":"arXiv preprint arXiv"},{"key":"ref11","article-title":"Lora: Low-rank adaptation of large language models","author":"Hu","year":"2021","journal-title":"arXiv preprint arXiv"},{"key":"ref12","first-page":"8153","article-title":"Animate anyone: Consistent and controllable image-to-video synthesis for character animation","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Hu"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i4.32389"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02060"},{"key":"ref15","article-title":"Pyramidal flow matching for efficient video generative modeling","author":"Jin","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73390-1_25"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1926"},{"key":"ref18","article-title":"Consistency trajectory models: Learning probability flow ode trajectory of diffusion","author":"Kim","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref19","article-title":"Imagine flash: Accelerating emu diffusion models with backward distillation","author":"Kohler","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref20","article-title":"Hunyuanvideo: A systematic framework for large video generative models","author":"Kong","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref22","article-title":"PKU-Yuan Lab and Tuzhan AI etc","year":"2024","journal-title":"Open-sora-plan"},{"key":"ref23","article-title":"Truncated consistency models","author":"Lee","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref24","article-title":"Animatediff-lightning: Cross-model diffusion distillation","author":"Lin","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref25","article-title":"Sdxllightning: Progressive adversarial diffusion distillation","author":"Lin","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref26","article-title":"Diffusion adversarial post-training for one-step video generation","author":"Lin","year":"2025","journal-title":"arXiv preprint arXiv"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i5.32580"},{"key":"ref28","article-title":"Pseudo numerical methods for diffusion models on manifolds","author":"Liu","year":"2022","journal-title":"arXiv preprint arXiv"},{"key":"ref29","article-title":"Flow straight and fast: Learning to generate and transfer data with rectified flow","author":"Liu","year":"2022","journal-title":"arXiv preprint arXiv"},{"key":"ref30","article-title":"Instaflow: One step is enough for high-quality diffusionbased text-to-image generation","volume-title":"The Twelfth International Conference on Learning Representations","author":"Liu"},{"key":"ref31","article-title":"Simplifying, stabilizing and scaling continuous-time consistency models","author":"Lu","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.52202\/068431-0418"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/s11633-025-1562-4"},{"key":"ref34","article-title":"Knowledge distillation in iterative generative models for improved sampling speed","author":"Luhman","year":"2021","journal-title":"arXiv preprint arXiv"},{"key":"ref35","article-title":"Latent consistency models: Synthesizing highresolution images with few-step inference","author":"Luo","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref36","article-title":"Lcm-lora: A universal stable-diffusion acceleration module","author":"Luo","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref37","article-title":"A comprehensive survey on knowledge distillation of diffusion models","author":"Luo","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.52202\/075280-3344"},{"key":"ref39","article-title":"One-step diffusion distillation through score implicit matching","author":"Luo","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref40","article-title":"You only sample once: Taming one-step text-toimage synthesis by self-cooperative diffusion gans","author":"Luo","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref41","article-title":"Fastercache: Training-free video diffusion model acceleration with high quality","author":"Lv","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref42","article-title":"Latte: Latent diffusion transformer for video generation","author":"Ma","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01374"},{"key":"ref44","article-title":"OpenAI","year":"2024","journal-title":"Sora"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"ref46","article-title":"Dreamfusion: Text-to-3d using 2d diffusion","author":"Poole","year":"2022","journal-title":"arXiv preprint arXiv"},{"key":"ref47","article-title":"Hyper-sd: Trajectory segmented consistency model for efficient image synthesis","author":"Ren","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref49","article-title":"Progressive distillation for fast sampling of diffusion models","author":"Salimans","year":"2022","journal-title":"arXiv preprint arXiv"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/3680528.3687625"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73016-0_6"},{"key":"ref52","article-title":"Denoising diffusion implicit models","author":"Song","year":"2020","journal-title":"arXiv preprint arXiv"},{"key":"ref53","article-title":"Improved techniques for training consistency models","author":"Song","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref54","article-title":"Consistency models","author":"Song","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref56","article-title":"Wan: Open and advanced large-scale video generative models","author":"Wan","year":"2025","journal-title":"arXiv preprint arXiv"},{"key":"ref57","article-title":"Phased consistency model","author":"Wang","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/3681758.3698013"},{"key":"ref59","article-title":"Modelscope text-to-video technical report","author":"Wang","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref60","article-title":"Sparse videogen: Accelerating video diffusion transformers with spatial-temporal sparsity","author":"Xi","year":"2025","journal-title":"arXiv preprint arXiv"},{"key":"ref61","article-title":"Tackling the generative learning trilemma with denoising diffusion gans","author":"Xiao","year":"2021","journal-title":"arXiv preprint arXiv"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00783"},{"key":"ref63","article-title":"Cogvideox: Text-to-video diffusion models with an expert transformer","author":"Yang","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref64","article-title":"Improved distribution matching distillation for fast image synthesis","author":"Yin","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00632"},{"key":"ref66","article-title":"From slow bidirectional to fast causal video generators","author":"Yin","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref67","article-title":"Motion consistency model: Accelerating video diffusion with disentangled motion-appearance distillation","author":"Zhai","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref68","article-title":"Fast video generation with sliding tile attention","author":"Zhang","year":"2025","journal-title":"arXiv preprint arXiv"},{"key":"ref69","first-page":"arXiv-2505","article-title":"Faster video diffusion with trainable sparse attention","author":"Zhang","year":"2025","journal-title":"arXiv e-prints"},{"key":"ref70","article-title":"Controlvideo: Training-free controllable text-to-video generation","author":"Zhang","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i10.33114"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51701.2025.01684"},{"key":"ref73","article-title":"Real-time video generation with pyramid attention broadcast","author":"Zhao","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref74","first-page":"42390","article-title":"Fast sampling of diffusion models via operator learning","volume-title":"International conference on machine learning","author":"Zheng"},{"key":"ref75","article-title":"Trajectory consistency distillation","author":"Zheng","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref76","article-title":"Score identity distillation: Exponentially fast distillation of pretrained diffusion models for one-step generation","volume-title":"Forty-first International Conference on Machine Learning","author":"Zhou"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11443115\/11443287\/11445863.pdf?arnumber=11445863","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T06:42:47Z","timestamp":1777531367000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11445863\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":74,"URL":"https:\/\/doi.org\/10.1109\/iccv51701.2025.01390","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}