{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T07:31:00Z","timestamp":1776238260981,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","funder":[{"name":"Samsung Electronics"},{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","award":["NRF2018R1A5A1060031"],"award-info":[{"award-number":["NRF2018R1A5A1060031"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Institute of Information & communications Technology Planning & Evaluation (IITP)","award":["RS-2019-II191906, IITP-2021-0-02068"],"award-info":[{"award-number":["RS-2019-II191906, IITP-2021-0-02068"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,10]]},"DOI":"10.1145\/3721238.3730719","type":"proceedings-article","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T08:40:47Z","timestamp":1753260047000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["DC-VSR: Spatially and Temporally Consistent Video Super-Resolution with Video Diffusion Prior"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-2287-6263","authenticated-orcid":false,"given":"Janghyeok","family":"Han","sequence":"first","affiliation":[{"name":"POSTECH, Pohang, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2414-7295","authenticated-orcid":false,"given":"Gyujin","family":"Sim","sequence":"additional","affiliation":[{"name":"POSTECH, Pohang, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0806-6963","authenticated-orcid":false,"given":"Geonung","family":"Kim","sequence":"additional","affiliation":[{"name":"POSTECH, Pohang, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3549-0432","authenticated-orcid":false,"given":"Hyun-Seung","family":"Lee","sequence":"additional","affiliation":[{"name":"Visual Display Business, Samsung Electronics, Suwon, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4271-6862","authenticated-orcid":false,"given":"Kyuha","family":"Choi","sequence":"additional","affiliation":[{"name":"Visual Display Business, Samsung Electronics, Suwon, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8440-1836","authenticated-orcid":false,"given":"Youngseok","family":"Han","sequence":"additional","affiliation":[{"name":"Visual Display Business, Samsung Electronics, Suwon, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7627-3513","authenticated-orcid":false,"given":"Sunghyun","family":"Cho","sequence":"additional","affiliation":[{"name":"POSTECH, Pohang, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,27]]},"reference":[{"key":"e_1_3_3_2_2_1","unstructured":"Donghoon Ahn Hyoungwon Cho Jaewon Min Wooseok Jang Jungwoo Kim SeonHwa Kim Hyun\u00a0Hee Park Kyong\u00a0Hwan Jin and Seungryong Kim. 2024. Self-Rectifying Diffusion Sampling with Perturbed-Attention Guidance. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.17377 (2024)."},{"key":"e_1_3_3_2_3_1","unstructured":"Omer Bar-Tal Lior Yariv Yaron Lipman and Tali Dekel. 2023. MultiDiffusion: Fusing Diffusion Paths for Controlled Image Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.08113 (2023)."},{"key":"e_1_3_3_2_4_1","unstructured":"Andreas Blattmann Tim Dockhorn Sumith Kulal Daniel Mendelevitch Maciej Kilian Dominik Lorenz Yam Levi Zion English Vikram Voleti Adam Letts Varun Jampani and Robin Rombach. 2023a. Stable Video Diffusion: Scaling Latent Video Diffusion Models to Large Datasets. arxiv:https:\/\/arXiv.org\/abs\/2311.15127\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2311.15127"},{"key":"e_1_3_3_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02161"},{"key":"e_1_3_3_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.38"},{"key":"e_1_3_3_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00491"},{"key":"e_1_3_3_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00588"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00587"},{"key":"e_1_3_3_2_10_1","unstructured":"Kelvin C.\u00a0K. Chan Xintao Wang Xiangyu Xu Jinwei Gu and Chen\u00a0Change Loy. 2020. GLEAN: Generative Latent Bank for Large-Factor Image Super-Resolution. arxiv:https:\/\/arXiv.org\/abs\/2012.00739\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2012.00739"},{"key":"e_1_3_3_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00882"},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"crossref","unstructured":"Mengyu Chu You Xie Jonas Mayer Laura Leal-Taix\u00e9 and Nils Thuerey. 2020. Learning temporal coherence via self-supervision for GAN-based video generation. ACM Trans. Graph. 39 4 (Aug. 2020). https:\/\/doi.org\/10.1145\/3386569.3392457","DOI":"10.1145\/3386569.3392457"},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"crossref","unstructured":"Keyan Ding Kede Ma Shiqi Wang and Eero\u00a0P. Simoncelli. 2021. Image Quality Assessment: Unifying Structure and Texture Similarity. IEEE Transactions on Pattern Analysis and Machine Intelligence (2021). https:\/\/doi.org\/10.1109\/TPAMI.2020.3045810","DOI":"10.1109\/TPAMI.2020.3045810"},{"key":"e_1_3_3_2_14_1","unstructured":"Chao Dong Chen\u00a0Change Loy Kaiming He and Xiaoou Tang. 2015. Image Super-Resolution Using Deep Convolutional Networks. arxiv:https:\/\/arXiv.org\/abs\/1501.00092\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1501.00092"},{"key":"e_1_3_3_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00589"},{"key":"e_1_3_3_2_16_1","unstructured":"Ian\u00a0J. Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2014. Generative Adversarial Networks. arxiv:https:\/\/arXiv.org\/abs\/1406.2661\u00a0[stat.ML] https:\/\/arxiv.org\/abs\/1406.2661"},{"key":"e_1_3_3_2_17_1","unstructured":"Muhammad Haris Greg Shakhnarovich and Norimichi Ukita. 2018. Deep Back-Projection Networks For Super-Resolution. arxiv:https:\/\/arXiv.org\/abs\/1803.02735\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1803.02735"},{"key":"e_1_3_3_2_18_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Ho Jonathan","year":"2021","unstructured":"Jonathan Ho and Tim Salimans. 2021. Classifier-Free Diffusion Guidance. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00686"},{"key":"e_1_3_3_2_20_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Karras Tero","year":"2022","unstructured":"Tero Karras, Miika Aittala, Timo Aila, and Samuli Laine. 2022. Elucidating the Design Space of Diffusion-Based Generative Models. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_2_21_1","unstructured":"Jiwon Kim Jung\u00a0Kwon Lee and Kyoung\u00a0Mu Lee. 2016a. Accurate Image Super-Resolution Using Very Deep Convolutional Networks. arxiv:https:\/\/arXiv.org\/abs\/1511.04587\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1511.04587"},{"key":"e_1_3_3_2_22_1","unstructured":"Jiwon Kim Jung\u00a0Kwon Lee and Kyoung\u00a0Mu Lee. 2016b. Deeply-Recursive Convolutional Network for Image Super-Resolution. arxiv:https:\/\/arXiv.org\/abs\/1511.04491\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1511.04491"},{"key":"e_1_3_3_2_23_1","unstructured":"Wei-Sheng Lai Jia-Bin Huang Narendra Ahuja and Ming-Hsuan Yang. 2017. Deep Laplacian Pyramid Networks for Fast and Accurate Super-Resolution. arxiv:https:\/\/arXiv.org\/abs\/1704.03915\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1704.03915"},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"crossref","unstructured":"Christian Ledig Lucas Theis Ferenc Huszar Jose Caballero Andrew Cunningham Alejandro Acosta Andrew Aitken Alykhan Tejani Johannes Totz Zehan Wang and Wenzhe Shi. 2017. Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network. arxiv:https:\/\/arXiv.org\/abs\/1609.04802\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1609.04802","DOI":"10.1109\/CVPR.2017.19"},{"key":"e_1_3_3_2_25_1","volume-title":"Thirty-seventh Conference on Neural Information Processing Systems","author":"Lee Yuseung","year":"2023","unstructured":"Yuseung Lee, Kunho Kim, Hyunjin Kim, and Minhyuk Sung. 2023. SyncDiffusion: Coherent Montage via Synchronized Joint Diffusions. In Thirty-seventh Conference on Neural Information Processing Systems."},{"key":"e_1_3_3_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00210"},{"key":"e_1_3_3_2_27_1","doi-asserted-by":"crossref","unstructured":"Xinqi Lin Jingwen He Ziyan Chen Zhaoyang Lyu Bo Dai Fanghua Yu Wanli Ouyang Yu Qiao and Chao Dong. 2024. DiffBIR: Towards Blind Image Restoration with Generative Diffusion Prior. arxiv:https:\/\/arXiv.org\/abs\/2308.15070\u00a0[cs.CV]","DOI":"10.1007\/978-3-031-73202-7_25"},{"key":"e_1_3_3_2_28_1","unstructured":"Sachit Menon Alexandru Damian Shijia Hu Nikhil Ravi and Cynthia Rudin. 2020. PULSE: Self-Supervised Photo Upsampling via Latent Space Exploration of Generative Models. arxiv:https:\/\/arXiv.org\/abs\/2003.03808\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2003.03808"},{"key":"e_1_3_3_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00251"},{"key":"e_1_3_3_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_2_31_1","volume-title":"The European Conference on Computer Vision (ECCV)","author":"Rota Claudio","year":"2024","unstructured":"Claudio Rota, Marco Buzzelli, and Joost van\u00a0de Weijer. 2024. Enhancing Perceptual Quality in Video Super-Resolution through Temporally-Consistent Detail Synthesis using Diffusion Models. In The European Conference on Computer Vision (ECCV)."},{"key":"e_1_3_3_2_32_1","unstructured":"StabilityAI. 2023. Stable Video Diffusion Image-to-Video. https:\/\/huggingface.co\/stabilityai\/stable-video-diffusion-img2vid"},{"key":"e_1_3_3_2_33_1","unstructured":"Lingchen Sun Rongyuan Wu Zhengqiang Zhang Hongwei Yong and Lei Zhang. 2024. Improving the Stability of Diffusion Models for Content Consistent Super-Resolution. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.00877 (2024)."},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.298"},{"key":"e_1_3_3_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.1998.710815"},{"key":"e_1_3_3_2_36_1","doi-asserted-by":"crossref","unstructured":"Jianyi Wang Zongsheng Yue Shangchen Zhou Kelvin\u00a0C.K. Chan and Chen\u00a0Change Loy. 2024. Exploiting Diffusion Prior for Real-World Image Super-Resolution. (2024).","DOI":"10.1007\/s11263-024-02168-7"},{"key":"e_1_3_3_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00813"},{"key":"e_1_3_3_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00217"},{"key":"e_1_3_3_2_39_1","unstructured":"Xintao Wang Ke Yu Shixiang Wu Jinjin Gu Yihao Liu Chao Dong Chen\u00a0Change Loy Yu Qiao and Xiaoou Tang. 2018b. ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks. arxiv:https:\/\/arXiv.org\/abs\/1809.00219\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1809.00219"},{"key":"e_1_3_3_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01843"},{"key":"e_1_3_3_2_41_1","unstructured":"Rongyuan Wu Lingchen Sun Zhiyuan Ma and Lei Zhang. 2024a. One-Step Effective Diffusion Network for Real-World Image Super-Resolution. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.08177 (2024)."},{"key":"e_1_3_3_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02405"},{"key":"e_1_3_3_2_43_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i3.25398"},{"key":"e_1_3_3_2_44_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Xie Yujia","year":"2022","unstructured":"Yujia Xie, Yujun Shen, Zhiwei Xie, Yichen Wei, and Alan Yuille. 2022. MUSIQ: Learning Multimodal Representations for Visual Question Answering. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_3_2_45_1","unstructured":"Yiran Xu Taesung Park Richard Zhang Yang Zhou Eli Shechtman Feng Liu Jia-Bin Huang and Difan Liu. 2024. VideoGigaGAN: Towards Detail-rich Video Super-Resolution. (2024). arxiv:https:\/\/arXiv.org\/abs\/2404.12388\u00a0[cs.CV]"},{"key":"e_1_3_3_2_46_1","unstructured":"Tao Yang Rongyuan Wu Peiran Ren Xuansong Xie and Lei Zhang. 2024b. Pixel-Aware Stable Diffusion for Realistic Image Super-resolution and Personalized Stylization. arxiv:https:\/\/arXiv.org\/abs\/2308.14469\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2308.14469"},{"key":"e_1_3_3_2_47_1","volume-title":"European Conference on Computer Vision (ECCV)","author":"Yang Xi","year":"2024","unstructured":"Xi Yang, Chenhang He, Jianqi Ma, and Lei Zhang. 2024a. Motion-Guided Latent Diffusion for Temporally Consistent Real-world Video Super-resolution. In European Conference on Computer Vision (ECCV)."},{"key":"e_1_3_3_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00320"},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00013"},{"key":"e_1_3_3_2_50_1","unstructured":"Fanghua Yu Jinjin Gu Zheyuan Li Jinfan Hu Xiangtao Kong Xintao Wang Jingwen He Yu Qiao and Chao Dong. 2024. Scaling Up to Excellence: Practicing Model Scaling for Photo-Realistic Image Restoration In the Wild. arxiv:https:\/\/arXiv.org\/abs\/2401.13627\u00a0[cs.CV]"},{"key":"e_1_3_3_2_51_1","unstructured":"Yulun Zhang Kunpeng Li Kai Li Lichen Wang Bineng Zhong and Yun Fu. 2018a. Image Super-Resolution Using Very Deep Residual Channel Attention Networks. arxiv:https:\/\/arXiv.org\/abs\/1807.02758\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1807.02758"},{"key":"e_1_3_3_2_52_1","unstructured":"Yulun Zhang Yapeng Tian Yu Kong Bineng Zhong and Yun Fu. 2018b. Residual Dense Network for Image Super-Resolution. arxiv:https:\/\/arXiv.org\/abs\/1802.08797\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1802.08797"},{"key":"e_1_3_3_2_53_1","volume-title":"European Conference on Computer Vision (ECCV)","author":"Zhang Yuehan","year":"2024","unstructured":"Yuehan Zhang and Angela Yao. 2024. RealViformer: Investigating Attention for Real-World Video Super-Resolution. In European Conference on Computer Vision (ECCV)."},{"key":"e_1_3_3_2_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00245"}],"event":{"name":"SIGGRAPH Conference Papers '25: Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers","location":"Vancouver BC Canada","acronym":"SIGGRAPH Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721238.3730719","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T14:55:10Z","timestamp":1774018510000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721238.3730719"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,27]]},"references-count":53,"alternative-id":["10.1145\/3721238.3730719","10.1145\/3721238"],"URL":"https:\/\/doi.org\/10.1145\/3721238.3730719","relation":{},"subject":[],"published":{"date-parts":[[2025,7,27]]},"assertion":[{"value":"2025-07-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}