{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T15:57:58Z","timestamp":1774022278070,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","funder":[{"name":"The National Natural Science Foundation of China","award":["No. 62202158"],"award-info":[{"award-number":["No. 62202158"]}]},{"name":"the National Natural Science Foundation of China","award":["No. 62206089"],"award-info":[{"award-number":["No. 62206089"]}]},{"name":"the National Natural Science Foundation of China","award":["No. 62472157"],"award-info":[{"award-number":["No. 62472157"]}]},{"name":"the science and technology innovation Program of Hunan Province","award":["Grants No. 2023RC3098"],"award-info":[{"award-number":["Grants No. 2023RC3098"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,10]]},"DOI":"10.1145\/3721238.3730736","type":"proceedings-article","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T08:42:43Z","timestamp":1753260163000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["ColorSurge: Bringing Vibrancy and Efficiency to Automatic Video Colorization via Dual-Branch Fusion"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-8126-6357","authenticated-orcid":false,"given":"Hongbo","family":"Zhao","sequence":"first","affiliation":[{"name":"Hunan University, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8012-5360","authenticated-orcid":false,"given":"Jiaxing","family":"Li","sequence":"additional","affiliation":[{"name":"Hunan University, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7901-8982","authenticated-orcid":false,"given":"Peiyi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Hunan University, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2520-4248","authenticated-orcid":false,"given":"Peng","family":"Xiao","sequence":"additional","affiliation":[{"name":"Hunan University, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0359-8821","authenticated-orcid":false,"given":"Jianxin","family":"Lin","sequence":"additional","affiliation":[{"name":"Hunan University, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3372-8167","authenticated-orcid":false,"given":"Yijun","family":"Wang","sequence":"additional","affiliation":[{"name":"Hunan University, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,27]]},"reference":[{"key":"e_1_3_3_2_2_1","unstructured":"J Antic. 2019. DeOldify\u2013A Deep Learning based project for colorizing and restoring old images (and video!)."},{"key":"e_1_3_3_2_3_1","unstructured":"Andreas Blattmann Tim Dockhorn Sumith Kulal Daniel Mendelevitch Maciej Kilian Dominik Lorenz Yam Levi Zion English Vikram Voleti Adam Letts et\u00a0al. 2023. Stable video diffusion: Scaling latent video diffusion models to large datasets. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.15127 (2023)."},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"crossref","unstructured":"Nicolas Bonneel James Tompkin Kalyan Sunkavalli Deqing Sun Sylvain Paris and Hanspeter Pfister. 2015. Blind video temporal consistency. ACM Transactions on Graphics (TOG) 34 6 (2015) 1\u20139.","DOI":"10.1145\/2816795.2818107"},{"key":"e_1_3_3_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657509"},{"key":"e_1_3_3_2_6_1","unstructured":"Zheng Chang Shuchen Weng Huan Ouyang Yu Li Si Li and Boxin Shi. 2024. L-C4: Language-Based Video Colorization for Creative and Consistent Color. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.04972 (2024)."},{"key":"e_1_3_3_2_7_1","unstructured":"Smithsonian Channel. 2025. America in Color. https:\/\/www.smithsonianchannel.com\/shows\/america-in-color. Accessed: 2025-01-17."},{"key":"e_1_3_3_2_8_1","doi-asserted-by":"crossref","unstructured":"Siqi Chen Xueming Li Xianlin Zhang Mingdao Wang Yu Zhang Jiatong Han and Yue Zhang. 2024a. Exemplar-based video colorization with long-term spatiotemporal dependency. Knowledge-Based Systems 284 (2024) 111240.","DOI":"10.1016\/j.knosys.2023.111240"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01265"},{"key":"e_1_3_3_2_10_1","doi-asserted-by":"publisher","DOI":"10.5555\/2853769.2853775"},{"key":"e_1_3_3_2_11_1","unstructured":"Junyao Gao Yanchen Liu Yanan Sun Yinhao Tang Yanhong Zeng Kai Chen and Cairong Zhao. 2024. Styleshot: A snapshot on any style. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.01414 (2024)."},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV61041.2025.00083"},{"key":"e_1_3_3_2_13_1","unstructured":"Yuwei Guo Ceyuan Yang Anyi Rao Zhengyang Liang Yaohui Wang Yu Qiao Maneesh Agrawala Dahua Lin and Bo Dai. 2023. Animatediff: Animate your personalized text-to-image diffusion models without specific tuning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.04725 (2023)."},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"publisher","DOI":"10.1117\/12.477378"},{"key":"e_1_3_3_2_15_1","unstructured":"Jing He Haodong Li Wei Yin Yixun Liang Leheng Li Kaiqiang Zhou Hongbo Zhang Bingbing Liu and Ying-Cong Chen. 2024. Lotus: Diffusion-based visual foundation model for high-quality dense prediction. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.18124 (2024)."},{"key":"e_1_3_3_2_16_1","unstructured":"Martin Heusel Hubert Ramsauer Thomas Unterthiner Bernhard Nessler and Sepp Hochreiter. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_2_17_1","unstructured":"Wenyi Hong Ming Ding Wendi Zheng Xinghan Liu and Jie Tang. 2022. Cogvideo: Large-scale pretraining for text-to-video generation via transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2205.15868 (2022)."},{"key":"e_1_3_3_2_18_1","doi-asserted-by":"crossref","unstructured":"Satoshi Iizuka and Edgar Simo-Serra. 2019. Deepremaster: temporal source-reference attention networks for comprehensive video enhancement. ACM Transactions on Graphics (TOG) 38 6 (2019) 1\u201313.","DOI":"10.1145\/3355089.3356570"},{"key":"e_1_3_3_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"e_1_3_3_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00159"},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00037"},{"key":"e_1_3_3_2_22_1","doi-asserted-by":"crossref","unstructured":"Bahjat Kawar Michael Elad Stefano Ermon and Jiaming Song. 2022. Denoising diffusion restoration models. Advances in Neural Information Processing Systems 35 (2022) 23593\u201323606.","DOI":"10.52202\/068431-1714"},{"key":"e_1_3_3_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00510"},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20071-7_21"},{"key":"e_1_3_3_2_25_1","unstructured":"Max Ku Cong Wei Weiming Ren Huan Yang and Wenhu Chen. 2024. AnyV2V: A Tuning-Free Framework For Any Video-to-Video Editing Tasks. Transactions on Machine Learning Research (2024)."},{"key":"e_1_3_3_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00387"},{"key":"e_1_3_3_2_27_1","doi-asserted-by":"crossref","unstructured":"Chenyang Lei Yazhou Xing Hao Ouyang and Qifeng Chen. 2022. Deep video prior for video consistency and propagation. IEEE Transactions on Pattern Analysis and Machine Intelligence 45 1 (2022) 356\u2013371.","DOI":"10.1109\/TPAMI.2022.3142071"},{"key":"e_1_3_3_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1186562.1015780"},{"key":"e_1_3_3_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681356"},{"key":"e_1_3_3_2_30_1","unstructured":"Jun\u00a0Hao Liew Hanshu Yan Jianfeng Zhang Zhongcong Xu and Jiashi Feng. 2023. Magicedit: High-fidelity and temporally coherent video editing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.14749 (2023)."},{"key":"e_1_3_3_2_31_1","doi-asserted-by":"crossref","unstructured":"Chang Liu Rui Li Kaidong Zhang Yunwei Lan and Dong Liu. 2024a. StableV2V: Stablizing Shape Consistency in Video-to-Video Editing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.11045 (2024).","DOI":"10.1109\/TCSVT.2025.3639307"},{"key":"e_1_3_3_2_32_1","doi-asserted-by":"crossref","unstructured":"Yihao Liu Hengyuan Zhao Kelvin\u00a0CK Chan Xintao Wang Chen\u00a0Change Loy Yu Qiao and Chao Dong. 2024b. Temporally consistent video colorization with deep feature propagation and self-regularization learning. Computational Visual Media 10 2 (2024) 375\u2013395.","DOI":"10.1007\/s41095-023-0342-8"},{"key":"e_1_3_3_2_33_1","unstructured":"I Loshchilov. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1711.05101 (2017)."},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"crossref","unstructured":"Somdyuti Paul Saumik Bhattacharya and Sumana Gupta. 2016. Spatiotemporal colorization of video using 3d steerable pyramids. IEEE Transactions on Circuits and Systems for Video Technology 27 8 (2016) 1605\u20131619.","DOI":"10.1109\/TCSVT.2016.2539539"},{"key":"e_1_3_3_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.85"},{"key":"e_1_3_3_2_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528233.3530757"},{"key":"e_1_3_3_2_37_1","unstructured":"Karen Simonyan. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1409.1556 (2014)."},{"key":"e_1_3_3_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00652"},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CRV.2019.00033"},{"key":"e_1_3_3_2_40_1","doi-asserted-by":"crossref","unstructured":"Hanzhang Wang Deming Zhai Xianming Liu Junjun Jiang and Wen Gao. 2023. Unsupervised deep exemplar colorization via pyramid dual non-local attention. IEEE Transactions on Image Processing (2023).","DOI":"10.1109\/TIP.2023.3293777"},{"key":"e_1_3_3_2_41_1","first-page":"2677","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"36","author":"Weng Shuchen","year":"2022","unstructured":"Shuchen Weng, Hao Wu, Zheng Chang, Jiajun Tang, Si Li, and Boxin Shi. 2022. L-code: Language-based colorization using color-object decoupled conditions. In Proceedings of the AAAI Conference on Artificial Intelligence , Vol.\u00a036. 2677\u20132684."},{"key":"e_1_3_3_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01548"},{"key":"e_1_3_3_2_43_1","unstructured":"Wilson Yan Yunzhi Zhang Pieter Abbeel and Aravind Srinivas. 2021. VideoGPT: Video Generation using VQ-VAE and Transformers. arxiv:https:\/\/arXiv.org\/abs\/2104.10157\u00a0[cs.CV]"},{"key":"e_1_3_3_2_44_1","first-page":"174","volume-title":"European Conference on Computer Vision","author":"Yang Ren","year":"2022","unstructured":"Ren Yang, Radu Timofte, Xin Li, Qi Zhang, Lin Zhang, Fanglong Liu, Dongliang He, Fu Li, He Zheng, Weihang Yuan, et\u00a0al. 2022. Aim 2022 challenge on super-resolution of compressed image and video: Dataset, methods and results. In European Conference on Computer Vision. Springer, 174\u2013202."},{"key":"e_1_3_3_2_45_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73235-5_19"},{"key":"e_1_3_3_2_46_1","doi-asserted-by":"crossref","unstructured":"Yixin Yang Jinshan Pan Zhongzheng Peng Xiaoyu Du Zhulin Tao and Jinhui Tang. 2024a. Bistnet: Semantic image prior guided bidirectional temporal feature fusion for deep exemplar-based video colorization. IEEE Transactions on Pattern Analysis and Machine Intelligence (2024).","DOI":"10.1109\/TPAMI.2024.3370920"},{"key":"e_1_3_3_2_47_1","unstructured":"Zhuoyi Yang Jiayan Teng Wendi Zheng Ming Ding Shiyu Huang Jiazheng Xu Yuanming Yang Wenyi Hong Xiaohan Zhang Guanyu Feng et\u00a0al. 2024b. Cogvideox: Text-to-video diffusion models with an expert transformer. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.06072 (2024)."},{"key":"e_1_3_3_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00824"},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_3_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_3_2_51_1","unstructured":"Richard Zhang Jun-Yan Zhu Phillip Isola Xinyang Geng Angela\u00a0S Lin Tianhe Yu and Alexei\u00a0A Efros. 2017. Real-time user-guided image colorization with learned deep priors. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1705.02999 (2017)."},{"key":"e_1_3_3_2_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00182"},{"key":"e_1_3_3_2_53_1","unstructured":"Min Zhao Rongzhen Wang Fan Bao Chongxuan Li and Jun Zhu. 2023b. Controlvideo: Adding conditional control for one shot text-to-video editing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.17098 2 3 (2023)."},{"key":"e_1_3_3_2_54_1","doi-asserted-by":"crossref","unstructured":"Yuzhi Zhao Lai-Man Po Kangcheng Liu Xuehui Wang Wing-Yin Yu Pengfei Xian Yujia Zhang and Mengyang Liu. 2023a. SVCNet: Scribble-based video colorization network with temporal aggregation. IEEE Transactions on Image Processing (2023).","DOI":"10.1109\/TIP.2023.3298537"},{"key":"e_1_3_3_2_55_1","doi-asserted-by":"crossref","unstructured":"Yuzhi Zhao Lai-Man Po Wing-Yin Yu Yasar Abbas\u00a0Ur Rehman Mengyang Liu Yujia Zhang and Weifeng Ou. 2022. Vcgan: Video colorization with hybrid generative adversarial network. IEEE Transactions on Multimedia 25 (2022) 3017\u20133032.","DOI":"10.1109\/TMM.2022.3154600"}],"event":{"name":"SIGGRAPH Conference Papers '25: Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers","location":"Vancouver BC Canada","acronym":"SIGGRAPH Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721238.3730736","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T14:59:14Z","timestamp":1774018754000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721238.3730736"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,27]]},"references-count":54,"alternative-id":["10.1145\/3721238.3730736","10.1145\/3721238"],"URL":"https:\/\/doi.org\/10.1145\/3721238.3730736","relation":{},"subject":[],"published":{"date-parts":[[2025,7,27]]},"assertion":[{"value":"2025-07-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}