{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:25:46Z","timestamp":1765308346772,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No.62307032"],"award-info":[{"award-number":["No.62307032"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"?Pioneer? and ?Leading Goose? R&D Program of Zhejiang under Grant","award":["No. 2025C02022"],"award-info":[{"award-number":["No. 2025C02022"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755535","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:47:42Z","timestamp":1761371262000},"page":"10315-10324","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Show and Polish: Reference-Guided Identity Preservation in Face Video Restoration"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-2593-5525","authenticated-orcid":false,"given":"Wenkang","family":"Han","sequence":"first","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8353-2392","authenticated-orcid":false,"given":"Wang","family":"Lin","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5801-8540","authenticated-orcid":false,"given":"Yiyun","family":"Zhou","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9996-098X","authenticated-orcid":false,"given":"Qi","family":"Liu","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2285-3606","authenticated-orcid":false,"given":"Shulei","family":"Wang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1187-6257","authenticated-orcid":false,"given":"Chang","family":"Yao","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0415-6937","authenticated-orcid":false,"given":"Jingyuan","family":"Chen","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Real-time convolutional neural networks for emotion and gender classification. arXiv preprint arXiv:1710.07557","author":"Arriaga Octavio","year":"2017","unstructured":"Octavio Arriaga, Matias Valdenegro-Toro, and Paul Pl\u00f6ger. 2017. Real-time convolutional neural networks for emotion and gender classification. arXiv preprint arXiv:1710.07557 (2017)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01402"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00587"},{"key":"e_1_3_2_1_4_1","volume-title":"Towards Real-world Video Face Restoration: A New Benchmark. arXiv preprint arXiv:2404.19500","author":"Chen Ziyan","year":"2024","unstructured":"Ziyan Chen, Jingwen He, Xinqi Lin, Yu Qiao, and Chao Dong. 2024. Towards Real-world Video Face Restoration: A New Benchmark. arXiv preprint arXiv:2404.19500 (2024)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00482"},{"key":"e_1_3_2_1_6_1","volume-title":"Kalman-inspired feature propagation for video face super-resolution. arXiv preprint arXiv:2408.05205","author":"Feng Ruicheng","year":"2024","unstructured":"Ruicheng Feng, Chongyi Li, and Chen Change Loy. 2024b. Kalman-inspired feature propagation for video face super-resolution. arXiv preprint arXiv:2408.05205 (2024)."},{"key":"e_1_3_2_1_7_1","first-page":"118182","article-title":"Etextsuperscript3: Exploring Embodied Emotion Through A Large-Scale Egocentric Video Dataset","volume":"37","author":"Feng Yueying","year":"2024","unstructured":"Yueying Feng, WenKang Han, Tao Jin, Zhou Zhao, Fei Wu, Chang Yao, Jingyuan Chen, et al., 2024a. Etextsuperscript3: Exploring Embodied Emotion Through A Large-Scale Egocentric Video Dataset. Advances in Neural Information Processing Systems, Vol. 37 (2024), 118182-118197.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_8_1","unstructured":"Daiheng Gao Shilin Lu Shaw Walters Wenbo Zhou Jiaming Chu Jie Zhang Bang Zhang Mengxi Jia Jian Zhao Zhaoxin Fan et al. 2024. EraseAnything: Enabling Concept Erasure in Rectified Flow Transformers. arXiv preprint arXiv:2412.20413 (2024)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00308"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19797-0_8"},{"key":"e_1_3_2_1_11_1","volume-title":"Contrastive Cross-Course Knowledge Tracing via Concept Graph Guided Knowledge Transfer. arXiv preprint arXiv:2505.13489","author":"Han Wenkang","year":"2025","unstructured":"Wenkang Han, Wang Lin, Liya Hu, Zhenlong Dai, Yiyun Zhou, Mengze Li, Zemin Liu, Chang Yao, and Jingyuan Chen. 2025a. Contrastive Cross-Course Knowledge Tracing via Concept Graph Guided Knowledge Transfer. arXiv preprint arXiv:2505.13489 (2025)."},{"key":"e_1_3_2_1_12_1","volume-title":"GUIRoboTron-Speech: Towards Automated GUI Agents Based on Speech Instructions. arXiv preprint arXiv:2506.11127","author":"Han Wenkang","year":"2025","unstructured":"Wenkang Han, Zhixiong Zeng, Jing Huang, Shu Jiang, Liming Zheng, Longrong Yang, Haibo Qiu, Chang Yao, Jingyuan Chen, and Lin Ma. 2025b. GUIRoboTron-Speech: Towards Automated GUI Agents Based on Speech Instructions. arXiv preprint arXiv:2506.11127 (2025)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00193"},{"key":"e_1_3_2_1_14_1","volume-title":"Venhancer: Generative space-time enhancement for video generation. arXiv preprint arXiv:2407.07667","author":"He Jingwen","year":"2024","unstructured":"Jingwen He, Tianfan Xue, Dongyang Liu, Xinqi Lin, Peng Gao, Dahua Lin, Yu Qiao, Wanli Ouyang, and Ziwei Liu. 2024. Venhancer: Generative space-time enhancement for video generation. arXiv preprint arXiv:2407.07667 (2024)."},{"key":"e_1_3_2_1_15_1","volume-title":"Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598","author":"Ho Jonathan","year":"2022","unstructured":"Jonathan Ho and Tim Salimans. 2022. Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)."},{"key":"e_1_3_2_1_16_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_17_1","volume-title":"Consistentid: Portrait generation with multimodal fine-grained identity preserving. arXiv preprint arXiv:2404.16771","author":"Huang Jiehui","year":"2024","unstructured":"Jiehui Huang, Xiao Dong, Wenhui Song, Hanhui Li, Jun Zhou, Yuhao Cheng, Shutao Liao, Long Chen, Yiqiang Yan, Shengcai Liao, et al., 2024. Consistentid: Portrait generation with multimodal fine-grained identity preserving. arXiv preprint arXiv:2404.16771 (2024)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00510"},{"key":"e_1_3_2_1_20_1","volume-title":"Towards Unsupervised Blind Face Restoration using Diffusion Prior. arXiv preprint arXiv:2410.04618","author":"Kuai Tianshu","year":"2024","unstructured":"Tianshu Kuai, Sina Honari, Igor Gilitschenski, and Alex Levinshtein. 2024. Towards Unsupervised Blind Face Restoration using Diffusion Prior. arXiv preprint arXiv:2410.04618 (2024)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01267-0_11"},{"key":"e_1_3_2_1_22_1","volume-title":"Set you straight: Auto-steering denoising trajectories to sidestep unwanted concepts. arXiv preprint arXiv:2504.12782","author":"Li Leyang","year":"2025","unstructured":"Leyang Li, Shilin Lu, Yan Ren, and Adams Wai-Kin Kong. 2025. Set you straight: Auto-steering denoising trajectories to sidestep unwanted concepts. arXiv preprint arXiv:2504.12782 (2025)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00278"},{"key":"e_1_3_2_1_24_1","first-page":"5904","article-title":"Learning dual memory dictionaries for blind face restoration","volume":"45","author":"Li Xiaoming","year":"2022","unstructured":"Xiaoming Li, Shiguang Zhang, Shangchen Zhou, Lei Zhang, and Wangmeng Zuo. 2022. Learning dual memory dictionaries for blind face restoration. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 45, 5 (2022), 5904-5917.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00825"},{"key":"e_1_3_2_1_26_1","volume-title":"AuthFace: Towards Authentic Blind Face Restoration with Face-oriented Generative Diffusion Prior. arXiv preprint arXiv:2410.09864","author":"Liang Guoqiang","year":"2024","unstructured":"Guoqiang Liang, Qingnan Fan, Bingtao Fu, Jinwei Chen, Hong Gu, and Lin Wang. 2024. AuthFace: Towards Authentic Blind Face Restoration with Face-oriented Generative Diffusion Prior. arXiv preprint arXiv:2410.09864 (2024)."},{"key":"e_1_3_2_1_27_1","unstructured":"Wang Lin Jingyuan Chen Jiaxin Shi Yichen Zhu Chen Liang Junzhong Miao Tao Jin Zhou Zhao Fei Wu Shuicheng Yan et al. 2024. Non-confusing Generation of Customized Concepts in Diffusion Models. arXiv preprint arXiv:2405.06914 (2024)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.425"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00218"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00615"},{"key":"e_1_3_2_1_31_1","volume-title":"Robust watermarking using generative priors against image editing: From benchmarking to advances. arXiv preprint arXiv:2410.18775","author":"Lu Shilin","year":"2024","unstructured":"Shilin Lu, Zihan Zhou, Jiayou Lu, Yuanzhi Zhu, and Adams Wai-Kin Kong. 2024b. Robust watermarking using generative priors against image editing: From benchmarking to advances. arXiv preprint arXiv:2410.18775 (2024)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00251"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02434"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3115428"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611731"},{"key":"e_1_3_2_1_36_1","volume-title":"Moon: A mixed objective optimization network for the recognition of facial attributes. In Computer Vision-ECCV 2016: 14th European Conference","author":"Rudd Ethan M","year":"2016","unstructured":"Ethan M Rudd, Manuel G\u00fcnther, and Terrance E Boult. 2016. Moon: A mixed objective optimization network for the recognition of facial attributes. In Computer Vision-ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11-14, 2016, Proceedings, Part V 14. Springer, 19-35."},{"key":"e_1_3_2_1_37_1","volume-title":"Progressive distillation for fast sampling of diffusion models. arXiv preprint arXiv:2202.00512","author":"Salimans Tim","year":"2022","unstructured":"Tim Salimans and Jonathan Ho. 2022. Progressive distillation for fast sampling of diffusion models. arXiv preprint arXiv:2202.00512 (2022)."},{"key":"e_1_3_2_1_38_1","volume-title":"Overcoming False Illusions in Real-World Face Restoration with Multi-Modal Guided Diffusion Model. arXiv preprint arXiv:2410.04161","author":"Tao Keda","year":"2024","unstructured":"Keda Tao, Jinjin Gu, Yulun Zhang, Xiucheng Wang, and Nan Cheng. 2024. Overcoming False Illusions in Real-World Face Restoration with Multi-Modal Guided Diffusion Model. arXiv preprint arXiv:2410.04161 (2024)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00230"},{"key":"e_1_3_2_1_40_1","volume-title":"Jiachun Pan, Weijia Wu, et al.","author":"Wang Bohan","year":"2025","unstructured":"Bohan Wang, Zhongqi Yue, Fengda Zhang, Shuo Chen, Li'an Bi, Junzhe Zhang, Xue Song, Kennard Yanting Chan, Jiachun Pan, Weijia Wu, et al., 2025c. Discrete visual tokens of autoregression, by diffusion, and for reasoning. arXiv e-prints (2025), arXiv-2505."},{"key":"e_1_3_2_1_41_1","volume-title":"IRBridge: Solving Image Restoration Bridge with Pre-trained Generative Diffusion Models. arXiv preprint arXiv:2505.24406","author":"Wang Hanting","year":"2025","unstructured":"Hanting Wang, Tao Jin, Wang Lin, Shulei Wang, Hai Huang, Shengpeng Ji, and Zhou Zhao. 2025a. IRBridge: Solving Image Restoration Bridge with Pre-trained Generative Diffusion Models. arXiv preprint arXiv:2505.24406 (2025)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25353"},{"key":"e_1_3_2_1_43_1","volume-title":"Kelvin CK Chan, and Chen Change Loy","author":"Wang Jianyi","year":"2024","unstructured":"Jianyi Wang, Zongsheng Yue, Shangchen Zhou, Kelvin CK Chan, and Chen Change Loy. 2024b. Exploiting diffusion prior for real-world image super-resolution. International Journal of Computer Vision (2024), 1-21."},{"key":"e_1_3_2_1_44_1","volume-title":"Instantid: Zero-shot identity-preserving generation in seconds. arXiv preprint arXiv:2401.07519","author":"Wang Qixun","year":"2024","unstructured":"Qixun Wang, Xu Bai, Haofan Wang, Zekui Qin, Anthony Chen, Huaxia Li, Xu Tang, and Yao Hu. 2024a. Instantid: Zero-shot identity-preserving generation in seconds. arXiv preprint arXiv:2401.07519 (2024)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"crossref","unstructured":"Shulei Wang Wang Lin Hai Huang Hanting Wang Sihang Cai WenKang Han Tao Jin Jingyuan Chen Jiacheng Sun Jieming Zhu et al. 2025b. Towards transformer-based aligned generation with self-coherence guidance. arXiv preprint arXiv:2503.17675 (2025).","DOI":"10.1109\/CVPR52734.2025.01720"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00905"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01699"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681014"},{"key":"e_1_3_2_1_49_1","volume-title":"Fastcomposer: Tuning-free multi-subject image generation with localized attention. International Journal of Computer Vision","author":"Xiao Guangxuan","year":"2024","unstructured":"Guangxuan Xiao, Tianwei Yin, William T Freeman, Fr\u00e9do Durand, and Song Han. 2024. Fastcomposer: Tuning-free multi-subject image generation with localized attention. International Journal of Computer Vision (2024), 1-20."},{"key":"e_1_3_2_1_50_1","volume-title":"Diff-prompt: Diffusion-driven prompt generator with mask supervision. arXiv preprint arXiv:2504.21423","author":"Yan Weicai","year":"2025","unstructured":"Weicai Yan, Wang Lin, Zirun Guo, Ye Wang, Fangming Feng, Xiaoda Yang, Zehan Wang, and Tao Jin. 2025. Diff-prompt: Diffusion-driven prompt generator with mask supervision. arXiv preprint arXiv:2504.21423 (2025)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413965"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00073"},{"key":"e_1_3_2_1_53_1","volume-title":"Ip-adapter: Text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:2308.06721","author":"Ye Hu","year":"2023","unstructured":"Hu Ye, Jun Zhang, Sibo Liu, Xiao Han, and Wei Yang. 2023. Ip-adapter: Text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:2308.06721 (2023)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_2_1_56_1","volume-title":"I2vgen-xl: High-quality image-to-video synthesis via cascaded diffusion models. arXiv preprint arXiv:2311.04145","author":"Zhang Shiwei","year":"2023","unstructured":"Shiwei Zhang, Jiayu Wang, Yingya Zhang, Kang Zhao, Hangjie Yuan, Zhiwu Qin, Xiang Wang, Deli Zhao, and Jingren Zhou. 2023b. I2vgen-xl: High-quality image-to-video synthesis via cascaded diffusion models. arXiv preprint arXiv:2311.04145 (2023)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01352"},{"key":"e_1_3_2_1_58_1","first-page":"30599","article-title":"Towards robust blind face restoration with codebook lookup transformer","volume":"35","author":"Zhou Shangchen","year":"2022","unstructured":"Shangchen Zhou, Kelvin Chan, Chongyi Li, and Chen Change Loy. 2022. Towards robust blind face restoration with codebook lookup transformer. Advances in Neural Information Processing Systems, Vol. 35 (2022), 30599-30611.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00245"},{"key":"e_1_3_2_1_60_1","volume-title":"Flair: A conditional diffusion framework with applications to face video restoration. arXiv preprint arXiv:2311.15445","author":"Zou Zihao","year":"2023","unstructured":"Zihao Zou, Jiaming Liu, Shirin Shoushtari, Yubo Wang, Weijie Gan, and Ulugbek S Kamilov. 2023. Flair: A conditional diffusion framework with applications to face video restoration. arXiv preprint arXiv:2311.15445 (2023)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755535","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:21:02Z","timestamp":1765308062000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755535"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":60,"alternative-id":["10.1145\/3746027.3755535","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755535","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}