{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T17:41:32Z","timestamp":1772905292770,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022YFB36066"],"award-info":[{"award-number":["2022YFB36066"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755335","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:54:15Z","timestamp":1761375255000},"page":"10034-10043","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["TASR: Timestep-Aware Diffusion Model for Image Super-Resolution"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-2844-3262","authenticated-orcid":false,"given":"Qinwei","family":"Lin","sequence":"first","affiliation":[{"name":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7583-4428","authenticated-orcid":false,"given":"Xiaopeng","family":"Sun","sequence":"additional","affiliation":[{"name":"Meituan Inc., Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1680-9782","authenticated-orcid":false,"given":"Yu","family":"Gao","sequence":"additional","affiliation":[{"name":"Meituan Inc., Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7113-1022","authenticated-orcid":false,"given":"Yujie","family":"Zhong","sequence":"additional","affiliation":[{"name":"Meituan Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2354-2240","authenticated-orcid":false,"given":"Zheng","family":"Zhao","sequence":"additional","affiliation":[{"name":"Meituan Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1444-8313","authenticated-orcid":false,"given":"Dengjie","family":"Li","sequence":"additional","affiliation":[{"name":"Meituan Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2792-8469","authenticated-orcid":false,"given":"Haoqian","family":"Wang","sequence":"additional","affiliation":[{"name":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"An image is worth multiple words: Multi-attribute inversion for constrained text-to-image synthesis. arXiv preprint arXiv:2311.11919","author":"Agarwal Aishwarya","year":"2023","unstructured":"Aishwarya Agarwal, Srikrishna Karanam, Tripti Shukla, and Balaji Vasan Srinivasan. 2023. An image is worth multiple words: Multi-attribute inversion for constrained text-to-image synthesis. arXiv preprint arXiv:2311.11919 (2023)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.150"},{"key":"e_1_3_2_1_3_1","unstructured":"Yogesh Balaji Seungjun Nah Xun Huang Arash Vahdat Jiaming Song Qinsheng Zhang Karsten Kreis Miika Aittala Timo Aila Samuli Laine et al. 2022. ediff-i: Text-to-image diffusion models with an ensemble of expert denoisers. arXiv preprint arXiv:2211.01324 (2022)."},{"key":"e_1_3_2_1_4_1","volume-title":"CasSR: Activating Image Power for Real-World Image Super-Resolution. arXiv preprint arXiv:2403.11451","author":"Chen Haolan","year":"2024","unstructured":"Haolan Chen, Jinhua Hao, Kai Zhao, Kun Yuan, Ming Sun, Chao Zhou, and Wei Hu. 2024. CasSR: Activating Image Power for Real-World Image Super-Resolution. arXiv preprint arXiv:2403.11451 (2024)."},{"key":"e_1_3_2_1_5_1","volume-title":"Pixart-alpha: Fast training of diffusion transformer for photorealistic text-to-image synthesis. arXiv preprint arXiv:2310.00426","author":"Chen Junsong","year":"2023","unstructured":"Junsong Chen, Jincheng Yu, Chongjian Ge, Lewei Yao, Enze Xie, Yue Wu, Zhongdao Wang, James Kwok, Ping Luo, Huchuan Lu, et al. 2023. Pixart-alpha: Fast training of diffusion transformer for photorealistic text-to-image synthesis. arXiv preprint arXiv:2310.00426 (2023)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01118"},{"key":"e_1_3_2_1_7_1","first-page":"8780","article-title":"Diffusion models beat gans on image synthesis","volume":"34","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. Adv. Neural Inform. Process. Syst. 34 (2021), 8780-8794.","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"e_1_3_2_1_8_1","unstructured":"Patrick Esser Sumith Kulal Andreas Blattmann Rahim Entezari Jonas M\u00fcller Harry Saini Yam Levi Dominik Lorenz Axel Sauer Frederic Boesel et al. 2024. Scaling rectified flow transformers for high-resolution image synthesis."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00435"},{"key":"e_1_3_2_1_11_1","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Adv. Neural Inform. Process. Syst. 33 (2020), 6840-6851.","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"e_1_3_2_1_12_1","volume-title":"Ella: Equip diffusion models with llm for enhanced semantic alignment. arXiv preprint arXiv:2403.05135","author":"Hu Xiwei","year":"2024","unstructured":"Xiwei Hu, Rui Wang, Yixiao Fang, Bin Fu, Pei Cheng, and Gang Yu. 2024. Ella: Equip diffusion models with llm for enhanced semantic alignment. arXiv preprint arXiv:2403.05135 (2024)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"e_1_3_2_1_14_1","volume-title":"Int. Conf. Learn. Represent.","volume":"5","author":"Kinga D","year":"2015","unstructured":"D Kinga, Jimmy Ba Adam, et al. 2015. A method for stochastic optimization. In Int. Conf. Learn. Represent., Vol. 5. San Diego, California;, 6."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00210"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Xinqi Lin Jingwen He Ziyan Chen Zhaoyang Lyu Bo Dai Fanghua Yu Wanli Ouyang Yu Qiao and Chao Dong. 2024. DiffBIR: Towards Blind Image Restoration with Generative Diffusion Prior. arXiv:2308.15070 [cs.CV]","DOI":"10.1007\/978-3-031-73202-7_25"},{"key":"e_1_3_2_1_17_1","first-page":"5461","article-title":"Blind image super-resolution: A survey and beyond","volume":"45","author":"Liu Anran","year":"2022","unstructured":"Anran Liu, Yihao Liu, Jinjin Gu, Yu Qiao, and Chao Dong. 2022. Blind image super-resolution: A survey and beyond. IEEE Trans. Pattern Anal. Mach. Intell. 45, 5 (2022), 5461-5480.","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"e_1_3_2_1_18_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems 36","author":"Liu Haotian","year":"2024","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2024. Visual instruction tuning. Advances in neural information processing systems 36 (2024)."},{"key":"e_1_3_2_1_19_1","volume-title":"Juan-Manuel Perez-Rua, and J\u00fcrgen Schmidhuber.","author":"Liu Haozhe","year":"2024","unstructured":"Haozhe Liu, Wentian Zhang, Jinheng Xie, Francesco Faccio, Mengmeng Xu, Tao Xiang, Mike Zheng Shou, Juan-Manuel Perez-Rua, and J\u00fcrgen Schmidhuber. 2024. Faster Diffusion via Temporal Attention Decomposition. arXiv e-prints (2024), arXiv-2404."},{"key":"e_1_3_2_1_20_1","volume-title":"Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741","author":"Nichol Alex","year":"2021","unstructured":"Alex Nichol, Prafulla Dhariwal, Aditya Ramesh, Pranav Shyam, Pamela Mishkin, Bob McGrew, Ilya Sutskever, and Mark Chen. 2021. Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741 (2021)."},{"key":"e_1_3_2_1_21_1","first-page":"8162","article-title":"Improved denoising diffusion probabilistic models","author":"Nichol Alexander Quinn","year":"2021","unstructured":"Alexander Quinn Nichol and Prafulla Dhariwal. 2021. Improved denoising diffusion probabilistic models. PMLR, 8162-8171.","journal-title":"PMLR"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"e_1_3_2_1_23_1","volume-title":"Vincent Dumoulin, and Aaron Courville.","author":"Perez Ethan","year":"2018","unstructured":"Ethan Perez, Florian Strub, Harm De Vries, Vincent Dumoulin, and Aaron Courville. 2018. Film: Visual reasoning with a general conditioning layer. In AAAI, Vol. 32."},{"key":"e_1_3_2_1_24_1","volume-title":"Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952","author":"Podell Dustin","year":"2023","unstructured":"Dustin Podell, Zion English, Kyle Lacey, Andreas Blattmann, Tim Dockhorn, Jonas M\u00fcller, Joe Penna, and Robin Rombach. 2023. Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)."},{"key":"e_1_3_2_1_25_1","volume-title":"TIP: Text-Driven Image Processing with Semantic and Restoration Instructions. arXiv:2312.11595","author":"Qi Chenyang","year":"2023","unstructured":"Chenyang Qi, Zhengzhong Tu, Keren Ye, Mauricio Delbracio, Peyman Milanfar, Qifeng Chen, and Hossein Talebi. 2023. TIP: Text-Driven Image Processing with Semantic and Restoration Instructions. arXiv:2312.11595 (2023)."},{"key":"e_1_3_2_1_26_1","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. PMLR, 8748-8763.","journal-title":"PMLR"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_29_1","first-page":"36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume":"35","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily L Denton, Kamyar Ghasemipour, Raphael Gontijo Lopes, Burcu Karagol Ayan, Tim Salimans, et al. 2022. Photorealistic text-to-image diffusion models with deep language understanding. Adv. Neural Inform. Process. Syst. 35 (2022), 36479-36494.","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"e_1_3_2_1_30_1","first-page":"9460","article-title":"Defining and characterizing reward gaming","volume":"35","author":"Skalse Joar","year":"2022","unstructured":"Joar Skalse, Nikolaus Howe, Dmitrii Krasheninnikov, and David Krueger. 2022. Defining and characterizing reward gaming. Adv. Neural Inform. Process. Syst. 35 (2022), 9460-9471.","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"e_1_3_2_1_31_1","volume-title":"Denoising Diffusion Implicit Models. arXiv:2010.02502 (October","author":"Song Jiaming","year":"2020","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2020. Denoising Diffusion Implicit Models. arXiv:2010.02502 (October 2020). https:\/\/arxiv.org\/abs\/2010.02502"},{"key":"e_1_3_2_1_32_1","volume-title":"Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456","author":"Song Yang","year":"2020","unstructured":"Yang Song, Jascha Sohl-Dickstein, Diederik P Kingma, Abhishek Kumar, Stefano Ermon, and Ben Poole. 2020. Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02444"},{"key":"e_1_3_2_1_34_1","volume-title":"Improving the stability of diffusion models for content consistent superresolution. arXiv e-prints","author":"Sun Lingchen","year":"2023","unstructured":"Lingchen Sun, RongyuanWu, Zhengqiang Zhang, Hongwei Yong, and Lei Zhang. 2023. Improving the stability of diffusion models for content consistent superresolution. arXiv e-prints (2023), arXiv-2401."},{"key":"e_1_3_2_1_35_1","volume-title":"Rfsr: Improving isr diffusion models via reward feedback learning. arXiv preprint arXiv:2412.03268","author":"Sun Xiaopeng","year":"2024","unstructured":"Xiaopeng Sun, Qinwei Lin, Yu Gao, Yujie Zhong, Chengjian Feng, Dengjie Li, Zheng Zhao, Jie Hu, and Lin Ma. 2024. Rfsr: Improving isr diffusion models via reward feedback learning. arXiv preprint arXiv:2412.03268 (2024)."},{"key":"e_1_3_2_1_36_1","volume-title":"Pixel- Aware Stable Diffusion for Realistic Image Super-Resolution and Personalized Stylization. In Eur. Conf. Comput. Vis.","author":"Xuansong Xie Tao Yang Peiran Ren","year":"2023","unstructured":"Peiran Ren Xuansong Xie Tao Yang, Rongyuan Wu and Lei Zhang. 2023. Pixel- Aware Stable Diffusion for Realistic Image Super-Resolution and Personalized Stylization. In Eur. Conf. Comput. Vis."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.149"},{"key":"e_1_3_2_1_38_1","volume-title":"Kelvin CK Chan, and Chen Change Loy","author":"Wang Jianyi","year":"2023","unstructured":"Jianyi Wang, Kelvin CK Chan, and Chen Change Loy. 2023. Exploring CLIP for Assessing the Look and Feel of Images. In AAAI."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Jianyi Wang Zongsheng Yue Shangchen Zhou Kelvin C.K. Chan and Chen Change Loy. 2024. Exploiting Diffusion Prior for Real-World Image Super- Resolution. (2024).","DOI":"10.1007\/s11263-024-02168-7"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00217"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00070"},{"key":"e_1_3_2_1_42_1","volume-title":"Proceedings of the European conference on computer vision (ECCV) workshops. 0-0.","author":"Wang Xintao","year":"2018","unstructured":"Xintao Wang, Ke Yu, Shixiang Wu, Jinjin Gu, Yihao Liu, Chao Dong, Yu Qiao, and Chen Change Loy. 2018. Esrgan: Enhanced super-resolution generative adversarial networks. In Proceedings of the European conference on computer vision (ECCV) workshops. 0-0."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2003.819861"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58598-3_7"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02405"},{"key":"e_1_3_2_1_46_1","volume-title":"Addsr: Accelerating diffusion-based blind superresolution with adversarial diffusion distillation. arXiv preprint arXiv:2404.01717","author":"Xie Rui","year":"2024","unstructured":"Rui Xie, Ying Tai, Chen Zhao, Kai Zhang, Zhenyu Zhang, Jun Zhou, Xiaoqian Ye, QianWang, and Jian Yang. 2024. Addsr: Accelerating diffusion-based blind superresolution with adversarial diffusion distillation. arXiv preprint arXiv:2404.01717 (2024)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00126"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02425"},{"key":"e_1_3_2_1_49_1","volume-title":"Resshift: Efficient diffusion model for image super-resolution by residual shifting. Adv. Neural Inform. Process. Syst. 36","author":"Yue Zongsheng","year":"2024","unstructured":"Zongsheng Yue, Jianyi Wang, and Chen Change Loy. 2024. Resshift: Efficient diffusion model for image super-resolution by residual shifting. Adv. Neural Inform. Process. Syst. 36 (2024)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00475"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00319"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00179"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755335","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:05:11Z","timestamp":1765339511000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755335"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":54,"alternative-id":["10.1145\/3746027.3755335","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755335","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}