{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T20:34:46Z","timestamp":1767990886234,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T00:00:00Z","timestamp":1665360000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSFC","award":["U2001209, 61902076"],"award-info":[{"award-number":["U2001209, 61902076"]}]},{"DOI":"10.13039\/100007219","name":"Natural Science Foundation of Shanghai","doi-asserted-by":"publisher","award":["21ZR1406600, 21ZR1403300"],"award-info":[{"award-number":["21ZR1406600, 21ZR1403300"]}],"id":[{"id":"10.13039\/100007219","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,10]]},"DOI":"10.1145\/3503161.3547951","type":"proceedings-article","created":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T15:42:46Z","timestamp":1665416566000},"page":"3461-3469","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Rethinking Super-Resolution as Text-Guided Details Generation"],"prefix":"10.1145","author":[{"given":"Chenxi","family":"Ma","sequence":"first","affiliation":[{"name":"School of Computer Science, Shanghai Key Laboratory of Intelligent Information Processing, Shanghai Collaborative Innovation Center of Intelligent Visual Computing, Fudan University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Yan","sequence":"additional","affiliation":[{"name":"School of Computer Science, Shanghai Key Laboratory of Intelligent Information Processing, Shanghai Collaborative Innovation Center of Intelligent Visual Computing, Fudan University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qing","family":"Lin","sequence":"additional","affiliation":[{"name":"School of Computer Science, Shanghai Key Laboratory of Intelligent Information Processing, Shanghai Collaborative Innovation Center of Intelligent Visual Computing, Fudan University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weimin","family":"Tan","sequence":"additional","affiliation":[{"name":"School of Computer Science, Shanghai Key Laboratory of Intelligent Information Processing, Shanghai Collaborative Innovation Center of Intelligent Visual Computing, Fudan University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Siming","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Data Science, Fudan University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,10,10]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Explorable Super Resolution. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Bahat Yuval","year":"2020","unstructured":"Yuval Bahat and Tomer Michaeli . 2020 . Explorable Super Resolution. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). Yuval Bahat and Tomer Michaeli. 2020. Explorable Super Resolution. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00019"},{"key":"e_1_3_2_2_3_1","unstructured":"Marcel Christoph B\u00fchler Andr\u00e9s Romero and Radu Timofte. 2020. DeepSEE: Deep Disentangled Semantic Explorative Extreme Super-Resolution.  Marcel Christoph B\u00fchler Andr\u00e9s Romero and Radu Timofte. 2020. DeepSEE: Deep Disentangled Semantic Explorative Extreme Super-Resolution."},{"key":"e_1_3_2_2_4_1","volume-title":"Language-Based Image Editing with Recurrent Attentive Models. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Chen Jianbo","year":"2018","unstructured":"Jianbo Chen , Yelong Shen , Jianfeng Gao , Jingjing Liu , and Xiaodong Liu . 2018 . Language-Based Image Editing with Recurrent Attentive Models. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). Jianbo Chen, Yelong Shen, Jianfeng Gao, Jingjing Liu, and Xiaodong Liu. 2018. Language-Based Image Editing with Recurrent Attentive Models. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_2_5_1","volume-title":"FSRNet: End-to-End Learning Face Super-Resolution with Facial Priors. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 2492--2501","author":"Chen Y.","unstructured":"Y. Chen , Y. Tai , X. Liu , C. Shen , and J. Yang . 2018 . FSRNet: End-to-End Learning Face Super-Resolution with Facial Priors. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 2492--2501 . Y. Chen, Y. Tai, X. Liu, C. Shen, and J. Yang. 2018. FSRNet: End-to-End Learning Face Super-Resolution with Facial Priors. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 2492--2501."},{"key":"e_1_3_2_2_6_1","volume-title":"European Conference on Computer Vision (ECCV).","author":"Cho Wonwoong","year":"2018","unstructured":"Wonwoong Cho , Hyojin Bahng , David Keetae Park , Seungjoo Yoo , Ziming Wu , Xiaojuan Ma , and Jaegul Choo . 2018 . Text2Colors: Guiding Image Colorization through Text-Driven Palette Generation . In European Conference on Computer Vision (ECCV). Wonwoong Cho, Hyojin Bahng, David Keetae Park, Seungjoo Yoo, Ziming Wu, Xiaojuan Ma, and Jaegul Choo. 2018. Text2Colors: Guiding Image Colorization through Text-Driven Palette Generation. In European Conference on Computer Vision (ECCV)."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2439281"},{"key":"e_1_3_2_2_8_1","first-page":"2672","article-title":"Generative Adversarial Networks","volume":"3","author":"Goodfellow Ian J.","year":"2014","unstructured":"Ian J. Goodfellow , Jean Pouget-Abadie , Mehdi Mirza , Bing Xu , David Warde-Farley , Sherjil Ozair , Aaron Courville , and Yoshua Bengio . 2014 . Generative Adversarial Networks . Advances in Neural Information Processing Systems , Vol. 3 (2014), 2672 -- 2680 . Ian J. Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley, Sherjil Ozair, Aaron Courville, and Yoshua Bengio. 2014. Generative Adversarial Networks. Advances in Neural Information Processing Systems, Vol. 3 (2014), 2672--2680.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_9_1","volume-title":"European Conference on Computer Vision (ECCV).","author":"Gu Jinjin","year":"2020","unstructured":"Jinjin Gu , Haoming Cai , Haoyu Chen , Xiaoxing Ye , Jimmy Ren , and Chao Dong . 2020 . PIPAL: a Large-Scale Image Quality Assessment Dataset for Perceptual Image Restoration . In European Conference on Computer Vision (ECCV). Jinjin Gu, Haoming Cai, Haoyu Chen, Xiaoxing Ye, Jimmy Ren, and Chao Dong. 2020. PIPAL: a Large-Scale Image Quality Assessment Dataset for Perceptual Image Restoration. In European Conference on Computer Vision (ECCV)."},{"key":"e_1_3_2_2_10_1","volume-title":"Learning Meta Face Recognition in Unseen Domains. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Guo Jianzhu","year":"2020","unstructured":"Jianzhu Guo , Xiangyu Zhu , Chenxu Zhao , Dong Cao , Zhen Lei , and Stan Z Li . 2020 b. Learning Meta Face Recognition in Unseen Domains. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). Jianzhu Guo, Xiangyu Zhu, Chenxu Zhao, Dong Cao, Zhen Lei, and Stan Z Li. 2020b. Learning Meta Face Recognition in Unseen Domains. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_2_11_1","volume-title":"Closed-loop Matters: Dual Regression Networks for Single Image Super-Resolution. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Guo Yong","year":"2020","unstructured":"Yong Guo , Jian Chen , Jingdong Wang , Qi Chen , Jiezhang Cao , Zeshuai Deng , Yanwu Xu , and Mingkui Tan . 2020 a. Closed-loop Matters: Dual Regression Networks for Single Image Super-Resolution. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). Yong Guo, Jian Chen, Jingdong Wang, Qi Chen, Jiezhang Cao, Zeshuai Deng, Yanwu Xu, and Mingkui Tan. 2020a. Closed-loop Matters: Dual Regression Networks for Single Image Super-Resolution. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00915"},{"key":"e_1_3_2_2_13_1","volume-title":"Accurate Image Super-Resolution Using Very Deep Convolutional Networks. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 1646--1654","author":"Kim J.","unstructured":"J. Kim , J. K. Lee , and K. M. Lee . 2016 . Accurate Image Super-Resolution Using Very Deep Convolutional Networks. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 1646--1654 . J. Kim, J. K. Lee, and K. M. Lee. 2016. Accurate Image Super-Resolution Using Very Deep Convolutional Networks. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 1646--1654."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.19"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00790"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01245"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.151"},{"key":"e_1_3_2_2_18_1","volume-title":"Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision (ECCV).","author":"Lin Tsung Yi","unstructured":"Tsung Yi Lin , Michael Maire , Serge Belongie , James Hays , and C. Lawrence Zitnick . 2014 . Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision (ECCV). Tsung Yi Lin, Michael Maire, Serge Belongie, James Hays, and C. Lawrence Zitnick. 2014. Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision (ECCV)."},{"key":"e_1_3_2_2_19_1","volume-title":"Residual Feature Aggregation Network for Image Super-Resolution. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Liu Jie","year":"2020","unstructured":"Jie Liu , Wenjie Zhang , Yuting Tang , Jie Tang , and Gangshan Wu . 2020 . Residual Feature Aggregation Network for Image Super-Resolution. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). Jie Liu, Wenjie Zhang, Yuting Tang, Jie Tang, and Gangshan Wu. 2020. Residual Feature Aggregation Network for Image Super-Resolution. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_2_20_1","volume-title":"Deep Learning Face Attributes in the Wild. In IEEE International Conference on Computer Vision (ICCV). 3730--3738","author":"Liu Z.","year":"2015","unstructured":"Z. Liu , P. Luo , X. Wang , and X. Tang . 2015 . Deep Learning Face Attributes in the Wild. In IEEE International Conference on Computer Vision (ICCV). 3730--3738 . https:\/\/doi.org\/10.1109\/ICCV. 2015 .425 Z. Liu, P. Luo, X. Wang, and X. Tang. 2015. Deep Learning Face Attributes in the Wild. In IEEE International Conference on Computer Vision (ICCV). 3730--3738. https:\/\/doi.org\/10.1109\/ICCV.2015.425"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00561"},{"key":"e_1_3_2_2_22_1","volume-title":"Structure-Preserving Super Resolution with Gradient Guidance. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Ma Cheng","year":"2020","unstructured":"Cheng Ma , Yongming Rao , Yean Cheng , Ce Chen , Jiwen Lu , and Jie Zhou . 2020 b. Structure-Preserving Super Resolution with Gradient Guidance. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). Cheng Ma, Yongming Rao, Yean Cheng, Ce Chen, Jiwen Lu, and Jie Zhou. 2020b. Structure-Preserving Super Resolution with Gradient Guidance. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00251"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2012.2227726"},{"key":"e_1_3_2_2_25_1","volume-title":"Proceedings of the Indian Conference on Computer Vision, Graphics and Image Processing.","author":"Nilsback M-E.","unstructured":"M-E. Nilsback and A. Zisserman . 2008. Automated Flower Classification over a Large Number of Classes . In Proceedings of the Indian Conference on Computer Vision, Graphics and Image Processing. M-E. Nilsback and A. Zisserman. 2008. Automated Flower Classification over a Large Number of Classes. In Proceedings of the Indian Conference on Computer Vision, Graphics and Image Processing."},{"key":"e_1_3_2_2_26_1","volume-title":"Generative Adversarial Text-to-Image Synthesis. In International Conference on Machine Learning (ICML).","author":"Reed Scott","year":"2016","unstructured":"Scott Reed , Zeynep Akata , Xinchen Yan , Lajanugen Logeswaran , Bernt Schiele , and Honglak Lee . 2016 . Generative Adversarial Text-to-Image Synthesis. In International Conference on Machine Learning (ICML). Scott Reed, Zeynep Akata, Xinchen Yan, Lajanugen Logeswaran, Bernt Schiele, and Honglak Lee. 2016. Generative Adversarial Text-to-Image Synthesis. In International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"key":"e_1_3_2_2_28_1","volume-title":"Deep Semantic Face Deblurring. CoRR","author":"Shen Ziyi","year":"2018","unstructured":"Ziyi Shen , Wei-Sheng Lai , Tingfa Xu , Jan Kautz , and Ming-Hsuan Yang . 2018. Deep Semantic Face Deblurring. CoRR , Vol. abs\/ 1803 .03345 ( 2018 ). Ziyi Shen, Wei-Sheng Lai, Tingfa Xu, Jan Kautz, and Ming-Hsuan Yang. 2018. Deep Semantic Face Deblurring. CoRR, Vol. abs\/1803.03345 (2018)."},{"key":"e_1_3_2_2_29_1","volume-title":"Rethinking the Inception Architecture for Computer Vision. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Szegedy Christian","year":"2016","unstructured":"Christian Szegedy , Vincent Vanhoucke , Sergey Ioffe , Jonathon Shlens , and Zbigniew Wojna . 2016 . Rethinking the Inception Architecture for Computer Vision. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, and Zbigniew Wojna. 2016. Rethinking the Inception Architecture for Computer Vision. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_2_30_1","volume-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Han Zhang Zhe Gan Qiuyuan Huang","year":"2018","unstructured":"Qiuyuan Huang Han Zhang Zhe Gan Xiaolei Huang Xiaodong He Tao Xu , Pengchuan Zhang . 2018 . AttnGAN: Fine-Grained Text to Image Generation with Attentional Generative Adversarial Networks . In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). Qiuyuan Huang Han Zhang Zhe Gan Xiaolei Huang Xiaodong He Tao Xu, Pengchuan Zhang. 2018. AttnGAN: Fine-Grained Text to Image Generation with Attentional Generative Adversarial Networks. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_2_31_1","volume-title":"Technical Report CNS-TR-2011-001. California Institute of Technology.","author":"Wah C.","year":"2011","unstructured":"C. Wah , S. Branson , P. Welinder , P. Perona , and S. Belongie . 2011 . The Caltech-UCSD Birds-200--2011 Dataset . Technical Report CNS-TR-2011-001. California Institute of Technology. C. Wah, S. Branson, P. Welinder, P. Perona, and S. Belongie. 2011. The Caltech-UCSD Birds-200--2011 Dataset. Technical Report CNS-TR-2011-001. California Institute of Technology."},{"key":"e_1_3_2_2_32_1","volume-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 606--615","author":"Wang X.","year":"2018","unstructured":"X. Wang , K. Yu , C. Dong , and C. Change Loy . 2018. Recovering Realistic Texture in Image Super-Resolution by Deep Spatial Feature Transform . In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 606--615 . https:\/\/doi.org\/10.1109\/CVPR. 2018 .00070 X. Wang, K. Yu, C. Dong, and C. Change Loy. 2018. Recovering Realistic Texture in Image Super-Resolution by Deep Spatial Feature Transform. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 606--615. https:\/\/doi.org\/10.1109\/CVPR.2018.00070"},{"key":"e_1_3_2_2_33_1","volume-title":"ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks. In European Conference on Computer Vision (ECCV).","author":"Wang Xintao","year":"2018","unstructured":"Xintao Wang , Ke Yu , Shixiang Wu , Jinjin Gu , Yihao Liu , Chao Dong , Chen Change Loy , Yu Qiao , and Xiaoou Tang . 2018 . ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks. In European Conference on Computer Vision (ECCV). Xintao Wang, Ke Yu, Shixiang Wu, Jinjin Gu, Yihao Liu, Chao Dong, Chen Change Loy, Yu Qiao, and Xiaoou Tang. 2018. ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks. In European Conference on Computer Vision (ECCV)."},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3055062"},{"key":"e_1_3_2_2_35_1","volume-title":"Cross-Modal Contrastive Learning for Text-to-Image Generation. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Zhang Han","year":"2021","unstructured":"Han Zhang , Jing Yu Koh , Jason Baldridge , Honglak Lee , and Yinfei Yang . 2021 . Cross-Modal Contrastive Learning for Text-to-Image Generation. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). Han Zhang, Jing Yu Koh, Jason Baldridge, Honglak Lee, and Yinfei Yang. 2021. Cross-Modal Contrastive Learning for Text-to-Image Generation. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.629"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2003.819861"}],"event":{"name":"MM '22: The 30th ACM International Conference on Multimedia","location":"Lisboa Portugal","acronym":"MM '22","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 30th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503161.3547951","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3503161.3547951","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:00:31Z","timestamp":1750186831000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3503161.3547951"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,10]]},"references-count":37,"alternative-id":["10.1145\/3503161.3547951","10.1145\/3503161"],"URL":"https:\/\/doi.org\/10.1145\/3503161.3547951","relation":{},"subject":[],"published":{"date-parts":[[2022,10,10]]},"assertion":[{"value":"2022-10-10","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}