{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T01:40:48Z","timestamp":1755826848153,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,30]],"date-time":"2024-05-30T00:00:00Z","timestamp":1717027200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Shanghai Science and Technology Committee","award":["No. 23010501500"],"award-info":[{"award-number":["No. 23010501500"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,30]]},"DOI":"10.1145\/3652583.3658070","type":"proceedings-article","created":{"date-parts":[[2024,6,7]],"date-time":"2024-06-07T06:30:40Z","timestamp":1717741840000},"page":"229-238","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Navigating Style Variations in Scene Text Image Super-Resolution through Multi-Scale Perception"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8044-5912","authenticated-orcid":false,"given":"Feifei","family":"Xu","sequence":"first","affiliation":[{"name":"Shanghai University of Electric Power, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-2434-9340","authenticated-orcid":false,"given":"Ziheng","family":"Yu","sequence":"additional","affiliation":[{"name":"Shanghai University of Electric Power, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2024,6,7]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.180"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-023-33488-2"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01185"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.543"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01132"},{"key":"e_1_3_2_1_6_1","volume-title":"Kaiming He, and Xiaoou Tang.","author":"Dong Chao","year":"2015","unstructured":"Chao Dong, Chen Change Loy, Kaiming He, and Xiaoou Tang. 2015. Image super-resolution using deep convolutional networks. IEEE transactions on pattern analysis and machine intelligence, Vol. 38, 2 (2015), 295--307."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00702"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2003.819221"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/87"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/s40747-022-00916-1"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.167"},{"key":"e_1_3_2_1_12_1","volume-title":"Bidirectional LSTM-CRF models for sequence tagging. arXiv preprint arXiv:1508.01991","author":"Huang Zhiheng","year":"2015","unstructured":"Zhiheng Huang, Wei Xu, and Kai Yu. 2015. Bidirectional LSTM-CRF models for sequence tagging. arXiv preprint arXiv:1508.01991 (2015)."},{"key":"e_1_3_2_1_13_1","unstructured":"Max Jaderberg Karen Simonyan Andrew Zisserman et al. 2015. Spatial transformer networks. Advances in neural information processing systems Vol. 28 (2015)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIVC58118.2023.10270461"},{"key":"e_1_3_2_1_15_1","volume-title":"ICDAR 2015 competition on robust reading. In 2015 13th international conference on document analysis and recognition (ICDAR). IEEE, 1156--1160","author":"Karatzas Dimosthenis","year":"2015","unstructured":"Dimosthenis Karatzas, Lluis Gomez-Bigorda, Anguelos Nicolaou, Suman Ghosh, Andrew Bagdanov, Masakazu Iwamura, Jiri Matas, Lukas Neumann, Vijay Ramaseshan Chandrasekhar, Shijian Lu, et al. 2015. ICDAR 2015 competition on robust reading. In 2015 13th international conference on document analysis and recognition (ICDAR). IEEE, 1156--1160."},{"key":"e_1_3_2_1_16_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.19"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.151"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106"},{"key":"e_1_3_2_1_20_1","volume-title":"TextDiff: Mask-Guided Residual Diffusion Models for Scene Text Image Super-Resolution. arXiv preprint arXiv:2308.06743","author":"Liu Baolin","year":"2023","unstructured":"Baolin Liu, Zongyuan Yang, Pengfei Wang, Junjie Zhou, Ziqi Liu, Ziyi Song, Yan Liu, and Yongping Xiong. 2023. TextDiff: Mask-Guided Residual Diffusion Models for Scene Text Image Super-Resolution. arXiv preprint arXiv:2308.06743 (2023)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_6"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2019.01.020"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3237002"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00582"},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings, Part XV 16","author":"Mou Yongqiang","year":"2020","unstructured":"Yongqiang Mou, Lei Tan, Hui Yang, Jingying Chen, Leyuan Liu, Rui Yan, and Yaohong Huang. 2020. Plugnet: Degradation aware scene text recognition supervised by a pluggable super-resolution unit. In Computer Vision--ECCV 2020: 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part XV 16. Springer, 158--174."},{"key":"e_1_3_2_1_26_1","volume-title":"Improving Scene Text Recognition With A Combinative Image Augmentation Approach. In 2022 13th International Conference on Information and Communication Technology Convergence (ICTC). IEEE, 1051--1055","author":"Nguyen Ngan-Linh","year":"2022","unstructured":"Ngan-Linh Nguyen, Gia-Huy Lam, Hoang-Thong Vo, Trong-Hop Do, Anh-Tien Tran, and Sungrae Cho. 2022. Improving Scene Text Recognition With A Combinative Image Augmentation Approach. In 2022 13th International Conference on Information and Communication Technology Convergence (ICTC). IEEE, 1051--1055."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/11590316"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.76"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2014.07.008"},{"key":"e_1_3_2_1_30_1","volume-title":"An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition","author":"Shi Baoguang","year":"2016","unstructured":"Baoguang Shi, Xiang Bai, and Cong Yao. 2016a. An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. IEEE transactions on pattern analysis and machine intelligence, Vol. 39, 11 (2016), 2298--2304."},{"key":"e_1_3_2_1_31_1","volume-title":"Aster: An attentional scene text recognizer with flexible rectification","author":"Shi Baoguang","year":"2018","unstructured":"Baoguang Shi, Mingkun Yang, Xinggang Wang, Pengyuan Lyu, Cong Yao, and Xiang Bai. 2018. Aster: An attentional scene text recognizer with flexible rectification. IEEE transactions on pattern analysis and machine intelligence, Vol. 41, 9 (2018), 2035--2048."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.207"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3267133"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00770"},{"key":"e_1_3_2_1_35_1","volume-title":"An Image Patch is a Wave: Quantum Inspired Vision MLP. arXiv preprint arXiv:2111.12294","author":"Tang Yehui","year":"2021","unstructured":"Yehui Tang, Kai Han, Jianyuan Guo, Chang Xu, Yanxi Li, Chao Xu, and Yunhe Wang. 2021. An Image Patch is a Wave: Quantum Inspired Vision MLP. arXiv preprint arXiv:2111.12294 (2021)."},{"key":"e_1_3_2_1_36_1","unstructured":"Petar Velickovic Guillem Cucurull Arantxa Casanova Adriana Romero Pietro Lio Yoshua Bengio et al. 2017. Graph attention networks. stat Vol. 1050 20 (2017) 10--48550."},{"key":"e_1_3_2_1_37_1","volume-title":"2011 International conference on computer vision. IEEE, 1457--1464","author":"Wang Kai","year":"2011","unstructured":"Kai Wang, Boris Babenko, and Serge Belongie. 2011. End-to-end scene text recognition. In 2011 International conference on computer vision. IEEE, 1457--1464."},{"key":"e_1_3_2_1_38_1","volume-title":"Proceedings, Part X 16","author":"Wang Wenjia","year":"2020","unstructured":"Wenjia Wang, Enze Xie, Xuebo Liu, Wenhai Wang, Ding Liang, Chunhua Shen, and Xiang Bai. 2020. Scene text image super-resolution in the wild. In Computer Vision--ECCV 2020: 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part X 16. Springer, 650--666."},{"key":"e_1_3_2_1_39_1","volume-title":"Textsr: Content-aware text super-resolution guided by recognition. arXiv preprint arXiv:1909.07113","author":"Wang Wenjia","year":"2019","unstructured":"Wenjia Wang, Enze Xie, Peize Sun, Wenhai Wang, Lixun Tian, Chunhua Shen, and Ping Luo. 2019. Textsr: Content-aware text super-resolution guided by recognition. arXiv preprint arXiv:1909.07113 (2019)."},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings of the European conference on computer vision (ECCV) workshops. 0--0.","author":"Wang Xintao","year":"2018","unstructured":"Xintao Wang, Ke Yu, Shixiang Wu, Jinjin Gu, Yihao Liu, Chao Dong, Yu Qiao, and Chen Change Loy. 2018. Esrgan: Enhanced super-resolution generative adversarial networks. In Proceedings of the European conference on computer vision (ECCV) workshops. 0--0."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00583"},{"key":"e_1_3_2_1_43_1","volume-title":"Electronic and Automation Control Conference (ITNEC)","volume":"1","author":"Ye Hanmin","year":"2020","unstructured":"Hanmin Ye, Wenjie Liu, and Shiming Huang. 2020. Method of Image Style Transfer Based on Edge Detection. In 2020 IEEE 4th Information Technology, Networking, Electronic and Automation Control Conference (ITNEC), Vol. 1. IEEE, 1635--1639."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CTISC52352.2021.00070"},{"key":"e_1_3_2_1_45_1","volume-title":"Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122","author":"Yu Fisher","year":"2015","unstructured":"Fisher Yu and Vladlen Koltun. 2015. Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122 (2015)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611913"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475640"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_18"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_18"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_18"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/238"},{"key":"e_1_3_2_1_52_1","volume-title":"HiREN: Towards Higher Supervision Quality for Better Scene Text Image Super-Resolution. arXiv preprint arXiv:2307.16410","author":"Zhao Minyi","year":"2023","unstructured":"Minyi Zhao, Yi Xu, Bingjia Li, Jie Wang, Jihong Guan, and Shuigeng Zhou. 2023. HiREN: Towards Higher Supervision Quality for Better Scene Text Image Super-Resolution. arXiv preprint arXiv:2307.16410 (2023)."},{"key":"e_1_3_2_1_53_1","volume-title":"Domain generalization with mixstyle. arXiv preprint arXiv:2104.02008","author":"Zhou Kaiyang","year":"2021","unstructured":"Kaiyang Zhou, Yongxin Yang, Yu Qiao, and Tao Xiang. 2021. Domain generalization with mixstyle. arXiv preprint arXiv:2104.02008 (2021)."},{"key":"e_1_3_2_1_54_1","volume-title":"Improving Scene Text Image Super-Resolution via Dual Prior Modulation Network. arXiv preprint arXiv:2302.10414","author":"Zhu Shipeng","year":"2023","unstructured":"Shipeng Zhu, Zuoyan Zhao, Pengfei Fang, and Hui Xue. 2023. Improving Scene Text Image Super-Resolution via Dual Prior Modulation Network. arXiv preprint arXiv:2302.10414 (2023)."}],"event":{"name":"ICMR '24: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia","SIGSOFT ACM Special Interest Group on Software Engineering"],"location":"Phuket Thailand","acronym":"ICMR '24"},"container-title":["Proceedings of the 2024 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652583.3658070","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3652583.3658070","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T08:52:29Z","timestamp":1755766349000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652583.3658070"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,30]]},"references-count":54,"alternative-id":["10.1145\/3652583.3658070","10.1145\/3652583"],"URL":"https:\/\/doi.org\/10.1145\/3652583.3658070","relation":{},"subject":[],"published":{"date-parts":[[2024,5,30]]},"assertion":[{"value":"2024-06-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}