{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T17:08:36Z","timestamp":1780765716980,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020YFB2103803."],"award-info":[{"award-number":["2020YFB2103803."]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"Supercomputing Center of University of Science and Technology of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3611913","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:40Z","timestamp":1698391660000},"page":"2168-2179","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":16,"title":["Pixel Adapter: A Graph-Based Post-Processing Approach for Scene Text Image Super-Resolution"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-1457-1707","authenticated-orcid":false,"given":"Wenyu","family":"Zhang","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6099-7516","authenticated-orcid":false,"given":"Xin","family":"Deng","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8841-5419","authenticated-orcid":false,"given":"Baojun","family":"Jia","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2884-8578","authenticated-orcid":false,"given":"Xingtong","family":"Yu","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5637-2758","authenticated-orcid":false,"given":"Yifan","family":"Chen","sequence":"additional","affiliation":[{"name":"China Merchants Bank, Chengdu, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-4601-6372","authenticated-orcid":false,"given":"Jin","family":"Ma","sequence":"additional","affiliation":[{"name":"China Merchants Bank, Chengdu, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3752-1163","authenticated-orcid":false,"given":"Qing","family":"Ding","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8136-6834","authenticated-orcid":false,"given":"Xinming","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58607-2_38"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2019.04.030"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2004.02.007"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.3017632"},{"key":"e_1_3_2_1_5_1","first-page":"338","volume-title":"Nguyen Bao Trung, Hoang Tien Duc, Pham Hoang Hiep, Doan Viet Dung, and Dang Hoang Vu. Fvi: An end-to-end vietnamese identification card detection and recognition in images.","author":"Liem Hoang Danh","year":"2018","unstructured":"Hoang Danh Liem, Nguyen Duc Minh, Nguyen Bao Trung, Hoang Tien Duc, Pham Hoang Hiep, Doan Viet Dung, and Dang Hoang Vu. Fvi: An end-to-end vietnamese identification card detection and recognition in images. pages 338--340, 2018."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2019.05.025"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00582"},{"key":"e_1_3_2_1_8_1","volume-title":"C3-STISR: scene text image super-resolution with triple clues. CoRR, abs\/2204.14044","author":"Zhao Minyi","year":"2022","unstructured":"Minyi Zhao, Miao Wang, Fan Bai, Bingjia Li, Jie Wang, and Shuigeng Zhou. C3-STISR: scene text image super-resolution with triple clues. CoRR, abs\/2204.14044, 2022."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01185"},{"key":"e_1_3_2_1_10_1","volume-title":"Text prior guided scene text image super-resolution. CoRR, abs\/2106.15368","author":"Ma Jianqi","year":"2021","unstructured":"Jianqi Ma, Shi Guo, and Lei Zhang. Text prior guided scene text image super-resolution. CoRR, abs\/2106.15368, 2021."},{"key":"e_1_3_2_1_11_1","series-title":"Lecture Notes in Computer Science","first-page":"184","volume-title":"Kaiming He, and Xiaoou Tang. Learning a deep convolutional network for image super-resolution","author":"Dong Chao","year":"2014","unstructured":"Chao Dong, Chen Change Loy, Kaiming He, and Xiaoou Tang. Learning a deep convolutional network for image super-resolution. In David J. Fleet, Tom\u00e1s Pajdla, Bernt Schiele, and Tinne Tuytelaars, editors, Computer Vision - ECCV 2014 - 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part IV, volume 8692 of Lecture Notes in Computer Science, pages 184--199. Springer, 2014."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.207"},{"key":"e_1_3_2_1_13_1","volume-title":"Deyu Meng, and Chongyi Li. Deep fourier up-sampling. CoRR, abs\/2210.05171","author":"Zhou Man","year":"2022","unstructured":"Man Zhou, Hu Yu, Jie Huang, Feng Zhao, Jinwei Gu, Chen Change Loy, Deyu Meng, and Chongyi Li. Deep fourier up-sampling. CoRR, abs\/2210.05171, 2022."},{"key":"e_1_3_2_1_14_1","volume-title":"6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings","author":"Velickovic Petar","year":"2018","unstructured":"Petar Velickovic, Guillem Cucurull, Arantxa Casanova, Adriana Romero, Pietro Li\u00f2, and Yoshua Bengio. Graph attention networks. In 6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings, 2018."},{"key":"e_1_3_2_1_15_1","volume-title":"Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, and Illia Polosukhin. Attention is all you need. In Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, USA, pages 5998--6008, 2017."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01270"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475640"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.177"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CoDIT55151.2022.9804094"},{"key":"e_1_3_2_1_20_1","volume-title":"Polarized self-attention: Towards high-quality pixel-wise regression. CoRR, abs\/2107.00782","author":"Liu Huajun","year":"2021","unstructured":"Huajun Liu, Fuqiang Liu, Xinyi Fan, and Dong Huang. Polarized self-attention: Towards high-quality pixel-wise regression. CoRR, abs\/2107.00782, 2021."},{"key":"e_1_3_2_1_21_1","first-page":"68","volume-title":"Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019","author":"Parmar Niki","year":"2019","unstructured":"Niki Parmar, Prajit Ramachandran, Ashish Vaswani, Irwan Bello, Anselm Levskaya, and Jonathon Shlens. Stand-alone self-attention in vision models. In Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada, pages 68--80, 2019."},{"key":"e_1_3_2_1_22_1","volume-title":"9th International Conference on Learning Representations, ICLR 2021","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. An image is worth 16x16 words: Transformers for image recognition at scale. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021. OpenReview.net, 2021."},{"key":"e_1_3_2_1_23_1","volume-title":"An image patch is a wave: Quantum inspired vision mlp","author":"Tang Yehui","year":"2021","unstructured":"Yehui Tang, Kai Han, Jianyuan Guo, Chang Xu, Yanxi Li, Chao Xu, and Yunhe Wang. An image patch is a wave: Quantum inspired vision mlp. 2021."},{"key":"e_1_3_2_1_24_1","volume-title":"Mlp-mixer: An all-mlp architecture for vision. CoRR, abs\/2105.01601","author":"Tolstikhin Ilya O.","year":"2021","unstructured":"Ilya O. Tolstikhin, Neil Houlsby, Alexander Kolesnikov, Lucas Beyer, Xiaohua Zhai, Thomas Unterthiner, Jessica Yung, Andreas Steiner, Daniel Keysers, Jakob Uszkoreit, Mario Lucic, and Alexey Dosovitskiy. Mlp-mixer: An all-mlp architecture for vision. CoRR, abs\/2105.01601, 2021."},{"key":"e_1_3_2_1_25_1","first-page":"1","volume-title":"2019 IEEE-RIVF International Conference on Computing and Communication Technologies, RIVF 2019","author":"Hanh T.","year":"2019","unstructured":"Hanh T. M. Tran and Tien Ho-Phuoc. Deep laplacian pyramid network for text images super-resolution. In 2019 IEEE-RIVF International Conference on Computing and Communication Technologies, RIVF 2019, Danang, Vietnam, March 20-22, 2019, pages 1--6. IEEE, 2019."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2646371"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.19"},{"key":"e_1_3_2_1_28_1","volume-title":"Text prior guided scene text image super-resolution. arXiv: Computer Vision and Pattern Recognition","author":"Ma Jianqi","year":"2021","unstructured":"Jianqi Ma, Shi Guo, and Lei Zhang. Text prior guided scene text image super-resolution. arXiv: Computer Vision and Pattern Recognition, 2021."},{"key":"e_1_3_2_1_29_1","volume-title":"Inverted residuals and linear bottlenecks: Mobile networks for classification, detection and segmentation. CoRR, abs\/1801.04381","author":"Sandler Mark","year":"2018","unstructured":"Mark Sandler, Andrew G. Howard, Menglong Zhu, Andrey Zhmoginov, and Liang-Chieh Chen. Inverted residuals and linear bottlenecks: Mobile networks for classification, detection and segmentation. CoRR, abs\/1801.04381, 2018."},{"key":"e_1_3_2_1_30_1","volume-title":"Text gestalt: Stroke-aware scene text image super-resolution. CoRR, abs\/2112.08171","author":"Chen Jingye","year":"2021","unstructured":"Jingye Chen, Haiyang Yu, Jianqi Ma, Bin Li, and Xiangyang Xue. Text gestalt: Stroke-aware scene text image super-resolution. CoRR, abs\/2112.08171, 2021."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.254"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0823-z"},{"key":"e_1_3_2_1_33_1","volume-title":"Advances in Neural Information Processing Systems 28: Annual Conference on Neural Information Processing Systems 2015","author":"Jaderberg Max","year":"2015","unstructured":"Max Jaderberg, Karen Simonyan, Andrew Zisserman, and Koray Kavukcuoglu. Spatial transformer networks. In Advances in Neural Information Processing Systems 28: Annual Conference on Neural Information Processing Systems 2015, December 7-12, 2015, Montreal, Quebec, Canada, pages 2017--2025, 2015."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"e_1_3_2_1_35_1","volume-title":"Graph attention networks. CoRR, abs\/1710.10903","author":"Velickovic Petar","year":"2017","unstructured":"Petar Velickovic, Guillem Cucurull, Arantxa Casanova, Adriana Romero, Pietro Li\u00f2, and Yoshua Bengio. Graph attention networks. CoRR, abs\/1710.10903, 2017."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2019.01.020"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2848939"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2003.819861"},{"key":"e_1_3_2_1_39_1","volume-title":"Zoom to learn, learn to zoom. CoRR, abs\/1905.05169","author":"Zhang Xuaner Cecilia","year":"2019","unstructured":"Xuaner Cecilia Zhang, Qifeng Chen, Ren Ng, and Vladlen Koltun. Zoom to learn, learn to zoom. CoRR, abs\/1905.05169, 2019."},{"key":"e_1_3_2_1_40_1","volume-title":"Toward real-world single image super-resolution: A new benchmark and A new model. CoRR, abs\/1904.00523","author":"Cai Jianrui","year":"2019","unstructured":"Jianrui Cai, Hui Zeng, Hongwei Yong, Zisheng Cao, and Lei Zhang. Toward real-world single image super-resolution: A new benchmark and A new model. CoRR, abs\/1904.00523, 2019."},{"key":"e_1_3_2_1_41_1","first-page":"1156","volume-title":"Ernest Valveny. ICDAR 2015 competition on robust reading. In 13th International Conference on Document Analysis and Recognition, ICDAR 2015","author":"Karatzas Dimosthenis","year":"2015","unstructured":"Dimosthenis Karatzas, Lluis Gomez-Bigorda, Anguelos Nicolaou, Suman K. Ghosh, Andrew D. Bagdanov, Masakazu Iwamura, Jiri Matas, Lukas Neumann, Vijay Ramaseshan Chandrasekhar, Shijian Lu, Faisal Shafait, Seiichi Uchida, and Ernest Valveny. ICDAR 2015 competition on robust reading. In 13th International Conference on Document Analysis and Recognition, ICDAR 2015, Nancy, France, August 23-26, 2015, pages 1156--1160. IEEE Computer Society, 2015."},{"key":"e_1_3_2_1_42_1","volume-title":"Coco-text: Dataset and benchmark for text detection and recognition in natural images. In arXiv preprint arXiv:1601.07140","author":"Veit Andreas","year":"2016","unstructured":"Andreas Veit, Tomas Matera, Lukas Neumann, Jiri Matas, and Serge Belongie. Coco-text: Dataset and benchmark for text detection and recognition in natural images. In arXiv preprint arXiv:1601.07140, 2016."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.76"},{"key":"e_1_3_2_1_44_1","volume-title":"Distilling the knowledge in a neural network. CoRR, abs\/1503.02531","author":"Hinton Geoffrey E.","year":"2015","unstructured":"Geoffrey E. Hinton, Oriol Vinyals, and Jeffrey Dean. Distilling the knowledge in a neural network. CoRR, abs\/1503.02531, 2015."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2010.2095871"},{"key":"e_1_3_2_1_46_1","volume-title":"Training latent variable models with auto-encoding variational bayes: A tutorial. CoRR, abs\/2208.07818","author":"Zhi-Han Yang","year":"2022","unstructured":"Yang Zhi-Han. Training latent variable models with auto-encoding variational bayes: A tutorial. CoRR, abs\/2208.07818, 2022."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611913","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3611913","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:08:25Z","timestamp":1755821305000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611913"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":47,"alternative-id":["10.1145\/3581783.3611913","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3611913","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}