{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,8]],"date-time":"2026-03-08T22:08:03Z","timestamp":1773007683587,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Shenzhen Science and Technology Program","award":["RCYX20200714114523079,JCYJ20220818101014030, JCYJ20220818101012025"],"award-info":[{"award-number":["RCYX20200714114523079,JCYJ20220818101014030, JCYJ20220818101012025"]}]},{"name":"PCNL KEY project","award":["PCL2021A07"],"award-info":[{"award-number":["PCL2021A07"]}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022YFF1202104"],"award-info":[{"award-number":["2022YFF1202104"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["6230070671,?62171248"],"award-info":[{"award-number":["6230070671,?62171248"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3611777","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:40Z","timestamp":1698391660000},"page":"2189-2198","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["One-stage Low-resolution Text Recognition with High-resolution Knowledge Transfer"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1746-2693","authenticated-orcid":false,"given":"Hang","family":"Guo","sequence":"first","affiliation":[{"name":"Tsinghua University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0594-6404","authenticated-orcid":false,"given":"Tao","family":"Dai","sequence":"additional","affiliation":[{"name":"Shenzhen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-8060-4237","authenticated-orcid":false,"given":"Mingyan","family":"Zhu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2708-2456","authenticated-orcid":false,"given":"Guanghao","family":"Meng","sequence":"additional","affiliation":[{"name":"Tsinghua University, Peng Cheng Laboratory, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4798-230X","authenticated-orcid":false,"given":"Bin","family":"Chen","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5462-6178","authenticated-orcid":false,"given":"Zhi","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8639-982X","authenticated-orcid":false,"given":"Shu-Tao","family":"Xia","sequence":"additional","affiliation":[{"name":"Tsinghua University &amp; Peng Cheng Laboratory, Shenzhen, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01505"},{"key":"e_1_3_2_1_2_1","volume-title":"Sajid Javed, Abdulhaid Shoufan, Yahya Zweiri, and Naoufel Werghi.","author":"Alansari Mohamad","year":"2023","unstructured":"Mohamad Alansari, Oussama Abdul Hay, Sajid Javed, Abdulhaid Shoufan, Yahya Zweiri, and Naoufel Werghi. 2023. GhostFaceNets: Lightweight Face Recognition Model from Cheap Operations. IEEE Access (2023)."},{"key":"e_1_3_2_1_3_1","volume-title":"Tel Aviv","author":"Bautista Darwin","year":"2022","unstructured":"Darwin Bautista and Rowel Atienza. 2022. Scene text recognition with permuted autoregressive sequence models. In Computer Vision--ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23-27, 2022, Proceedings, Part XXVIII. Springer, 178--196."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00102"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1117\/12.2586912"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2022.02.006"},{"key":"e_1_3_2_1_8_1","volume-title":"Scene Text Telescope: Text-Focused Scene Image Super-Resolution. computer vision and pattern recognition","author":"Chen Jingye","year":"2021","unstructured":"Jingye Chen, Bin Li, and Xiangyang Xue. 2021. Scene Text Telescope: Text-Focused Scene Image Super-Resolution. computer vision and pattern recognition (2021)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19904"},{"key":"e_1_3_2_1_10_1","volume-title":"Tel Aviv","author":"Chen Ying","year":"2022","unstructured":"Ying Chen, Liang Qiao, Zhanzhan Cheng, Shiliang Pu, Yi Niu, and Xi Li. 2022b. Dynamic Low-Resolution Distillation for Cost-Efficient End-to-End Text Spotting. In Computer Vision--ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23-27, 2022, Proceedings, Part XXVIII. Springer, 356--373."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01132"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00482"},{"key":"e_1_3_2_1_13_1","volume-title":"Boosting optical character recognition: A super-resolution approach. arXiv preprint arXiv:1506.02211","author":"Dong C","year":"2015","unstructured":"C Dong, X Zhu, Y Deng, CC Loy, and Y Qia. 2015. Boosting optical character recognition: A super-resolution approach. arXiv preprint arXiv:1506.02211 (2015)."},{"key":"e_1_3_2_1_14_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00702"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-23192-1_8"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6715"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_19_1","volume-title":"Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. 2015. Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)."},{"key":"e_1_3_2_1_20_1","volume-title":"Long short-term memory. Neural computation","author":"Hochreiter Sepp","year":"1997","unstructured":"Sepp Hochreiter and J\u00fcrgen Schmidhuber. 1997. Long short-term memory. Neural computation, Vol. 9, 8 (1997), 1735--1780."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Mingkun Huang Yongbin You Zhehuai Chen Yanmin Qian and Kai Yu. 2018. Knowledge Distillation for Sequence Model.. In Interspeech. 3703--3707.","DOI":"10.21437\/Interspeech.2018-1589"},{"key":"e_1_3_2_1_22_1","volume-title":"Speeding up convolutional neural networks with low rank expansions. arXiv preprint arXiv:1405.3866","author":"Jaderberg Max","year":"2014","unstructured":"Max Jaderberg, Andrea Vedaldi, and Andrew Zisserman. 2014. Speeding up convolutional neural networks with low rank expansions. arXiv preprint arXiv:1405.3866 (2014)."},{"key":"e_1_3_2_1_23_1","volume-title":"IFR: Iterative Fusion Based Recognizer for Low Quality Scene Text Recognition. In Pattern Recognition and Computer Vision: 4th Chinese Conference, PRCV","author":"Jia Zhiwei","year":"2021","unstructured":"Zhiwei Jia, Shugong Xu, Shiyi Mu, Yue Tao, Shan Cao, and Zhiyong Chen. 2021. IFR: Iterative Fusion Based Recognizer for Low Quality Scene Text Recognition. In Pattern Recognition and Computer Vision: 4th Chinese Conference, PRCV 2021, Beijing, China, October 29-November 1, 2021, Proceedings, Part II 4. Springer, 180--191."},{"key":"e_1_3_2_1_24_1","volume-title":"ICDAR 2015 competition on Robust Reading. International Conference on Document Analysis and Recognition","author":"Karatzas Dimosthenis","year":"2015","unstructured":"Dimosthenis Karatzas, Lluis Gomez-Bigorda, Anguelos Nicolaou, Suman K. Ghosh, Andrew D. Bagdanov, Masakazu Iwamura, Jiri Matas, Lukas Neumann, Vijay Chandrasekhar, Shijian Lu, Faisal Shafait, Seiichi Uchida, and Ernest Valveny. 2015. ICDAR 2015 competition on Robust Reading. International Conference on Document Analysis and Recognition (2015)."},{"key":"e_1_3_2_1_25_1","volume-title":"ICDAR 2013 robust reading competition. In 2013 12th international conference on document analysis and recognition. IEEE, 1484--1493","author":"Karatzas Dimosthenis","year":"2013","unstructured":"Dimosthenis Karatzas, Faisal Shafait, Seiichi Uchida, Masakazu Iwamura, Lluis Gomez i Bigorda, Sergi Robles Mestre, Joan Mas, David Fernandez Mota, Jon Almazan Almazan, and Lluis Pere De Las Heras. 2013. ICDAR 2013 robust reading competition. In 2013 12th international conference on document analysis and recognition. IEEE, 1484--1493."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2019.04.030"},{"key":"e_1_3_2_1_27_1","volume-title":"Sequence-level knowledge distillation. arXiv preprint arXiv:1606.07947","author":"Kim Yoon","year":"2016","unstructured":"Yoon Kim and Alexander M Rush. 2016. Sequence-level knowledge distillation. arXiv preprint arXiv:1606.07947 (2016)."},{"key":"e_1_3_2_1_28_1","volume-title":"Kingma and Jimmy Ba","author":"Diederik","year":"2014","unstructured":"Diederik P. Kingma and Jimmy Ba. 2014. Adam: A Method for Stochastic Optimization. arXiv: Learning (2014)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.19"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/NICS.2018.8606831"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i2.20062"},{"key":"e_1_3_2_1_32_1","volume-title":"International conference on machine learning. PMLR, 97--105","author":"Long Mingsheng","year":"2015","unstructured":"Mingsheng Long, Yue Cao, Jianmin Wang, and Michael Jordan. 2015. Learning transferable features with deep adaptation networks. In International conference on machine learning. PMLR, 97--105."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3237002"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00582"},{"key":"e_1_3_2_1_35_1","volume-title":"PlugNet: Degradation Aware Scene Text Recognition Supervised by a Pluggable Super-Resolution Unit. european conference on computer vision","author":"Mou Yongqiang","year":"2020","unstructured":"Yongqiang Mou, Lei Tan, Hui Yang, Jingying Chen, Leyuan Liu, Rui Yan, and Yaohong Huang. 2020. PlugNet: Degradation Aware Scene Text Recognition Supervised by a Pluggable Super-Resolution Unit. european conference on computer vision (2020)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19815-1_26"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.35.404"},{"key":"e_1_3_2_1_38_1","volume-title":"Recognizing Text with Perspective Distortion in Natural Scenes. international conference on computer vision","author":"Phan Trung Quy","year":"2013","unstructured":"Trung Quy Phan, Palaiahnakote Shivakumara, Shangxuan Tian, and Chew Lim Tan. 2013. Recognizing Text with Perspective Distortion in Natural Scenes. international conference on computer vision (2013)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01421"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475238"},{"key":"e_1_3_2_1_41_1","volume-title":"Scene Text Image Super-Resolution via Content Perceptual Loss and Criss-Cross Transformer Blocks. arXiv preprint arXiv:2210.06924","author":"Qin Rui","year":"2022","unstructured":"Rui Qin, Bin Wang, and Yu-Wing Tai. 2022. Scene Text Image Super-Resolution via Content Perceptual Loss and Criss-Cross Transformer Blocks. arXiv preprint arXiv:2210.06924 (2022)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"e_1_3_2_1_43_1","volume-title":"An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition","author":"Shi Baoguang","year":"2016","unstructured":"Baoguang Shi, Xiang Bai, and Cong Yao. 2016. An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. IEEE transactions on pattern analysis and machine intelligence, Vol. 39, 11 (2016), 2298--2304."},{"key":"e_1_3_2_1_44_1","volume-title":"Tel Aviv","author":"Shin Sungho","year":"2022","unstructured":"Sungho Shin, Joosoon Lee, Junseok Lee, Yeonguk Yu, and Kyoobin Lee. 2022. Teaching where to look: Attention similarity knowledge distillation for low resolution face recognition. In Computer Vision-ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23-27, 2022, Proceedings, Part XII. Springer, 631--647."},{"key":"e_1_3_2_1_45_1","volume-title":"Chee Seng Chan, and Chew Lim Tan","author":"Shivakumara Palaiahnakote","year":"2014","unstructured":"Palaiahnakote Shivakumara, Anhar Risnumawan, Chee Seng Chan, and Chew Lim Tan. 2014. A robust arbitrary text detection system for natural scene images. Expert Systems With Applications (2014)."},{"key":"e_1_3_2_1_46_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/RIVF.2019.8713657"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.141"},{"key":"e_1_3_2_1_49_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_50_1","volume-title":"End-to-end scene text recognition. international conference on computer vision","author":"Wang Kai","year":"2011","unstructured":"Kai Wang, Boris Babenko, and Serge Belongie. 2011. End-to-end scene text recognition. international conference on computer vision (2011)."},{"key":"e_1_3_2_1_51_1","volume-title":"Scene Text Image Super-Resolution in the Wild. european conference on computer vision","author":"Wang Wenjia","year":"2020","unstructured":"Wenjia Wang, Enze Xie, Xuebo Liu, Wenhai Wang, Ding Liang, Chunhua Shen, and Xiang Bai. 2020. Scene Text Image Super-Resolution in the Wild. european conference on computer vision (2020)."},{"key":"e_1_3_2_1_52_1","volume-title":"Textsr: Content-aware text super-resolution guided by recognition. arXiv preprint arXiv:1909.07113","author":"Wang Wenjia","year":"2019","unstructured":"Wenjia Wang, Enze Xie, Peize Sun, Wenhai Wang, Lixun Tian, Chunhua Shen, and Ping Luo. 2019. Textsr: Content-aware text super-resolution guided by recognition. arXiv preprint arXiv:1909.07113 (2019)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01393"},{"key":"e_1_3_2_1_54_1","volume-title":"Tel Aviv","author":"Xie Xudong","year":"2022","unstructured":"Xudong Xie, Ling Fu, Zhifei Zhang, Zhaowen Wang, and Xiang Bai. 2022. Toward Understanding WordArt: Corner-Guided Transformer for Scene Text Recognition. In Computer Vision--ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23-27, 2022, Proceedings, Part XXVIII. Springer, 303--321."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.36"},{"key":"e_1_3_2_1_56_1","volume-title":"Tel Aviv","author":"Yang Zhendong","year":"2022","unstructured":"Zhendong Yang, Zhe Li, Mingqi Shao, Dachuan Shi, Zehuan Yuan, and Chun Yuan. 2022. Masked generative distillation. In Computer Vision-ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23-27, 2022, Proceedings, Part XI. Springer, 53--69."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01213"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_18"},{"key":"e_1_3_2_1_59_1","volume-title":"Zhijun Ding, Jun Wu, Fumin Shen, and Heng Tao Shen.","author":"Zhao Cairong","year":"2021","unstructured":"Cairong Zhao, Shuyang Feng, Brian Nlong Zhao, Zhijun Ding, Jun Wu, Fumin Shen, and Heng Tao Shen. 2021. Scene Text Image Super-Resolution via Parallelly Contextual Attention Network. acm multimedia (2021)."},{"key":"e_1_3_2_1_60_1","volume-title":"C3-STISR: Scene Text Image Super-resolution with Triple Clues. international joint conference on artificial intelligence","author":"Zhao Minyi","year":"2022","unstructured":"Minyi Zhao, Miao Wang, Fan Bai, Bingjia Li, Jie Wang, and Shuigeng Zhou. 2022. C3-STISR: Scene Text Image Super-resolution with Triple Clues. international joint conference on artificial intelligence (2022)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682926"},{"key":"e_1_3_2_1_62_1","volume-title":"Improving Scene Text Image Super-Resolution via Dual Prior Modulation Network. arXiv preprint arXiv:2302.10414","author":"Zhu Shipeng","year":"2023","unstructured":"Shipeng Zhu, Zuoyan Zhao, Pengfei Fang, and Hui Xue. 2023. Improving Scene Text Image Super-Resolution via Dual Prior Modulation Network. arXiv preprint arXiv:2302.10414 (2023)."}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611777","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3611777","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:14:43Z","timestamp":1755821683000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611777"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":62,"alternative-id":["10.1145\/3581783.3611777","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3611777","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}