{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,15]],"date-time":"2025-10-15T00:40:13Z","timestamp":1760488813078,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":22,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,27]]},"DOI":"10.1145\/3704268.3742690","type":"proceedings-article","created":{"date-parts":[[2025,8,29]],"date-time":"2025-08-29T16:19:50Z","timestamp":1756484390000},"page":"1-9","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Text Image Super-Resolution for Improved OCR in Real-Life Scenarios using Swin Transformers"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0965-513X","authenticated-orcid":false,"given":"Philipp","family":"Hildebrandt","sequence":"first","affiliation":[{"name":"Hasso-Plattner-Institut, University of Potsdam, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4691-1219","authenticated-orcid":false,"given":"Maximilian","family":"Schulze","sequence":"additional","affiliation":[{"name":"Hasso-Plattner-Institut, University of Potsdam, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4578-1245","authenticated-orcid":false,"given":"Sarel","family":"Cohen","sequence":"additional","affiliation":[{"name":"The Academice College of Tel, Aviv-Yaffo, Tel Aviv-Yafo, Israel"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0190-7512","authenticated-orcid":false,"given":"Vanja","family":"Dosko\u010d","sequence":"additional","affiliation":[{"name":"Hasso-Plattner-Institut, University of Potsdam, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8844-8675","authenticated-orcid":false,"given":"Raid","family":"Saabni","sequence":"additional","affiliation":[{"name":"The Academic College of Tel, Aviv-Yaffo, Triangle R&amp;D Center, Tel-Aviv, Israel"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0076-6308","authenticated-orcid":false,"given":"Tobias","family":"Friedrich","sequence":"additional","affiliation":[{"name":"Hasso-Plattner-Institut, University of Potsdam, Germany"}]}],"member":"320","published-online":{"date-parts":[[2025,8,27]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Scene Text Recognition with Permuted Autoregressive Sequence Models. In European Conference on Computer Vision. 178--196","author":"Bautista Darwin","year":"2022","unstructured":"Darwin Bautista and Rowel Atienza. 2022. Scene Text Recognition with Permuted Autoregressive Sequence Models. In European Conference on Computer Vision. 178--196."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"crossref","unstructured":"Marco Bevilacqua Aline Roumy Christine Guillemot and Marie Line Alberi-Morel. 2012. Low-complexity single-image super-resolution based on nonnegative neighbor embedding. (2012).","DOI":"10.5244\/C.26.135"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01185"},{"key":"e_1_3_2_2_4_1","volume-title":"Activating More Pixels in Image Super-Resolution Transformer. arXiv preprint arXiv:2205.04437","author":"Chen Xiangyu","year":"2022","unstructured":"Xiangyu Chen, Xintao Wang, Jiantao Zhou, and Chao Dong. 2022. Activating More Pixels in Image Super-Resolution Transformer. arXiv preprint arXiv:2205.04437 (2022)."},{"key":"e_1_3_2_2_5_1","volume-title":"Kaiming He, and Xiaoou Tang.","author":"Dong Chao","year":"2015","unstructured":"Chao Dong, Chen Change Loy, Kaiming He, and Xiaoou Tang. 2015. Image super-resolution using deep convolutional networks. IEEE transactions on pattern analysis and machine intelligence 38, 2 (2015), 295--307."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299156"},{"key":"e_1_3_2_2_7_1","volume-title":"Luc Van Gool, and Radu Timofte","author":"Liang Jingyun","year":"2021","unstructured":"Jingyun Liang, Jiezhang Cao, Guolei Sun, Kai Zhang, Luc Van Gool, and Radu Timofte. 2021. SwinIR: Image Restoration Using Swin Transformer. arXiv preprint arXiv:2108.10257 (2021)."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2019.01.020"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00582"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58555-6_10"},{"key":"e_1_3_2_2_13_1","volume-title":"Scene Text Image Super-Resolution via Content Perceptual Loss and Criss-Cross Transformer Blocks. arXiv preprint arXiv:2210.06924","author":"Qin Rui","year":"2022","unstructured":"Rui Qin, Bin Wang, and Yu-Wing Tai. 2022. Scene Text Image Super-Resolution via Content Perceptual Loss and Criss-Cross Transformer Blocks. arXiv preprint arXiv:2210.06924 (2022)."},{"key":"e_1_3_2_2_14_1","volume-title":"An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition","author":"Shi Baoguang","year":"2016","unstructured":"Baoguang Shi, Xiang Bai, and Cong Yao. 2016. An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. IEEE transactions on pattern analysis and machine intelligence 39, 11 (2016), 2298--2304."},{"key":"e_1_3_2_2_15_1","volume-title":"ASTER: An Attentional Scene Text Recognizer with Flexible Rectification","author":"Shi Baoguang","year":"2018","unstructured":"Baoguang Shi, Mingkun Yang, Xinggang Wang, Pengyuan Lyu, Cong Yao, and Xiang Bai. 2018. ASTER: An Attentional Scene Text Recognizer with Flexible Rectification. IEEE Transactions on Pattern Analysis and Machine Intelligence (2018), 1--1."},{"key":"e_1_3_2_2_16_1","volume-title":"NTIRE 2018 Challenge on Single Image Super-Resolution: Methods and Results. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops.","author":"Timofte Radu","year":"2018","unstructured":"Radu Timofte, Shuhang Gu, Jiqing Wu, Luc Van Gool, Lei Zhang, Ming-Hsuan Yang, Muhammad Haris, et al. 2018. NTIRE 2018 Challenge on Single Image Super-Resolution: Methods and Results. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58607-2_38"},{"key":"e_1_3_2_2_18_1","volume-title":"TextSR: Content-Aware Text Super-Resolution Guided by Recognition. CoRR abs\/1909.07113","author":"Wang Wenjia","year":"2019","unstructured":"Wenjia Wang, Enze Xie, Peize Sun, Wenhai Wang, Lixun Tian, Chunhua Shen, and Ping Luo. 2019. TextSR: Content-Aware Text Super-Resolution Guided by Recognition. CoRR abs\/1909.07113 (2019). arXiv:1909.07113 http:\/\/arxiv.org\/abs\/1909.07113"},{"key":"e_1_3_2_2_19_1","volume-title":"International conference on curves and surfaces. Springer, 711--730","author":"Zeyde Roman","year":"2010","unstructured":"Roman Zeyde, Michael Elad, and Matan Protter. 2010. On single image scale-up using sparse-representations. In International conference on curves and surfaces. Springer, 711--730."},{"key":"e_1_3_2_2_20_1","volume-title":"SwinFIR: Revisiting the SWINIR with fast Fourier convolution and improved training for image super-resolution. arXiv preprint arXiv:2208.11247","author":"Zhang Dafeng","year":"2022","unstructured":"Dafeng Zhang, Feiyu Huang, Shizhuo Liu, Xiaobing Wang, and Zhezhu Jin. 2022. SwinFIR: Revisiting the SWINIR with fast Fourier convolution and improved training for image super-resolution. arXiv preprint arXiv:2208.11247 (2022)."},{"key":"e_1_3_2_2_21_1","volume-title":"C3-STISR: Scene Text Image Super-resolution with Triple Clues. arXiv preprint arXiv:2204.14044","author":"Zhao Minyi","year":"2022","unstructured":"Minyi Zhao, Miao Wang, Fan Bai, Bingjia Li, Jie Wang, and Shuigeng Zhou. 2022. C3-STISR: Scene Text Image Super-resolution with Triple Clues. arXiv preprint arXiv:2204.14044 (2022)."},{"key":"e_1_3_2_2_22_1","unstructured":"Tianlun Zheng Zhineng Chen Shancheng Fang Hongtao Xie and Yu-Gang Jiang. 2021. CDistNet: Perceiving Multi-Domain Character Distance for Robust Text Recognition. (2021)."}],"event":{"name":"DocEng '25: ACM Symposium on Document Engineering 2025","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Nottingham United Kingdom","acronym":"DocEng '25"},"container-title":["Proceedings of the 2025 ACM Symposium on Document Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3704268.3742690","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,14]],"date-time":"2025-10-14T18:26:52Z","timestamp":1760466412000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3704268.3742690"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,27]]},"references-count":22,"alternative-id":["10.1145\/3704268.3742690","10.1145\/3704268"],"URL":"https:\/\/doi.org\/10.1145\/3704268.3742690","relation":{},"subject":[],"published":{"date-parts":[[2025,8,27]]},"assertion":[{"value":"2025-08-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}