{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T06:03:40Z","timestamp":1770357820628,"version":"3.49.0"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61902225"],"award-info":[{"award-number":["61902225"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Joint Funds of Natural Science Foundation of Shandong Province","award":["ZR2021LZL011"],"award-info":[{"award-number":["ZR2021LZL011"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2024,1]]},"DOI":"10.1007\/s10489-023-05251-7","type":"journal-article","created":{"date-parts":[[2024,1,26]],"date-time":"2024-01-26T13:02:02Z","timestamp":1706274122000},"page":"1997-2008","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Scene text image super-resolution via textual reasoning and multiscale cross-convolution"],"prefix":"10.1007","volume":"54","author":[{"given":"Lan","family":"Yu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaojie","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qi","family":"Yu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guangju","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dehu","family":"Jin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3609-2560","authenticated-orcid":false,"given":"Meng","family":"Qi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,1,26]]},"reference":[{"key":"5251_CR1","doi-asserted-by":"publisher","unstructured":"Qiao Z, Zhou Y, Yang D et\u00a0al. (2020) Seed: semantics enhanced encoder-decoder framework for scene text recognition. 2020 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR) pp 13525\u201313534. https:\/\/doi.org\/10.1109\/cvpr42600.2020.01354","DOI":"10.1109\/cvpr42600.2020.01354"},{"key":"5251_CR2","doi-asserted-by":"publisher","unstructured":"Aberdam A, Litman R, Tsiper S et\u00a0al. (2020) Sequence-to-sequence contrastive learning for text recognition. 2021 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR) pp 15297\u201315307. https:\/\/doi.org\/10.1109\/CVPR46437.2021.01505","DOI":"10.1109\/CVPR46437.2021.01505"},{"key":"5251_CR3","doi-asserted-by":"publisher","unstructured":"Yue X, Kuang Z, Lin C et\u00a0al (2020) Robustscanner: dynamically enhancing positional clues for robust text recognition. In: European conference on computer vision, https:\/\/doi.org\/10.1007\/978-3-030-58529-7_9","DOI":"10.1007\/978-3-030-58529-7_9"},{"key":"5251_CR4","doi-asserted-by":"publisher","first-page":"5585","DOI":"10.1109\/TIP.2022.3197981","volume":"31","author":"Y Wang","year":"2022","unstructured":"Wang Y, Xie H, Fang S et al (2022) Petr: rethinking the capability of transformer-based language model in scene text recognition. IEEE Trans Image Process 31:5585\u20135598. https:\/\/doi.org\/10.1109\/TIP.2022.3197981","journal-title":"IEEE Trans Image Process"},{"key":"5251_CR5","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1109\/TPAMI.2015.2439281","volume":"38","author":"C Dong","year":"2014","unstructured":"Dong C, Loy CC, He K et al (2014) Image super-resolution using deep convolutional networks. IEEE Trans Patt Anal Mach Intell 38:295\u2013307. https:\/\/doi.org\/10.1109\/TPAMI.2015.2439281","journal-title":"IEEE Trans Patt Anal Mach Intell"},{"key":"5251_CR6","doi-asserted-by":"publisher","unstructured":"Chan KCK, Wang X, Xu X et\u00a0al (2020) Glean: generative latent bank for large-factor image super-resolution. 2021 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR) pp 14240\u201314249. https:\/\/doi.org\/10.1109\/CVPR46437.2021.01402","DOI":"10.1109\/CVPR46437.2021.01402"},{"key":"5251_CR7","doi-asserted-by":"publisher","unstructured":"Chen X, Wang X, Zhou J et\u00a0al (2022) Activating more pixels in image super-resolution transformer. 2023 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR) pp 22367\u201322377. https:\/\/doi.org\/10.1109\/CVPR52729.2023.02142","DOI":"10.1109\/CVPR52729.2023.02142"},{"issue":"5\u20136","key":"5251_CR8","doi-asserted-by":"publisher","first-page":"602","DOI":"10.1016\/j.neunet.2005.06.042","volume":"18","author":"A Graves","year":"2005","unstructured":"Graves A, Schmidhuber J (2005) Framewise phoneme classification with bidirectional lstm and other neural network architectures. Neural Netw\u202f: Official J Int Neural Netw Soc 18(5\u20136):602\u201310. https:\/\/doi.org\/10.1016\/j.neunet.2005.06.042","journal-title":"Neural Netw : Official J Int Neural Netw Soc"},{"key":"5251_CR9","doi-asserted-by":"publisher","unstructured":"Wang W, Xie E, Liu X et\u00a0al (2020) Scene text image super-resolution in the wild. In: European conference on computer vision, https:\/\/doi.org\/10.1007\/978-3-030-58607-2_38","DOI":"10.1007\/978-3-030-58607-2_38"},{"key":"5251_CR10","doi-asserted-by":"publisher","first-page":"1341","DOI":"10.1109\/TIP.2023.3237002","volume":"32","author":"J Ma","year":"2021","unstructured":"Ma J, Guo S, Zhang L (2021) Text prior guided scene text image super-resolution. IEEE Transactions on Image Processing 32:1341\u20131353. https:\/\/doi.org\/10.1109\/TIP.2023.3237002","journal-title":"IEEE Transactions on Image Processing"},{"key":"5251_CR11","doi-asserted-by":"publisher","unstructured":"Zhang Y, Tian Y, Kong Y et\u00a0al (2018) Residual dense network for image super-resolution. 2018 IEEE\/CVF Conference on computer vision and pattern recognition pp 2472\u20132481. https:\/\/doi.org\/10.1109\/CVPR.2018.00262","DOI":"10.1109\/CVPR.2018.00262"},{"key":"5251_CR12","doi-asserted-by":"publisher","first-page":"C1","DOI":"10.1007\/978-3-030-58610-2_12","volume":"12357","author":"B Niu","year":"2020","unstructured":"Niu B, Wen W, Ren W et al (2020) (2020) Correction to: single image super-resolution via a holistic attention network. Computer Vision - ECCV 12357:C1\u2013C1. https:\/\/doi.org\/10.1007\/978-3-030-58610-2_12","journal-title":"Computer Vision - ECCV"},{"key":"5251_CR13","doi-asserted-by":"publisher","unstructured":"Ledig C, Theis L, Husz\u00e1r F et\u00a0al (2016) Photo-realistic single image super-resolution using a generative adversarial network. 2017 IEEE Conference on computer vision and pattern recognition (CVPR) pp 105\u2013114. https:\/\/doi.org\/10.1109\/CVPR.2017.19","DOI":"10.1109\/CVPR.2017.19"},{"key":"5251_CR14","doi-asserted-by":"publisher","unstructured":"Lim B, Son S, Kim H et\u00a0al (2017) Enhanced deep residual networks for single image super-resolution. 2017 IEEE Conference on computer vision and pattern recognition workshops (CVPRW) pp 1132\u20131140. https:\/\/doi.org\/10.1109\/CVPRW.2017.151","DOI":"10.1109\/CVPRW.2017.151"},{"key":"5251_CR15","doi-asserted-by":"publisher","unstructured":"Zhang Y, Li K, Li K et\u00a0al (2018) Image super-resolution using very deep residual channel attention networks. In: European conference on computer vision, https:\/\/doi.org\/10.1007\/978-3-030-01234-2_18","DOI":"10.1007\/978-3-030-01234-2_18"},{"key":"5251_CR16","doi-asserted-by":"publisher","unstructured":"Li X, Zuo W, Loy CC (2023) Learning generative structure prior for blind text image super-resolution. 2023 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR) pp 10103\u201310113. https:\/\/doi.org\/10.1109\/CVPR52729.2023.00974","DOI":"10.1109\/CVPR52729.2023.00974"},{"key":"5251_CR17","unstructured":"Wang W, Xie E, Sun P et\u00a0al (2020) Textsr: content-aware text super-resolution guided by recognition. In: European conference on computer vision, https:\/\/api.semanticscholar.org\/CorpusID:202577634"},{"key":"5251_CR18","doi-asserted-by":"publisher","unstructured":"Chen J, Yu H, Ma J et\u00a0al (2021) Text gestalt: stroke-aware scene text image super-resolution. In: AAAI Conference on artificial intelligence, https:\/\/doi.org\/10.1609\/aaai.v36i1.19904","DOI":"10.1609\/aaai.v36i1.19904"},{"key":"5251_CR19","doi-asserted-by":"publisher","unstructured":"Mou Y, Tan L, Yang H et\u00a0al (2020) Plugnet: degradation aware scene text recognition supervised by a pluggable super-resolution unit. In: European conference on computer vision, https:\/\/doi.org\/10.1007\/978-3-030-58555-6_10","DOI":"10.1007\/978-3-030-58555-6_10"},{"key":"5251_CR20","doi-asserted-by":"publisher","unstructured":"Zhao C, Feng S, Zhao BN et\u00a0al (2021) Scene text image super-resolution via parallelly contextual attention network. Proceedings of the 29th ACM international conference on multimedia https:\/\/doi.org\/10.1145\/3474085.3475469","DOI":"10.1145\/3474085.3475469"},{"key":"5251_CR21","unstructured":"Vaswani A, Shazeer NM, Parmar N et al (2017) Attention is all you need. In: NIPS, https:\/\/api.semanticscholar.org\/CorpusID:13756489"},{"key":"5251_CR22","doi-asserted-by":"publisher","unstructured":"Chen J, Li B, Xue X (2021) Scene text telescope: text-focused scene image super-resolution. 2021 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR) pp 12021\u201312030. https:\/\/doi.org\/10.1109\/CVPR46437.2021.01185","DOI":"10.1109\/CVPR46437.2021.01185"},{"key":"5251_CR23","doi-asserted-by":"publisher","unstructured":"Shi W, Caballero J, Husz\u00e1r F et\u00a0al (2016) Real-time single image and video super-resolution using an efficient sub-pixel convolutional neural network. 2016 IEEE Conference on computer vision and pattern recognition (CVPR) pp 1874\u20131883. https:\/\/doi.org\/10.1109\/CVPR.2016.207","DOI":"10.1109\/CVPR.2016.207"},{"key":"5251_CR24","doi-asserted-by":"publisher","unstructured":"Fang S, Xie H, Wang Y et\u00a0al (2021) Read like humans: autonomous, bidirectional and iterative language modeling for scene text recognition. 2021 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR) pp 7094\u20137103. https:\/\/doi.org\/10.1109\/CVPR46437.2021.00702","DOI":"10.1109\/CVPR46437.2021.00702"},{"key":"5251_CR25","unstructured":"Liutkus A, C\u00edfka O, Wu SL et\u00a0al (2021) Relative positional encoding for transformers with linear complexity. In: International conference on machine learning, https:\/\/api.semanticscholar.org\/CorpusID:234762885"},{"key":"5251_CR26","doi-asserted-by":"publisher","first-page":"4927","DOI":"10.1109\/TCSVT.2021.3138431","volume":"32","author":"Y Liu","year":"2022","unstructured":"Liu Y, Jia Q, Fan X et al (2022) Cross-srn: structure-preserving super-resolution network with cross convolution. IEEE Trans Circuits Syst Video Technol 32:4927\u20134939. https:\/\/doi.org\/10.1109\/TCSVT.2021.3138431","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"5251_CR27","doi-asserted-by":"publisher","unstructured":"Zhang XC, Chen Q, Ng R et\u00a0al (2019) Zoom to learn, learn to zoom. 2019 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR) pp 3757\u20133765. https:\/\/doi.org\/10.1109\/CVPR.2019.00388","DOI":"10.1109\/CVPR.2019.00388"},{"key":"5251_CR28","doi-asserted-by":"publisher","first-page":"2298","DOI":"10.1109\/TPAMI.2016.2646371","volume":"39","author":"B Shi","year":"2015","unstructured":"Shi B, Bai X, Yao C (2015) An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. IEEE Trans Pattern Anal Mach Intell 39:2298\u20132304. https:\/\/doi.org\/10.1109\/TPAMI.2016.2646371","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"5251_CR29","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1016\/j.patcog.2019.01.020","volume":"90","author":"C Luo","year":"2019","unstructured":"Luo C, Jin L, Sun Z (2019) A multi-object rectified attention network for scene text recognition. Pattern Recognit 90:109\u2013118. https:\/\/doi.org\/10.1016\/j.patcog.2019.01.020","journal-title":"Pattern Recognit"},{"key":"5251_CR30","doi-asserted-by":"publisher","first-page":"2035","DOI":"10.1109\/TPAMI.2018.2848939","volume":"41","author":"B Shi","year":"2019","unstructured":"Shi B, Yang M, Wang X et al (2019) Aster: an attentional scene text recognizer with flexible rectification. IEEE Trans Pattern Anal Mach Intell 41:2035\u20132048. https:\/\/doi.org\/10.1109\/TPAMI.2018.2848939","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"5251_CR31","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang Z, Bovik AC, Sheikh HR et al (2004) Image quality assessment: from error visibility to structural similarity. IEEE Trans Image Process 13:600\u2013612. https:\/\/doi.org\/10.1109\/TIP.2003.819861","journal-title":"IEEE Trans Image Process"},{"key":"5251_CR32","doi-asserted-by":"publisher","unstructured":"Karatzas D, i\u00a0Bigorda LG, Nicolaou A et\u00a0al (2015) Icdar 2015 competition on robust reading. 2015 13th International conference on document analysis and recognition (ICDAR) pp 1156\u20131160. https:\/\/doi.org\/10.1109\/ICDAR.2015.7333942","DOI":"10.1109\/ICDAR.2015.7333942"},{"key":"5251_CR33","doi-asserted-by":"publisher","unstructured":"Karatzas D, Shafait F, Uchida S et\u00a0al (2013) Icdar 2013 robust reading competition. 2013 12th International conference on document analysis and recognition pp 1484\u20131493. https:\/\/doi.org\/10.1109\/ICDAR.2013.221","DOI":"10.1109\/ICDAR.2013.221"},{"key":"5251_CR34","doi-asserted-by":"publisher","unstructured":"Wang K, Babenko B, Belongie SJ (2011) End-to-end scene text recognition. 2011 International conference on computer vision pp 1457\u20131464. https:\/\/doi.org\/10.1109\/ICCV.2011.6126402","DOI":"10.1109\/ICCV.2011.6126402"},{"key":"5251_CR35","unstructured":"Kingma DP, Ba J (2014) Adam: a method for stochastic optimization. https:\/\/api.semanticscholar.org\/CorpusID:6628106. arXiv:1412.6980"},{"key":"5251_CR36","doi-asserted-by":"publisher","unstructured":"Ma J, Liang Z, Zhang L (2022) A text attention network for spatial deformation robust scene text image super-resolution. 2022 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR) pp 5901\u20135910. https:\/\/doi.org\/10.1109\/CVPR52688.2022.00582","DOI":"10.1109\/CVPR52688.2022.00582"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-023-05251-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-023-05251-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-023-05251-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,16]],"date-time":"2024-02-16T12:31:49Z","timestamp":1708086709000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-023-05251-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1]]},"references-count":36,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,1]]}},"alternative-id":["5251"],"URL":"https:\/\/doi.org\/10.1007\/s10489-023-05251-7","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,1]]},"assertion":[{"value":"23 December 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 January 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"The data that support the findings of this study are openly available in TextZoom, ICDAR- 2015, ICDAR2013 and SVT datasets at , , respectively.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical and informed consent for data used"}}]}}