{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T03:21:21Z","timestamp":1740108081907,"version":"3.37.3"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"14","license":[{"start":{"date-parts":[[2024,2,21]],"date-time":"2024-02-21T00:00:00Z","timestamp":1708473600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,21]],"date-time":"2024-02-21T00:00:00Z","timestamp":1708473600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["2662022LXYJ004"],"award-info":[{"award-number":["2662022LXYJ004"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1007\/s00521-024-09493-5","type":"journal-article","created":{"date-parts":[[2024,2,21]],"date-time":"2024-02-21T19:02:31Z","timestamp":1708542151000},"page":"7817-7827","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Cascade 2D attentional decoders with context-enhanced encoder for scene text recognition"],"prefix":"10.1007","volume":"36","author":[{"given":"Hongmei","family":"Chi","sequence":"first","affiliation":[]},{"given":"Jiaxin","family":"Cai","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5678-6829","authenticated-orcid":false,"given":"Xinran","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,21]]},"reference":[{"key":"9493_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107205","volume":"102","author":"L Xie","year":"2020","unstructured":"Xie L, Lee F, Liu L, Kotani K, Chen Q (2020) Scene recognition: a comprehensive survey. Pattern Recogn 102:107205","journal-title":"Pattern Recogn"},{"issue":"2","key":"9493_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3440756","volume":"54","author":"X Chen","year":"2021","unstructured":"Chen X, Jin L, Zhu Y, Luo C, Wang T (2021) Text recognition in the wild: a survey. ACM Comput Surv (CSUR) 54(2):1\u201335","journal-title":"ACM Comput Surv (CSUR)"},{"key":"9493_CR3","doi-asserted-by":"crossref","unstructured":"Huang Z, Chen K, He J, Bai X, Karatzas D, Lu S, Jawahar C (2019) Icdar 2019 robust reading challenge on scanned receipts OCR and information extraction. In: International conference on document analysis recognition","DOI":"10.1109\/ICDAR.2019.00244"},{"key":"9493_CR4","unstructured":"Bahdanau D, Cho K, Bengio Y (2015) Neural machine translation by jointly learning to align and translate. In: 3rd international conference on learning representations, ICLR 2015; conference date: 07-05-2015 Through 09-05-2015"},{"key":"9493_CR5","unstructured":"Xu K, Ba J, Kiros R, Cho K, Courville A, Salakhudinov R, Zemel R, Bengio Y (2015) Show, attend and tell: neural image caption generation with visual attention. In: International conference on machine learning, pp 2048\u20132057"},{"issue":"9","key":"9493_CR6","doi-asserted-by":"publisher","first-page":"2035","DOI":"10.1109\/TPAMI.2018.2848939","volume":"41","author":"B Shi","year":"2018","unstructured":"Shi B, Yang M, Wang X, Lyu P, Yao C, Bai X (2018) Aster: an attentional scene text recognizer with flexible rectification. IEEE Trans Pattern Anal Mach Intell 41(9):2035\u20132048","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9493_CR7","doi-asserted-by":"crossref","unstructured":"Li H, Wang P, Shen C, Zhang G (2019) Show, attend and read: a simple and strong baseline for irregular text recognition. In: Proceedings of the AAAI conference on artificial intelligence, vol 33, pp 8610\u20138617","DOI":"10.1609\/aaai.v33i01.33018610"},{"key":"9493_CR8","doi-asserted-by":"crossref","unstructured":"Wang T, Zhu Y, Jin L, Luo C, Chen X, Wu Y, Wang Q, Cai M (2020) Decoupled attention network for text recognition. In: AAAI, pp 12216\u201312224","DOI":"10.1609\/aaai.v34i07.6903"},{"key":"9493_CR9","doi-asserted-by":"crossref","unstructured":"Yue X, Kuang Z, Lin C, Sun H, Zhang W (2020) Robustscanner: dynamically enhancing positional clues for robust text recognition. In: European conference on computer vision. Springer, pp 135\u2013151","DOI":"10.1007\/978-3-030-58529-7_9"},{"key":"9493_CR10","doi-asserted-by":"crossref","unstructured":"Wang C, Liu C-L (2020) Scene text recognition by attention network with gated embedding. In: 2020 international joint conference on neural networks (IJCNN). IEEE, pp 1\u20138","DOI":"10.1109\/IJCNN48605.2020.9206802"},{"key":"9493_CR11","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1016\/j.neucom.2019.11.049","volume":"381","author":"X Chen","year":"2020","unstructured":"Chen X, Wang T, Zhu Y, Jin L, Luo C (2020) Adaptive embedding gate for attention-based scene text recognition. Neurocomputing 381:261\u2013271","journal-title":"Neurocomputing"},{"key":"9493_CR12","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. In: Advances in neural information processing systems, pp 5998\u20136008"},{"key":"9493_CR13","doi-asserted-by":"crossref","unstructured":"Lu J, Xiong C, Parikh D, Socher R (2017) Knowing when to look: adaptive attention via a visual sentinel for image captioning. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 375\u2013383","DOI":"10.1109\/CVPR.2017.345"},{"key":"9493_CR14","doi-asserted-by":"crossref","unstructured":"Cheng Z, Bai F, Xu Y, Zheng G, Pu S, Zhou S (2017) Focusing attention: towards accurate text recognition in natural images. In: Proceedings of the IEEE international conference on computer vision, pp 5076\u20135084","DOI":"10.1109\/ICCV.2017.543"},{"key":"9493_CR15","doi-asserted-by":"crossref","unstructured":"Yu D, Li X, Zhang C, Liu T, Han J, Liu J, Ding E (2020) Towards accurate scene text recognition with semantic reasoning networks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12113\u201312122","DOI":"10.1109\/CVPR42600.2020.01213"},{"key":"9493_CR16","doi-asserted-by":"crossref","unstructured":"Fang S, Xie H, Wang Y, Mao Z, Zhang Y (2021) Read like humans: autonomous, bidirectional and iterative language modeling for scene text recognition. In: 2021 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 7094\u20137103","DOI":"10.1109\/CVPR46437.2021.00702"},{"key":"9493_CR17","doi-asserted-by":"crossref","unstructured":"Mishra A, Alahari K, Jawahar C (2012) Top-down and bottom-up cues for scene text recognition. In: 2012 IEEE conference on computer vision and pattern recognition. IEEE, pp 2687\u20132694","DOI":"10.1109\/CVPR.2012.6247990"},{"key":"9493_CR18","doi-asserted-by":"crossref","unstructured":"Wang K, Babenko B, Belongie S (2011) End-to-end scene text recognition. In: 2011 international conference on computer vision. IEEE, pp 1457\u20131464","DOI":"10.1109\/ICCV.2011.6126402"},{"key":"9493_CR19","doi-asserted-by":"crossref","unstructured":"Karatzas D, Shafait F, Uchida S, Iwamura M, i Bigorda L.G, Mestre S.R, Mas J, Mota D.F, Almazan J.A, De\u00a0Las\u00a0Heras L.P (2013) Icdar 2013 robust reading competition. In: 2013 12th international conference on document analysis and recognition . IEEE, pp 1484\u20131493","DOI":"10.1109\/ICDAR.2013.221"},{"key":"9493_CR20","doi-asserted-by":"crossref","unstructured":"Karatzas D, Gomez-Bigorda L, Nicolaou A, Ghosh S, Bagdanov A, Iwamura M, Matas J, Neumann L, Chandrasekhar V.R, Lu S, et al. (2015) Icdar 2015 competition on robust reading. In: 2015 13th international conference on document analysis and recognition (ICDAR). IEEE, pp 1156\u20131160","DOI":"10.1109\/ICDAR.2015.7333942"},{"key":"9493_CR21","doi-asserted-by":"crossref","unstructured":"Quy\u00a0Phan T, Shivakumara P, Tian S, Lim\u00a0Tan C (2013) Recognizing text with perspective distortion in natural scenes. In: Proceedings of the IEEE international conference on computer vision, pp 569\u2013576","DOI":"10.1109\/ICCV.2013.76"},{"issue":"18","key":"9493_CR22","doi-asserted-by":"publisher","first-page":"8027","DOI":"10.1016\/j.eswa.2014.07.008","volume":"41","author":"A Risnumawan","year":"2014","unstructured":"Risnumawan A, Shivakumara P, Chan CS, Tan CL (2014) A robust arbitrary text detection system for natural scene images. Expert Syst Appl 41(18):8027\u20138048","journal-title":"Expert Syst Appl"},{"key":"9493_CR23","doi-asserted-by":"crossref","unstructured":"Graves A, Fern\u00e1ndez S, Gomez F, Schmidhuber J (2006) Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd international conference on machine learning, pp 369\u2013376","DOI":"10.1145\/1143844.1143891"},{"key":"9493_CR24","doi-asserted-by":"crossref","unstructured":"He P, Huang W, Qiao Y, Loy CC, Tang X (2016) Reading scene text in deep convolutional sequences. In: Proceedings of the thirtieth AAAI conference on artificial intelligence. AAAI\u201916. AAAI Press, pp 3501\u20133508","DOI":"10.1609\/aaai.v30i1.10465"},{"issue":"11","key":"9493_CR25","doi-asserted-by":"publisher","first-page":"2298","DOI":"10.1109\/TPAMI.2016.2646371","volume":"39","author":"B Shi","year":"2016","unstructured":"Shi B, Bai X, Yao C (2016) An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. IEEE Trans Pattern Anal Mach Intell 39(11):2298\u20132304","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9493_CR26","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1016\/j.neucom.2019.01.094","volume":"339","author":"Y Gao","year":"2019","unstructured":"Gao Y, Chen Y, Wang J, Tang M, Lu H (2019) Reading scene text with fully convolutional sequence modeling. Neurocomputing 339:161\u2013170","journal-title":"Neurocomputing"},{"key":"9493_CR27","doi-asserted-by":"crossref","unstructured":"Lee C-Y, Osindero S (2016) Recursive recurrent nets with attention modeling for OCR in the wild. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2231\u20132239","DOI":"10.1109\/CVPR.2016.245"},{"key":"9493_CR28","doi-asserted-by":"crossref","unstructured":"Baek J, Kim G, Lee J, Park S, Han D, Yun S, Oh S.J, Lee H (2019) What is wrong with scene text recognition model comparisons? dataset and model analysis. In: Proceedings of the IEEE international conference on computer vision, pp 4715\u20134723","DOI":"10.1109\/ICCV.2019.00481"},{"key":"9493_CR29","doi-asserted-by":"publisher","first-page":"397","DOI":"10.1016\/j.patcog.2016.10.016","volume":"63","author":"B Su","year":"2017","unstructured":"Su B, Lu S (2017) Accurate recognition of words in scenes without character segmentation using recurrent neural network. Pattern Recogn 63:397\u2013405","journal-title":"Pattern Recogn"},{"key":"9493_CR30","unstructured":"Wan Z, Xie F, Liu Y, Bai X, Yao C (2019) 2d-ctc for scene text recognition. arXiv preprint arXiv:1907.09705"},{"key":"9493_CR31","doi-asserted-by":"crossref","unstructured":"Yang X, He D, Zhou Z, Kifer D, Giles CL (2017) Learning to read irregular text with attention mechanisms. In: IJCAI, vol 1, p 3","DOI":"10.24963\/ijcai.2017\/458"},{"key":"9493_CR32","doi-asserted-by":"crossref","unstructured":"Liu W, Chen C, Wong K-YK (2018) Char-net: a character-aware neural network for distorted scene text recognition. In: AAAI, vol 1, p 4","DOI":"10.1609\/aaai.v32i1.12246"},{"key":"9493_CR33","doi-asserted-by":"crossref","unstructured":"Qiao Z, Zhou Y, Yang D, Zhou Y, Wang W (2020) Seed: semantics enhanced encoder-decoder framework for scene text recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 13528\u201313537","DOI":"10.1109\/CVPR42600.2020.01354"},{"key":"9493_CR34","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1016\/j.patcog.2019.01.020","volume":"90","author":"C Luo","year":"2019","unstructured":"Luo C, Jin L, Sun Z (2019) Moran: a multi-object rectified attention network for scene text recognition. Pattern Recogn 90:109\u2013118","journal-title":"Pattern Recogn"},{"key":"9493_CR35","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107692","volume":"111","author":"Q Lin","year":"2021","unstructured":"Lin Q, Luo C, Jin L, Lai S (2021) Stan: a sequential transformation attention-based network for scene text recognition. Pattern Recogn 111:107692","journal-title":"Pattern Recogn"},{"key":"9493_CR36","doi-asserted-by":"publisher","first-page":"278","DOI":"10.1016\/j.neucom.2020.04.129","volume":"425","author":"C Wang","year":"2021","unstructured":"Wang C, Liu C-L (2021) Multi-branch guided attention network for irregular text recognition. Neurocomputing 425:278\u2013289","journal-title":"Neurocomputing"},{"key":"9493_CR37","unstructured":"Wang W, Xie E, Sun P, Wang W, Tian L, Shen C, Luo P (2019) Textsr: content-aware text super-resolution guided by recognition. arXiv preprint arXiv:1909.07113"},{"key":"9493_CR38","doi-asserted-by":"crossref","unstructured":"Hu W, Cai X, Hou J, Yi S, Lin Z (2020) Gtc: guided training of ctc towards efficient and accurate scene text recognition. In: AAAI, pp 11005\u201311012","DOI":"10.1609\/aaai.v34i07.6735"},{"key":"9493_CR39","doi-asserted-by":"crossref","unstructured":"Zhang H, Yao Q, Yang M, Xu Y, Bai X (2020) Autostr: efficient backbone search for scene text recognition. In: Proceedings of the European conference on computer vision (ECCV)","DOI":"10.1007\/978-3-030-58586-0_44"},{"key":"9493_CR40","unstructured":"Lyu P, Yang Z, Leng X, Wu X, Li R, Shen X (2019) 2d attentional irregular scene text recognizer. arXiv preprint arXiv:1906.05708"},{"key":"9493_CR41","doi-asserted-by":"crossref","unstructured":"Litman R, Anschel O, Tsiper S, Litman R, Mazor S, Manmatha R (2020) Scatter: selective context attentional scene text recognizer. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11962\u201311972","DOI":"10.1109\/CVPR42600.2020.01198"},{"key":"9493_CR42","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.107980","volume":"117","author":"N Lu","year":"2021","unstructured":"Lu N, Yu W, Qi X, Chen Y, Gong P, Xiao R, Bai X (2021) Master: multi-aspect non-local network for scene text recognition. Pattern Recogn 117:107980","journal-title":"Pattern Recogn"},{"key":"9493_CR43","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"9493_CR44","unstructured":"Lucas S.M, Panaretos A, Sosa L, Tang A, Wong S, Young R (2003) Icdar 2003 robust reading competitions. In: Seventh international conference on document analysis and recognition, 2003. Proceedings. Citeseer, pp 682\u2013687"},{"key":"9493_CR45","unstructured":"Zeiler MD (2012) Adadelta: an adaptive learning rate method. arXiv preprint arXiv:1212.5701"},{"key":"9493_CR46","unstructured":"Wang P, Yang L, Li H, Deng Y, Shen C, Zhang Y (2019) A simple and robust convolutional-attention network for irregular text recognition, vol 6. arXiv:1904.01375"},{"key":"9493_CR47","doi-asserted-by":"crossref","unstructured":"Zhan F, Lu S (2019) Esir: end-to-end scene text recognition via iterative image rectification. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2059\u20132068","DOI":"10.1109\/CVPR.2019.00216"},{"key":"9493_CR48","doi-asserted-by":"crossref","unstructured":"Wan Z, He M, Chen H, Bai X, Yao C (2020) Textscanner: reading characters in order for robust scene text recognition. In: Proceedings of the AAAI conference on artificial intelligence, vol 34, pp 12120\u201312127","DOI":"10.1609\/aaai.v34i07.6891"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09493-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-024-09493-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09493-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,13]],"date-time":"2024-04-13T20:13:25Z","timestamp":1713039205000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-024-09493-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,21]]},"references-count":48,"journal-issue":{"issue":"14","published-print":{"date-parts":[[2024,5]]}},"alternative-id":["9493"],"URL":"https:\/\/doi.org\/10.1007\/s00521-024-09493-5","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2024,2,21]]},"assertion":[{"value":"14 May 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 January 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 February 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}