{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,12]],"date-time":"2026-05-12T17:10:29Z","timestamp":1778605829531,"version":"3.51.4"},"reference-count":233,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computers and Electrical Engineering"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.compeleceng.2026.111139","type":"journal-article","created":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T08:28:05Z","timestamp":1775896085000},"page":"111139","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["From pixels to text: A deep learning survey of scene text detection and recognition"],"prefix":"10.1016","volume":"135","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7784-1073","authenticated-orcid":false,"given":"Vijayakumar","family":"Kadha","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6307-8011","authenticated-orcid":false,"given":"Bala Bhaskar","family":"Duddeti","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3147-2347","authenticated-orcid":false,"given":"Kannuru","family":"Srinadh","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7448-3941","authenticated-orcid":false,"given":"Santhosh Kumar","family":"Buddepu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5340-4058","authenticated-orcid":false,"given":"Lakshminarayana","family":"Janjanam","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3654-204X","authenticated-orcid":false,"given":"Kalyanbrat","family":"Medhi","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"2","key":"10.1016\/j.compeleceng.2026.111139_b1","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1109\/34.982903","article-title":"Vision for mobile robot navigation: A survey","volume":"24","author":"DeSouza","year":"2002","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"10.1016\/j.compeleceng.2026.111139_b2","series-title":"2011 18th IEEE international conference on image processing","first-page":"2601","article-title":"Mobile visual search on printed documents using text and low bit-rate features","author":"Tsai","year":"2011"},{"issue":"19","key":"10.1016\/j.compeleceng.2026.111139_b3","article-title":"Extracting and segmenting container name from container images","volume":"74","author":"Chowdhury","year":"2013","journal-title":"Int J Comput Appl"},{"issue":"10","key":"10.1016\/j.compeleceng.2026.111139_b4","doi-asserted-by":"crossref","first-page":"1733","DOI":"10.1109\/TPAMI.2009.38","article-title":"Scene text recognition using similarity and a lexicon with sparse belief propagation","volume":"31","author":"Weinman","year":"2009","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"5","key":"10.1016\/j.compeleceng.2026.111139_b5","doi-asserted-by":"crossref","first-page":"1063","DOI":"10.1109\/TMM.2016.2638622","article-title":"Words matter: Scene text for image classification and retrieval","volume":"19","author":"Karaoglu","year":"2016","journal-title":"IEEE Trans Multimed"},{"issue":"5","key":"10.1016\/j.compeleceng.2026.111139_b6","first-page":"970","article-title":"Robust text detection in natural scene images","volume":"36","author":"Yin","year":"2013","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"9","key":"10.1016\/j.compeleceng.2026.111139_b7","doi-asserted-by":"crossref","first-page":"4256","DOI":"10.1109\/TIP.2012.2199327","article-title":"Localizing text in scene images by boundary clustering, stroke segmentation, and string fragment classification","volume":"21","author":"Yi","year":"2012","journal-title":"IEEE Trans Image Process"},{"issue":"3","key":"10.1016\/j.compeleceng.2026.111139_b8","first-page":"800","article-title":"A hybrid approach to detect and localize texts in natural scene images","volume":"20","author":"Pan","year":"2010","journal-title":"IEEE Trans Image Process"},{"key":"10.1016\/j.compeleceng.2026.111139_b9","series-title":"Proceedings of the 21st international conference on pattern recognition","first-page":"725","article-title":"Effective text localization in natural scene images with MSER, geometry-based grouping and AdaBoost","author":"Yin","year":"2012"},{"key":"10.1016\/j.compeleceng.2026.111139_b10","series-title":"2012 IEEE conference on computer vision and pattern recognition","first-page":"1083","article-title":"Detecting texts of arbitrary orientations in natural images","author":"Yao","year":"2012"},{"key":"10.1016\/j.compeleceng.2026.111139_b11","series-title":"R2CNN: Rotational region CNN for orientation robust scene text detection","author":"Jiang","year":"2017"},{"key":"10.1016\/j.compeleceng.2026.111139_b12","doi-asserted-by":"crossref","first-page":"109","DOI":"10.1016\/j.patcog.2019.01.020","article-title":"Moran: A multi-object rectified attention network for scene text recognition","volume":"90","author":"Luo","year":"2019","journal-title":"Pattern Recognit"},{"issue":"14","key":"10.1016\/j.compeleceng.2026.111139_b13","doi-asserted-by":"crossref","first-page":"4870","DOI":"10.3390\/s21144870","article-title":"Multi-directional scene text detection based on improved YOLOv3","volume":"21","author":"Xiao","year":"2021","journal-title":"Sensors"},{"key":"10.1016\/j.compeleceng.2026.111139_b14","series-title":"Computer vision \u2013 ACCV 2010","first-page":"770","article-title":"A method for text localization and recognition in real-world images","author":"Neumann","year":"2011"},{"issue":"1","key":"10.1016\/j.compeleceng.2026.111139_b15","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1007\/s11704-015-4488-0","article-title":"Scene text detection and recognition: Recent advances and future trends","volume":"10","author":"Zhu","year":"2016","journal-title":"Front Comput Sci"},{"key":"10.1016\/j.compeleceng.2026.111139_b16","doi-asserted-by":"crossref","unstructured":"Cho H, Sung M, Jun B. Canny Text Detector: Fast and Robust Scene Text Localization Algorithm. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2016.","DOI":"10.1109\/CVPR.2016.388"},{"key":"10.1016\/j.compeleceng.2026.111139_b17","series-title":"2010 IEEE computer society conference on computer vision and pattern recognition","first-page":"2963","article-title":"Detecting text in natural scenes with stroke width transform","author":"Epshtein","year":"2010"},{"key":"10.1016\/j.compeleceng.2026.111139_b18","series-title":"Synthetic data and artificial neural networks for natural scene text recognition","author":"Jaderberg","year":"2014"},{"key":"10.1016\/j.compeleceng.2026.111139_b19","series-title":"Computer vision \u2013 ECCV 2014","first-page":"512","article-title":"Deep features for text spotting","author":"Jaderberg","year":"2014"},{"key":"10.1016\/j.compeleceng.2026.111139_b20","series-title":"Proceedings of the 21st international conference on pattern recognition","first-page":"3304","article-title":"End-to-end text recognition with convolutional neural networks","author":"Wang","year":"2012"},{"issue":"8","key":"10.1016\/j.compeleceng.2026.111139_b21","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pone.0182227","article-title":"Scene text detection via extremal region based double threshold convolutional network classification","volume":"12","author":"Zhu","year":"2017","journal-title":"PLoS One"},{"issue":"7","key":"10.1016\/j.compeleceng.2026.111139_b22","doi-asserted-by":"crossref","first-page":"1480","DOI":"10.1109\/TPAMI.2014.2366765","article-title":"Text detection and recognition in imagery: A survey","volume":"37","author":"Ye","year":"2014","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"10.1016\/j.compeleceng.2026.111139_b23","doi-asserted-by":"crossref","first-page":"310","DOI":"10.1016\/j.neucom.2013.05.037","article-title":"Text extraction from natural scene image: A survey","volume":"122","author":"Zhang","year":"2013","journal-title":"Neurocomputing"},{"key":"10.1016\/j.compeleceng.2026.111139_b24","series-title":"2019 IEEE 11th international conference on communication software and networks","first-page":"688","article-title":"Scene text recognition based on deep learning: A brief survey","author":"Chen","year":"2019"},{"key":"10.1016\/j.compeleceng.2026.111139_b25","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1007\/s10032-019-00320-5","article-title":"Scene text detection and recognition with advances in deep learning: a survey","volume":"22","author":"Liu","year":"2019","journal-title":"Int J Doc Anal Recognit (IJDAR)"},{"issue":"1","key":"10.1016\/j.compeleceng.2026.111139_b26","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1007\/s11263-020-01369-0","article-title":"Scene text detection and recognition: The deep learning era","volume":"129","author":"Long","year":"2021","journal-title":"Int J Comput Vis"},{"key":"10.1016\/j.compeleceng.2026.111139_b27","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J. Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2016, p. 770\u20138.","DOI":"10.1109\/CVPR.2016.90"},{"key":"10.1016\/j.compeleceng.2026.111139_b28","doi-asserted-by":"crossref","unstructured":"Long J, Shelhamer E, Darrell T. Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2015, p. 3431\u201340.","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"10.1016\/j.compeleceng.2026.111139_b29","series-title":"2013 IEEE international conference on acoustics, speech and signal processing","first-page":"6645","article-title":"Speech recognition with deep recurrent neural networks","author":"Graves","year":"2013"},{"issue":"11","key":"10.1016\/j.compeleceng.2026.111139_b30","doi-asserted-by":"crossref","first-page":"2298","DOI":"10.1109\/TPAMI.2016.2646371","article-title":"An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition","volume":"39","author":"Shi","year":"2016","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"10.1016\/j.compeleceng.2026.111139_b31","article-title":"Attention is all you need","volume":"vol. 30","author":"Vaswani","year":"2017"},{"key":"10.1016\/j.compeleceng.2026.111139_b32","series-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020"},{"key":"10.1016\/j.compeleceng.2026.111139_b33","doi-asserted-by":"crossref","unstructured":"Borisyuk F, Gordo A, Sivakumar V. Rosetta: Large scale system for text detection and recognition in images. In: Proceedings of the 24th ACM SIGKDD international conference on knowledge discovery & data mining. 2018, p. 71\u20139.","DOI":"10.1145\/3219819.3219861"},{"key":"10.1016\/j.compeleceng.2026.111139_b34","first-page":"2321","article-title":"Morgan: Multi-scale robust generative adversarial network for arbitrary-oriented scene text detection and recognition","volume":"23","author":"Luo","year":"2020","journal-title":"IEEE Trans Multimed"},{"key":"10.1016\/j.compeleceng.2026.111139_b35","unstructured":"Yuliang L, Jin L, Zhang S, Zhang S. Detecting Oriented Text in Natural Images by Linking Segments. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2017, p. 2550\u20138."},{"key":"10.1016\/j.compeleceng.2026.111139_b36","series-title":"2015 13th international conference on document analysis and recognition","first-page":"1156","article-title":"ICDAR 2015 competition on robust reading","author":"Karatzas","year":"2015"},{"key":"10.1016\/j.compeleceng.2026.111139_b37","series-title":"2020 IEEE winter conference on applications of computer vision","first-page":"985","article-title":"Towards efficient lexicon-free scene text recognition","author":"Krishnan","year":"2020"},{"key":"10.1016\/j.compeleceng.2026.111139_b38","series-title":"2019 international conference on document analysis and recognition","first-page":"146","article-title":"Unconstrained scene text and video text recognition for Indian scripts","author":"Jain","year":"2019"},{"key":"10.1016\/j.compeleceng.2026.111139_b39","series-title":"2013 12th international conference on document analysis and recognition","first-page":"1484","article-title":"ICDAR 2013 robust reading competition","author":"Karatzas","year":"2013"},{"key":"10.1016\/j.compeleceng.2026.111139_b40","first-page":"935","article-title":"Total-text: A comprehensive dataset for scene text detection and recognition","volume":"vol. 1","author":"Ch\u2019ng","year":"2017"},{"key":"10.1016\/j.compeleceng.2026.111139_b41","article-title":"Text spotting: A review","author":"Ye","year":"2022","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"3","key":"10.1016\/j.compeleceng.2026.111139_b42","first-page":"674","article-title":"A comprehensive survey of scene text: where we are and what\u2019s next","volume":"130","author":"Zhang","year":"2022","journal-title":"Int J Comput Vis"},{"key":"10.1016\/j.compeleceng.2026.111139_b43","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2020.114549","article-title":"A novel pipeline framework for multi-oriented scene text image detection and recognition","volume":"170","author":"Naiemi","year":"2021","journal-title":"Expert Syst Appl"},{"key":"10.1016\/j.compeleceng.2026.111139_b44","doi-asserted-by":"crossref","first-page":"6518","DOI":"10.1109\/ACCESS.2024.3349952","article-title":"A survey of text classification with transformers: How wide? how large? how long? how accurate? how expensive? how safe?","volume":"12","author":"Fields","year":"2024","journal-title":"IEEE Access"},{"issue":"3","key":"10.1016\/j.compeleceng.2026.111139_b45","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3439726","article-title":"Deep learning\u2013based text classification: a comprehensive review","volume":"54","author":"Minaee","year":"2021","journal-title":"ACM Comput Surv"},{"key":"10.1016\/j.compeleceng.2026.111139_b46","first-page":"935","article-title":"Total-text: A comprehensive dataset for scene text detection and recognition","volume":"vol. 1","author":"Ch\u2019ng","year":"2017"},{"key":"10.1016\/j.compeleceng.2026.111139_b47","first-page":"1454","article-title":"ICDAR2017 robust reading challenge on multi-lingual scene text detection and script identification - RRC-mlt","volume":"vol. 1","author":"Nayef","year":"2017"},{"key":"10.1016\/j.compeleceng.2026.111139_b48","series-title":"International conference on document analysis and recognition","first-page":"233","article-title":"Indicstr12: A dataset for indic scene text recognition","author":"Lunia","year":"2023"},{"issue":"1","key":"10.1016\/j.compeleceng.2026.111139_b49","doi-asserted-by":"crossref","first-page":"131","DOI":"10.1007\/s12559-023-10193-w","article-title":"MDIW-13: a new multi-lingual and multi-script database and benchmark for script identification","volume":"16","author":"Ferrer","year":"2024","journal-title":"Cogn Comput"},{"key":"10.1016\/j.compeleceng.2026.111139_b50","series-title":"2015 13th international conference on document analysis and recognition","first-page":"1156","article-title":"ICDAR 2015 competition on robust reading","author":"Karatzas","year":"2015"},{"key":"10.1016\/j.compeleceng.2026.111139_b51","series-title":"2013 12th international conference on document analysis and recognition","first-page":"1484","article-title":"ICDAR 2013 robust reading competition","author":"Karatzas","year":"2013"},{"key":"10.1016\/j.compeleceng.2026.111139_b52","article-title":"USTB-SV1K: A multi-view and multi-directional street view text dataset","volume":"2016","author":"Zhang","year":"2016","journal-title":"Sci World J"},{"key":"10.1016\/j.compeleceng.2026.111139_b53","series-title":"BMVC 2012-23rd british machine vision conference","first-page":"1","article-title":"Scene text recognition using higher order language priors","author":"Mishra","year":"2012"},{"key":"10.1016\/j.compeleceng.2026.111139_b54","series-title":"2011 international conference on computer vision","first-page":"1457","article-title":"End-to-end scene text recognition","author":"Wang","year":"2011"},{"key":"10.1016\/j.compeleceng.2026.111139_b55","series-title":"2011 international conference on document analysis and recognition","first-page":"1485","article-title":"ICDAR 2011 robust reading competition challenge 1: Reading text in Born-digital images (web and email)","author":"Hedjam","year":"2011"},{"key":"10.1016\/j.compeleceng.2026.111139_b56","first-page":"1429","article-title":"ICDAR2017 competition on reading Chinese text in the wild (RCTW-17)","volume":"vol. 1","author":"Shi","year":"2017"},{"key":"10.1016\/j.compeleceng.2026.111139_b57","series-title":"CASIA-10k: A large-scale scene text dataset","author":"Cao","year":"2018"},{"key":"10.1016\/j.compeleceng.2026.111139_b58","series-title":"BanglaLekha-isolated: Handwritten bangla character dataset","author":"Bengali.AI","year":"2019"},{"key":"10.1016\/j.compeleceng.2026.111139_b59","series-title":"Devanagari handwritten character dataset","author":"Arora","year":"2018"},{"key":"10.1016\/j.compeleceng.2026.111139_b60","series-title":"Old tamil character dataset (TamilMNIST)","author":"Chittampally","year":"2021"},{"key":"10.1016\/j.compeleceng.2026.111139_b61","series-title":"IndicCorp: A massive multilingual text corpus for indic languages","author":"Kakwani","year":"2020"},{"key":"10.1016\/j.compeleceng.2026.111139_b62","series-title":"Indicglue: A benchmark for natural language understanding in indic languages","author":"Kumar","year":"2022"},{"key":"10.1016\/j.compeleceng.2026.111139_b63","series-title":"International conference on document analysis and recognition","first-page":"1582","article-title":"ICDAR 2019 competition on multilingual scene text detection and recognition (MLT-2019)","author":"Nayef","year":"2019"},{"key":"10.1016\/j.compeleceng.2026.111139_b64","series-title":"2015 13th international conference on document analysis and recognition","first-page":"1156","article-title":"ICDAR 2015 competition on robust reading","author":"Karatzas","year":"2015"},{"key":"10.1016\/j.compeleceng.2026.111139_b65","series-title":"2006 IEEE international conference on multimedia and expo","first-page":"1721","article-title":"Multiscale edge-based text extraction from complex images","author":"Liu","year":"2006"},{"key":"10.1016\/j.compeleceng.2026.111139_b66","series-title":"2015 13th international conference on document analysis and recognition","first-page":"30","article-title":"YVT: YouTube video text dataset","author":"Phan","year":"2015"},{"key":"10.1016\/j.compeleceng.2026.111139_b67","series-title":"Seventh international conference on document analysis and recognition, 2003. proceedings","first-page":"682","article-title":"ICDAR 2003 robust reading competitions","author":"Lucas","year":"2003"},{"key":"10.1016\/j.compeleceng.2026.111139_b68","series-title":"Eighth international conference on document analysis and recognition","first-page":"80","article-title":"ICDAR 2005 text locating competition results","author":"Lucas","year":"2005"},{"issue":"4","key":"10.1016\/j.compeleceng.2026.111139_b69","first-page":"2","article-title":"Character recognition in natural images","volume":"2","author":"de Campos","year":"2009","journal-title":"Proc Int Conf Comput Vis Theory Appl Lisbon Port"},{"issue":"2","key":"10.1016\/j.compeleceng.2026.111139_b70","first-page":"5","article-title":"Reading digits in natural images with unsupervised feature learning","volume":"2011","author":"Netzer","year":"2011","journal-title":"NIPS Work Deep Learn Unsupervised Featur Learn"},{"key":"10.1016\/j.compeleceng.2026.111139_b71","article-title":"Imagenet classification with deep convolutional neural networks","volume":"25","author":"Krizhevsky","year":"2012","journal-title":"Adv Neural Inf Process Syst"},{"key":"10.1016\/j.compeleceng.2026.111139_b72","series-title":"Very deep convolutional networks for large-scale image recognition","author":"Simonyan","year":"2014"},{"key":"10.1016\/j.compeleceng.2026.111139_b73","article-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","volume":"28","author":"Ren","year":"2015","journal-title":"Adv Neural Inf Process Syst"},{"key":"10.1016\/j.compeleceng.2026.111139_b74","first-page":"2961","article-title":"Mask r-cnn","author":"He","year":"2017","journal-title":"Proc IEEE Int Conf Comput Vis"},{"key":"10.1016\/j.compeleceng.2026.111139_b75","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J. Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2014, p. 580\u20137.","DOI":"10.1109\/CVPR.2014.81"},{"key":"10.1016\/j.compeleceng.2026.111139_b76","doi-asserted-by":"crossref","unstructured":"Zhou X, Yao C, Wen H, Wang Y, Zhou S, He W, Liang J. East: an efficient and accurate scene text detector. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2017, p. 5551\u201360.","DOI":"10.1109\/CVPR.2017.283"},{"key":"10.1016\/j.compeleceng.2026.111139_b77","doi-asserted-by":"crossref","unstructured":"Liu X, Liang D, Yan S, Chen D, Qiao Y, Yan J. Fots: Fast oriented text spotting with a unified network. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2018, p. 5676\u201385.","DOI":"10.1109\/CVPR.2018.00595"},{"key":"10.1016\/j.compeleceng.2026.111139_b78","article-title":"Textboxes: A fast text detector with a single deep neural network","volume":"vol. 31","author":"Liao","year":"2017"},{"issue":"8","key":"10.1016\/j.compeleceng.2026.111139_b79","doi-asserted-by":"crossref","first-page":"3676","DOI":"10.1109\/TIP.2018.2825107","article-title":"Textboxes++: A single-shot oriented scene text detector","volume":"27","author":"Liao","year":"2018","journal-title":"IEEE Trans Image Process"},{"key":"10.1016\/j.compeleceng.2026.111139_b80","series-title":"Proceedings of the 23rd international conference on machine learning","first-page":"369","article-title":"Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks","author":"Graves","year":"2006"},{"issue":"8","key":"10.1016\/j.compeleceng.2026.111139_b81","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","article-title":"Long short-term memory","volume":"9","author":"Hochreiter","year":"1997","journal-title":"Neural Comput"},{"key":"10.1016\/j.compeleceng.2026.111139_b82","doi-asserted-by":"crossref","unstructured":"Baek J, Kim G, Lee J, Park S, Han D, Yun S, Oh SJ, Lee H. What is wrong with scene text recognition model comparisons? dataset and model analysis. In: Proceedings of the IEEE\/CVF international conference on computer vision. 2019, p. 4715\u201323.","DOI":"10.1109\/ICCV.2019.00481"},{"issue":"01","key":"10.1016\/j.compeleceng.2026.111139_b83","first-page":"8610","article-title":"Show, attend and read: A simple and strong baseline for irregular text recognition","volume":"33","author":"Li","year":"2019","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"10.1016\/j.compeleceng.2026.111139_b84","first-page":"12216","article-title":"Decoupled attention network for text recognition","volume":"vol. 34","author":"Wang","year":"2020"},{"key":"10.1016\/j.compeleceng.2026.111139_b85","article-title":"Generative adversarial nets","volume":"27","author":"Goodfellow","year":"2014","journal-title":"Adv Neural Inf Process Syst"},{"key":"10.1016\/j.compeleceng.2026.111139_b86","series-title":"International conference on pattern recognition","first-page":"6542","article-title":"GAN-based synthetic data generation for rare conjunct handling in indic scripts","author":"Singh","year":"2020"},{"key":"10.1016\/j.compeleceng.2026.111139_b87","doi-asserted-by":"crossref","unstructured":"Wang W, Xie E, Song X, Zang Y, Wang W, Lu T, Yu G, Shen C. Efficient and accurate arbitrary-shaped text detection with pixel aggregation network. In: Proceedings of the IEEE\/CVF international conference on computer vision. 2019, p. 8440\u20139.","DOI":"10.1109\/ICCV.2019.00853"},{"issue":"11","key":"10.1016\/j.compeleceng.2026.111139_b88","doi-asserted-by":"crossref","first-page":"5566","DOI":"10.1109\/TIP.2019.2900589","article-title":"Textfield: Learning a deep direction field for irregular scene text detection","volume":"28","author":"Xu","year":"2019","journal-title":"IEEE Trans Image Process"},{"key":"10.1016\/j.compeleceng.2026.111139_b89","doi-asserted-by":"crossref","unstructured":"Tian S, Lu S, Li C. We text: Scene text detection under weak supervision. In: Proceedings of the IEEE international conference on computer vision. 2017, p. 1492\u2013500.","DOI":"10.1109\/ICCV.2017.166"},{"key":"10.1016\/j.compeleceng.2026.111139_b90","unstructured":"Li Z, Chen Z, Jia C, Yin X, Du Y, Li C, Du Y, Jiang Y-G. Svtr: Scene text recognition with a single visual model. In: Proceedings of the European conference on computer vision. 2022."},{"key":"10.1016\/j.compeleceng.2026.111139_b91","series-title":"SVTR: Scene text recognition with a single visual model","author":"Du","year":"2022"},{"key":"10.1016\/j.compeleceng.2026.111139_b92","article-title":"Learning to segment text lines in historical documents using graph neural networks","volume":"104","author":"Qiao","year":"2020","journal-title":"Pattern Recognit"},{"key":"10.1016\/j.compeleceng.2026.111139_b93","first-page":"211","article-title":"Text clustering with deep neural networks","volume":"400","author":"Zhu","year":"2020","journal-title":"Neurocomputing"},{"key":"10.1016\/j.compeleceng.2026.111139_b94","unstructured":"Verma S, Agarwal P. Graph Neural Networks for Modeling Spatial Dependencies in Indic Script Text. In: Proceedings of the European conference on computer vision (ECCV) workshops. 2022, p. 401\u201315."},{"key":"10.1016\/j.compeleceng.2026.111139_b95","unstructured":"Liao M, et al. Graph-based scene text detection. In: IEEE transactions on pattern analysis and machine intelligence. 2022."},{"key":"10.1016\/j.compeleceng.2026.111139_b96","article-title":"Emerging properties in self-supervised vision transformers","author":"Caron","year":"2021","journal-title":"ICCV"},{"key":"10.1016\/j.compeleceng.2026.111139_b97","first-page":"172","article-title":"Self-supervised learning for scene text detection and recognition","volume":"147","author":"Wang","year":"2021","journal-title":"Pattern Recognit Lett"},{"key":"10.1016\/j.compeleceng.2026.111139_b98","first-page":"345","article-title":"Self-supervised pre-training for indic language understanding","volume":"11","author":"Khatri","year":"2023","journal-title":"Trans Assoc Comput Linguist"},{"key":"10.1016\/j.compeleceng.2026.111139_b99","doi-asserted-by":"crossref","unstructured":"Xue Z-Y, Zhao L-J, Zhang J-Y, Luo X, Xu X-S. SSCD: Self-Supervised Coherence Discrimination Representation Learning for Scene Text Recognition. In: Proceedings of the 2025 international conference on multimedia retrieval. 2025, p. 1644\u201353.","DOI":"10.1145\/3731715.3733436"},{"key":"10.1016\/j.compeleceng.2026.111139_b100","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2004.1315187","article-title":"Detecting and reading text in natural scenes","volume":"2","author":"Chen","year":"2004","journal-title":"Proc 2004 IEEE Comput Soc Conf Comput Vis Pattern Recognit"},{"key":"10.1016\/j.compeleceng.2026.111139_b101","first-page":"425","article-title":"Text detection in images based on unsupervised classification of high-frequency wavelet coefficients","volume":"vol. 1","author":"Gllavata","year":"2004"},{"key":"10.1016\/j.compeleceng.2026.111139_b102","first-page":"2326","article-title":"Text detection and segmentation in complex color images","volume":"vol. 4","author":"Garcia","year":"2000"},{"issue":"12","key":"10.1016\/j.compeleceng.2026.111139_b103","doi-asserted-by":"crossref","first-page":"1631","DOI":"10.1109\/TPAMI.2003.1251157","article-title":"Texture-based approach for text detection in images using support vector machines and continuously adaptive mean shift algorithm","volume":"25","author":"Kim","year":"2003","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"10.1016\/j.compeleceng.2026.111139_b104","series-title":"2011 international conference on document analysis and recognition","first-page":"429","article-title":"Adaboost for text detection in natural scene","author":"Lee","year":"2011"},{"issue":"2","key":"10.1016\/j.compeleceng.2026.111139_b105","first-page":"601","article-title":"Indian script character recognition: a survey","volume":"45","author":"Pal","year":"2012","journal-title":"Pattern Recognit"},{"issue":"4","key":"10.1016\/j.compeleceng.2026.111139_b106","first-page":"249","article-title":"Multi-script scene text detection: a survey","volume":"25","author":"Chanda","year":"2022","journal-title":"Int J Doc Anal Recognit (IJDAR)"},{"issue":"6","key":"10.1016\/j.compeleceng.2026.111139_b107","doi-asserted-by":"crossref","first-page":"2296","DOI":"10.1109\/TIP.2013.2249082","article-title":"Scene text detection via connected component clustering and nontext filtering","volume":"22","author":"Koo","year":"2013","journal-title":"IEEE Trans Image Process"},{"key":"10.1016\/j.compeleceng.2026.111139_b108","series-title":"2017 2nd international conference on image, vision and computing","first-page":"26","article-title":"Natural scene text detection based on SWT, MSER and candidate classification","author":"Guan","year":"2017"},{"key":"10.1016\/j.compeleceng.2026.111139_b109","series-title":"2015 13th international conference on document analysis and recognition","first-page":"206","article-title":"Object proposals for text extraction in the wild","author":"Gomez","year":"2015"},{"issue":"2","key":"10.1016\/j.compeleceng.2026.111139_b110","doi-asserted-by":"crossref","first-page":"433","DOI":"10.1007\/s11831-019-09315-1","article-title":"Review of scene text detection and recognition","volume":"27","author":"Lin","year":"2020","journal-title":"Arch Comput Methods Eng"},{"key":"10.1016\/j.compeleceng.2026.111139_b111","series-title":"European conference on computer vision","first-page":"21","article-title":"Ssd: Single shot multibox detector","author":"Liu","year":"2016"},{"issue":"11","key":"10.1016\/j.compeleceng.2026.111139_b112","doi-asserted-by":"crossref","first-page":"3111","DOI":"10.1109\/TMM.2018.2818020","article-title":"Arbitrary-oriented scene text detection via rotation proposals","volume":"20","author":"Ma","year":"2018","journal-title":"IEEE Trans Multimed"},{"key":"10.1016\/j.compeleceng.2026.111139_b113","doi-asserted-by":"crossref","unstructured":"Liu Y, Jin L. Deep matching prior network: Toward tighter multi-oriented text detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2017, p. 1962\u20139.","DOI":"10.1109\/CVPR.2017.368"},{"key":"10.1016\/j.compeleceng.2026.111139_b114","doi-asserted-by":"crossref","unstructured":"Hu H, Zhang C, Luo Y, Wang Y, Han J, Ding E. Wordsup: Exploiting word annotations for character based text detection. In: Proceedings of the IEEE international conference on computer vision. 2017, p. 4940\u20139.","DOI":"10.1109\/ICCV.2017.529"},{"key":"10.1016\/j.compeleceng.2026.111139_b115","doi-asserted-by":"crossref","unstructured":"He P, Huang W, He T, Zhu Q, Qiao Y, Li X. Single shot text detector with regional attention. In: Proceedings of the IEEE international conference on computer vision. 2017, p. 3047\u201355.","DOI":"10.1109\/ICCV.2017.331"},{"key":"10.1016\/j.compeleceng.2026.111139_b116","doi-asserted-by":"crossref","unstructured":"Liao M, Zhu Z, Shi B, Xia G-s, Bai X. Rotation-sensitive regression for oriented scene text detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2018, p. 5909\u201318.","DOI":"10.1109\/CVPR.2018.00619"},{"key":"10.1016\/j.compeleceng.2026.111139_b117","doi-asserted-by":"crossref","unstructured":"Wang F, Zhao L, Li X, Wang X, Tao D. Geometry-aware scene text detection with instance transformation network. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2018, p. 1381\u20139.","DOI":"10.1109\/CVPR.2018.00150"},{"key":"10.1016\/j.compeleceng.2026.111139_b118","series-title":"Computer vision\u2013ECCV 2016: 14th European conference, amsterdam, the netherlands, October 11-14, 2016, proceedings, part VIII 14","first-page":"56","article-title":"Detecting text in natural image with connectionist text proposal network","author":"Tian","year":"2016"},{"key":"10.1016\/j.compeleceng.2026.111139_b119","doi-asserted-by":"crossref","unstructured":"Shi B, Bai X, Belongie S. Detecting oriented text in natural images by linking segments. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2017, p. 2550\u20138.","DOI":"10.1109\/CVPR.2017.371"},{"key":"10.1016\/j.compeleceng.2026.111139_b120","series-title":"Learning markov clustering networks for scene text detection","author":"Liu","year":"2018"},{"key":"10.1016\/j.compeleceng.2026.111139_b121","doi-asserted-by":"crossref","unstructured":"Lyu P, Yao C, Wu W, Yan S, Bai X. Multi-oriented scene text detection via corner localization and region segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2018, p. 7553\u201363.","DOI":"10.1109\/CVPR.2018.00788"},{"key":"10.1016\/j.compeleceng.2026.111139_b122","first-page":"2105","article-title":"Joint script identification and text recognition using multi-task learning","volume":"25","author":"Nair","year":"2023","journal-title":"IEEE Trans Multimed"},{"issue":"10","key":"10.1016\/j.compeleceng.2026.111139_b123","doi-asserted-by":"crossref","first-page":"761","DOI":"10.1016\/j.imavis.2004.02.006","article-title":"Robust wide-baseline stereo from maximally stable extremal regions","volume":"22","author":"Matas","year":"2004","journal-title":"Image Vis Comput"},{"key":"10.1016\/j.compeleceng.2026.111139_b124","doi-asserted-by":"crossref","unstructured":"Zhang Z, Zhang C, Shen W, Yao C, Liu W, Bai X. Multi-oriented text detection with fully convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2016, p. 4159\u201367.","DOI":"10.1109\/CVPR.2016.451"},{"key":"10.1016\/j.compeleceng.2026.111139_b125","series-title":"Scene text detection via holistic, multi-channel prediction","author":"Yao","year":"2016"},{"key":"10.1016\/j.compeleceng.2026.111139_b126","doi-asserted-by":"crossref","unstructured":"He D, Yang X, Liang C, Zhou Z, Ororbi AG, Kifer D, Lee Giles C. Multi-scale FCN with cascaded instance aware segmentation for arbitrary oriented word spotting in the wild. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2017, p. 3519\u201328.","DOI":"10.1109\/CVPR.2017.58"},{"key":"10.1016\/j.compeleceng.2026.111139_b127","first-page":"45","article-title":"Ensemble deep learning for degraded historical indic document analysis","volume":"49","author":"Choudhary","year":"2021","journal-title":"J Cult Herit"},{"key":"10.1016\/j.compeleceng.2026.111139_b128","unstructured":"Verma S, Agarwal P. Graph Neural Networks for Modeling Spatial Dependencies in Indic Script Text. In: Proceedings of the European conference on computer vision (ECCV) workshops. 2022, p. 401\u201315."},{"key":"10.1016\/j.compeleceng.2026.111139_b129","doi-asserted-by":"crossref","unstructured":"Wu Y, Natarajan P. Self-organized text detection with minimal post-processing via border learning. In: Proceedings of the IEEE international conference on computer vision. 2017, p. 5000\u20139.","DOI":"10.1109\/ICCV.2017.535"},{"key":"10.1016\/j.compeleceng.2026.111139_b130","doi-asserted-by":"crossref","unstructured":"Xue C, Lu S, Zhan F. Accurate scene text detection through border semantics awareness and bootstrapping. In: Proceedings of the European conference on computer vision. 2018, p. 355\u201372.","DOI":"10.1007\/978-3-030-01270-0_22"},{"key":"10.1016\/j.compeleceng.2026.111139_b131","series-title":"2019 international conference on document analysis and recognition","first-page":"1582","article-title":"ICDAR2019 robust reading challenge on multi-lingual scene text detection and recognition\u2013RRC-MLT-2019","author":"Nayef","year":"2019"},{"key":"10.1016\/j.compeleceng.2026.111139_b132","doi-asserted-by":"crossref","unstructured":"Tian S, Lu S, Li C. Wetext: Scene text detection under weak supervision. In: Proceedings of the IEEE international conference on computer vision. 2017, p. 1492\u2013500.","DOI":"10.1109\/ICCV.2017.166"},{"key":"10.1016\/j.compeleceng.2026.111139_b133","doi-asserted-by":"crossref","unstructured":"He W, Zhang X-Y, Yin F, Liu C-L. Deep direct regression for multi-oriented scene text detection. In: Proceedings of the IEEE international conference on computer vision. 2017, p. 745\u201353.","DOI":"10.1109\/ICCV.2017.87"},{"key":"10.1016\/j.compeleceng.2026.111139_b134","series-title":"Computer vision\u2013ECCV 2014: 13th European conference, zurich, Switzerland, September 6-12, 2014, proceedings, part IV 13","first-page":"497","article-title":"Robust scene text detection with convolution neural network induced mser trees","author":"Huang","year":"2014"},{"key":"10.1016\/j.compeleceng.2026.111139_b135","series-title":"2017 1st international workshop on arabic script analysis and recognition","first-page":"26","article-title":"Unconstrained scene text and video text recognition for arabic script","author":"Jain","year":"2017"},{"key":"10.1016\/j.compeleceng.2026.111139_b136","doi-asserted-by":"crossref","first-page":"19801","DOI":"10.1109\/ACCESS.2019.2895876","article-title":"A novel dataset for english-arabic scene text recognition (EASTR)-42k and its evaluation using invariant feature extraction on detected extremal regions","volume":"7","author":"Ahmed","year":"2019","journal-title":"IEEE Access"},{"key":"10.1016\/j.compeleceng.2026.111139_b137","article-title":"Mobile-optimized lightweight CNN for real-time indic script OCR","volume":"93","author":"Mehta","year":"2021","journal-title":"Comput Electr Eng"},{"key":"10.1016\/j.compeleceng.2026.111139_b138","series-title":"Video skimming for quick browsing based on audio and image characterization","author":"Smith","year":"1995"},{"key":"10.1016\/j.compeleceng.2026.111139_b139","series-title":"Proceedings 1998 IEEE international workshop on content-based access of image and video database","first-page":"52","article-title":"Video OCR for digital news archive","author":"Sato","year":"1998"},{"key":"10.1016\/j.compeleceng.2026.111139_b140","series-title":"Proceedings 11th international conference on image analysis and processing","first-page":"192","article-title":"Text enhancement with asymmetric filter for video OCR","author":"Chen","year":"2001"},{"key":"10.1016\/j.compeleceng.2026.111139_b141","article-title":"A new approach for video text detection","volume":"vol. 1","author":"Cai","year":"2002"},{"issue":"2","key":"10.1016\/j.compeleceng.2026.111139_b142","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1109\/TCSVT.2004.841653","article-title":"A comprehensive method for multilingual video text detection, localization, and extraction","volume":"15","author":"Lyu","year":"2005","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"10.1016\/j.compeleceng.2026.111139_b143","series-title":"2009 10th international conference on document analysis and recognition","first-page":"66","article-title":"A Laplacian method for video text detection","author":"Phan","year":"2009"},{"key":"10.1016\/j.compeleceng.2026.111139_b144","first-page":"2294","article-title":"A combined algorithm for video text extraction","volume":"vol. 5","author":"Zhang","year":"2010"},{"key":"10.1016\/j.compeleceng.2026.111139_b145","first-page":"611","article-title":"A robust algorithm for text detection in images","volume":"vol. 2","author":"Gllavata","year":"2003"},{"key":"10.1016\/j.compeleceng.2026.111139_b146","first-page":"701","article-title":"An edge-based text region extraction algorithm for indoor mobile robot navigation","volume":"vol. 2","author":"Liu","year":"2005"},{"issue":"15","key":"10.1016\/j.compeleceng.2026.111139_b147","doi-asserted-by":"crossref","first-page":"2431","DOI":"10.1016\/j.patrec.2005.04.014","article-title":"An efficient method of license plate location","volume":"26","author":"Zheng","year":"2005","journal-title":"Pattern Recognit Lett"},{"key":"10.1016\/j.compeleceng.2026.111139_b148","series-title":"2009 IEEE international conference on multimedia and expo","first-page":"514","article-title":"Video text detection based on filters and edge features","author":"Shivakumara","year":"2009"},{"key":"10.1016\/j.compeleceng.2026.111139_b149","series-title":"2009 10th international conference on document analysis and recognition","first-page":"156","article-title":"A gradient difference based technique for video text detection","author":"Shivakumara","year":"2009"},{"key":"10.1016\/j.compeleceng.2026.111139_b150","series-title":"2011 international conference on emerging trends in networks and computer communications","first-page":"487","article-title":"Novel approach for text extraction from natural images using ISEF edge detection","author":"Shah","year":"2011"},{"key":"10.1016\/j.compeleceng.2026.111139_b151","series-title":"International conference on pattern recognition, informatics and medical engineering","first-page":"444","article-title":"A novel approach for kannada text extraction","author":"Seeri","year":"2012"},{"issue":"2","key":"10.1016\/j.compeleceng.2026.111139_b152","doi-asserted-by":"crossref","first-page":"245","DOI":"10.1016\/j.jcss.2012.05.006","article-title":"An algorithm for accuracy enhancement of license plate recognition","volume":"79","author":"Zheng","year":"2013","journal-title":"J Comput System Sci"},{"key":"10.1016\/j.compeleceng.2026.111139_b153","series-title":"Image and graphics: 8th international conference, ICIG 2015, tianjin, China, August 13\u201316, 2015, proceedings, part III","first-page":"310","article-title":"Scene character and text recognition: The state-of-the-art","author":"Chen","year":"2015"},{"key":"10.1016\/j.compeleceng.2026.111139_b154","series-title":"2015 international conference on advances in computing, communications and informatics","first-page":"1436","article-title":"Text localization in video\/scene images using kirsch directional masks","author":"Shekar","year":"2015"},{"issue":"4","key":"10.1016\/j.compeleceng.2026.111139_b155","doi-asserted-by":"crossref","first-page":"603","DOI":"10.1049\/iet-cvi.2013.0307","article-title":"Text detection and recognition in natural scene with edge analysis","volume":"9","author":"Yu","year":"2015","journal-title":"IET Comput Vis"},{"key":"10.1016\/j.compeleceng.2026.111139_b156","series-title":"2016 15th international conference on frontiers in handwriting recognition","first-page":"156","article-title":"Scene text detection via edge cue and multi-features","author":"Tang","year":"2016"},{"key":"10.1016\/j.compeleceng.2026.111139_b157","first-page":"1295","article-title":"Max-pooling based scene text proposal for scene text detection","volume":"vol. 1","author":"Van","year":"2017"},{"issue":"1","key":"10.1016\/j.compeleceng.2026.111139_b158","doi-asserted-by":"crossref","first-page":"220","DOI":"10.1109\/TITS.2017.2749977","article-title":"Effective uyghur language text detection in complex background images for traffic prompt identification","volume":"19","author":"Yan","year":"2017","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"10.1016\/j.compeleceng.2026.111139_b159","series-title":"International conference on image and signal processing","first-page":"243","article-title":"Extraction and recognition of bangla texts from natural scene images using CNN","author":"Islam","year":"2020"},{"key":"10.1016\/j.compeleceng.2026.111139_b160","doi-asserted-by":"crossref","DOI":"10.7717\/peerj-cs.717","article-title":"Urdu text in natural scene images: a new dataset and preliminary text detection","volume":"7","author":"Ali","year":"2021","journal-title":"PeerJ Comput Sci"},{"issue":"11","key":"10.1016\/j.compeleceng.2026.111139_b161","doi-asserted-by":"crossref","first-page":"5406","DOI":"10.1109\/TIP.2018.2855399","article-title":"Multi-oriented and multi-lingual scene text detection with direct regression","volume":"27","author":"He","year":"2018","journal-title":"IEEE Trans Image Process"},{"issue":"3","key":"10.1016\/j.compeleceng.2026.111139_b162","doi-asserted-by":"crossref","first-page":"542","DOI":"10.1109\/TPAMI.2017.2692763","article-title":"A unified framework for tracking based text detection and recognition from web videos","volume":"40","author":"Tian","year":"2017","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"10.1016\/j.compeleceng.2026.111139_b163","doi-asserted-by":"crossref","first-page":"71299","DOI":"10.1109\/ACCESS.2019.2919994","article-title":"A detection and verification model based on SSD and encoder-decoder network for scene text detection","volume":"7","author":"Gao","year":"2019","journal-title":"IEEE Access"},{"issue":"1","key":"10.1016\/j.compeleceng.2026.111139_b164","article-title":"DetReco: Object-text detection and recognition based on deep neural network","volume":"2020","author":"Zhang","year":"2020","journal-title":"Math Probl Eng"},{"key":"10.1016\/j.compeleceng.2026.111139_b165","first-page":"112","article-title":"Hybrid CNN-BiLSTM with attention for robust indic script recognition","volume":"158","author":"Sharma","year":"2022","journal-title":"Pattern Recognit Lett"},{"key":"10.1016\/j.compeleceng.2026.111139_b166","first-page":"9452","article-title":"Vision transformers with script-adaptive tokenization for multilingual OCR","volume":"vol. 37","author":"Kumar","year":"2023"},{"key":"10.1016\/j.compeleceng.2026.111139_b167","series-title":"International conference on pattern recognition","first-page":"6542","article-title":"GAN-based synthetic data generation for rare conjunct handling in indic scripts","author":"Singh","year":"2020"},{"issue":"1","key":"10.1016\/j.compeleceng.2026.111139_b168","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1186\/s13634-021-00808-5","article-title":"PSENet-based efficient scene text detection","volume":"2021","author":"Liao","year":"2021","journal-title":"EURASIP J Adv Signal Process"},{"issue":"2","key":"10.1016\/j.compeleceng.2026.111139_b169","doi-asserted-by":"crossref","DOI":"10.1145\/3440756","article-title":"Text recognition in the wild: A survey","volume":"54","author":"Chen","year":"2021","journal-title":"ACM Comput Surv"},{"key":"10.1016\/j.compeleceng.2026.111139_b170","series-title":"TPS++: Attention-enhanced thin-plate spline for scene text recognition","author":"Zheng","year":"2023"},{"key":"10.1016\/j.compeleceng.2026.111139_b171","doi-asserted-by":"crossref","DOI":"10.1016\/j.compeleceng.2021.107043","article-title":"Text detection and script identification in natural scene images using deep learning","volume":"91","author":"Khalil","year":"2021","journal-title":"Comput Electr Eng"},{"key":"10.1016\/j.compeleceng.2026.111139_b172","first-page":"4651","article-title":"Perceiver: General perception with iterative attention","author":"Jaegle","year":"2021","journal-title":"Int Conf Mach Learn"},{"key":"10.1016\/j.compeleceng.2026.111139_b173","article-title":"Hindi scene text recognition using structural features","author":"Ullah","year":"2006","journal-title":"Proc ICPR"},{"key":"10.1016\/j.compeleceng.2026.111139_b174","article-title":"Attention-based deep neural models for indic scene text recognition","volume":"168","author":"Singh","year":"2021","journal-title":"Expert Syst Appl"},{"key":"10.1016\/j.compeleceng.2026.111139_b175","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1007\/s10032-019-00320-5","article-title":"Indic handwritten and scene text recognition using deep learning","volume":"22","author":"Mathew","year":"2019","journal-title":"Int J Doc Anal Recognit"},{"key":"10.1016\/j.compeleceng.2026.111139_b176","article-title":"Towards robust multilingual scene text recognition","volume":"98","author":"Krishnan","year":"2020","journal-title":"Pattern Recognit"},{"issue":"11","key":"10.1016\/j.compeleceng.2026.111139_b177","doi-asserted-by":"crossref","first-page":"2298","DOI":"10.1109\/TPAMI.2016.2646371","article-title":"An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition","volume":"39","author":"Shi","year":"2017","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"07","key":"10.1016\/j.compeleceng.2026.111139_b178","first-page":"11005","article-title":"GTC: Guided training of CTC towards efficient and accurate scene text recognition","volume":"34","author":"Hu","year":"2020","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"10.1016\/j.compeleceng.2026.111139_b179","doi-asserted-by":"crossref","DOI":"10.1016\/j.compeleceng.2024.109861","article-title":"C3E: A framework for chart classification and content extraction","volume":"121","author":"Kanroo","year":"2025","journal-title":"Comput Electr Eng"},{"key":"10.1016\/j.compeleceng.2026.111139_b180","article-title":"Sequence to sequence learning with neural networks","volume":"vol. 27","author":"Sutskever","year":"2014"},{"key":"10.1016\/j.compeleceng.2026.111139_b181","series-title":"Learning phrase representations using RNN encoder-decoder for statistical machine translation","author":"Cho","year":"2014"},{"issue":"9","key":"10.1016\/j.compeleceng.2026.111139_b182","doi-asserted-by":"crossref","first-page":"2035","DOI":"10.1109\/TPAMI.2018.2848939","article-title":"ASTER: An attentional scene text recognizer with flexible rectification","volume":"41","author":"Shi","year":"2019","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"01","key":"10.1016\/j.compeleceng.2026.111139_b183","first-page":"8610","article-title":"Show, attend and read: A simple and strong baseline for irregular text recognition","volume":"33","author":"Li","year":"2019","journal-title":"Proc AAAI Conf Artif Intell"},{"issue":"07","key":"10.1016\/j.compeleceng.2026.111139_b184","first-page":"12216","article-title":"Decoupled attention network for text recognition","volume":"34","author":"Wang","year":"2020","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"10.1016\/j.compeleceng.2026.111139_b185","series-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","author":"Chung","year":"2014"},{"key":"10.1016\/j.compeleceng.2026.111139_b186","series-title":"2019 international conference on document analysis and recognition","first-page":"781","article-title":"NRTR: A no-recurrence sequence-to-sequence model for scene text recognition","author":"Sheng","year":"2019"},{"key":"10.1016\/j.compeleceng.2026.111139_b187","series-title":"Proceedings of the 29th ACM international conference on multimedia","first-page":"3791","article-title":"MMOCR: A comprehensive toolbox for text detection, recognition and understanding","author":"Kuang","year":"2021"},{"key":"10.1016\/j.compeleceng.2026.111139_b188","doi-asserted-by":"crossref","unstructured":"Lee J, Park S, Baek J, Oh SJ, Kim S, Lee H. On Recognizing Texts of Arbitrary Shapes With 2D Self-Attention. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR) workshops. 2020.","DOI":"10.1109\/CVPRW50498.2020.00281"},{"issue":"07","key":"10.1016\/j.compeleceng.2026.111139_b189","first-page":"12120","article-title":"TextScanner: Reading characters in order for robust scene text recognition","volume":"34","author":"Wan","year":"2020","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"10.1016\/j.compeleceng.2026.111139_b190","series-title":"Computer vision \u2013 ECCV 2020","first-page":"135","article-title":"RobustScanner: Dynamically enhancing positional clues for robust text recognition","author":"Yue","year":"2020"},{"key":"10.1016\/j.compeleceng.2026.111139_b191","series-title":"European conference on computer vision","first-page":"178","article-title":"Scene text recognition with permuted autoregressive sequence models","author":"Bautista","year":"2022"},{"key":"10.1016\/j.compeleceng.2026.111139_b192","doi-asserted-by":"crossref","unstructured":"Yu D, Li X, Zhang C, Liu T, Han J, Liu J, Ding E. Towards accurate scene text recognition with semantic reasoning networks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2020, p. 12113\u201322.","DOI":"10.1109\/CVPR42600.2020.01213"},{"key":"10.1016\/j.compeleceng.2026.111139_b193","doi-asserted-by":"crossref","unstructured":"Fang S, Xie H, Wang Y, Mao Z, Zhang Y. Read like humans: Autonomous, bidirectional and iterative language modeling for scene text recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2021, p. 7098\u2013107.","DOI":"10.1109\/CVPR46437.2021.00702"},{"key":"10.1016\/j.compeleceng.2026.111139_b194","doi-asserted-by":"crossref","unstructured":"Fang S, Xie H, Wang Y, Mao Z, Zhang Y. Read Like Humans: Autonomous, Bidirectional and Iterative Language Modeling for Scene Text Recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2021, p. 7098\u2013107.","DOI":"10.1109\/CVPR46437.2021.00702"},{"key":"10.1016\/j.compeleceng.2026.111139_b195","doi-asserted-by":"crossref","unstructured":"Wang Y, Xie H, Fang S, Wang J, Zhu S, Zhang Y. From Two to One: A New Scene Text Recognizer With Visual Language Modeling Network. In: Proceedings of the IEEE\/CVF international conference on computer vision. 2021, p. 14194\u2013203.","DOI":"10.1109\/ICCV48922.2021.01393"},{"key":"10.1016\/j.compeleceng.2026.111139_b196","doi-asserted-by":"crossref","unstructured":"Qiao Z, Zhou Y, Wei J, Wang W, Zhang Y, Jiang N, Wang H, Wang W. PIMNet: a parallel, iterative and mimicking network for scene text recognition. In: Proceedings of the 29th ACM international conference on multimedia. 2021, p. 2046\u201355.","DOI":"10.1145\/3474085.3475238"},{"key":"10.1016\/j.compeleceng.2026.111139_b197","series-title":"European conference on computer vision","first-page":"339","article-title":"Multi-granularity prediction for scene text recognition","author":"Wang","year":"2022"},{"key":"10.1016\/j.compeleceng.2026.111139_b198","series-title":"Linguistic more: Taking a further step toward efficient and accurate scene text recognition","author":"Zhang","year":"2023"},{"key":"10.1016\/j.compeleceng.2026.111139_b199","series-title":"Context perception parallel decoder for scene text recognition","author":"Du","year":"2023"},{"key":"10.1016\/j.compeleceng.2026.111139_b200","series-title":"International conference on document analysis and recognition","first-page":"287","article-title":"Reciprocal feature learning via explicit and implicit tasks in scene text recognition","author":"Jiang","year":"2021"},{"key":"10.1016\/j.compeleceng.2026.111139_b201","series-title":"European conference on computer vision","first-page":"197","article-title":"When counting meets HMER: counting-aware network for handwritten mathematical expression recognition","author":"Li","year":"2022"},{"key":"10.1016\/j.compeleceng.2026.111139_b202","doi-asserted-by":"crossref","unstructured":"Wang Y, Xie H, Fang S, Wang J, Zhu S, Zhang Y. From two to one: A new scene text recognizer with visual language modeling network. In: Proceedings of the IEEE\/CVF international conference on computer vision. 2021, p. 14194\u2013203.","DOI":"10.1109\/ICCV48922.2021.01393"},{"issue":"2","key":"10.1016\/j.compeleceng.2026.111139_b203","doi-asserted-by":"crossref","first-page":"300","DOI":"10.1007\/s11263-023-01880-0","article-title":"Cdistnet: Perceiving multi-domain character distance for robust text recognition","volume":"132","author":"Zheng","year":"2024","journal-title":"Int J Comput Vis"},{"key":"10.1016\/j.compeleceng.2026.111139_b204","doi-asserted-by":"crossref","unstructured":"Xie Z, Huang Y, Zhu Y, Jin L, Liu Y, Xie L. Aggregation cross-entropy for sequence recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2019, p. 6538\u201347.","DOI":"10.1109\/CVPR.2019.00670"},{"key":"10.1016\/j.compeleceng.2026.111139_b205","first-page":"888","article-title":"Visual semantics allow for textual reasoning better in scene text recognition","volume":"vol. 36","author":"He","year":"2022"},{"key":"10.1016\/j.compeleceng.2026.111139_b206","article-title":"Instruction-guided scene text recognition","author":"Du","year":"2025","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"10.1016\/j.compeleceng.2026.111139_b207","doi-asserted-by":"crossref","first-page":"100895","DOI":"10.1109\/ACCESS.2022.3207469","article-title":"A transformer-based framework for scene text recognition","volume":"10","author":"Selvam","year":"2022","journal-title":"IEEE Access"},{"key":"10.1016\/j.compeleceng.2026.111139_b208","doi-asserted-by":"crossref","DOI":"10.1109\/TIP.2024.3512354","article-title":"Clip4str: A simple baseline for scene text recognition with pre-trained vision-language model","author":"Zhao","year":"2024","journal-title":"IEEE Trans Image Process"},{"key":"10.1016\/j.compeleceng.2026.111139_b209","first-page":"345","article-title":"Self-supervised pre-training for indic language understanding","volume":"11","author":"Khatri","year":"2023","journal-title":"Trans Assoc Comput Linguist"},{"key":"10.1016\/j.compeleceng.2026.111139_b210","doi-asserted-by":"crossref","unstructured":"Xu J, Wang Y, Xie H, Zhang Y. Ote: Exploring accurate scene text recognition using one token. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2024, p. 28327\u201336.","DOI":"10.1109\/CVPR52733.2024.02676"},{"key":"10.1016\/j.compeleceng.2026.111139_b211","doi-asserted-by":"crossref","DOI":"10.1016\/j.compeleceng.2024.109794","article-title":"A robust solution for recognizing accurate handwritten text extraction using quantum convolutional neural network and transformer models","volume":"120","author":"Aparna","year":"2024","journal-title":"Comput Electr Eng"},{"key":"10.1016\/j.compeleceng.2026.111139_b212","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.111229","article-title":"MAGIC: Multi-granularity domain adaptation for text recognition","volume":"161","author":"Zhang","year":"2025","journal-title":"Pattern Recognit"},{"key":"10.1016\/j.compeleceng.2026.111139_b213","doi-asserted-by":"crossref","first-page":"174","DOI":"10.1016\/j.patrec.2024.01.008","article-title":"Sequential visual and semantic consistency for semi-supervised text recognition","volume":"178","author":"Yang","year":"2024","journal-title":"Pattern Recognit Lett"},{"key":"10.1016\/j.compeleceng.2026.111139_b214","doi-asserted-by":"crossref","unstructured":"Zhao S, Du Y, Chen Z, Jiang Y-G. Decoder pre-training with only text for scene text recognition. In: Proceedings of the 32nd ACM international conference on multimedia. 2024, p. 5191\u2013200.","DOI":"10.1145\/3664647.3681390"},{"key":"10.1016\/j.compeleceng.2026.111139_b215","first-page":"9452","article-title":"Vision transformers with script-adaptive tokenization for multilingual OCR","volume":"vol. 37","author":"Kumar","year":"2023"},{"issue":"4","key":"10.1016\/j.compeleceng.2026.111139_b216","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1007\/s10032-011-0170-8","article-title":"Automatic processing of handwritten bank cheque images: a survey","volume":"15","author":"Jayadevan","year":"2011","journal-title":"Int J Doc Anal Recognit (IJDAR)"},{"key":"10.1016\/j.compeleceng.2026.111139_b217","series-title":"2016 international conference on communication and electronics systems","first-page":"1","article-title":"A novel method for off-line handwritten gujarati character recognition using neural network","author":"Patel","year":"2016"},{"key":"10.1016\/j.compeleceng.2026.111139_b218","first-page":"137004","article-title":"OCR is not all you need: Benchmarking multimodal invoice comprehension with deep learning","volume":"9","author":"Bhunia","year":"2021","journal-title":"IEEE Access"},{"key":"10.1016\/j.compeleceng.2026.111139_b219","series-title":"2019 international conference on document analysis and recognition","first-page":"1050","article-title":"Script identification in natural scene image and video frames using an attention based convolutional-LSTM network","author":"Dutta","year":"2019"},{"key":"10.1016\/j.compeleceng.2026.111139_b220","first-page":"123438","article-title":"Handwriting recognition in low-resource scripts using adversarial learning","volume":"9","author":"Bhunia","year":"2021","journal-title":"IEEE Access"},{"issue":"3","key":"10.1016\/j.compeleceng.2026.111139_b221","first-page":"143","article-title":"Scene text detection and recognition with advances in deep learning: a survey","volume":"25","author":"Lu","year":"2021","journal-title":"Int J Doc Anal Recognit (IJDAR)"},{"key":"10.1016\/j.compeleceng.2026.111139_b222","series-title":"2022 2nd international conference on digital futures and transformative technologies","first-page":"1","article-title":"GAN-based data augmentation for handwritten text recognition","author":"Arafat","year":"2022"},{"key":"10.1016\/j.compeleceng.2026.111139_b223","first-page":"213732","article-title":"Transfer learning for low-resource handwritten text recognition","volume":"8","author":"Masalmah","year":"2020","journal-title":"IEEE Access"},{"key":"10.1016\/j.compeleceng.2026.111139_b224","doi-asserted-by":"crossref","unstructured":"Gupta A, Vedaldi A, Zisserman A. Synthetic data for text localisation in natural images. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2016, p. 2315\u201324.","DOI":"10.1109\/CVPR.2016.254"},{"issue":"2","key":"10.1016\/j.compeleceng.2026.111139_b225","first-page":"45","article-title":"Synthetic data generation for robust Indian language text recognition","volume":"9","author":"Desai","year":"2023","journal-title":"J Imaging"},{"key":"10.1016\/j.compeleceng.2026.111139_b226","series-title":"Synthetic data generation for low-resource indic languages","author":"JD","year":"2023"},{"key":"10.1016\/j.compeleceng.2026.111139_b227","doi-asserted-by":"crossref","DOI":"10.1016\/j.compeleceng.2023.109025","article-title":"A crowdsource based framework for Bengali scene text data collection and detection","volume":"112","author":"Hossain","year":"2023","journal-title":"Comput Electr Eng"},{"issue":"33","key":"10.1016\/j.compeleceng.2026.111139_b228","first-page":"1","article-title":"Challenges and opportunities for scene text recognition in Indian languages","volume":"176","author":"Reddy","year":"2020","journal-title":"Int J Comput Appl"},{"key":"10.1016\/j.compeleceng.2026.111139_b229","first-page":"4168","article-title":"Robust scene text recognition with automatic rectification","author":"Shi","year":"2016","journal-title":"Proc IEEE Conf Comput Vis Pattern Recognit"},{"issue":"5","key":"10.1016\/j.compeleceng.2026.111139_b230","first-page":"1537","article-title":"Convolutional neural networks for scene text detection and recognition","volume":"129","author":"Liu","year":"2021","journal-title":"Int J Comput Vis"},{"issue":"3","key":"10.1016\/j.compeleceng.2026.111139_b231","first-page":"857","article-title":"Recognition of offline handwritten devanagari words using deep learning","volume":"12","author":"Sharma","year":"2020","journal-title":"Int J Inf Technol"},{"key":"10.1016\/j.compeleceng.2026.111139_b232","first-page":"43","article-title":"Transformer-based approach for handwritten text recognition in multilingual documents","volume":"161","author":"Bhunia","year":"2022","journal-title":"Pattern Recognit Lett"},{"key":"10.1016\/j.compeleceng.2026.111139_b233","series-title":"Handwritten multilingual benchmark dataset (HMBD)","author":"Devalla","year":"2021"}],"container-title":["Computers and Electrical Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0045790626002119?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0045790626002119?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,12]],"date-time":"2026-05-12T16:30:10Z","timestamp":1778603410000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0045790626002119"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":233,"alternative-id":["S0045790626002119"],"URL":"https:\/\/doi.org\/10.1016\/j.compeleceng.2026.111139","relation":{},"ISSN":["0045-7906"],"issn-type":[{"value":"0045-7906","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"From pixels to text: A deep learning survey of scene text detection and recognition","name":"articletitle","label":"Article Title"},{"value":"Computers and Electrical Engineering","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.compeleceng.2026.111139","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"111139"}}