{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T04:51:58Z","timestamp":1775019118000,"version":"3.50.1"},"reference-count":49,"publisher":"Pleiades Publishing Ltd","issue":"8","license":[{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Program Comput Soft"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1134\/s0361768823080212","type":"journal-article","created":{"date-parts":[[2024,1,24]],"date-time":"2024-01-24T09:30:41Z","timestamp":1706088641000},"page":"954-965","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Scene Text Detection Using HRNet and Spatial Attention Mechanism"],"prefix":"10.1134","volume":"49","author":[{"given":"Qingsong","family":"Tang","sequence":"first","affiliation":[]},{"given":"Zhangyan","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Bolin","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Jinting","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Wuming","family":"Jiang","sequence":"additional","affiliation":[]}],"member":"137","published-online":{"date-parts":[[2024,1,24]]},"reference":[{"key":"3800_CR1","doi-asserted-by":"publisher","first-page":"3239","DOI":"10.1007\/s10462-020-09930-6","volume":"54","author":"T. Khan","year":"2021","unstructured":"Khan, T., Sarkar, R., and Mollah, A.F., Deep learning approaches to scene text detection: a comprehensive review, Artif. Intell. Rev., 2021, vol. 54, no. 5, pp. 3239\u20133298.","journal-title":"Artif. Intell. Rev."},{"key":"3800_CR2","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1007\/s11263-020-01369-0","volume":"129","author":"S. Long","year":"2021","unstructured":"Long, S., He, X., and Yao, C., Scene text detection and recognition: the deep learning era, Int. J. Comput. Vis., 2021, vol. 129, pp. 161\u2013184.","journal-title":"Int. J. Comput. Vis."},{"key":"3800_CR3","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S. Ren","year":"2017","unstructured":"Ren, S., He, K., Girshick, R., and Sun, J., Faster R-CNN: towards real-time object detection with region proposal networks, IEEE Trans. Pattern Anal. Mach. Intell., 2017, vol. 39, no. 6, pp. 1137\u20131149.","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3800_CR4","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.Y., and Berg, A.C., SSD: aingle shot multibox detector, Proc. European Conf. on Computer Vision, Amsterdam, 2016, pp. 21\u201337.","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"3800_CR5","doi-asserted-by":"crossref","unstructured":"Tian, Z., Huang, W., He, T., He, P., and Qiao, Y., Detecting text in natural image with connectionist text proposal network, Proc. European Conf. on Computer Vision, Amsterdam, 2016, pp. 56\u201372.","DOI":"10.1007\/978-3-319-46484-8_4"},{"key":"3800_CR6","doi-asserted-by":"crossref","unstructured":"Liao, M., Shi, B., Bai, X., Wang, X., and Liu, W., Textboxes: a fast text detector with a single deep neural network, in Proc. 31st AAAI Conf. on Artificial Intelligence, Palo Alto, CA: AAAI Press, 2017, vol. 31, no. 1.","DOI":"10.1609\/aaai.v31i1.11196"},{"key":"3800_CR7","doi-asserted-by":"publisher","first-page":"3676","DOI":"10.1109\/TIP.2018.2825107","volume":"27","author":"M. Liao","year":"2018","unstructured":"Liao, M., Shi, B., and Bai, X., Textboxes++: a single-shot oriented scene text detector, IEEE Trans. Image Process., 2018, vol. 27, no. 8, pp. 3676\u20133690.","journal-title":"IEEE Trans. Image Process."},{"key":"3800_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11263-015-0823-z","volume":"116","author":"M. Jaderberg","year":"2018","unstructured":"Jaderberg, M., Simonyan, K., Vedaldi, A., and Zisserman, A., Reading text in the wild with convolutional neural networks, Int. J. Comput. Vis., 2018, vol. 116, no.\u00a01, pp. 1\u201320.","journal-title":"Int. J. Comput. Vis."},{"key":"3800_CR9","doi-asserted-by":"crossref","unstructured":"Zitnick, C.L., and Dollar, P., Edge boxes: locating object proposals from edges, Proc. European Conf. on Computer Vision, Zurich, 2014, pp. 391\u2013405.","DOI":"10.1007\/978-3-319-10602-1_26"},{"key":"3800_CR10","doi-asserted-by":"crossref","unstructured":"Dai, P., Zhang, S., Zhang, H., and Cao, X., Progressive contour regression for arbitrary-shape scene text detection, Proc. IEEE\/CVF Conf. on Computer Vision and Pattern Recognition, Nashville, TN, 2021, pp. 7393\u20137402.","DOI":"10.1109\/CVPR46437.2021.00731"},{"key":"3800_CR11","doi-asserted-by":"crossref","unstructured":"Wang, W., Xie, E., Li, X., Hou, W., Lu, T., Yu, G., and Shao, S., Shape robust text detection with progressive scale expansion network, Proc. IEEE\/CVF Conf. on Computer Vision and Pattern Recognition, Long Beach, CA, 2019, pp. 9336\u20139345.","DOI":"10.1109\/CVPR.2019.00956"},{"key":"3800_CR12","doi-asserted-by":"crossref","unstructured":"Deng, D., Liu, H., Li, X., and Cai, D., Pixellink: detecting scene text via instance segmentation, Proc. AAAI Conf. on Artificial Intelligence, New Orleans, 2018, vol.\u00a032, no. 1.","DOI":"10.1609\/aaai.v32i1.12269"},{"key":"3800_CR13","doi-asserted-by":"crossref","unstructured":"Liao, M., Wan, Z., Yao, C., Chen, K., and Bai, X., Real-time scene text detection with differentiable binarization, Proc. AAAI Conf. on Artificial Intelligence, New York, 2020, vol. 34, no. 7, pp. 11474\u201311481.","DOI":"10.1609\/aaai.v34i07.6812"},{"key":"3800_CR14","doi-asserted-by":"publisher","first-page":"919","DOI":"10.1109\/TPAMI.2022.3155612","volume":"45","author":"M. Liao","year":"2023","unstructured":"Liao, M., Zou, Z., Wan, Z., Yao, C., and Bai, X., Real-time scene text detection with differentiable binarization and adaptive scale fusion, IEEE Trans. Pattern Anal. Mach. Intell., 2023, vol. 45, no. 1, pp. 919\u2013931.","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3800_CR15","doi-asserted-by":"crossref","unstructured":"Wu, Y. and Natarajan, P., Self-organized text detection with minimal post-processing via border learning, Proc. IEEE Int. Conf. on Computer Vision, Venice, 2017, pp.\u00a05000\u20135009.","DOI":"10.1109\/ICCV.2017.535"},{"key":"3800_CR16","first-page":"2736","volume":"45","author":"S.X. Zhang","year":"2022","unstructured":"Zhang, S.X., Zhu, X., Chen, L., Hou, J.B., and Yin, X.C., Arbitrary shape text detection via segmentation with probability maps, IEEE Trans. Pattern Anal. Mach. Intell., 2022,vol. 45, no. 3, pp. 2736\u20132750.","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3800_CR17","doi-asserted-by":"crossref","unstructured":"Tian, Z., Shu, M., Lyu, P., Li, R., Zhou, C., Shen, X., and Jia, J., Learning shape-aware embedding for scene text detection, Proc. IEEE\/CVF Conf. on Computer Vision and Pattern Recognition, Long Beach, 2019, pp.\u00a04234\u20134243.","DOI":"10.1109\/CVPR.2019.00436"},{"key":"3800_CR18","doi-asserted-by":"crossref","unstructured":"Lyu, P., Liao, M., Yao, C., Wu, W., and Bai, X., Mask textspotter: an end-to-end trainable neural network for spotting text with arbitrary shapes, Proc. European Conf. on Computer Vision, Munich, 2018, pp. 67\u201383.","DOI":"10.1007\/978-3-030-01264-9_5"},{"key":"3800_CR19","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., and Sun, J., Identity mappings in deep residual networks, Proc. European Conf. on Computer Vision, Amsterdam, 2016, pp. 630\u2013645.","DOI":"10.1007\/978-3-319-46493-0_38"},{"key":"3800_CR20","unstructured":"Simonyan, K. and Zisserman, A., Very deep convolutional networks for large-scale image recognition, 2014. arXiv:1409.1556."},{"key":"3800_CR21","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Dollar, P., Girshick, R., He, K., Hariharan, B., and Belongie, S., Feature pyramid networks for object detection, Proc. IEEE Conf. on Computer Vision and Pattern Recognition, Honolulu, 2017, pp. 2117\u20132125.","DOI":"10.1109\/CVPR.2017.106"},{"key":"3800_CR22","doi-asserted-by":"publisher","first-page":"3349","DOI":"10.1109\/TPAMI.2020.2983686","volume":"43","author":"J. Wang","year":"2020","unstructured":"Wang, J., Sun, K., Cheng, T., Jiang, B., Deng, C., Zhao, Y., and Xiao, B., Deep high-resolution representation learning for visual recognition, IEEE Trans. Pattern Anal. Mach. Intell., 2020, vol. 43, no. 10, pp. 3349\u20133364.","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3800_CR23","doi-asserted-by":"crossref","unstructured":"Karatzas, D., Gomez-Bigorda, L., Nicolaou, A., Ghosh, S., Bagdanov, A., Iwamura, M., and Valveny, E., ICDAR 2015 competition on robust reading, Proc. 13th Int. Conf. on Document Analysis and Recognition, Tunis, 2015, pp. 1156\u20131160.","DOI":"10.1109\/ICDAR.2015.7333942"},{"key":"3800_CR24","doi-asserted-by":"crossref","unstructured":"Chee, C.K. and Chan, C.S., Total-text: a comprehensive dataset for scene text detection and recognition, Proc. 14th IAPR Int. Conf. on Document Analysis and Recognition, Kyoto, 2017, vol. 1, pp. 935\u2013942.","DOI":"10.1109\/ICDAR.2017.157"},{"key":"3800_CR25","doi-asserted-by":"crossref","unstructured":"Nayef, N., Yin, F., Bizid, I., Choi, H., Feng, Y., Karatzas, D., and Ogier, J.M., ICDAR 2017 robust reading challenge on multi-lingual scene text detection and script identification-rrc-mlt, Proc. 14th IAPR Int. Conf. on Document Analysis and Recognition, Kyoto, 2017, vol. 1, pp. 1454\u20131459.","DOI":"10.1109\/ICDAR.2017.237"},{"key":"3800_CR26","doi-asserted-by":"crossref","unstructured":"Yao, C., Bai, X., Liu, W., Ma, Y., and Tu, Z., Detecting texts of arbitrary orientations in natural images, Proc. IEEE Conf. on Computer Vision and Pattern Recognition, Providence, RI, 2012, pp. 1083\u20131090.","DOI":"10.1109\/CVPR.2012.6247787"},{"key":"3800_CR27","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1016\/j.patcog.2019.02.002","volume":"90","author":"Y. Liu","year":"2019","unstructured":"Liu, Y., Jin, L., Zhang, S., Luo, C., and Zhang, S., Curved scene text detection via transverse and longitudinal sequence connection, Pattern Recogn., 2019, vol. 90, pp. 337\u2013345.","journal-title":"Pattern Recogn."},{"key":"3800_CR28","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., and Wang, J., Deep high-resolution representation learning for human pose estimation, Proc. IEEE\/CVF Conf. on Computer Vision and Pattern Recognition, Long Beach, CA, 2019, pp. 5693\u20135703.","DOI":"10.1109\/CVPR.2019.00584"},{"key":"3800_CR29","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1145\/129902.129906","volume":"35","author":"B.R. Vatti","year":"1992","unstructured":"Vatti, B.R., A generic solution to polygon clipping, Commun. ACM, 1992, vol. 35, no. 7, pp. 56\u201363.","journal-title":"Commun. ACM"},{"key":"3800_CR30","doi-asserted-by":"publisher","first-page":"6073","DOI":"10.1109\/TCSVT.2022.3156390","volume":"32","author":"T. Guan","year":"2022","unstructured":"Guan, T., Gu, C., Lu, C., et al., Industrial scene text detection with refined feature-attentive network, IEEE Trans. Circuits Syst. Video Technol., 2022, vol. 32, no. 9, pp. 6073\u20136085.","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"3800_CR31","doi-asserted-by":"publisher","first-page":"107980","DOI":"10.1016\/j.patcog.2021.107980","volume":"117","author":"N. Lu","year":"2021","unstructured":"Lu, N., Yu, W., Qi, X., Chen, Y., Gong, P., Xiao, R., and Bai, X., Master: multi-aspect non-local network for scene text recognition, Pattern Recogn., 2021, vol. 117, p.\u00a0107980.","journal-title":"Pattern Recogn."},{"key":"3800_CR32","first-page":"1","volume":"15","author":"Z. Liu","year":"2019","unstructured":"Liu, Z., Zhou, W., and Li, H., AB-LSTM: attention-based bidirectional LSTM model for scene text detection, ACM Trans. Multimed. Comput. Commun. Appl., 2019, vol. 15, no. 4, pp. 1\u201323.","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"3800_CR33","doi-asserted-by":"publisher","first-page":"103261","DOI":"10.1016\/j.jvcir.2021.103261","volume":"80","author":"Y. Wu","year":"2021","unstructured":"Wu, Y., Liu, W., and Wan, S., Multiple attention encoded cascade R-CNN for scene text detection, J. Vis. Commun. Image Represent., 2021, vol. 80, p. 103261.","journal-title":"J. Vis. Commun. Image Represent."},{"key":"3800_CR34","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J.Y., and Kweon, I.S., CBAM: convolutional block attention module, Proc. European Conf. on Computer Vision, Munich, 2018, pp. 3\u201319.","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"3800_CR35","doi-asserted-by":"crossref","unstructured":"Biswas, K., Kumar, S., Banerjee, S., and Pandey, A.K., SMU: smooth activation function for deep networks using smoothing maximum technique, 2021. arXiv:2111.04682.","DOI":"10.1109\/CVPR52688.2022.00087"},{"key":"3800_CR36","doi-asserted-by":"publisher","first-page":"4737","DOI":"10.1109\/TIP.2014.2353813","volume":"23","author":"C. Yao","year":"2014","unstructured":"Yao, C., Bai, X., and Liu, W., A unified framework for multioriented text detection and recognition, IEEE Trans. Image Process., 2014, vol. 23, no. 11, pp. 4737\u20134749.","journal-title":"IEEE Trans. Image Process."},{"key":"3800_CR37","first-page":"47","volume":"2","author":"D.M. Powers","year":"2011","unstructured":"Powers, D.M., Evaluation: from precision, recall and F-measure to ROC, informedness, markedness and correlation, Int. J. Mach. Learn., 2011, vol. 2, no. 1, pp.\u00a047\u201363.","journal-title":"Int. J. Mach. Learn."},{"key":"3800_CR38","doi-asserted-by":"crossref","unstructured":"Zhou, X., Yao, C., Wen, H., Wang, Y., Zhou, S., He, W., and Liang, J., East: an efficient and accurate scene text detector, Proc. IEEE Conf. on Computer Vision and Pattern Recognition, Honolulu, 2017, pp. 5551\u20135560.","DOI":"10.1109\/CVPR.2017.283"},{"key":"3800_CR39","doi-asserted-by":"crossref","unstructured":"Zhu, Y., Chen, J., Liang, L., Kuang, Z., Jin, L., and Zhang, W., Fourier contour embedding for arbitrary-shaped text detection, Proc. IEEE\/CVF Conf. on Computer Vision and Pattern Recognition, Nashville, TN, 2021, pp. 3123\u20133131.","DOI":"10.1109\/CVPR46437.2021.00314"},{"key":"3800_CR40","doi-asserted-by":"crossref","unstructured":"Long, S., Ruan, J., Zhang, W., He, X., Wu, W., and Yao, C., Textsnake: a flexible representation for detecting text of arbitrary shapes, Proc. European Conf. on Computer Vision, Munich, 2018, pp. 20\u201336.","DOI":"10.1007\/978-3-030-01216-8_2"},{"key":"3800_CR41","doi-asserted-by":"crossref","unstructured":"Shi, B., Bai, X., and Belongie, S., Detecting oriented text in natural images by linking segments, Proc. IEEE Conf. on Computer Vision and Pattern Recognition, Honolulu, 2017, pp. 2550\u20132558.","DOI":"10.1109\/CVPR.2017.371"},{"key":"3800_CR42","doi-asserted-by":"crossref","unstructured":"Wang, P., Zhang, C., Qi, F., Huang, Z., En, M., Han, J., and Shi, G., A single-shot arbitrarily-shaped text detector based on context attended multi-task learning, Proc. 27th ACM Int. Conf. on Multimedia, Nice, 2019, pp. 1277\u20131285.","DOI":"10.1145\/3343031.3350988"},{"key":"3800_CR43","doi-asserted-by":"crossref","unstructured":"Zhang, C., Liang, B., Huang, Z., En, M., Han, J., Ding, E., and Ding, X., Look more than once: an accurate detector for text of arbitrary shapes, Proc. IEEE\/CVF Conf. on Computer Vision and Pattern Recognition, Long Beach, CA, 2019, pp. 10552\u201310561.","DOI":"10.1109\/CVPR.2019.01080"},{"key":"3800_CR44","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Xie, H., Fang, S., Li, Y., and Zhang, Y., CRNet: a center-aware representation for detecting text of arbitrary shapes, Proc. 28th ACM Int. Conf. on Multimedia, Seattle, 2020, pp. 2571\u20132580.","DOI":"10.1145\/3394171.3413565"},{"key":"3800_CR45","doi-asserted-by":"crossref","unstructured":"Liu, Y., Chen, H., Shen, C., He, T., Jin, L., and Wang, L., Abcnet: real-time scene text spotting with adaptive bezier-curve network, Proc. IEEE\/CVF Conf. on Computer Vision and Pattern Recognition, Seattle, 2020, pp.\u00a09809\u20139818.","DOI":"10.1109\/CVPR42600.2020.00983"},{"key":"3800_CR46","doi-asserted-by":"publisher","first-page":"5566","DOI":"10.1109\/TIP.2019.2900589","volume":"28","author":"Y. Xu","year":"2019","unstructured":"Xu, Y., Wang, Y., Zhou, W., Wang, Y., Yang, Z., and Bai, X., Textfield: learning a deep direction field for irregular scene text detection, IEEE Trans. Image Process., 2019, vol. 28, no. 11, pp. 5566\u20135579.","journal-title":"IEEE Trans. Image Process."},{"key":"3800_CR47","doi-asserted-by":"crossref","unstructured":"Baek, Y., Lee, B., Han, D., Yun, S., and Lee, H., Character region awareness for text detection, Proc. IEEE\/CVF Conf. on Computer Vision and Pattern Recognition, Long Beach, CA, 2019, pp. 9365\u20139374.","DOI":"10.1109\/CVPR.2019.00959"},{"key":"3800_CR48","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, G., Yang, S., Feng, J., Lin, W., and Goh, W.L., Learning Markov clustering networks for scene text detection, Proc. IEEE\/CVF Conf. on Computer Vision and Pattern Recognition, Salt Lake City, UT, 2018, pp.\u00a06936\u20136944.","DOI":"10.1109\/CVPR.2018.00725"},{"key":"3800_CR49","doi-asserted-by":"crossref","unstructured":"Lyu, P., Yao, C., Wu, W., Yan, S., and Bai, X., Multi-oriented scene text detection via corner localization and region segmentation, Proc. IEEE Conf. on Computer Vision and Pattern Recognition, Salt Lake City, UT, 2018, pp. 7553\u20137563.","DOI":"10.1109\/CVPR.2018.00788"}],"container-title":["Programming and Computer Software"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1134\/S0361768823080212.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1134\/S0361768823080212","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1134\/S0361768823080212.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T02:58:43Z","timestamp":1775012323000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1134\/S0361768823080212"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12]]},"references-count":49,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["3800"],"URL":"https:\/\/doi.org\/10.1134\/s0361768823080212","relation":{},"ISSN":["0361-7688","1608-3261"],"issn-type":[{"value":"0361-7688","type":"print"},{"value":"1608-3261","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,12]]},"assertion":[{"value":"10 February 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 April 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 May 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 January 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare that they have no conflicts of interest.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"CONFLICT OF INTEREST"}}]}}