{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T13:39:14Z","timestamp":1742996354841,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":25,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819785100"},{"type":"electronic","value":"9789819785117"}],"license":[{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8511-7_1","type":"book-chapter","created":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T05:02:39Z","timestamp":1730523759000},"page":"3-15","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Scene Text Recognition Via k-NN Attention-Based Decoder and Margin-Based Softmax Loss"],"prefix":"10.1007","author":[{"given":"Hongxia","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Minqiang","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Liang","family":"He","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,3]]},"reference":[{"key":"1_CR1","unstructured":"Aberdam, A., Ganz, R., Mazor, S., Litman, R.: Multimodal semi-supervised learning for text recognition (2022). arXiv:2205.03873"},{"key":"1_CR2","doi-asserted-by":"crossref","unstructured":"Atienza, R.: Vision transformer for fast and efficient scene text recognition. In: International Conference on Document Analysis and Recognition, pp. 319\u2013334. Springer (2021)","DOI":"10.1007\/978-3-030-86549-8_21"},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Bautista, D., Atienza, R.: Scene text recognition with permuted autoregressive sequence models. In: European Conference on Computer Vision, pp. 178\u2013196. Springer (2022)","DOI":"10.1007\/978-3-031-19815-1_11"},{"key":"1_CR4","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1_CR5","doi-asserted-by":"crossref","unstructured":"Deng, J., Guo, J., Xue, N., Zafeiriou, S.: Arcface: additive angular margin loss for deep face recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4690\u20134699 (2019)","DOI":"10.1109\/CVPR.2019.00482"},{"key":"1_CR6","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale (2020). arXiv:2010.11929"},{"key":"1_CR7","doi-asserted-by":"crossref","unstructured":"Du, Y., et al.: Svtr: scene text recognition with a single visual model (2022). arXiv:2205.00159","DOI":"10.24963\/ijcai.2022\/124"},{"key":"1_CR8","doi-asserted-by":"crossref","unstructured":"Fang, S., Xie, H., Wang, Y., Mao, Z., Zhang, Y.: Read like humans: autonomous, bidirectional and iterative language modeling for scene text recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7098\u20137107 (2021)","DOI":"10.1109\/CVPR46437.2021.00702"},{"key":"1_CR9","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., Girshick, R.: Masked autoencoders are scalable vision learners. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16000\u201316009 (2022)","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"1_CR10","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., et\u00a0al.: Spatial transformer networks. Adv. Neural Inf. Process. Syst. 28 (2015)"},{"key":"1_CR11","doi-asserted-by":"crossref","unstructured":"Jiang, Q., Wang, J., Peng, D., Liu, C., Jin, L.: Revisiting scene text recognition: a data perspective. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 20543\u201320554 (2023)","DOI":"10.1109\/ICCV51070.2023.01878"},{"key":"1_CR12","doi-asserted-by":"crossref","unstructured":"Karatzas, D., et\u00a0al.: Icdar 2015 competition on robust reading. In: 2015 13th International Conference on Document Analysis and Recognition (ICDAR), pp. 1156\u20131160. IEEE (2015)","DOI":"10.1109\/ICDAR.2015.7333942"},{"key":"1_CR13","doi-asserted-by":"crossref","unstructured":"Karatzas, D., et al.: Icdar 2013 robust reading competition. In: 2013 12th International Conference on Document Analysis and Recognition, pp. 1484\u20131493. IEEE (2013)","DOI":"10.1109\/ICDAR.2013.221"},{"key":"1_CR14","doi-asserted-by":"crossref","unstructured":"Lee, J., Park, S., Baek, J., Oh, S.J., Kim, S., Lee, H.: On recognizing texts of arbitrary shapes with 2d self-attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, pp. 546\u2013547 (2020)","DOI":"10.1109\/CVPRW50498.2020.00281"},{"key":"1_CR15","doi-asserted-by":"crossref","unstructured":"Li, M., et al.: Trocr: Transformer-based optical character recognition with pre-trained models. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 13094\u201313102 (2023)","DOI":"10.1609\/aaai.v37i11.26538"},{"key":"1_CR16","unstructured":"Lyu, J., et al.: Textblockv2: towards precise-detection-free scene text spotting with pre-trained language model (2024). arXiv:2403.10047"},{"key":"1_CR17","doi-asserted-by":"crossref","unstructured":"Mishra, A., Alahari, K., Jawahar, C.: Scene text recognition using higher order language priors. In: BMVC-British Machine Vision Conference. BMVA (2012)","DOI":"10.5244\/C.26.127"},{"key":"1_CR18","doi-asserted-by":"crossref","unstructured":"Na, B., Kim, Y., Park, S.: Multi-modal text recognition networks: interactive enhancements between visual and semantic features. In: European Conference on Computer Vision, pp. 446\u2013463. Springer (2022)","DOI":"10.1007\/978-3-031-19815-1_26"},{"key":"1_CR19","doi-asserted-by":"crossref","unstructured":"Phan, T.Q., Shivakumara, P., Tian, S., Tan, C.L.: Recognizing text with perspective distortion in natural scenes. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 569\u2013576 (2013)","DOI":"10.1109\/ICCV.2013.76"},{"key":"1_CR20","doi-asserted-by":"crossref","unstructured":"Qiao, Z., Zhou, Y., Yang, D., Zhou, Y., Wang, W.: Seed: Semantics enhanced encoder-decoder framework for scene text recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13528\u201313537 (2020)","DOI":"10.1109\/CVPR42600.2020.01354"},{"issue":"18","key":"1_CR21","doi-asserted-by":"publisher","first-page":"8027","DOI":"10.1016\/j.eswa.2014.07.008","volume":"41","author":"A Risnumawan","year":"2014","unstructured":"Risnumawan, A., Shivakumara, P., Chan, C.S., Tan, C.L.: A robust arbitrary text detection system for natural scene images. Expert Syst. Appl. 41(18), 8027\u20138048 (2014)","journal-title":"Expert Syst. Appl."},{"key":"1_CR22","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"1_CR23","doi-asserted-by":"crossref","unstructured":"Wang, H., et al.: Cosface: large margin cosine loss for deep face recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5265\u20135274 (2018)","DOI":"10.1109\/CVPR.2018.00552"},{"key":"1_CR24","unstructured":"Wang, K., Babenko, B., Belongie, S.: End-to-end scene text recognition. In: 2011 International Conference on Computer Vision, pp. 1457\u20131464. IEEE (2011)"},{"key":"1_CR25","unstructured":"Yang, X., Qiao, Z., Zhou, Y., Wang, W.: Ipad: iterative, parallel, and diffusion-based network for scene text recognition (2023). arXiv:2312.11923"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8511-7_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T05:03:01Z","timestamp":1730523781000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8511-7_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,3]]},"ISBN":["9789819785100","9789819785117"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8511-7_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,3]]},"assertion":[{"value":"3 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}