{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,13]],"date-time":"2026-02-13T13:55:53Z","timestamp":1770990953739,"version":"3.50.1"},"reference-count":207,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2025,5,29]],"date-time":"2025-05-29T00:00:00Z","timestamp":1748476800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2025,5,29]],"date-time":"2025-05-29T00:00:00Z","timestamp":1748476800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100016386","name":"Conselleria de Innovaci\u00f3n, Universidades, Ciencia y Sociedad Digital, Generalitat Valenciana","doi-asserted-by":"publisher","award":["CIGE\/2023\/216"],"award-info":[{"award-number":["CIGE\/2023\/216"]}],"id":[{"id":"10.13039\/501100016386","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100009092","name":"Universidad de Alicante","doi-asserted-by":"publisher","award":["UAFPU22-19"],"award-info":[{"award-number":["UAFPU22-19"]}],"id":[{"id":"10.13039\/100009092","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,9]]},"abstract":"<jats:title>Abstract<\/jats:title>\n          <jats:p>Text Recognition (TR) refers to the research area that focuses on retrieving textual information from images, a topic that has seen significant advancements in the last decade due to the use of Deep Neural Networks (DNN). However, these solutions often necessitate vast amounts of manually labeled or synthetic data. Addressing this challenge, Self-Supervised Learning (SSL) has gained attention by utilizing large datasets of unlabeled data to train DNN, thereby generating meaningful and robust representations. Although SSL was initially overlooked in TR because of its unique characteristics, recent years have witnessed a surge in the development of SSL methods specifically for this field. This rapid development, however, has led to many methods being explored independently, without taking previous efforts in methodology or comparison into account, thereby hindering progress in the field of research. This paper, therefore, seeks to consolidate the use of SSL in the field of TR, offering a critical and comprehensive overview of the current state of the art. We will review and analyze the existing methods, compare their results, and highlight inconsistencies in the current literature. This thorough analysis aims to provide general insights into the field, propose standardizations, identify new research directions, and foster its proper development.<\/jats:p>","DOI":"10.1007\/s11263-025-02487-3","type":"journal-article","created":{"date-parts":[[2025,5,29]],"date-time":"2025-05-29T09:25:41Z","timestamp":1748510741000},"page":"6221-6250","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Self-Supervised Learning for Text Recognition: A Critical Survey"],"prefix":"10.1007","volume":"133","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6460-1831","authenticated-orcid":false,"given":"Carlos","family":"Penarrubia","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8667-4070","authenticated-orcid":false,"given":"Jose J.","family":"Valero-Mas","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3183-2232","authenticated-orcid":false,"given":"Jorge","family":"Calvo-Zaragoza","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,29]]},"reference":[{"key":"2487_CR1","doi-asserted-by":"crossref","unstructured":"Aberdam, A., Litman, R., Tsiper, S., et\u00a0al (2021). Sequence-to-sequence contrastive learning for text recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 15302\u201315312","DOI":"10.1109\/CVPR46437.2021.01505"},{"key":"2487_CR2","unstructured":"Aberdam, A., Ganz, R., Mazor, S., et\u00a0al (2022). Multimodal semi-supervised learning for text recognition. arXiv preprint arXiv:2205.03873"},{"key":"2487_CR3","doi-asserted-by":"crossref","unstructured":"Aggarwal, A., Mittal, M., & Battineni, G. (2021). Generative adversarial network: An overview of theory and applications. International Journal of Information Management Data Insights, 1(1), Article 100004.","DOI":"10.1016\/j.jjimei.2020.100004"},{"key":"2487_CR4","doi-asserted-by":"crossref","unstructured":"Agrawal, P., Carreira, J., & Malik, J. (2015). Learning to see by moving. In: Proceedings of the IEEE international conference on computer vision, pp 37\u201345","DOI":"10.1109\/ICCV.2015.13"},{"key":"2487_CR5","doi-asserted-by":"crossref","unstructured":"Akrim, A., Gogu, C., Vingerhoeds, R., et\u00a0al. (2023). Self-supervised learning for data scarcity in a fatigue damage prognostic problem. Engineering Applications of Artificial Intelligence, 120, Article 105837.","DOI":"10.1016\/j.engappai.2023.105837"},{"issue":"4","key":"2487_CR6","doi-asserted-by":"crossref","first-page":"551","DOI":"10.3390\/e24040551","volume":"24","author":"S Albelwi","year":"2022","unstructured":"Albelwi, S. (2022). Survey on self-supervised learning: auxiliary pretext tasks and contrastive learning methods in imaging. Entropy, 24(4), 551.","journal-title":"Entropy"},{"issue":"5","key":"2487_CR7","doi-asserted-by":"crossref","first-page":"2385","DOI":"10.3390\/s23052385","volume":"23","author":"AM Ali","year":"2023","unstructured":"Ali, A. M., Benjdira, B., Koubaa, A., et al. (2023). Vision transformers in image restoration: A survey. Sensors, 23(5), 2385.","journal-title":"Sensors"},{"issue":"1","key":"2487_CR8","doi-asserted-by":"crossref","first-page":"18","DOI":"10.3390\/jimaging10010018","volume":"10","author":"W AlKendi","year":"2024","unstructured":"AlKendi, W., Gechter, F., Heyberger, L., et al. (2024). Advancements and challenges in handwritten text recognition: A comprehensive survey. Journal of Imaging, 10(1), 18.","journal-title":"Journal of Imaging"},{"key":"2487_CR9","doi-asserted-by":"crossref","unstructured":"Ansari, A., Kaur, B., Rakhra, M., et\u00a0al (2022). Handwritten text recognition using deep learning algorithms. In: 2022 4th International Conference on Artificial Intelligence and Speech Technology (AIST), IEEE, pp 1\u20136","DOI":"10.1109\/AIST55798.2022.10065348"},{"key":"2487_CR10","unstructured":"Anwar, S., Tahir, M., Li, C., et\u00a0al (2020). Image colorization: A survey and dataset. arXiv preprint arXiv:2008.10774"},{"key":"2487_CR11","doi-asserted-by":"crossref","unstructured":"Atienza, R. (2021). Data augmentation for scene text recognition. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 1561\u20131570","DOI":"10.1109\/ICCVW54120.2021.00181"},{"key":"2487_CR12","unstructured":"Ba, J. L., Kiros, J. R., & Hinton, G. E. (2016). Layer normalization. arXiv preprint arXiv:1607.06450"},{"key":"2487_CR13","doi-asserted-by":"crossref","unstructured":"Baek, J., Kim, G., Lee, J., et al (2019). What is wrong with scene text recognition model comparisons? dataset and model analysis. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 4715\u20134723","DOI":"10.1109\/ICCV.2019.00481"},{"key":"2487_CR14","doi-asserted-by":"crossref","unstructured":"Baek, J., Matsui, Y., & Aizawa, K. (2021). What if we only use real datasets for scene text recognition? toward scene text recognition with fewer labels. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3113\u20133122","DOI":"10.1109\/CVPR46437.2021.00313"},{"key":"2487_CR15","first-page":"12449","volume":"33","author":"A Baevski","year":"2020","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., et al. (2020). wav2vec 2.0: A framework for self-supervised learning of speech representations. Advances in neural information processing systems, 33, 12449\u201312460.","journal-title":"Advances in neural information processing systems"},{"key":"2487_CR16","unstructured":"Baevski, A., Babu, A., Hsu, W. N., et\u00a0al (2023). Efficient self-supervised learning with contextualized target representations for vision, speech and language. In: International Conference on Machine Learning, PMLR, pp 1416\u20131429"},{"key":"2487_CR17","unstructured":"Bahdanau, D., Cho, K., & Bengio, Y. (2014). Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473"},{"key":"2487_CR18","unstructured":"Balestriero, R., Ibrahim, M., Sobal, V., et\u00a0al (2023). A cookbook of self-supervised learning. arXiv preprint arXiv:2304.12210"},{"key":"2487_CR19","unstructured":"Bansal, Y., Kaplun, G., & Barak, B. (2020). For self-supervised learning, rationality implies generalization, provably. arXiv preprint arXiv:2010.08508"},{"key":"2487_CR20","unstructured":"Bao, H., Dong, L., Piao, S., et\u00a0al (2021). Beit: Bert pre-training of image transformers. arXiv preprint arXiv:2106.08254"},{"key":"2487_CR21","unstructured":"Bardes, A., Ponce, J., LeCun, Y. (2021). Vicreg: Variance-invariance-covariance regularization for self-supervised learning. arXiv preprint arXiv:2105.04906"},{"key":"2487_CR22","first-page":"8799","volume":"35","author":"A Bardes","year":"2022","unstructured":"Bardes, A., Ponce, J., & LeCun, Y. (2022). Vicregl: Self-supervised learning of local visual features. Advances in Neural Information Processing Systems, 35, 8799\u20138810.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2487_CR23","doi-asserted-by":"crossref","unstructured":"Baykal, G., Ozcelik, F., & Unal, G. (2022). Exploring deshufflegans in self-supervised generative adversarial networks. Pattern Recognition, 122, Article 108244.","DOI":"10.1016\/j.patcog.2021.108244"},{"key":"2487_CR24","first-page":"58324","volume":"36","author":"I Ben-Shaul","year":"2023","unstructured":"Ben-Shaul, I., Shwartz-Ziv, R., Galanti, T., et al. (2023). Reverse engineering self-supervised learning. Advances in Neural Information Processing Systems, 36, 58324\u201358345.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2487_CR25","unstructured":"Bezerra, B. L. D., Zanchettin, C., Toselli, A. H., et\u00a0al. (2017). Handwriting: recognition, development and analysis. Nova Science Publishers, Inc."},{"key":"2487_CR26","doi-asserted-by":"crossref","unstructured":"Biten AF, Tito R, Mafla A, et\u00a0al (2019) Scene text visual question answering. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 4291\u20134301","DOI":"10.1109\/ICCV.2019.00439"},{"key":"2487_CR27","unstructured":"Bordes F, Lavoie S, Balestriero R, et\u00a0al (2023) A surprisingly simple technique to control the pretraining bias for better transfer: Expand or narrow your representation. arXiv preprint arXiv:2304.05369"},{"key":"2487_CR28","doi-asserted-by":"crossref","unstructured":"Bromley J, Guyon I, LeCun Y, et\u00a0al (1993) Signature verification using a\" siamese\" time delay neural network. Advances in neural information processing systems 6","DOI":"10.1142\/9789812797926_0003"},{"key":"2487_CR29","doi-asserted-by":"crossref","unstructured":"Carlucci FM, D\u2019Innocente A, Bucci S, et\u00a0al (2019) Domain generalization by solving jigsaw puzzles. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 2229\u20132238","DOI":"10.1109\/CVPR.2019.00233"},{"key":"2487_CR30","doi-asserted-by":"crossref","unstructured":"Caron M, Bojanowski P, Joulin A, et\u00a0al (2018) Deep clustering for unsupervised learning of visual features. In: Proceedings of the European conference on computer vision (ECCV), pp 132\u2013149","DOI":"10.1007\/978-3-030-01264-9_9"},{"key":"2487_CR31","first-page":"9912","volume":"33","author":"M Caron","year":"2020","unstructured":"Caron, M., Misra, I., Mairal, J., et al. (2020). Unsupervised learning of visual features by contrasting cluster assignments. Advances in neural information processing systems, 33, 9912\u20139924.","journal-title":"Advances in neural information processing systems"},{"key":"2487_CR32","doi-asserted-by":"crossref","unstructured":"Caron M, Touvron H, Misra I, et\u00a0al (2021) Emerging properties in self-supervised vision transformers. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 9650\u20139660","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"2487_CR33","doi-asserted-by":"crossref","unstructured":"Cascianelli S, Cornia M, Baraldi L, et\u00a0al (2021) Learning to read l\u2019infinito: handwritten text recognition with synthetic training data. In: Computer Analysis of Images and Patterns: 19th International Conference, CAIP 2021, Virtual Event, September 28\u201330, 2021, Proceedings, Part II 19, Springer, pp 340\u2013350","DOI":"10.1007\/978-3-030-89131-2_31"},{"key":"2487_CR34","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1109\/LSP.2020.3044547","volume":"28","author":"N Chen","year":"2020","unstructured":"Chen, N., Watanabe, S., Villalba, J., et al. (2020). Non-autoregressive transformer for speech recognition. IEEE Signal Processing Letters, 28, 121\u2013125.","journal-title":"IEEE Signal Processing Letters"},{"issue":"8","key":"2487_CR35","doi-asserted-by":"crossref","first-page":"1777","DOI":"10.3390\/math11081777","volume":"11","author":"S Chen","year":"2023","unstructured":"Chen, S., & Guo, W. (2023). Auto-encoders in deep learning-a review with new perspectives. Mathematics, 11(8), 1777.","journal-title":"Mathematics"},{"key":"2487_CR36","doi-asserted-by":"crossref","unstructured":"Chen T, Zhai X, Ritter M, et\u00a0al (2019) Self-supervised gans via auxiliary rotation loss. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12154\u201312163","DOI":"10.1109\/CVPR.2019.01243"},{"key":"2487_CR37","unstructured":"Chen T, Kornblith S, Norouzi M, et\u00a0al (2020b) A simple framework for contrastive learning of visual representations. In: International conference on machine learning, PMLR, pp 1597\u20131607"},{"key":"2487_CR38","doi-asserted-by":"crossref","unstructured":"Chen X, He K (2021) Exploring simple siamese representation learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 15750\u201315758","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"2487_CR39","unstructured":"Chen X, Fan H, Girshick R, et\u00a0al (2020c) Improved baselines with momentum contrastive learning. arXiv preprint arXiv:2003.04297"},{"issue":"2","key":"2487_CR40","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3440756","volume":"54","author":"X Chen","year":"2021","unstructured":"Chen, X., Jin, L., Zhu, Y., et al. (2021). Text recognition in the wild: A survey. ACM Computing Surveys (CSUR), 54(2), 1\u201335.","journal-title":"ACM Computing Surveys (CSUR)"},{"key":"2487_CR41","doi-asserted-by":"crossref","unstructured":"Chen X, Xie S, He K (2021b) An empirical study of training self-supervised vision transformers. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 9640\u20139649","DOI":"10.1109\/ICCV48922.2021.00950"},{"issue":"1","key":"2487_CR42","doi-asserted-by":"crossref","first-page":"208","DOI":"10.1007\/s11263-023-01852-4","volume":"132","author":"X Chen","year":"2024","unstructured":"Chen, X., Ding, M., Wang, X., et al. (2024). Context autoencoder for self-supervised representation learning. International Journal of Computer Vision, 132(1), 208\u2013223.","journal-title":"International Journal of Computer Vision"},{"key":"2487_CR43","unstructured":"Chen Y, Bardes A, Li Z, et\u00a0al (2022) Bag of image patch embedding behind the success of self-supervised learning. arXiv preprint arXiv:2206.08954"},{"key":"2487_CR44","doi-asserted-by":"crossref","unstructured":"Cheng Z, Yang Q, Sheng B (2015) Deep colorization. In: Proceedings of the IEEE international conference on computer vision, pp 415\u2013423","DOI":"10.1109\/ICCV.2015.55"},{"key":"2487_CR45","doi-asserted-by":"crossref","unstructured":"Cheng Z, Bai F, Xu Y, et\u00a0al (2017) Focusing attention: Towards accurate text recognition in natural images. In: Proceedings of the IEEE international conference on computer vision, pp 5076\u20135084","DOI":"10.1109\/ICCV.2017.543"},{"key":"2487_CR46","doi-asserted-by":"crossref","unstructured":"Chng CK, Liu Y, Sun Y, et\u00a0al (2019) Icdar2019 robust reading challenge on arbitrary-shaped text-rrc-art. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), IEEE, pp 1571\u20131576","DOI":"10.1109\/ICDAR.2019.00252"},{"key":"2487_CR47","unstructured":"Chowdhury A, Vig L (2018) An efficient end-to-end neural model for handwritten text recognition. arXiv preprint arXiv:1807.07965"},{"key":"2487_CR48","unstructured":"Ciga O, Xu T, Martel AL (2021) Resource and data efficient self supervised learning. arXiv preprint arXiv:2109.01721"},{"issue":"1","key":"2487_CR49","doi-asserted-by":"crossref","first-page":"508","DOI":"10.1109\/TPAMI.2022.3144899","volume":"45","author":"D Coquenet","year":"2022","unstructured":"Coquenet, D., Chatelain, C., & Paquet, T. (2022). End-to-end handwritten paragraph text recognition using a vertical attention network. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(1), 508\u2013524.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"7","key":"2487_CR50","doi-asserted-by":"crossref","first-page":"8227","DOI":"10.1109\/TPAMI.2023.3235826","volume":"45","author":"D Coquenet","year":"2023","unstructured":"Coquenet, D., Chatelain, C., & Paquet, T. (2023). Dan: a segmentation-free document attention network for handwritten document recognition. IEEE transactions on pattern analysis and machine intelligence, 45(7), 8227\u20138243.","journal-title":"IEEE transactions on pattern analysis and machine intelligence"},{"issue":"1","key":"2487_CR51","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1109\/MSP.2017.2765202","volume":"35","author":"A Creswell","year":"2018","unstructured":"Creswell, A., White, T., Dumoulin, V., et al. (2018). Generative adversarial networks: An overview. IEEE signal processing magazine, 35(1), 53\u201365.","journal-title":"IEEE signal processing magazine"},{"key":"2487_CR52","unstructured":"Devlin J, Chang MW, Lee K, et\u00a0al (2018) Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805"},{"key":"2487_CR53","doi-asserted-by":"crossref","first-page":"28","DOI":"10.1016\/j.patrec.2023.03.020","volume":"169","author":"M Dhiaf","year":"2023","unstructured":"Dhiaf, M., Rouhou, A. C., Kessentini, Y., et al. (2023). Msdoctr-lite: A lite transformer for full page multi-script handwriting recognition. Pattern Recognition Letters, 169, 28\u201334.","journal-title":"Pattern Recognition Letters"},{"key":"2487_CR54","unstructured":"Diaz DH, Qin S, Ingle R, et\u00a0al (2021) Rethinking text line recognition models. arXiv preprint arXiv:2104.07787"},{"key":"2487_CR55","doi-asserted-by":"crossref","unstructured":"Doersch C, Gupta A, Efros AA (2015) Unsupervised visual representation learning by context prediction. In: Proceedings of the IEEE international conference on computer vision, pp 1422\u20131430","DOI":"10.1109\/ICCV.2015.167"},{"key":"2487_CR56","doi-asserted-by":"crossref","unstructured":"Dong, S., Wang, P., & Abbas, K. (2021). A survey on deep learning and its applications. Computer Science Review, 40, Article 100379.","DOI":"10.1016\/j.cosrev.2021.100379"},{"key":"2487_CR57","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, et\u00a0al (2020) An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"key":"2487_CR58","doi-asserted-by":"crossref","first-page":"2007","DOI":"10.1007\/s11063-019-10163-0","volume":"51","author":"O Elharrouss","year":"2020","unstructured":"Elharrouss, O., Almaadeed, N., Al-Maadeed, S., et al. (2020). Image inpainting: A review. Neural Processing Letters, 51, 2007\u20132028.","journal-title":"Neural Processing Letters"},{"key":"2487_CR59","doi-asserted-by":"crossref","unstructured":"Feng Z, Xu C, Tao D (2019) Self-supervised representation learning by rotation feature decoupling. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 10364\u201310374","DOI":"10.1109\/CVPR.2019.01061"},{"key":"2487_CR60","doi-asserted-by":"crossref","unstructured":"Fujitake M (2024) Dtrocr: Decoder-only transformer for optical character recognition. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 8025\u20138035","DOI":"10.1109\/WACV57701.2024.00784"},{"key":"2487_CR61","doi-asserted-by":"crossref","first-page":"3005","DOI":"10.1109\/TIP.2021.3051485","volume":"30","author":"Y Gao","year":"2021","unstructured":"Gao, Y., Chen, Y., Wang, J., et al. (2021). Semi-supervised scene text recognition. IEEE Transactions on Image Processing, 30, 3005\u20133016.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2487_CR62","doi-asserted-by":"crossref","unstructured":"Gao Z, Wang Y, Qu Y, et\u00a0al (2024) Self-supervised pre-training with symmetric superimposition modeling for scene text recognition. arXiv preprint arXiv:2405.05841","DOI":"10.24963\/ijcai.2024\/85"},{"key":"2487_CR63","doi-asserted-by":"crossref","unstructured":"Garcia-Bordils S, Mafla A, Biten AF, et\u00a0al (2022) Out-of-vocabulary challenge report. In: European Conference on Computer Vision, Springer, pp 359\u2013375","DOI":"10.1007\/978-3-031-25069-9_24"},{"key":"2487_CR64","unstructured":"Garrido Q, Chen Y, Bardes A, et\u00a0al (2022) On the duality between contrastive and non-contrastive self-supervised learning. arXiv preprint arXiv:2206.02574"},{"key":"2487_CR65","doi-asserted-by":"crossref","unstructured":"Ghosh, T., Sen, S., Obaidullah, S. M., et\u00a0al. (2022). Advances in online handwritten recognition in the last decades. Computer Science Review, 46, Article 100515.","DOI":"10.1016\/j.cosrev.2022.100515"},{"key":"2487_CR66","unstructured":"Gidaris S, Singh P, Komodakis N (2018) Unsupervised representation learning by predicting image rotations. arXiv preprint arXiv:1803.07728"},{"key":"2487_CR67","doi-asserted-by":"crossref","unstructured":"G\u00f3mez L, Mafla A, Rusinol M, et\u00a0al (2018) Single shot scene text retrieval. In: Proceedings of the European conference on computer vision (ECCV), pp 700\u2013715","DOI":"10.1007\/978-3-030-01264-9_43"},{"key":"2487_CR68","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, et\u00a0al (2014) Generative adversarial nets. Advances in neural information processing systems 27"},{"key":"2487_CR69","doi-asserted-by":"crossref","unstructured":"Graves A, Fern\u00e1ndez S, Gomez F, et\u00a0al (2006) Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd international conference on Machine learning, pp 369\u2013376","DOI":"10.1145\/1143844.1143891"},{"key":"2487_CR70","first-page":"21271","volume":"33","author":"JB Grill","year":"2020","unstructured":"Grill, J. B., Strub, F., Altch\u00e9, F., et al. (2020). Bootstrap your own latent-a new approach to self-supervised learning. Advances in neural information processing systems, 33, 21271\u201321284.","journal-title":"Advances in neural information processing systems"},{"key":"2487_CR71","doi-asserted-by":"crossref","unstructured":"Grosicki E, El\u00a0Abed H (2009) Icdar 2009 handwriting recognition competition. In: 2009 10th International Conference on Document Analysis and Recognition, IEEE, pp 1398\u20131402","DOI":"10.1109\/ICDAR.2009.184"},{"key":"2487_CR72","doi-asserted-by":"crossref","unstructured":"Guan T, Shen W, Yang X, et\u00a0al (2023) Self-supervised character-to-character distillation for text recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 19473\u201319484","DOI":"10.1109\/ICCV51070.2023.01784"},{"issue":"4","key":"2487_CR73","doi-asserted-by":"crossref","first-page":"3313","DOI":"10.1109\/TKDE.2021.3130191","volume":"35","author":"J Gui","year":"2021","unstructured":"Gui, J., Sun, Z., Wen, Y., et al. (2021). A review on generative adversarial networks: Algorithms, theory, and applications. IEEE transactions on knowledge and data engineering, 35(4), 3313\u20133332.","journal-title":"IEEE transactions on knowledge and data engineering"},{"key":"2487_CR74","doi-asserted-by":"crossref","unstructured":"Gupta A, Vedaldi A, Zisserman A (2016) Synthetic data for text localisation in natural images. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2315\u20132324","DOI":"10.1109\/CVPR.2016.254"},{"key":"2487_CR75","unstructured":"Gupta K, Ajanthan T, Hengel Avd, et\u00a0al (2022) Understanding and improving the role of projection head in self-supervised learning. arXiv preprint arXiv:2212.11491"},{"issue":"4","key":"2487_CR76","doi-asserted-by":"crossref","first-page":"3457","DOI":"10.1007\/s10462-021-10091-3","volume":"55","author":"N Gupta","year":"2022","unstructured":"Gupta, N., & Jalal, A. S. (2022). Traditional to transfer learning progression on scene text detection and recognition: a survey. Artificial Intelligence Review, 55(4), 3457\u20133502.","journal-title":"Artificial Intelligence Review"},{"key":"2487_CR77","unstructured":"Gutmann M, Hyv\u00e4rinen A (2010) Noise-contrastive estimation: A new estimation principle for unnormalized statistical models. In: Proceedings of the thirteenth international conference on artificial intelligence and statistics, JMLR Workshop and Conference Proceedings, pp 297\u2013304"},{"key":"2487_CR78","doi-asserted-by":"crossref","unstructured":"Hadsell R, Chopra S, LeCun Y (2006) Dimensionality reduction by learning an invariant mapping. In: 2006 IEEE computer society conference on computer vision and pattern recognition (CVPR\u201906), IEEE, pp 1735\u20131742","DOI":"10.1109\/CVPR.2006.100"},{"key":"2487_CR79","doi-asserted-by":"crossref","unstructured":"Harshvardhan, G., Gourisaria, M. K., Pandey, M., et\u00a0al. (2020). A comprehensive survey and analysis of generative models in machine learning. Computer Science Review, 38, Article 100285.","DOI":"10.1016\/j.cosrev.2020.100285"},{"key":"2487_CR80","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, et\u00a0al (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"2487_CR81","doi-asserted-by":"crossref","unstructured":"He K, Fan H, Wu Y, et\u00a0al (2020) Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 9729\u20139738","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"2487_CR82","doi-asserted-by":"crossref","unstructured":"He K, Chen X, Xie S, et\u00a0al (2022) Masked autoencoders are scalable vision learners. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 16000\u201316009","DOI":"10.1109\/CVPR52688.2022.01553"},{"issue":"2","key":"2487_CR83","doi-asserted-by":"crossref","first-page":"311","DOI":"10.1093\/logcom\/exs049","volume":"24","author":"C De la Higuera","year":"2014","unstructured":"De la Higuera, C., & Oncina, J. (2014). The most probable string: an algorithmic study. Journal of Logic and Computation, 24(2), 311\u2013330.","journal-title":"Journal of Logic and Computation"},{"issue":"8","key":"2487_CR84","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., & Schmidhuber, J. (1997). Long short-term memory. Neural computation, 9(8), 1735\u20131780.","journal-title":"Neural computation"},{"key":"2487_CR85","unstructured":"Huang W, Yi M, Zhao X, et\u00a0al (2021) Towards the generalization of contrastive self-supervised learning. arXiv preprint arXiv:2111.00743"},{"key":"2487_CR86","doi-asserted-by":"crossref","unstructured":"Hubenthal M, Kumar S (2023) Image-text pre-training for logo recognition. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp 1145\u20131154","DOI":"10.1109\/WACV56688.2023.00120"},{"key":"2487_CR87","unstructured":"Iwana BK, Rizvi STR, Ahmed S, et\u00a0al (2016) Judging a book by its cover. arXiv preprint arXiv:1610.09204"},{"key":"2487_CR88","unstructured":"Jaderberg M, Simonyan K, Vedaldi A, et\u00a0al (2014) Synthetic data and artificial neural networks for natural scene text recognition. arXiv preprint arXiv:1406.2227"},{"issue":"1","key":"2487_CR89","doi-asserted-by":"crossref","first-page":"2","DOI":"10.3390\/technologies9010002","volume":"9","author":"A Jaiswal","year":"2020","unstructured":"Jaiswal, A., Babu, A. R., Zadeh, M. Z., et al. (2020). A survey on contrastive self-supervised learning. Technologies, 9(1), 2.","journal-title":"Technologies"},{"key":"2487_CR90","doi-asserted-by":"crossref","unstructured":"Jiang Q, Wang J, Peng D, et\u00a0al (2023a) Revisiting scene text recognition: A data perspective. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 20543\u201320554","DOI":"10.1109\/ICCV51070.2023.01878"},{"key":"2487_CR91","doi-asserted-by":"crossref","unstructured":"Jiang X, Zhang J, Du J, et\u00a0al (2022) Scene text recognition with self-supervised contrastive predictive coding. In: 2022 26th International Conference on Pattern Recognition (ICPR), IEEE, pp 1514\u20131521","DOI":"10.1109\/ICPR56361.2022.9956631"},{"key":"2487_CR92","doi-asserted-by":"crossref","unstructured":"Jiang X, Du J, Hu P, et\u00a0al (2023b) Group, contrast and recognize: A self-supervised method for chinese character recognition. In: International Conference on Document Analysis and Recognition, Springer, pp 411\u2013427","DOI":"10.1007\/978-3-031-41685-9_26"},{"issue":"11","key":"2487_CR93","doi-asserted-by":"crossref","first-page":"4037","DOI":"10.1109\/TPAMI.2020.2992393","volume":"43","author":"L Jing","year":"2020","unstructured":"Jing, L., & Tian, Y. (2020). Self-supervised visual feature learning with deep neural networks: A survey. IEEE transactions on pattern analysis and machine intelligence, 43(11), 4037\u20134058.","journal-title":"IEEE transactions on pattern analysis and machine intelligence"},{"key":"2487_CR94","doi-asserted-by":"crossref","unstructured":"Kang L, Riba P, Rusinol M, et\u00a0al (2020a) Distilling content from style for handwritten word recognition. In: 2020 17th International Conference on Frontiers in Handwriting Recognition (ICFHR), IEEE, pp 139\u2013144","DOI":"10.1109\/ICFHR2020.2020.00035"},{"key":"2487_CR95","doi-asserted-by":"crossref","unstructured":"Kang L, Rusinol M, Forn\u00e9s A, et\u00a0al (2020b) Unsupervised writer adaptation for synthetic-to-real handwritten word recognition. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp 3502\u20133511","DOI":"10.1109\/WACV45572.2020.9093392"},{"key":"2487_CR96","doi-asserted-by":"crossref","unstructured":"Kang, L., Riba, P., Rusi\u00f1ol, M., et\u00a0al. (2022). Pay attention to what you read: non-recurrent handwritten text-line recognition. Pattern Recognition, 129, Article 108766.","DOI":"10.1016\/j.patcog.2022.108766"},{"key":"2487_CR97","doi-asserted-by":"crossref","unstructured":"Karatzas D, Shafait F, Uchida S, et\u00a0al (2013) Icdar 2013 robust reading competition. In: 2013 12th international conference on document analysis and recognition, IEEE, pp 1484\u20131493","DOI":"10.1109\/ICDAR.2013.221"},{"key":"2487_CR98","doi-asserted-by":"crossref","unstructured":"Karatzas D, Gomez-Bigorda L, Nicolaou A, et\u00a0al (2015) Icdar 2015 competition on robust reading. In: 2015 13th international conference on document analysis and recognition (ICDAR), IEEE, pp 1156\u20131160","DOI":"10.1109\/ICDAR.2015.7333942"},{"key":"2487_CR99","doi-asserted-by":"crossref","first-page":"3239","DOI":"10.1007\/s10462-020-09930-6","volume":"54","author":"T Khan","year":"2021","unstructured":"Khan, T., Sarkar, R., & Mollah, A. F. (2021). Deep learning approaches to scene text detection: a comprehensive review. Artificial Intelligence Review, 54, 3239\u20133298.","journal-title":"Artificial Intelligence Review"},{"key":"2487_CR100","doi-asserted-by":"crossref","unstructured":"Kim D, Yoo Y, Park S, et\u00a0al (2021) Selfreg: Self-supervised contrastive regularization for domain generalization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 9619\u20139628","DOI":"10.1109\/ICCV48922.2021.00948"},{"key":"2487_CR101","doi-asserted-by":"crossref","unstructured":"Kim G, Hong T, Yim M, et\u00a0al (2022) Ocr-free document understanding transformer. In: European Conference on Computer Vision, Springer, pp 498\u2013517","DOI":"10.1007\/978-3-031-19815-1_29"},{"key":"2487_CR102","doi-asserted-by":"crossref","unstructured":"Kleber F, Fiel S, Diem M, et\u00a0al (2013) Cvl-database: An off-line database for writer retrieval, writer identification and word spotting. In: 2013 12th international conference on document analysis and recognition, IEEE, pp 560\u2013564","DOI":"10.1109\/ICDAR.2013.117"},{"key":"2487_CR103","doi-asserted-by":"crossref","unstructured":"Kraft M, Pieczy\u0144ski D, Siemionow KK (2021) Overcoming data scarcity for coronary vessel segmentation through self-supervised pre-training. In: Neural Information Processing: 28th International Conference, ICONIP 2021, Sanur, Bali, Indonesia, December 8\u201312, 2021, Proceedings, Part III 28, Springer, pp 369\u2013378","DOI":"10.1007\/978-3-030-92238-2_31"},{"key":"2487_CR104","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25"},{"key":"2487_CR105","doi-asserted-by":"crossref","unstructured":"Larsson G, Maire M, Shakhnarovich G (2016) Learning representations for automatic colorization. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part IV 14, Springer, pp 577\u2013593","DOI":"10.1007\/978-3-319-46493-0_35"},{"key":"2487_CR106","doi-asserted-by":"crossref","unstructured":"Larsson G, Maire M, Shakhnarovich G (2017) Colorization as a proxy task for visual understanding. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6874\u20136883","DOI":"10.1109\/CVPR.2017.96"},{"issue":"4","key":"2487_CR107","doi-asserted-by":"crossref","first-page":"541","DOI":"10.1162\/neco.1989.1.4.541","volume":"1","author":"Y LeCun","year":"1989","unstructured":"LeCun, Y., Boser, B., Denker, J. S., et al. (1989). Backpropagation applied to handwritten zip code recognition. Neural computation, 1(4), 541\u2013551.","journal-title":"Neural computation"},{"key":"2487_CR108","doi-asserted-by":"crossref","unstructured":"Lee J, Park S, Baek J, et\u00a0al (2020) On recognizing texts of arbitrary shapes with 2d self-attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, pp 546\u2013547","DOI":"10.1109\/CVPRW50498.2020.00281"},{"key":"2487_CR109","unstructured":"Li C, Yang J, Zhang P, et\u00a0al (2021a) Efficient self-supervised vision transformers for representation learning. arXiv preprint arXiv:2106.09785"},{"key":"2487_CR110","doi-asserted-by":"crossref","unstructured":"Li C, Feh\u00e9rv\u00e1ri I, Zhao X, et\u00a0al (2022) Seetek: Very large-scale open-set logo recognition with text-aware metric learning. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp 2544\u20132553","DOI":"10.1109\/WACV51458.2022.00066"},{"key":"2487_CR111","doi-asserted-by":"crossref","unstructured":"Li H, Xue FF, Chaitanya K, et\u00a0al (2021b) Imbalance-aware self-supervised learning for 3d radiomic representations. In: Medical Image Computing and Computer Assisted Intervention\u2013MICCAI 2021: 24th International Conference, Strasbourg, France, September 27\u2013October 1, 2021, Proceedings, Part II 24, Springer, pp 36\u201346","DOI":"10.1007\/978-3-030-87196-3_4"},{"key":"2487_CR112","doi-asserted-by":"crossref","unstructured":"Li M, Lv T, Chen J, et\u00a0al (2023) Trocr: Transformer-based optical character recognition with pre-trained models. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp 13094\u201313102","DOI":"10.1609\/aaai.v37i11.26538"},{"issue":"11","key":"2487_CR113","first-page":"8602","volume":"44","author":"J Liang","year":"2021","unstructured":"Liang, J., Hu, D., Wang, Y., et al. (2021). Source data-absent unsupervised domain adaptation through hypothesis transfer and labeling transfer. IEEE Transactions on Pattern Analysis and Machine Intelligence, 44(11), 8602\u20138617.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2487_CR114","unstructured":"Liu H, HaoChen JZ, Gaidon A, et\u00a0al (2021a) Self-supervised learning is more robust to dataset imbalance. arXiv preprint arXiv:2110.05025"},{"key":"2487_CR115","doi-asserted-by":"crossref","unstructured":"Liu H, Wang B, Bao Z, et\u00a0al (2022) Perceiving stroke-semantic context: Hierarchical contrastive learning for robust scene text recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp 1702\u20131710","DOI":"10.1609\/aaai.v36i2.20062"},{"key":"2487_CR116","doi-asserted-by":"crossref","first-page":"343","DOI":"10.1016\/j.neucom.2020.05.059","volume":"407","author":"J Liu","year":"2020","unstructured":"Liu, J., Zhong, Q., Yuan, Y., et al. (2020). Semitext: Scene text detection with semi-supervised learning. Neurocomputing, 407, 343\u2013353.","journal-title":"Neurocomputing"},{"issue":"1","key":"2487_CR117","first-page":"857","volume":"35","author":"X Liu","year":"2021","unstructured":"Liu, X., Zhang, F., Hou, Z., et al. (2021). Self-supervised learning: Generative or contrastive. IEEE transactions on knowledge and data engineering, 35(1), 857\u2013876.","journal-title":"IEEE transactions on knowledge and data engineering"},{"key":"2487_CR118","unstructured":"Liu Y, Ott M, Goyal N, et\u00a0al (2019) Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692"},{"issue":"1","key":"2487_CR119","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1007\/s11263-020-01369-0","volume":"129","author":"S Long","year":"2021","unstructured":"Long, S., He, X., & Yao, C. (2021). Scene text detection and recognition: The deep learning era. International Journal of Computer Vision, 129(1), 161\u2013184.","journal-title":"International Journal of Computer Vision"},{"key":"2487_CR120","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1007\/s10032-004-0134-3","volume":"7","author":"SM Lucas","year":"2005","unstructured":"Lucas, S. M., Panaretos, A., Sosa, L., et al. (2005). Icdar 2003 robust reading competitions: entries, results, and future directions. International Journal of Document Analysis and Recognition (IJDAR), 7, 105\u2013122.","journal-title":"International Journal of Document Analysis and Recognition (IJDAR)"},{"key":"2487_CR121","doi-asserted-by":"crossref","unstructured":"Luo C, Zhu Y, Jin L, et\u00a0al (2020) Learn to augment: Joint data augmentation and network optimization for text recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 13746\u201313755","DOI":"10.1109\/CVPR42600.2020.01376"},{"key":"2487_CR122","doi-asserted-by":"crossref","unstructured":"Luo C, Jin L, Chen J (2022) Siman: Exploring self-supervised representation learning of scene text via similarity-aware normalization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 1039\u20131048","DOI":"10.1109\/CVPR52688.2022.00111"},{"key":"2487_CR123","unstructured":"Lyu P, Zhang C, Liu S, et\u00a0al (2023) Maskocr: Text recognition with masked encoder-decoder pretraining. arXiv preprint arXiv:2206.00311"},{"key":"2487_CR124","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1007\/s100320200071","volume":"5","author":"UV Marti","year":"2002","unstructured":"Marti, U. V., & Bunke, H. (2002). The iam-database: an english sentence database for offline handwriting recognition. International journal on document analysis and recognition, 5, 39\u201346.","journal-title":"International journal on document analysis and recognition"},{"issue":"7","key":"2487_CR125","first-page":"3523","volume":"44","author":"S Minaee","year":"2021","unstructured":"Minaee, S., Boykov, Y., Porikli, F., et al. (2021). Image segmentation using deep learning: A survey. IEEE transactions on pattern analysis and machine intelligence, 44(7), 3523\u20133542.","journal-title":"IEEE transactions on pattern analysis and machine intelligence"},{"key":"2487_CR126","doi-asserted-by":"crossref","unstructured":"Mishra A, Alahari K, Jawahar C (2012) Scene text recognition using higher order language priors. In: BMVC-British machine vision conference, BMVA","DOI":"10.5244\/C.26.127"},{"key":"2487_CR127","doi-asserted-by":"crossref","unstructured":"Misra I, Maaten Lvd (2020) Self-supervised learning of pretext-invariant representations. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6707\u20136717","DOI":"10.1109\/CVPR42600.2020.00674"},{"issue":"14","key":"2487_CR128","doi-asserted-by":"crossref","first-page":"20255","DOI":"10.1007\/s11042-022-12693-7","volume":"81","author":"F Naiemi","year":"2022","unstructured":"Naiemi, F., Ghods, V., & Khalesi, H. (2022). Scene text detection and recognition: a survey. Multimedia Tools and Applications, 81(14), 20255\u201320290.","journal-title":"Multimedia Tools and Applications"},{"issue":"8","key":"2487_CR129","doi-asserted-by":"crossref","first-page":"5517","DOI":"10.1007\/s10462-020-09827-4","volume":"53","author":"SR Narang","year":"2020","unstructured":"Narang, S. R., Jindal, M. K., & Kumar, M. (2020). Ancient text recognition: a review. Artificial Intelligence Review, 53(8), 5517\u20135558.","journal-title":"Artificial Intelligence Review"},{"key":"2487_CR130","doi-asserted-by":"crossref","unstructured":"Nayef N, Patel Y, Busta M, et\u00a0al (2019) Icdar2019 robust reading challenge on multi-lingual scene text detection and recognition-rrc-mlt-2019. In: 2019 International conference on document analysis and recognition (ICDAR), IEEE, pp 1582\u20131587","DOI":"10.1109\/ICDAR.2019.00254"},{"key":"2487_CR131","doi-asserted-by":"crossref","unstructured":"Nikitha A, Geetha J, JayaLakshmi D (2020) Handwritten text recognition using deep learning. In: 2020 International Conference on Recent Trends on Electronics, Information, Communication & Technology (RTEICT), IEEE, pp 388\u2013392","DOI":"10.1109\/RTEICT49044.2020.9315679"},{"issue":"4","key":"2487_CR132","doi-asserted-by":"crossref","first-page":"305","DOI":"10.1007\/s10032-022-00405-8","volume":"25","author":"K Nikolaidou","year":"2022","unstructured":"Nikolaidou, K., Seuret, M., Mokayed, H., et al. (2022). A survey of historical document image datasets. International Journal on Document Analysis and Recognition (IJDAR), 25(4), 305\u2013338.","journal-title":"International Journal on Document Analysis and Recognition (IJDAR)"},{"key":"2487_CR133","doi-asserted-by":"crossref","unstructured":"Noroozi M, Favaro P (2016) Unsupervised learning of visual representations by solving jigsaw puzzles. In: European conference on computer vision, Springer, pp 69\u201384","DOI":"10.1007\/978-3-319-46466-4_5"},{"key":"2487_CR134","doi-asserted-by":"crossref","unstructured":"Novotny D, Albanie S, Larlus D, et\u00a0al (2018) Self-supervised learning of geometrically stable features through probabilistic introspection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 3637\u20133645","DOI":"10.1109\/CVPR.2018.00383"},{"key":"2487_CR135","unstructured":"Oord Avd, Li Y, Vinyals O (2018) Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748"},{"key":"2487_CR136","unstructured":"Ozbulak U, Lee HJ, Boga B, et\u00a0al (2023) Know your self-supervised learning: A survey on image-based generative and discriminative training. arXiv preprint arXiv:2305.13689"},{"key":"2487_CR137","unstructured":"Pajot A, de\u00a0Bezenac E, Gallinari P (2019) Unsupervised adversarial image inpainting. arXiv preprint arXiv:1912.12164"},{"key":"2487_CR138","doi-asserted-by":"crossref","unstructured":"Pang K, Yang Y, Hospedales TM, et\u00a0al (2020) Solving mixed-modal jigsaw puzzle for fine-grained sketch-based image retrieval. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10347\u201310355","DOI":"10.1109\/CVPR42600.2020.01036"},{"key":"2487_CR139","doi-asserted-by":"crossref","unstructured":"Pathak D, Krahenbuhl P, Donahue J, et\u00a0al (2016) Context encoders: Feature learning by inpainting. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2536\u20132544","DOI":"10.1109\/CVPR.2016.278"},{"key":"2487_CR140","doi-asserted-by":"crossref","unstructured":"Penarrubia C, Garrido-Munoz C, Valero-Mas JJ, et\u00a0al (2024) Spatial context-based self-supervised learning for handwritten text recognition. arXiv preprint arXiv:2404.11585","DOI":"10.1016\/j.patrec.2025.05.014"},{"key":"2487_CR141","doi-asserted-by":"crossref","unstructured":"Phan TQ, Shivakumara P, Tian S, et\u00a0al (2013) Recognizing text with perspective distortion in natural scenes. In: Proceedings of the IEEE international conference on computer vision, pp 569\u2013576","DOI":"10.1109\/ICCV.2013.76"},{"key":"2487_CR142","doi-asserted-by":"crossref","first-page":"44","DOI":"10.1016\/j.patrec.2023.06.003","volume":"172","author":"V Pippi","year":"2023","unstructured":"Pippi, V., Cascianelli, S., Baraldi, L., et al. (2023). Evaluating synthetic pre-training for handwriting processing tasks. Pattern Recognition Letters, 172, 44\u201350.","journal-title":"Pattern Recognition Letters"},{"key":"2487_CR143","doi-asserted-by":"crossref","unstructured":"Puigcerver J (2017) Are multidimensional recurrent layers really necessary for handwritten text recognition? In: 2017 14th IAPR international conference on document analysis and recognition (ICDAR), IEEE, pp 67\u201372","DOI":"10.1109\/ICDAR.2017.20"},{"key":"2487_CR144","doi-asserted-by":"crossref","unstructured":"Qian Q, Xu Y, Hu J, et\u00a0al (2022) Unsupervised visual representation learning by online constrained k-means. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 16640\u201316649","DOI":"10.1109\/CVPR52688.2022.01614"},{"key":"2487_CR145","doi-asserted-by":"crossref","unstructured":"Qiao Z, Ji Z, Yuan Y, et\u00a0al (2023) Decoupling visual-semantic features learning with dual masked autoencoder for self-supervised scene text recognition. In: International Conference on Document Analysis and Recognition, Springer, pp 261\u2013279","DOI":"10.1007\/978-3-031-41679-8_15"},{"issue":"10","key":"2487_CR146","doi-asserted-by":"crossref","first-page":"1872","DOI":"10.1007\/s11431-020-1647-3","volume":"63","author":"X Qiu","year":"2020","unstructured":"Qiu, X., Sun, T., Xu, Y., et al. (2020). Pre-trained models for natural language processing: A survey. Science China Technological Sciences, 63(10), 1872\u20131897.","journal-title":"Science China Technological Sciences"},{"key":"2487_CR147","unstructured":"Radford A, Metz L, Chintala S (2015) Unsupervised representation learning with deep convolutional generative adversarial networks. arXiv preprint arXiv:1511.06434"},{"key":"2487_CR148","unstructured":"Ramesh A, Pavlov M, Goh G, et\u00a0al (2021) Zero-shot text-to-image generation. In: International conference on machine learning, Pmlr, pp 8821\u20138831"},{"issue":"18","key":"2487_CR149","doi-asserted-by":"crossref","first-page":"8027","DOI":"10.1016\/j.eswa.2014.07.008","volume":"41","author":"A Risnumawan","year":"2014","unstructured":"Risnumawan, A., Shivakumara, P., Chan, C. S., et al. (2014). A robust arbitrary text detection system for natural scene images. Expert Systems with Applications, 41(18), 8027\u20138048.","journal-title":"Expert Systems with Applications"},{"key":"2487_CR150","unstructured":"Rolfe JT (2016) Discrete variational autoencoders. arXiv preprint arXiv:1609.02200"},{"key":"2487_CR151","doi-asserted-by":"crossref","unstructured":"Sharma R, Kaushik B, Gondhi N (2020) Character recognition using machine learning and deep learning-a survey. In: 2020 International Conference on Emerging Smart Computing and Informatics (ESCI), IEEE, pp 341\u2013345","DOI":"10.1109\/ESCI48226.2020.9167649"},{"key":"2487_CR152","doi-asserted-by":"crossref","unstructured":"Sherstinsky, A. (2020). Fundamentals of recurrent neural network (rnn) and long short-term memory (lstm) network. Physica D: Nonlinear Phenomena, 404, Article 132306.","DOI":"10.1016\/j.physd.2019.132306"},{"issue":"11","key":"2487_CR153","doi-asserted-by":"crossref","first-page":"2298","DOI":"10.1109\/TPAMI.2016.2646371","volume":"39","author":"B Shi","year":"2016","unstructured":"Shi, B., Bai, X., & Yao, C. (2016). An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. IEEE transactions on pattern analysis and machine intelligence, 39(11), 2298\u20132304.","journal-title":"IEEE transactions on pattern analysis and machine intelligence"},{"key":"2487_CR154","doi-asserted-by":"crossref","unstructured":"Shi B, Wang X, Lyu P, et\u00a0al (2016b) Robust scene text recognition with automatic rectification. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4168\u20134176","DOI":"10.1109\/CVPR.2016.452"},{"key":"2487_CR155","doi-asserted-by":"crossref","unstructured":"Shi B, Yao C, Liao M, et\u00a0al (2017) Icdar2017 competition on reading chinese text in the wild (rctw-17). In: 2017 14th iapr international conference on document analysis and recognition (ICDAR), IEEE, pp 1429\u20131434","DOI":"10.1109\/ICDAR.2017.233"},{"key":"2487_CR156","doi-asserted-by":"crossref","unstructured":"Singh A, Natarajan V, Shah M, et\u00a0al (2019) Towards vqa models that can read. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 8317\u20138326","DOI":"10.1109\/CVPR.2019.00851"},{"key":"2487_CR157","doi-asserted-by":"crossref","unstructured":"Singh A, Pang G, Toh M, et\u00a0al (2021) Textocr: Towards large-scale end-to-end reasoning for arbitrary-shaped scene text. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 8802\u20138812","DOI":"10.1109\/CVPR46437.2021.00869"},{"key":"2487_CR158","doi-asserted-by":"crossref","unstructured":"Singh SS, Karayev S (2021) Full page handwriting recognition via image to sequence extraction. In: Document Analysis and Recognition\u2013ICDAR 2021: 16th International Conference, Lausanne, Switzerland, September 5\u201310, 2021, Proceedings, Part III 16, Springer, pp 55\u201369","DOI":"10.1007\/978-3-030-86334-0_4"},{"issue":"03","key":"2487_CR159","doi-asserted-by":"crossref","first-page":"186","DOI":"10.36548\/jscp.2020.3.007","volume":"2","author":"S Smys","year":"2020","unstructured":"Smys, S., Chen, J. I. Z., & Shakya, S. (2020). Survey on neural network architectures with deep learning. Journal of Soft Computing Paradigm (JSCP), 2(03), 186\u2013194.","journal-title":"Journal of Soft Computing Paradigm (JSCP)"},{"key":"2487_CR160","doi-asserted-by":"crossref","unstructured":"Souibgui MA, Biswas S, Mafla A, et\u00a0al (2023) Text-diae: A self-supervised degradation invariant autoencoder for text recognition and document enhancement. In: proceedings of the AAAI conference on artificial intelligence, pp 2330\u20132338","DOI":"10.1609\/aaai.v37i2.25328"},{"key":"2487_CR161","doi-asserted-by":"crossref","unstructured":"Sun Y, Ni Z, Chng CK, et\u00a0al (2019) Icdar 2019 competition on large-scale street view text with partial labeling-rrc-lsvt. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), IEEE, pp 1557\u20131562","DOI":"10.1109\/ICDAR.2019.00250"},{"key":"2487_CR162","doi-asserted-by":"crossref","unstructured":"Tao C, Zhu X, Su W, et\u00a0al (2023) Siamese image modeling for self-supervised vision representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 2132\u20132141","DOI":"10.1109\/CVPR52729.2023.00212"},{"key":"2487_CR163","doi-asserted-by":"crossref","unstructured":"Tendle, A., & Hasan, M. R. (2021). A study of the generalizability of self-supervised representations. Machine Learning with Applications, 6, Article 100124.","DOI":"10.1016\/j.mlwa.2021.100124"},{"key":"2487_CR164","unstructured":"Tong S, Chen Y, Ma Y, et\u00a0al (2023) Emp-ssl: Towards self-supervised learning in one training epoch. arXiv preprint arXiv:2304.03977"},{"key":"2487_CR165","unstructured":"Touvron H, Cord M, Douze M, et\u00a0al (2021) Training data-efficient image transformers & distillation through attention. In: International conference on machine learning, PMLR, pp 10347\u201310357"},{"issue":"7","key":"2487_CR166","doi-asserted-by":"crossref","first-page":"19929","DOI":"10.1007\/s11042-023-15607-3","volume":"83","author":"JJ Valero-Mas","year":"2024","unstructured":"Valero-Mas, J. J., Gallego, A. J., & Rico-Juan, J. R. (2024). An overview of ensemble and feature learning in few-shot image classification using siamese networks. Multimedia Tools and Applications, 83(7), 19929\u201319952.","journal-title":"Multimedia Tools and Applications"},{"key":"2487_CR167","unstructured":"Vaswani A, Shazeer N, Parmar N, et\u00a0al (2017) Attention is all you need. Advances in neural information processing systems 30"},{"key":"2487_CR168","unstructured":"Veit A, Matera T, Neumann L, et\u00a0al (2016) Coco-text: Dataset and benchmark for text detection and recognition in natural images. arXiv preprint arXiv:1601.07140"},{"key":"2487_CR169","doi-asserted-by":"crossref","unstructured":"Vincent P, Larochelle H, Bengio Y, et\u00a0al (2008) Extracting and composing robust features with denoising autoencoders. In: Proceedings of the 25th international conference on Machine learning, pp 1096\u20131103","DOI":"10.1145\/1390156.1390294"},{"key":"2487_CR170","doi-asserted-by":"crossref","unstructured":"Voigtlaender P, Doetsch P, Ney H (2016) Handwriting recognition with large multidimensional long short-term memory recurrent neural networks. In: 2016 15th international conference on frontiers in handwriting recognition (ICFHR), IEEE, pp 228\u2013233","DOI":"10.1109\/ICFHR.2016.0052"},{"key":"2487_CR171","doi-asserted-by":"crossref","unstructured":"Wan Z, Zhang J, Zhang L, et\u00a0al (2020) On vocabulary reliance in scene text recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 11425\u201311434","DOI":"10.1109\/CVPR42600.2020.01144"},{"key":"2487_CR172","doi-asserted-by":"crossref","unstructured":"Wang G, Wang K, Wang G, et\u00a0al (2021) Solving inefficiency of self-supervised representation learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 9505\u20139515","DOI":"10.1109\/ICCV48922.2021.00937"},{"key":"2487_CR173","unstructured":"Wang K, Babenko B, Belongie S (2011) End-to-end scene text recognition. In: 2011 International conference on computer vision, IEEE, pp 1457\u20131464"},{"key":"2487_CR174","first-page":"5776","volume":"33","author":"W Wang","year":"2020","unstructured":"Wang, W., Wei, F., Dong, L., et al. (2020). Minilm: Deep self-attention distillation for task-agnostic compression of pre-trained transformers. Advances in neural information processing systems, 33, 5776\u20135788.","journal-title":"Advances in neural information processing systems"},{"key":"2487_CR175","doi-asserted-by":"crossref","unstructured":"Wang, X., & Gupta, A. (2015). Unsupervised learning of visual representations using videos. In: Proceedings of the IEEE international conference on computer vision, pp 2794\u20132802","DOI":"10.1109\/ICCV.2015.320"},{"key":"2487_CR176","doi-asserted-by":"crossref","unstructured":"Wang, X. F., He, Z. H., Wang, K., et\u00a0al. (2023). A survey of text detection and recognition algorithms based on deep learning technology. Neurocomputing, 556, Article 126702.","DOI":"10.1016\/j.neucom.2023.126702"},{"key":"2487_CR177","unstructured":"Wei, C., Wang, H., Shen, W., et\u00a0al (2020). Co2: Consistent contrast for unsupervised visual representation learning. arXiv preprint arXiv:2010.02217"},{"key":"2487_CR178","doi-asserted-by":"crossref","unstructured":"Xiang, H., Zou, Q., Nawaz, M. A., et\u00a0al. (2023). Deep learning for image inpainting: A survey. Pattern Recognition, 134, Article 109046.","DOI":"10.1016\/j.patcog.2022.109046"},{"key":"2487_CR179","doi-asserted-by":"crossref","unstructured":"Xie, Z., Zhang, Z., Cao, Y., et\u00a0al (2022). Simmim: A simple framework for masked image modeling. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 9653\u20139663","DOI":"10.1109\/CVPR52688.2022.00943"},{"key":"2487_CR180","doi-asserted-by":"crossref","unstructured":"Yamaguchi, S., Kanai, S., Shioda, T., et\u00a0al (2021). Image enhanced rotation prediction for self-supervised learning. In: 2021 IEEE International Conference on Image Processing (ICIP), IEEE, pp 489\u2013493","DOI":"10.1109\/ICIP42928.2021.9506132"},{"key":"2487_CR181","doi-asserted-by":"crossref","unstructured":"Yang, M., Liao, M., Lu, P., et\u00a0al (2022a). Reading and writing: Discriminative and generative modeling for self-supervised text recognition. In: Proceedings of the 30th ACM International Conference on Multimedia, pp 4214\u20134223","DOI":"10.1145\/3503161.3547784"},{"key":"2487_CR182","doi-asserted-by":"crossref","unstructured":"Yang, M., Yang, B., Liao, M., et\u00a0al. (2024). Class-aware mask-guided feature refinement for scene text recognition. Pattern Recognition, 149, Article 110244.","DOI":"10.1016\/j.patcog.2023.110244"},{"key":"2487_CR183","doi-asserted-by":"crossref","unstructured":"Yang, Z., Lu, Y., Wang, J., et\u00a0al (2021). Tap: Text-aware pre-training for text-vqa and text-caption. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 8751\u20138761","DOI":"10.1109\/CVPR46437.2021.00864"},{"issue":"1","key":"2487_CR184","doi-asserted-by":"crossref","first-page":"132","DOI":"10.1109\/TNNLS.2022.3172423","volume":"35","author":"Z Yang","year":"2022","unstructured":"Yang, Z., Yu, H., He, Y., et al. (2022). Fully convolutional network-based self-supervised learning for semantic segmentation. IEEE Transactions on Neural Networks and Learning Systems, 35(1), 132\u2013142.","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"2487_CR185","doi-asserted-by":"crossref","unstructured":"Yenduri, G., Srivastava, G., Maddikunta, P. K. R., et\u00a0al (2023). Generative pre-trained transformer: A comprehensive review on enabling technologies, potential applications, emerging challenges, and future directions. arXiv preprint arXiv:2305.10435","DOI":"10.1109\/ACCESS.2024.3389497"},{"key":"2487_CR186","doi-asserted-by":"crossref","unstructured":"Yousef, M., Hussain, K. F., & Mohammed, U. S. (2020). Accurate, data-efficient, unconstrained text recognition with convolutional neural networks. Pattern Recognition, 108, Article 107482.","DOI":"10.1016\/j.patcog.2020.107482"},{"key":"2487_CR187","doi-asserted-by":"crossref","unstructured":"Yu, E., & Zhang, Z. (2021). English billboard text recognition using deep learning. In: Journal of Physics: Conference Series, IOP Publishing, p 012003","DOI":"10.1088\/1742-6596\/1994\/1\/012003"},{"key":"2487_CR188","unstructured":"Yu, J., Li, X., Koh, J. Y., et\u00a0al (2021). Vector-quantized image modeling with improved vqgan. arXiv preprint arXiv:2110.04627"},{"key":"2487_CR189","unstructured":"Zbontar, J., Jing, L., Misra, I., et\u00a0al (2021). Barlow twins: Self-supervised learning via redundancy reduction. In: International conference on machine learning, PMLR, pp 12310\u201312320"},{"issue":"7","key":"2487_CR190","doi-asserted-by":"crossref","first-page":"4727","DOI":"10.1109\/TITS.2020.3017632","volume":"22","author":"C Zhang","year":"2020","unstructured":"Zhang, C., Ding, W., Peng, G., et al. (2020). Street view text recognition with deep learning for urban scene understanding in intelligent transportation systems. IEEE Transactions on Intelligent Transportation Systems, 22(7), 4727\u20134743.","journal-title":"IEEE Transactions on Intelligent Transportation Systems"},{"key":"2487_CR191","doi-asserted-by":"crossref","unstructured":"Zhang, C., Zhang, C., Song, J., et\u00a0al (2022a). A survey on masked autoencoder for self-supervised learning in vision and beyond. arXiv preprint arXiv:2208.00173","DOI":"10.24963\/ijcai.2023\/762"},{"key":"2487_CR192","doi-asserted-by":"crossref","unstructured":"Zhang, C., Zhang, K., Pham, T. X., et\u00a0al (2022b). Dual temperature helps contrastive learning without many negative samples: Towards understanding and simplifying moco. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 14441\u201314450","DOI":"10.1109\/CVPR52688.2022.01404"},{"key":"2487_CR193","unstructured":"Zhang, C., Zhang, K., Zhang, C., et\u00a0al (2022c). How does simsiam avoid collapse without negative samples? a unified understanding with self-supervised contrastive learning. arXiv preprint arXiv:2203.16262"},{"key":"2487_CR194","doi-asserted-by":"crossref","unstructured":"Zhang, D., Nan, F., Wei, X., et\u00a0al (2021). Supporting clustering with contrastive learning. arXiv preprint arXiv:2103.12953","DOI":"10.18653\/v1\/2021.naacl-main.427"},{"key":"2487_CR195","doi-asserted-by":"crossref","unstructured":"Zhang, J., Lin, T., Xu, Y., et\u00a0al (2023). Relational contrastive learning for scene text recognition. In: Proceedings of the 31st ACM International Conference on Multimedia, pp 5764\u20135775","DOI":"10.1145\/3581783.3612247"},{"key":"2487_CR196","doi-asserted-by":"crossref","unstructured":"Zhang, L., Qi, G. J., Wang, L., et\u00a0al (2019a). Aet vs. aed: Unsupervised representation learning by auto-encoding transformations rather than data. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 2547\u20132555","DOI":"10.1109\/CVPR.2019.00265"},{"key":"2487_CR197","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., & Efros, A. A. (2016). Colorful image colorization. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11-14, 2016, Proceedings, Part III 14, Springer, pp 649\u2013666","DOI":"10.1007\/978-3-319-46487-9_40"},{"key":"2487_CR198","doi-asserted-by":"crossref","unstructured":"Zhang, R., Zhou, Y., Jiang, Q., et\u00a0al (2019b). Icdar 2019 robust reading challenge on reading chinese text on signboard. In: 2019 international conference on document analysis and recognition (ICDAR), IEEE, pp 1577\u20131581","DOI":"10.1109\/ICDAR.2019.00253"},{"key":"2487_CR199","doi-asserted-by":"crossref","unstructured":"Zhang, X., Wang, J., Jin, L., et\u00a0al (2022d). Cmt-co: Contrastive learning with character movement task for handwritten text recognition. In: Proceedings of the Asian Conference on Computer Vision, pp 3104\u20133120","DOI":"10.1007\/978-3-031-26293-7_37"},{"key":"2487_CR200","doi-asserted-by":"crossref","unstructured":"Zhang, X., Wang, T., Wang, J., et\u00a0al (2022e). Chaco: character contrastive learning for handwritten text recognition. In: International Conference on Frontiers in Handwriting Recognition, Springer, pp 345\u2013359","DOI":"10.1007\/978-3-031-21648-0_24"},{"key":"2487_CR201","doi-asserted-by":"crossref","unstructured":"Zhang, X., Zhu, B., Yao, X., et\u00a0al (2022f). Context-based contrastive learning for scene text recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp 3353\u20133361","DOI":"10.1609\/aaai.v36i3.20245"},{"key":"2487_CR202","unstructured":"Zhang, Y., Gueguen, L., Zharkov, I., et\u00a0al (2017). Uber-text: A large-scale dataset for optical character recognition from street-level imagery. In: SUNw: Scene Understanding Workshop-CVPR, p\u00a05"},{"key":"2487_CR203","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Nie, S., Liu, W., et\u00a0al (2019c). Sequence-to-sequence domain adaptation network for robust text image recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2740\u20132749","DOI":"10.1109\/CVPR.2019.00285"},{"key":"2487_CR204","first-page":"2543","volume":"34","author":"M Zheng","year":"2021","unstructured":"Zheng, M., You, S., Wang, F., et al. (2021). Ressl: Relational self-supervised learning with weak augmentation. Advances in Neural Information Processing Systems, 34, 2543\u20132555.","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"2","key":"2487_CR205","doi-asserted-by":"crossref","first-page":"268","DOI":"10.3390\/e25020268","volume":"25","author":"J Zhou","year":"2023","unstructured":"Zhou, J., Li, G., Wang, R., et al. (2023). A novel contrastive self-supervised learning framework for solving data imbalance in solder joint defect detection. Entropy, 25(2), 268.","journal-title":"Entropy"},{"issue":"4","key":"2487_CR206","first-page":"4396","volume":"45","author":"K Zhou","year":"2022","unstructured":"Zhou, K., Liu, Z., Qiao, Y., et al. (2022). Domain generalization: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(4), 4396\u20134415.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2487_CR207","doi-asserted-by":"crossref","unstructured":"Zhuang, J., Ren, Y., Li, X., et\u00a0al (2022). Text-level contrastive learning for scene text recognition. In: 2022 International Conference on Asian Language Processing (IALP), IEEE, pp 231\u2013236","DOI":"10.1109\/IALP57159.2022.9961322"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02487-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02487-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02487-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T08:05:13Z","timestamp":1757405113000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02487-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,29]]},"references-count":207,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2025,9]]}},"alternative-id":["2487"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02487-3","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,29]]},"assertion":[{"value":"29 July 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 May 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}