{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:05:34Z","timestamp":1778079934979,"version":"3.51.4"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2023,10,6]],"date-time":"2023-10-06T00:00:00Z","timestamp":1696550400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,10,6]],"date-time":"2023-10-06T00:00:00Z","timestamp":1696550400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100005619","name":"Ministerstvo Kultury","doi-asserted-by":"publisher","award":["G18P02OVV055"],"award-info":[{"award-number":["G18P02OVV055"]}],"id":[{"id":"10.13039\/501100005619","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100005619","name":"Ministerstvo Kultury","doi-asserted-by":"publisher","award":["G18P02OVV055"],"award-info":[{"award-number":["G18P02OVV055"]}],"id":[{"id":"10.13039\/501100005619","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100005619","name":"Ministerstvo Kultury","doi-asserted-by":"publisher","award":["G18P02OVV055"],"award-info":[{"award-number":["G18P02OVV055"]}],"id":[{"id":"10.13039\/501100005619","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["IJDAR"],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1007\/s10032-023-00452-9","type":"journal-article","created":{"date-parts":[[2023,10,6]],"date-time":"2023-10-06T03:27:18Z","timestamp":1696562838000},"page":"177-193","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["SoftCTC\u2014semi-supervised learning for text recognition using soft pseudo-labels"],"prefix":"10.1007","volume":"27","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6853-0508","authenticated-orcid":false,"given":"Martin","family":"Ki\u0161\u0161","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6364-129X","authenticated-orcid":false,"given":"Michal","family":"Hradi\u0161","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0805-1860","authenticated-orcid":false,"given":"Karel","family":"Bene\u0161","sequence":"additional","affiliation":[]},{"given":"Petr","family":"Buchal","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5795-5938","authenticated-orcid":false,"given":"Michal","family":"Kula","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,10,6]]},"reference":[{"issue":"11","key":"452_CR1","doi-asserted-by":"publisher","first-page":"2298","DOI":"10.1109\/TPAMI.2016.2646371","volume":"39","author":"B Shi","year":"2017","unstructured":"Shi, B., Bai, X., Yao, C.: An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. IEEE Trans. Pattern Anal. Mach. Intell. 39(11), 2298\u20132304 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"452_CR2","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., et\u00a0al.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning, ICML \u201906, pp. 369-376. New York, NY, USA (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"452_CR3","volume-title":"Advances in Neural Information Processing Systems","author":"I Sutskever","year":"2014","unstructured":"Sutskever, I., Vinyals, O., Le, Q.V.: Sequence to Sequence Learning with Neural Networks. In: Ghahramani, Z., Welling, M., Cortes, C., et al. (eds.) Advances in Neural Information Processing Systems, vol. 27. Curran Associates Inc (2014)"},{"key":"452_CR4","unstructured":"Radford, A., Kim, J.W., Xu, T., et\u00a0al.: Robust speech recognition via large-scale weak supervision (2022)"},{"key":"452_CR5","unstructured":"Brown, T., Mann, B., Ryder, N., et\u00a0al.: Language models are few-shot learners. In: Advances in Neural Information Processing Systems, vol\u00a033. Curran Associates Inc., pp 1877\u20131901 (2020)"},{"key":"452_CR6","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., et\u00a0al.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"452_CR7","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., et\u00a0al.: Hierarchical text-conditional image generation with CLIP latents. 2204.06125 [cs] (2022)"},{"key":"452_CR8","doi-asserted-by":"publisher","first-page":"463","DOI":"10.1007\/978-3-030-86337-1_31","volume-title":"Document Analysis and Recognition-ICDAR 2021","author":"M Ki\u0161\u0161","year":"2021","unstructured":"Ki\u0161\u0161, M., Bene\u0161, K., Hradi\u0161, M.: AT-ST: Self-Training Adaptation Strategy for OCR in Domains with Limited Transcriptions. In: Llad\u00f3s, J., Lopresti, D., Uchida, S. (eds.) Document Analysis and Recognition-ICDAR 2021, pp. 463\u2013477. Springer International Publishing (2021)"},{"key":"452_CR9","doi-asserted-by":"crossref","unstructured":"Arazo, E., Ortego, D., Albert, P., et\u00a0al.: Pseudo-labeling and confirmation bias in deep semi-supervised learning. In: 2020 International Joint Conference on Neural Networks (IJCNN), pp. 1\u20138 (2020)","DOI":"10.1109\/IJCNN48605.2020.9207304"},{"key":"452_CR10","unstructured":"Bachman, P., Alsharif, O., Precup, D.: Learning with pseudo-ensembles. 1412.4864 [cs, stat] (2014)"},{"key":"452_CR11","unstructured":"Sajjadi, M., Javanmardi, M., Tasdizen, T.: Regularization with stochastic transformations and perturbations for deep semi-supervised learning. In: Advances in Neural Information Processing Systems, vol\u00a029. Curran Associates, Inc (2016)"},{"key":"452_CR12","unstructured":"Tarvainen, A., Valpola, H.: Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results. In: Advances in Neural Information Processing Systems, vol\u00a030. Curran Associates, Inc (2017)"},{"key":"452_CR13","unstructured":"Berthelot, D., Carlini, N., Goodfellow, I., et\u00a0al.: MixMatch: A holistic approach to semi-supervised learning. In: Advances in Neural Information Processing Systems, vol\u00a032. Curran Associates, Inc (2019)"},{"key":"452_CR14","unstructured":"Kurakin, A., Raffel, C., Berthelot, D., et\u00a0al.: ReMixMatch: semi-supervised learning with distribution matching and augmentation anchoring. In: ICLR (2020)"},{"key":"452_CR15","first-page":"6256","volume-title":"Advances in Neural Information Processing Systems","author":"Q Xie","year":"2020","unstructured":"Xie, Q., Dai, Z., Hovy, E., et al.: Unsupervised Data Augmentation for Consistency Training. In: Larochelle, H., Ranzato, M., Hadsell, R., et al. (eds.) Advances in Neural Information Processing Systems, vol. 33, pp. 6256\u20136268. Curran Associates, Inc (2020)"},{"key":"452_CR16","unstructured":"Englesson, E., Azizpour, H.: Generalized jensen-shannon divergence loss for learning with noisy labels. In: Advances in Neural Information Processing Systems, vol\u00a034. Curran Associates, Inc., pp 30,284\u201330,297 (2021)"},{"key":"452_CR17","doi-asserted-by":"crossref","unstructured":"Zheng, C., Li, H., Rhee, S., et\u00a0al.: Pushing the performance limit of scene text recognizer without human annotation. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 14096\u201314105 (2022)","DOI":"10.1109\/CVPR52688.2022.01372"},{"key":"452_CR18","unstructured":"Aberdam, A., Ganz, R., Mazor, S., et\u00a0al.: Multimodal semi-supervised learning for text recognition. 2205.03873 [cs] (2022)"},{"key":"452_CR19","unstructured":"Lee, D.H.: Pseudo-label : The simple and efficient semi-supervised learning method for deep neural networks. In: ICML 2013 Workshop : Challenges in Representation Learning (WREPL) (2013)"},{"key":"452_CR20","doi-asserted-by":"crossref","unstructured":"Xie, Q., Luong, M.T., Hovy, E., et\u00a0al.: Self-training with noisy student improves ImageNet classification. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.01070"},{"key":"452_CR21","doi-asserted-by":"crossref","unstructured":"Pham, H., Dai, Z., Xie, Q., et\u00a0al.: Meta pseudo labels. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11557\u201311568 (2021)","DOI":"10.1109\/CVPR46437.2021.01139"},{"key":"452_CR22","doi-asserted-by":"crossref","unstructured":"Nagai, A.: Recognizing Japanese historical cursive with pseudo-labeling-aided crnn as an application of semi-supervised learning to sequence labeling. In: 2020 17th International Conference on Frontiers in Handwriting Recognition (ICFHR), pp. 97\u2013102 (2020)","DOI":"10.1109\/ICFHR2020.2020.00028"},{"key":"452_CR23","doi-asserted-by":"crossref","unstructured":"Stuner, B., Chatelain, C., Paquet, T.: Self-training of BLSTM with lexicon verification for handwriting recognition. In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), pp. 633\u2013638 (2017)","DOI":"10.1109\/ICDAR.2017.109"},{"key":"452_CR24","doi-asserted-by":"crossref","unstructured":"Leifert, G., Labahn, R., S\u00e1nchez, J.A.: Two semi-supervised training approaches for automated text recognition. In: 2020 17th International Conference on Frontiers in Handwriting Recognition (ICFHR), pp. 145\u2013150 (2020)","DOI":"10.1109\/ICFHR2020.2020.00036"},{"key":"452_CR25","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1007\/978-3-030-57058-3_3","volume-title":"Document Analysis Systems","author":"D Das","year":"2020","unstructured":"Das, D., Jawahar, C.V.: Adapting OCR with Limited Supervision. In: Bai, X., Karatzas, D., Lopresti, D. (eds.) Document Analysis Systems, pp. 30\u201344. Springer International Publishing, Cham (2020)"},{"key":"452_CR26","unstructured":"Sohn, K., Berthelot, D., Carlini, N., et\u00a0al.: FixMatch: simplifying semi-supervised learning with consistency and confidence. In: Advances in Neural Information Processing Systems, vol\u00a033. Curran Associates, Inc., pp. 596\u2013608 (2020)"},{"key":"452_CR27","first-page":"2802","volume":"2020","author":"F Weninger","year":"2020","unstructured":"Weninger, F., Mana, F., Gemello, R., et al.: Semi-supervised learning with data augmentation for end-to-end ASR. Proc. Interspeech 2020, 2802\u20132806 (2020)","journal-title":"Proc. Interspeech"},{"key":"452_CR28","doi-asserted-by":"crossref","unstructured":"Wolf, F., Fink, G.A.: Self-training of handwritten word recognition for synthetic-to-real adaptation. In: Proceedings of the International Conference on Pattern Recognition (2022)","DOI":"10.1109\/ICPR56361.2022.9956168"},{"key":"452_CR29","unstructured":"Zhang, H., Cisse, M., Dauphin, Y.N., et\u00a0al.: Mixup: beyond empirical risk minimization. In: International Conference on Learning Representations (2018)"},{"key":"452_CR30","doi-asserted-by":"crossref","unstructured":"Frinken, V., Bunke, H.: Evaluating retraining rules for semi-supervised learning in neural network based cursive word recognition. In: 2009 10th International Conference on Document Analysis and Recognition, pp. 31\u201335 (2009)","DOI":"10.1109\/ICDAR.2009.18"},{"key":"452_CR31","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1007\/978-3-031-06555-2_10","volume-title":"Document Analysis Systems","author":"T Constum","year":"2022","unstructured":"Constum, T., Kempf, N., Paquet, T., et al.: Recognition and Information Extraction in Historical Handwritten Tables: Toward Understanding Early $$20^{\\rm th }$$ Century Paris Census. In: Uchida, S., Barney, E., Eglin, V. (eds.) Document Analysis Systems, pp. 143\u2013157. Springer International Publishing (2022)"},{"key":"452_CR32","doi-asserted-by":"publisher","first-page":"3005","DOI":"10.1109\/TIP.2021.3051485","volume":"30","author":"Y Gao","year":"2021","unstructured":"Gao, Y., Chen, Y., Wang, J., et al.: Semi-supervised scene text recognition. IEEE Trans. Image Process. 30, 3005\u20133016 (2021)","journal-title":"IEEE Trans. Image Process."},{"key":"452_CR33","doi-asserted-by":"publisher","first-page":"3707","DOI":"10.21437\/Interspeech.2017-1566","volume":"2017","author":"H Soltau","year":"2017","unstructured":"Soltau, H., Liao, H., Sak, H.: Neural speech recognizer: acoustic-to-word LSTM model for large vocabulary speech recognition. Proc. Interspeech 2017, 3707\u20133711 (2017)","journal-title":"Proc. Interspeech"},{"issue":"8","key":"452_CR34","doi-asserted-by":"publisher","first-page":"1240","DOI":"10.1109\/JSTSP.2017.2763455","volume":"11","author":"S Watanabe","year":"2017","unstructured":"Watanabe, S., Hori, T., Kim, S., et al.: Hybrid CTC\/attention architecture for end-to-end speech recognition. IEEE J. Select. Topics Signal Process. 11(8), 1240\u20131253 (2017)","journal-title":"IEEE J. Select. Topics Signal Process."},{"issue":"2","key":"452_CR35","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/5.18626","volume":"77","author":"LR Rabiner","year":"1989","unstructured":"Rabiner, L.R.: A tutorial on hidden Markov models and selected applications in speech recognition. Proc. IEEE 77(2), 257\u2013286 (1989)","journal-title":"Proc. IEEE"},{"key":"452_CR36","unstructured":"Bangalore, B., Bordel, G., Riccardi, G.: Computing consensus translation from multiple machine translation systems. In: IEEE Workshop on Automatic Speech Recognition and Understanding, 2001. ASRU \u201901., pp 351\u2013354 (2001)"},{"key":"452_CR37","unstructured":"Rosti, A.V., Ayan, N.F., Xiang, B., et\u00a0al.: Combining outputs from multiple machine translation systems. In: Human Language Technologies 2007: The Conference of the North American Chapter of the Association for Computational Linguistics; Proceedings of the Main Conference. Association for Computational Linguistics, pp. 228\u2013235 (2007)"},{"key":"452_CR38","unstructured":"Fiscus, J.: A post-processing system to yield reduced word error rates: recognizer output voting error reduction (ROVER). In: 1997 IEEE Workshop on Automatic Speech Recognition and Understanding Proceedings, pp. 347\u2013354 (1997)"},{"issue":"4","key":"452_CR39","doi-asserted-by":"publisher","first-page":"373","DOI":"10.1006\/csla.2000.0152","volume":"14","author":"L Mangu","year":"2000","unstructured":"Mangu, L., Brill, E., Stolcke, A.: Finding consensus in speech recognition: word error minimization and other applications of confusion networks. Comput. Speech Lang. 14(4), 373\u2013400 (2000)","journal-title":"Comput. Speech Lang."},{"key":"452_CR40","doi-asserted-by":"crossref","unstructured":"Sanchez, J.A., Romero, V., Toselli, A.H., et al.: ICDAR2017 Competition on handwritten text recognition on the READ dataset. In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), pp. 1383\u20131388. IEEE, Kyoto (2017)","DOI":"10.1109\/ICDAR.2017.226"},{"key":"452_CR41","doi-asserted-by":"crossref","unstructured":"S\u00e1nchez, J.A., Romero, V., Toselli, A.H., et\u00a0al.: ICFHR2014 competition on handwritten text recognition on transcriptorium datasets (HTRtS). In: 2014 14th International Conference on Frontiers in Handwriting Recognition, pp. 785\u2013790, (2014)","DOI":"10.1109\/ICFHR.2014.137"},{"key":"452_CR42","unstructured":"Serrano, N., Castro, F., Juan, A.: The RODRIGO database. In: Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC\u201910). European Language Resources Association (ELRA) (2010)"},{"key":"452_CR43","doi-asserted-by":"crossref","unstructured":"S\u00e1nchez, J.A., Romero, V., Toselli, A.H., et\u00a0al.: ICFHR2016 competition on handwritten text recognition on the READ dataset. In: 2016 15th International Conference on Frontiers in Handwriting Recognition (ICFHR), pp. 630\u2013635 (2016)","DOI":"10.1109\/ICFHR.2016.0120"},{"key":"452_CR44","doi-asserted-by":"crossref","unstructured":"Koh\u00fat, J., Hradi\u0161, M.: Finetuning is a surprisingly effective domain adaptation baseline in handwriting recognition. 2302.06308 [cs] (2023)","DOI":"10.1007\/978-3-031-41685-9_17"},{"key":"452_CR45","doi-asserted-by":"crossref","unstructured":"Koh\u00fat, J., Hradi\u0161, M., Ki\u0161\u0161, M.: Towards writing style adaptation in handwriting recognition. 2302.06318 [cs] (2023)","DOI":"10.1007\/978-3-031-41685-9_24"}],"container-title":["International Journal on Document Analysis and Recognition (IJDAR)"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10032-023-00452-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10032-023-00452-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10032-023-00452-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,18]],"date-time":"2024-05-18T20:23:05Z","timestamp":1716063785000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10032-023-00452-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,6]]},"references-count":45,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,6]]}},"alternative-id":["452"],"URL":"https:\/\/doi.org\/10.1007\/s10032-023-00452-9","relation":{},"ISSN":["1433-2833","1433-2825"],"issn-type":[{"value":"1433-2833","type":"print"},{"value":"1433-2825","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10,6]]},"assertion":[{"value":"15 November 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 June 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 August 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 October 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors do not have any competing interests, financial or other.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}