{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T19:36:50Z","timestamp":1773949010454,"version":"3.50.1"},"reference-count":83,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,2,10]],"date-time":"2024-02-10T00:00:00Z","timestamp":1707523200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2024,2,10]],"date-time":"2024-02-10T00:00:00Z","timestamp":1707523200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100002322","name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior","doi-asserted-by":"publisher","award":["Finance Code 001"],"award-info":[{"award-number":["Finance Code 001"]}],"id":[{"id":"10.13039\/501100002322","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006162","name":"Funda\u00e7\u00e3o de Amparo \u00e0 Ci\u00eancia e Tecnologia do Estado de Pernambuco","doi-asserted-by":"publisher","award":["APQ-1216-1.03\/22"],"award-info":[{"award-number":["APQ-1216-1.03\/22"]}],"id":[{"id":"10.13039\/501100006162","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003593","name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico","doi-asserted-by":"publisher","award":["315251\/2018-2"],"award-info":[{"award-number":["315251\/2018-2"]}],"id":[{"id":"10.13039\/501100003593","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003593","name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico","doi-asserted-by":"publisher","award":["141721\/2023-5"],"award-info":[{"award-number":["141721\/2023-5"]}],"id":[{"id":"10.13039\/501100003593","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004233","name":"Universitat Polit\u00e8cnica de Val\u00e8ncia","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100004233","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"abstract":"<jats:title>Abstract<\/jats:title><jats:p>Offline Handwritten Text Recognition (HTR) systems concern the automatic recognition and transcription of handwritten text from scanned images to digital media. Recently, HTR research field has become increasingly important due to the growing need for digitizing documents and automating data entry across various industries. However, achieving satisfactory results depend on the amount of available samples to train an optical model. Creating and labeling large enough datasets for this purpose often require significant time and effort, that in some situations may be impractical. To address this problem, data augmentation approaches are commonly used as an essential component of HTR systems. In this way, the present work aims to identify, explore, and analyze the scope of data augmentation approaches for offline HTR systems. Furthermore, we detailed our research protocol and answered four pertinent research questions, which enabled us to discuss trends and possible gaps. A search was conducted across five scientific databases, focusing on papers published between 2012 and 2023. The search yielded 976 primary papers, with 32 meeting the criteria for inclusion in this review. Our results indicate that handwriting synthesis is an emerging research field, and we observed that Digital Image Processing (DIP) is still widely used as an image generator. Nevertheless, the application of Generative Adversarial Networks (GAN) has gained traction in recent years owing to its impressive ability to synthesize images of handwritten text with arbitrary style and content. In addition, we explored and analyzed the most commonly used datasets and text recognition levels in the selected works.<\/jats:p>","DOI":"10.1007\/s42979-023-02583-6","type":"journal-article","created":{"date-parts":[[2024,2,10]],"date-time":"2024-02-10T13:02:53Z","timestamp":1707570173000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":19,"title":["Data Augmentation for Offline Handwritten Text Recognition: A Systematic Literature Review"],"prefix":"10.1007","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0522-2150","authenticated-orcid":false,"given":"Arthur Flor","family":"de Sousa Neto","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8327-9734","authenticated-orcid":false,"given":"Byron Leite Dantas","family":"Bezerra","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0404-3211","authenticated-orcid":false,"given":"Gabriel Calazans Duarte","family":"de Moura","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6955-9249","authenticated-orcid":false,"given":"Alejandro H\u00e9ctor","family":"Toselli","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,10]]},"reference":[{"key":"2583_CR1","volume-title":"Handwriting: recognition. Development and analysis-computer science: technology and applications","author":"B Bezerra","year":"2017","unstructured":"Bezerra B, Zanchettin C, Toselli A, Pirlo G. Handwriting: recognition. Development and analysis-computer science: technology and applications. New York: Nova Science Pub Inc; 2017."},{"key":"2583_CR2","doi-asserted-by":"publisher","unstructured":"Palehai D, Fanany MI. Handwriting recognition on form document using convolutional neural network and support vector machines (CNN-SVM). In: 5th International conference on information and communication technology (ICoIC7) (2017). https:\/\/doi.org\/10.1109\/ICoICT.2017.8074699.","DOI":"10.1109\/ICoICT.2017.8074699"},{"key":"2583_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11042-020-10151-w","volume":"80","author":"D Dhar","year":"2021","unstructured":"Dhar D, Garain A, Singh P, Sarkar R. Hp_docpres: a method for classifying printed and handwritten texts in doctor\u2019s prescription. Multimed Tools Appl. 2021;80:1\u201334. https:\/\/doi.org\/10.1007\/s11042-020-10151-w.","journal-title":"Multimed Tools Appl"},{"key":"2583_CR4","doi-asserted-by":"publisher","first-page":"208543","DOI":"10.1109\/ACCESS.2020.3039003","volume":"8","author":"AFS Neto","year":"2020","unstructured":"Neto AFS, Bezerra BLD, Lima EB, Toselli AH. HDSR-Flor: a robust end-to-end system to solve the handwritten digit string recognition problem in real complex scenarios. IEEE Access. 2020;8:208543\u201353. https:\/\/doi.org\/10.1109\/ACCESS.2020.3039003.","journal-title":"IEEE Access"},{"key":"2583_CR5","doi-asserted-by":"publisher","DOI":"10.1108\/JD-07-2018-0114","author":"G Muehlberger","year":"2019","unstructured":"Muehlberger G, et al. Transforming scholarship in the archives through handwritten text recognition: Transkribus as a case study. J Doc. 2019. https:\/\/doi.org\/10.1108\/JD-07-2018-0114.","journal-title":"J Doc"},{"key":"2583_CR6","doi-asserted-by":"publisher","first-page":"1399","DOI":"10.1016\/0031-3203(95)00013-P","volume":"28","author":"H Bunke","year":"1995","unstructured":"Bunke H, Roth M, Schukat-Talamazzini EG. Off-line cursive handwriting recognition using hidden Markov models. Pattern Recognit. 1995;28:1399\u2013413. https:\/\/doi.org\/10.1016\/0031-3203(95)00013-P.","journal-title":"Pattern Recognit"},{"key":"2583_CR7","doi-asserted-by":"publisher","unstructured":"Doetsch P, Kozielski M, Ney H. Fast and robust training of recurrent neural networks for offline handwriting recognition. In: Proceedings of international conference on frontiers in handwriting recognition, ICFHR, pp. 279\u2013284 (2014). https:\/\/doi.org\/10.1109\/ICFHR.2014.54.","DOI":"10.1109\/ICFHR.2014.54"},{"key":"2583_CR8","doi-asserted-by":"publisher","unstructured":"Toselli AH, Vidal E. Handwritten text recognition results on the Bentham collection with improved classical N-Gram-HMM methods. In: Proceedings of the 3rd international workshop on historical document imaging and processing, pp. 15\u201322 (2015). https:\/\/doi.org\/10.1145\/2809544.2809551.","DOI":"10.1145\/2809544.2809551"},{"key":"2583_CR9","doi-asserted-by":"publisher","unstructured":"Graves A, Fern\u00e1ndez S, Schmidhuber J. Multi-dimensional recurrent neural networks. In: International conference on artificial neural networks, pp 549\u2013558 (2007). https:\/\/doi.org\/10.1007\/978-3-540-74690-4_56.","DOI":"10.1007\/978-3-540-74690-4_56"},{"key":"2583_CR10","doi-asserted-by":"publisher","unstructured":"Voigtlaender P, Doetsch P, Ney H. Handwriting recognition with large multidimensional long short-term memory recurrent neural networks. In: 15th International conference on frontiers in handwriting recognition (ICFHR), pp. 228\u2013233 (2016). https:\/\/doi.org\/10.1109\/ICFHR.2016.0052.","DOI":"10.1109\/ICFHR.2016.0052"},{"key":"2583_CR11","doi-asserted-by":"publisher","first-page":"855","DOI":"10.1109\/TPAMI.2008.137","volume":"31","author":"A Graves","year":"2009","unstructured":"Graves A, et al. A novel connectionist system for unconstrained handwriting recognition. IEEE Trans Pattern Anal Mach Intell. 2009;31:855\u201368. https:\/\/doi.org\/10.1109\/TPAMI.2008.137.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2583_CR12","doi-asserted-by":"publisher","unstructured":"Bluche T, Messina R. Gated convolutional recurrent neural networks for multilingual handwriting recognition. In: 14th IAPR international conference on document analysis and recognition (ICDAR), pp. 646\u2013651 (2017). https:\/\/doi.org\/10.1109\/ICDAR.2017.111.","DOI":"10.1109\/ICDAR.2017.111"},{"key":"2583_CR13","doi-asserted-by":"publisher","unstructured":"Puigcerver J. Are multidimensional recurrent layers really necessary for handwritten text recognition? In: 14th IAPR international conference on document analysis and recognition (ICDAR), pp. 67\u201372 (2017). https:\/\/doi.org\/10.1109\/ICDAR.2017.20.","DOI":"10.1109\/ICDAR.2017.20"},{"key":"2583_CR14","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.patrec.2022.04.009","volume":"1","author":"AFS Neto","year":"2022","unstructured":"Neto AFS, Bezerra BLD, Toselli AH, Lima EB. A robust handwritten recognition system for learning on different data restriction scenarios. Pattern Recognit Lett. 2022;1:1\u20137. https:\/\/doi.org\/10.1016\/j.patrec.2022.04.009.","journal-title":"Pattern Recognit Lett"},{"key":"2583_CR15","doi-asserted-by":"publisher","unstructured":"Ingle RR, Fujii Y, Deselaers T, Baccash J, Popat AC. A scalable handwritten text recognition system. In: 2019 International conference on document analysis and recognition (ICDAR), pp. 17\u201324 (2019). https:\/\/doi.org\/10.1109\/ICDAR.2019.00013.","DOI":"10.1109\/ICDAR.2019.00013"},{"key":"2583_CR16","doi-asserted-by":"publisher","unstructured":"Kass D, Vats E. Attentionhtr: handwritten text recognition based on attention encoder-decoder networks. In: Document analysis systems, pp. 507\u2013522 (2022). https:\/\/doi.org\/10.1007\/978-3-031-06555-2_34.","DOI":"10.1007\/978-3-031-06555-2_34"},{"key":"2583_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108766","volume":"129","author":"L Kang","year":"2022","unstructured":"Kang L, Riba P, Rusi\u00f1ol M, Forn\u00e9s A, Villegas M. Pay attention to what you read: non-recurrent handwritten text-line recognition. Pattern Recognit. 2022;129: 108766. https:\/\/doi.org\/10.1016\/j.patcog.2022.108766.","journal-title":"Pattern Recognit"},{"key":"2583_CR18","doi-asserted-by":"publisher","unstructured":"Scheidl H, Fiel S, Sablatnig R. Word beam search: a connectionist temporal classification decoding algorithm. In: 2018 16th International conference on frontiers in handwriting recognition (ICFHR), pp. 253\u2013258 (2018). https:\/\/doi.org\/10.1109\/ICFHR-2018.2018.00052.","DOI":"10.1109\/ICFHR-2018.2018.00052"},{"issue":"21","key":"2583_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3390\/app10217711","volume":"10","author":"AFS Neto","year":"2020","unstructured":"Neto AFS, Bezerra BLD, Toselli AH. Towards the natural language processing as spelling correction for offline handwritten text recognition systems. Appl Sci. 2020;10(21):1\u201329. https:\/\/doi.org\/10.3390\/app10217711.","journal-title":"Appl Sci"},{"key":"2583_CR20","doi-asserted-by":"publisher","unstructured":"Jayasundara V, et\u00a0al. Textcaps: handwritten character recognition with very small datasets. In: 2019 IEEE winter conference on applications of computer vision (WACV), pp 254\u2013262 (2019). https:\/\/doi.org\/10.1109\/WACV.2019.00033.","DOI":"10.1109\/WACV.2019.00033"},{"key":"2583_CR21","doi-asserted-by":"publisher","unstructured":"Bhunia AK, Das A, Bhunia AK, Kishore PSR, Roy PP. Handwriting recognition in low-resource scripts using adversarial learning. In: 2019 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp. 4762\u20134771 (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00490.","DOI":"10.1109\/CVPR.2019.00490"},{"key":"2583_CR22","doi-asserted-by":"publisher","unstructured":"Pham H, et\u00a0al. Robust handwriting recognition with limited and noisy data. In: 2020 17th International conference on frontiers in handwriting recognition (ICFHR), pp. 301\u2013306 (2020). https:\/\/doi.org\/10.1109\/ICFHR2020.2020.00062.","DOI":"10.1109\/ICFHR2020.2020.00062"},{"key":"2583_CR23","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1016\/j.patrec.2022.06.003","volume":"160","author":"MA Souibgui","year":"2022","unstructured":"Souibgui MA, Forn\u00e9s A, Kessentini Y, Megyesi B. Few shots are all you need: a progressive learning approach for low resource handwritten text recognition. Pattern Recognit Lett. 2022;160:43\u20139. https:\/\/doi.org\/10.1016\/j.patrec.2022.06.003.","journal-title":"Pattern Recognit Lett"},{"key":"2583_CR24","unstructured":"Kitchenham B, Charters S. Guidelines for performing systematic literature reviews in software engineering\u2014technical report EBSE-2007-01\u2014School of Computer Science and Mathematics (2007). https:\/\/www.elsevier.com\/__data\/promis_misc\/525444systematicreviewsguide.pdf."},{"issue":"1","key":"2583_CR25","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1016\/j.infsof.2008.09.009","volume":"51","author":"B Kitchenham","year":"2009","unstructured":"Kitchenham B, et al. Systematic literature reviews in software engineering\u2014a systematic literature review. Inf Softw Technol. 2009;51(1):7\u201315. https:\/\/doi.org\/10.1016\/j.infsof.2008.09.009.","journal-title":"Inf Softw Technol"},{"issue":"8","key":"2583_CR26","doi-asserted-by":"publisher","first-page":"792","DOI":"10.1016\/j.infsof.2010.03.006","volume":"52","author":"B Kitchenham","year":"2010","unstructured":"Kitchenham B, et al. Systematic literature reviews in software engineering\u2014a tertiary study. Inf Softw Technol. 2010;52(8):792\u2013805. https:\/\/doi.org\/10.1016\/j.infsof.2010.03.006.","journal-title":"Inf Softw Technol"},{"key":"2583_CR27","doi-asserted-by":"publisher","unstructured":"Fogel S, Averbuch-Elor H, Cohen S, Mazor S, Litman R. Scrabblegan: semi-supervised varying length handwritten text generation. In: 2020 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp. 4323\u20134332 (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.00438.","DOI":"10.1109\/CVPR42600.2020.00438"},{"key":"2583_CR28","doi-asserted-by":"publisher","DOI":"10.1145\/3550070","author":"J Gan","year":"2022","unstructured":"Gan J, Wang W, Leng J, Gao X. Higan+: handwriting imitation gan with disentangled representations. ACM Trans Graph. 2022. https:\/\/doi.org\/10.1145\/3550070.","journal-title":"ACM Trans Graph"},{"key":"2583_CR29","doi-asserted-by":"publisher","first-page":"1838","DOI":"10.1109\/LSP.2021.3109541","volume":"28","author":"X Liu","year":"2021","unstructured":"Liu X, Meng G, Xiang S, Pan C. Handwritten text generation via disentangled representations. IEEE Signal Process Lett. 2021;28:1838\u201342. https:\/\/doi.org\/10.1109\/LSP.2021.3109541.","journal-title":"IEEE Signal Process Lett"},{"key":"2583_CR30","doi-asserted-by":"publisher","unstructured":"Wigington C, et al. Data augmentation for recognition of handwritten words and lines using a cnn-lstm network. In: 2017 14th IAPR International conference on document analysis and recognition (ICDAR), pp. 639\u2013645 (2017). https:\/\/doi.org\/10.1109\/ICDAR.2017.110.","DOI":"10.1109\/ICDAR.2017.110"},{"issue":"12","key":"2583_CR31","doi-asserted-by":"publisher","first-page":"8846","DOI":"10.1109\/TPAMI.2021.3122572","volume":"44","author":"L Kang","year":"2022","unstructured":"Kang L, Riba P, Rusi\u00f1ol M, Forn\u00e9s A, Villegas M. Content and style aware generation of text-line images for handwriting recognition. IEEE Trans Pattern Anal Mach Intell. 2022;44(12):8846\u201360. https:\/\/doi.org\/10.1109\/TPAMI.2021.3122572.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2583_CR32","doi-asserted-by":"publisher","unstructured":"Zdenek J, Nakayama H. Jokergan: memory-efficient model for handwritten text generation with text line awareness. In: Proceedings of the 29th ACM international conference on multimedia, pp. 5655\u20135663 (2021). https:\/\/doi.org\/10.1145\/3474085.3475713.","DOI":"10.1145\/3474085.3475713"},{"key":"2583_CR33","doi-asserted-by":"publisher","unstructured":"Kang L, et\u00a0al. Ganwriting: content-conditioned generation of styled handwritten word images. In: Computer vision\u2014ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXIII, pp. 273\u2013289 (2020). https:\/\/doi.org\/10.1007\/978-3-030-58592-1_17.","DOI":"10.1007\/978-3-030-58592-1_17"},{"key":"2583_CR34","doi-asserted-by":"publisher","unstructured":"Moysset B, Messina R. Manifold mixup improves text recognition with CTC loss. In: 2019 International conference on document analysis and recognition (ICDAR), pp. 799\u2013804 (2019). https:\/\/doi.org\/10.1109\/ICDAR.2019.00133 .","DOI":"10.1109\/ICDAR.2019.00133"},{"key":"2583_CR35","doi-asserted-by":"publisher","unstructured":"Chen W, Su X, Zhang H. Script-level word sample augmentation for few-shot handwritten text recognition. In: 18th International conference on frontiers in handwriting recognition (ICFHR), pp. 316\u2013330 (2022). https:\/\/doi.org\/10.1007\/978-3-031-21648-0_22.","DOI":"10.1007\/978-3-031-21648-0_22"},{"key":"2583_CR36","doi-asserted-by":"publisher","unstructured":"Pippi V, Cascianelli S, Cucchiara R. Handwritten text generation from visual archetypes. In: 2023 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp. 22458\u201322467 (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.02151.","DOI":"10.1109\/CVPR52729.2023.02151"},{"key":"2583_CR37","doi-asserted-by":"publisher","unstructured":"Zhu Y, Li Z, Wang T, He M, Yao C. Conditional text image generation with diffusion models. In: 2023 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp. 14235\u201314244 (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.01368.","DOI":"10.1109\/CVPR52729.2023.01368"},{"key":"2583_CR38","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1007\/978-3-031-41679-8_18","volume":"2023","author":"J Zdenek","year":"2023","unstructured":"Zdenek J, Nakayama H. Handwritten text generation with character-specific encoding for style imitation. Doc Anal Recognit ICDAR. 2023;2023:313\u201329. https:\/\/doi.org\/10.1007\/978-3-031-41679-8_18.","journal-title":"Doc Anal Recognit ICDAR"},{"issue":"10","key":"2583_CR39","doi-asserted-by":"publisher","first-page":"14775","DOI":"10.1007\/s11042-022-13891-z","volume":"82","author":"A Das","year":"2023","unstructured":"Das A, Choudhuri A, Basu A, Sarkar R. Generation of a synthetic handwritten bangla compound character dataset using a modified conditional gan architecture. Multimed Tools Appl. 2023;82(10):14775\u201397. https:\/\/doi.org\/10.1007\/s11042-022-13891-z.","journal-title":"Multimed Tools Appl"},{"key":"2583_CR40","doi-asserted-by":"publisher","unstructured":"Burdett E, et\u00a0al. Active transfer learning for handwriting recognition. In: Frontiers in handwriting recognition: 18th international conference, ICFHR 2022, Hyderabad, India, December 4\u20137, 2022, Proceedings, pp. 245\u2013258 (2022). https:\/\/doi.org\/10.1007\/978-3-031-21648-0_17.","DOI":"10.1007\/978-3-031-21648-0_17"},{"key":"2583_CR41","unstructured":"Kudaibergen T, Hamada MA. Application of deep convolutional generative adversarial network for Russian handwritten text recognition. In: Proceedings of the 7th international conference on digital technologies in education, science and industry (DTESI), vol. 3382, pp. 1\u201311 (2022)."},{"key":"2583_CR42","doi-asserted-by":"publisher","unstructured":"Luo C, Zhu Y, Jin L, Wang, Y. Learn to augment: joint data augmentation and network optimization for text recognition. In: 2020 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp. 13743\u201313752 (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.01376.","DOI":"10.1109\/CVPR42600.2020.01376"},{"key":"2583_CR43","doi-asserted-by":"publisher","DOI":"10.3390\/app13095677","author":"A Yeleussinov","year":"2023","unstructured":"Yeleussinov A, Amirgaliyev Y, Cherikbayeva L. Improving OCR accuracy for Kazakh handwriting recognition using gan models. Appl Sci. 2023. https:\/\/doi.org\/10.3390\/app13095677.","journal-title":"Appl Sci"},{"key":"2583_CR44","doi-asserted-by":"publisher","first-page":"384","DOI":"10.1007\/978-3-031-41679-8_22","volume":"2023","author":"K Nikolaidou","year":"2023","unstructured":"Nikolaidou K, et al. Wordstylist: styled verbatim handwritten text generation with latent diffusion models. Doc Anal Recognit ICDAR. 2023;2023:384\u2013401. https:\/\/doi.org\/10.1007\/978-3-031-41679-8_22.","journal-title":"Doc Anal Recognit ICDAR"},{"key":"2583_CR45","doi-asserted-by":"publisher","unstructured":"Bhunia AK, et\u00a0al. Handwriting transformers. In: 2021 IEEE\/CVF international conference on computer vision (ICCV), pp. 1066\u20131074 (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.00112.","DOI":"10.1109\/ICCV48922.2021.00112"},{"key":"2583_CR46","doi-asserted-by":"publisher","unstructured":"Alonso E, Moysset B, Messina R. Adversarial generation of handwritten text images conditioned on sequences. In: 2019 International conference on document analysis and recognition (ICDAR), pp. 481\u2013486 (2019). https:\/\/doi.org\/10.1109\/ICDAR.2019.00083.","DOI":"10.1109\/ICDAR.2019.00083"},{"key":"2583_CR47","doi-asserted-by":"publisher","unstructured":"Spoto M, Wolf B, Fischer A, Scius-Bertrand A. Improving handwriting recognition for historical documents using synthetic text lines. In: Intertwining graphonomics with human movements, pp. 61\u201375 (2022). https:\/\/doi.org\/10.1007\/978-3-031-19745-1_5.","DOI":"10.1007\/978-3-031-19745-1_5"},{"key":"2583_CR48","doi-asserted-by":"publisher","unstructured":"Huu M-K N, Ho S-T, Nguyen V-T, Ng, TD. Multilingual-gan: a multilingual gan-based approach for handwritten generation. In: 2021 International conference on multimedia analysis and pattern recognition (MAPR), pp. 1\u20136 (2021). https:\/\/doi.org\/10.1109\/MAPR53640.2021.9585285.","DOI":"10.1109\/MAPR53640.2021.9585285"},{"key":"2583_CR49","doi-asserted-by":"publisher","unstructured":"Luo C, Zhu Y, Jin L, Li Z, Peng D. Slogan: Handwriting style synthesis for arbitrary-length and out-of-vocabulary text. In: IEEE transactions on neural networks and learning systems, pp. 1\u201313 (2022). https:\/\/doi.org\/10.1109\/TNNLS.2022.3151477.","DOI":"10.1109\/TNNLS.2022.3151477"},{"key":"2583_CR50","doi-asserted-by":"publisher","unstructured":"Shen X, Messina R. A method of synthesizing handwritten Chinese images for data augmentation. In: 2016 15th International conference on frontiers in handwriting recognition (ICFHR), pp. 114\u2013119 (2016). https:\/\/doi.org\/10.1109\/ICFHR.2016.0033.","DOI":"10.1109\/ICFHR.2016.0033"},{"key":"2583_CR51","doi-asserted-by":"publisher","unstructured":"Chang CC, Perera LPG, Khudanpur S. Crosslingual handwritten text generation using gans. In: Document analysis and recognition\u2014ICDAR 2023 workshops, pp. 285\u2013301 (2023). https:\/\/doi.org\/10.1007\/978-3-031-41501-2_20.","DOI":"10.1007\/978-3-031-41501-2_20"},{"key":"2583_CR52","doi-asserted-by":"publisher","first-page":"428","DOI":"10.1007\/978-3-031-41685-9_27","volume":"2023","author":"Z Memon","year":"2023","unstructured":"Memon Z, Ul-Hasan A, Shafait F. Content-aware Urdu handwriting generation. Doc Anal Recognit ICDAR. 2023;2023:428\u201344. https:\/\/doi.org\/10.1007\/978-3-031-41685-9_27.","journal-title":"Doc Anal Recognit ICDAR"},{"key":"2583_CR53","doi-asserted-by":"publisher","first-page":"348","DOI":"10.1007\/978-3-031-41679-8_20","volume":"2023","author":"D Gui","year":"2023","unstructured":"Gui D, Chen K, Ding H, Huo Q. Zero-shot generation of training data with denoising diffusion probabilistic model for handwritten Chinese character recognition. Doc Anal Recognit ICDAR. 2023;2023:348\u201365. https:\/\/doi.org\/10.1007\/978-3-031-41679-8_20.","journal-title":"Doc Anal Recognit ICDAR"},{"key":"2583_CR54","doi-asserted-by":"publisher","unstructured":"Hidayat AA, Purwandari K, Cenggoro TW, Pardamean B. A convolutional neural network-based ancient Sundanese character classifier with data augmentation. In: 5th International conference on computer science and computational intelligence 2020, vol. 179, pp. 195\u2013201 (2021). https:\/\/doi.org\/10.1016\/j.procs.2020.12.025.","DOI":"10.1016\/j.procs.2020.12.025"},{"key":"2583_CR55","doi-asserted-by":"publisher","unstructured":"Hayashi T, Gyohten K, Ohki H, Takami T. A study of data augmentation for handwritten character recognition using deep learning. In: 2018 16th International conference on frontiers in handwriting recognition (ICFHR), pp. 552\u2013557 (2018). https:\/\/doi.org\/10.1109\/ICFHR-2018.2018.00102.","DOI":"10.1109\/ICFHR-2018.2018.00102"},{"key":"2583_CR56","doi-asserted-by":"publisher","DOI":"10.7717\/peerj-cs.861","author":"M Eltay","year":"2022","unstructured":"Eltay M, Zidouri A, Ahmad I, Elarian Y. Generative adversarial network based adaptive data augmentation for handwritten Arabic text recognition. PeerJ Comput Sci. 2022. https:\/\/doi.org\/10.7717\/peerj-cs.861.","journal-title":"PeerJ Comput Sci"},{"key":"2583_CR57","doi-asserted-by":"publisher","unstructured":"Eltay M, Zidouri A, Ahmad I, Elarian Y. Improving handwritten Arabic text recognition using an adaptive data-augmentation algorithm. In: Document analysis and recognition\u2014ICDAR 2021 workshops, pp. 322\u2013335 (2021). https:\/\/doi.org\/10.1007\/978-3-030-86198-8_23.","DOI":"10.1007\/978-3-030-86198-8_23"},{"key":"2583_CR58","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1007\/978-3-031-41685-9_19","volume":"2023","author":"H Wang","year":"2023","unstructured":"Wang H, Wang Y, Wei H. Affganwriting: a handwriting image generation method based on multi-feature fusion. Doc Anal Recognit ICDAR. 2023;2023:302\u201312. https:\/\/doi.org\/10.1007\/978-3-031-41685-9_19.","journal-title":"Doc Anal Recognit ICDAR"},{"key":"2583_CR59","doi-asserted-by":"publisher","unstructured":"Marti U-V, Bunke H. The IAM-database: an English sentence database for offline handwriting recognition. In: International journal on document analysis and recognition, vol. 5 (2002). https:\/\/doi.org\/10.1007\/s100320200071.","DOI":"10.1007\/s100320200071"},{"key":"2583_CR60","doi-asserted-by":"publisher","unstructured":"Kleber F, Fiel S, Diem M, Sablatnig R. Cvl-database: an off-line database for writer retrieval, writer identification and word spotting. In: 2013 12th International conference on document analysis and recognition, pp. 560\u2013564 (2013). https:\/\/doi.org\/10.1109\/ICDAR.2013.117.","DOI":"10.1109\/ICDAR.2013.117"},{"key":"2583_CR61","doi-asserted-by":"publisher","unstructured":"Gatos B, et\u00a0al. Ground-truth production in the transcriptorium project. In: 2014 11th IAPR international workshop on document analysis systems, pp. 237\u2013241 (2014). https:\/\/doi.org\/10.1109\/DAS.2014.23.","DOI":"10.1109\/DAS.2014.23"},{"key":"2583_CR62","doi-asserted-by":"publisher","unstructured":"Brunessaux S, et\u00a0al. The Maurdor project: improving automatic processing of digital documents. In: 2014 11th IAPR international workshop on document analysis systems, pp. 349\u2013354 (2014). https:\/\/doi.org\/10.1109\/DAS.2014.58.","DOI":"10.1109\/DAS.2014.58"},{"key":"2583_CR63","doi-asserted-by":"publisher","unstructured":"Lee AWC, Chung J, Lee M. Gnhk: a dataset for English handwriting in the wild. In: Document analysis and recognition\u2014ICDAR 2021: 16th international conference, Lausanne, Switzerland, September 5\u201310, 2021, Proceedings, Part IV, pp. 399\u2013412 (2021). https:\/\/doi.org\/10.1007\/978-3-030-86337-1_27.","DOI":"10.1007\/978-3-030-86337-1_27"},{"key":"2583_CR64","doi-asserted-by":"publisher","unstructured":"Grosicki E, Carre M, Brodin J-M, Geoffrois E. Rimes evaluation campaign for handwritten mail processing. In: ICFHR 2008: 11th international conference on frontiers in handwriting recognition, pp. 1\u20136 (2008). https:\/\/doi.org\/10.1109\/ICDAR.2009.224.","DOI":"10.1109\/ICDAR.2009.224"},{"key":"2583_CR65","doi-asserted-by":"publisher","unstructured":"S\u00e1nchez JA, Romero V, Toselli AH, Vidal E. ICFHR2016 competition on handwritten text recognition on the read dataset. In: 2016 15th International conference on frontiers in handwriting recognition (ICFHR), pp. 630\u2013635 (2016). https:\/\/doi.org\/10.1109\/ICFHR.2016.0120.","DOI":"10.1109\/ICFHR.2016.0120"},{"key":"2583_CR66","unstructured":"National Institute of Standards and Technology (NIST). Open handwriting recognition and translation evaluation (OpenHaRT) (2010). https:\/\/www.nist.gov\/system\/files\/documents\/itl\/iad\/mig\/OpenHaRT2010_EvalPlan_v2-8.pdf."},{"key":"2583_CR67","doi-asserted-by":"publisher","unstructured":"Pechwitz M, Margner V. Baseline estimation for Arabic handwritten words. In: Proceedings eighth international workshop on frontiers in handwriting recognition, pp. 479\u2013484 (2002). https:\/\/doi.org\/10.1109\/IWFHR.2002.1030956.","DOI":"10.1109\/IWFHR.2002.1030956"},{"key":"2583_CR68","doi-asserted-by":"publisher","unstructured":"Al-Ma\u2019adeed S, Elliman D, Higgins C. A data base for Arabic handwritten text recognition research. In: Proceedings eighth international workshop on frontiers in handwriting recognition, pp. 485\u2013489 (2002). https:\/\/doi.org\/10.1109\/IWFHR.2002.1030957.","DOI":"10.1109\/IWFHR.2002.1030957"},{"key":"2583_CR69","doi-asserted-by":"publisher","unstructured":"Lee D, et\u00a0al. MADCAT phase 1 training set. In: Linguistic Data Consortium (LDC) (2012). https:\/\/doi.org\/10.35111\/9bm5-nz55.","DOI":"10.35111\/9bm5-nz55"},{"key":"2583_CR70","doi-asserted-by":"publisher","unstructured":"Lee D, et\u00a0al. MADCAT phase 2 training set. In: Linguistic Data Consortium (LDC) (2013). https:\/\/doi.org\/10.35111\/044b-ah68.","DOI":"10.35111\/044b-ah68"},{"key":"2583_CR71","doi-asserted-by":"publisher","unstructured":"Lee D, et\u00a0al. MADCAT phase 3 training set. In: Linguistic Data Consortium (LDC) (2013). https:\/\/doi.org\/10.35111\/w1px-d922.","DOI":"10.35111\/w1px-d922"},{"key":"2583_CR72","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-021-11399-6","author":"D Nurseitov","year":"2021","unstructured":"Nurseitov D, et al. Handwritten Kazakh and Russian (HKR) database for text recognition. Multimed Tools Appl. 2021. https:\/\/doi.org\/10.1007\/s11042-021-11399-6.","journal-title":"Multimed Tools Appl"},{"key":"2583_CR73","doi-asserted-by":"publisher","unstructured":"Liu C-L, Yin F, Wang D-H, Wang Q-F. Casia online and offline Chinese handwriting databases. In: 2011 International conference on document analysis and recognition, pp. 37\u201341 (2011). https:\/\/doi.org\/10.1109\/ICDAR.2011.17.","DOI":"10.1109\/ICDAR.2011.17"},{"key":"2583_CR74","doi-asserted-by":"publisher","first-page":"519","DOI":"10.1142\/S0218001404003344","volume":"18","author":"AH Toselli","year":"2004","unstructured":"Toselli AH, et al. Integrated handwriting recognition and interpretation using finite-state models. Int J Pattern Recognit Artif Intell (IJPRAI). 2004;18:519\u201339. https:\/\/doi.org\/10.1142\/S0218001404003344.","journal-title":"Int J Pattern Recognit Artif Intell (IJPRAI)"},{"key":"2583_CR75","unstructured":"Cinnamon AI Labs. Cinnamon Handwritten OCR for Vietnamese Address Challenge Dataset \u2013 Cinnamon AI Marathon (2018). https:\/\/it.tdtu.edu.vn\/thong-tin-cuoc-thi-cinnamon-ai-marathon."},{"key":"2583_CR76","doi-asserted-by":"publisher","unstructured":"Nguyen HT, Nguyen CT, Nakagawa M. ICFHR 2018\u2014competition on Vietnamese online handwritten text recognition using hands-VNOnDB (VOHTR2018). In: 2018 16th International conference on frontiers in handwriting recognition (ICFHR), pp. 494\u2013499 (2018). https:\/\/doi.org\/10.1109\/ICFHR-2018.2018.00092.","DOI":"10.1109\/ICFHR-2018.2018.00092"},{"issue":"5","key":"2583_CR77","doi-asserted-by":"publisher","first-page":"1592","DOI":"10.1016\/j.asoc.2011.11.030","volume":"12","author":"N Das","year":"2012","unstructured":"Das N, et al. A genetic algorithm based region sampling for selection of local features in handwritten digit recognition application. Appl Soft Comput. 2012;12(5):1592\u2013606. https:\/\/doi.org\/10.1016\/j.asoc.2011.11.030.","journal-title":"Appl Soft Comput"},{"key":"2583_CR78","doi-asserted-by":"publisher","unstructured":"Suryani M, Paulus E, Hadi S, Darsa UA, Burie J-C. The handwritten Sundanese palm leaf manuscript dataset from 15th century. In: 2017 14th IAPR international conference on document analysis and recognition (ICDAR), vol. 01, pp. 796\u2013800 (2017). https:\/\/doi.org\/10.1109\/ICDAR.2017.135.","DOI":"10.1109\/ICDAR.2017.135"},{"key":"2583_CR79","doi-asserted-by":"publisher","unstructured":"Khattak IU, Siddiqi I, Khalid S, Djeddi C. Recognition of Urdu ligatures\u2014a holistic approach. In: 2015 13th International conference on document analysis and recognition (ICDAR), pp. 71\u201375 (2015). https:\/\/doi.org\/10.1109\/ICDAR.2015.7333728.","DOI":"10.1109\/ICDAR.2015.7333728"},{"key":"2583_CR80","unstructured":"Ahmed S, et al. Ucom offline dataset\u2014an Urdu handwritten dataset generation. Int Arab J Inf Technol. 2017;14:239\u201345. https:\/\/api.semanticscholar.org\/CorpusID:1019515."},{"key":"2583_CR81","unstructured":"Japan Electronics and Information Technology Industries Association. ETL Character Database\u2014National Institute of Advanced Industrial Science and Technology (AIST) (2011). http:\/\/etlcdb.db.aist.go.jp\/."},{"key":"2583_CR82","doi-asserted-by":"publisher","unstructured":"Cho K, van Merri\u00ebnboer B, Bahdanau D, Bougares H, Fethi\u00a0Schwenk, Bengio Y. Learning phrase representations using RNN encoder-decoder for statistical machine translation. In: 2014 Conference on empirical methods in natural language processing (EMNLP), pp. 1724\u20131734 (2014). https:\/\/doi.org\/10.3115\/v1\/D14-1179.","DOI":"10.3115\/v1\/D14-1179"},{"key":"2583_CR83","doi-asserted-by":"publisher","unstructured":"Vaswani A, et\u00a0al. Attention is all you need. In: Proceedings of the 31st international conference on neural information processing systems, pp. 6000\u20136010 (2017). https:\/\/doi.org\/10.5555\/3295222.3295349.","DOI":"10.5555\/3295222.3295349"}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-023-02583-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-023-02583-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-023-02583-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,10]],"date-time":"2024-02-10T13:04:56Z","timestamp":1707570296000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-023-02583-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,10]]},"references-count":83,"journal-issue":{"issue":"2","published-online":{"date-parts":[[2024,2]]}},"alternative-id":["2583"],"URL":"https:\/\/doi.org\/10.1007\/s42979-023-02583-6","relation":{},"ISSN":["2661-8907"],"issn-type":[{"value":"2661-8907","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2,10]]},"assertion":[{"value":"28 October 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 December 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 February 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that there is no conflict of interest. They have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}}],"article-number":"258"}}