{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T06:11:35Z","timestamp":1758089495961,"version":"3.44.0"},"publisher-location":"Cham","reference-count":45,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783032046239"},{"type":"electronic","value":"9783032046246"}],"license":[{"start":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T00:00:00Z","timestamp":1758067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T00:00:00Z","timestamp":1758067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-04624-6_20","type":"book-chapter","created":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T05:34:26Z","timestamp":1758000866000},"page":"340-357","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Evaluating Handwritten Text Recognition in\u00a0Medieval Notarial Manuscripts: A\u00a0New Dataset and\u00a0Comprehensive Analysis"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8455-7196","authenticated-orcid":false,"given":"Mariona","family":"Coll Ardanuy","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6161-2452","authenticated-orcid":false,"given":"Iban","family":"Berganzo-Besga","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2099-3567","authenticated-orcid":false,"given":"Ramon","family":"Sarobe","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4577-2381","authenticated-orcid":false,"given":"Coral","family":"Cuadrada","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,17]]},"reference":[{"key":"20_CR1","doi-asserted-by":"publisher","DOI":"10.5334\/johd.124","author":"H Alkemade","year":"2023","unstructured":"Alkemade, H., et al.: Datasheets for digital cultural heritage datasets. J. Open Hum. Data (2023). https:\/\/doi.org\/10.5334\/johd.124","journal-title":"J. Open Hum. Data"},{"key":"20_CR2","doi-asserted-by":"publisher","first-page":"587","DOI":"10.1162\/tacl_a_00041","volume":"6","author":"EM Bender","year":"2018","unstructured":"Bender, E.M., Friedman, B.: Data statements for natural language processing: toward mitigating system bias and enabling better science. Trans. Assoc. Comput. Linguistics 6, 587\u2013604 (2018). https:\/\/doi.org\/10.1162\/tacl_a_00041","journal-title":"Trans. Assoc. Comput. Linguistics"},{"key":"20_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.jas.2022.105654","volume":"148","author":"I Berganzo-Besga","year":"2022","unstructured":"Berganzo-Besga, I., Orengo, H.A., Lumbreras, F., Aliende, P., Ramsey, M.N.: Automated detection and classification of multi-cell phytoliths using deep learning-based algorithms. J. Archaeol. Sci. 148, 105654 (2022). https:\/\/doi.org\/10.1016\/j.jas.2022.105654","journal-title":"J. Archaeol. Sci."},{"key":"20_CR4","unstructured":"Boros, E., Ehrmann, M., Romanello, M., Najem-Meyer, S., Kaplan, F.: Post-correction of historical text transcripts with large language models: an exploratory study. In: Proceedings of the 8th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2024), pp. 133\u2013159. Association for Computational Linguistics (2024)"},{"key":"20_CR5","unstructured":"Chagu\u00e9, A., Cl\u00e9rice, T.: \u201cI\u2019m here to fight for ground truth\u201d: HTR-United, a solution towards a common for HTR training data. In: Digital Humanities 2023: Collaboration as Opportunity (2023)"},{"key":"20_CR6","doi-asserted-by":"publisher","unstructured":"Claustre, J., et al.: The e-NDP project: collaborative digital edition of the Chapter registers of Notre-Dame of Paris (1326-1504). Ground-truth for handwriting text recognition (HTR) on late medieval manuscripts. (2023).https:\/\/doi.org\/10.5281\/zenodo.7575693","DOI":"10.5281\/zenodo.7575693"},{"key":"20_CR7","doi-asserted-by":"publisher","unstructured":"Cl\u00e9rice, T., et al.: CATMuS medieval: a multilingual large-scale cross-century dataset in Latin script for handwritten text recognition and beyond. In: Barney Smith, E.H., Liwicki, M., Peng, L. (eds.) ICDAR 2024. LNCS, vol. 14806, pp. 174\u2013194. Springer, Cham (2024). https:\/\/doi.org\/10.1007\/978-3-031-70543-4_11","DOI":"10.1007\/978-3-031-70543-4_11"},{"key":"20_CR8","doi-asserted-by":"publisher","unstructured":"Cl\u00e9rice, T., Vlachou-Efstathiou, M., Chagu\u00e9, A.: CREMMA Medii Aevi: Literary manuscript text recognition in Latin. J. Open Hum. Data 9, 4 (2023). https:\/\/doi.org\/10.5334\/johd.97","DOI":"10.5334\/johd.97"},{"key":"20_CR9","doi-asserted-by":"publisher","unstructured":"Cl\u00e9rice, T., Pinche, A., Vlachou-Efstathiou, M.: Generic CREMMA model for Medieval Manuscripts (Latin and Old French), 8-15th century (2023). https:\/\/doi.org\/10.5281\/zenodo.7631619","DOI":"10.5281\/zenodo.7631619"},{"key":"20_CR10","unstructured":"Coll Ardanuy, M., Nanni, F., Beelen, K., Hare, L.: The past is a foreign place: improving toponym linking for historical newspapers. In: Computational Humanities Research Conference (CHR), vol. 3558, pp. 368\u2013390 (2023)"},{"key":"20_CR11","unstructured":"Crosilla, G., Klic, L., Colavizza, G.: Benchmarking large language models for handwritten text recognition. arXiv preprint arXiv:2503.15195 (2025)"},{"key":"20_CR12","doi-asserted-by":"crossref","unstructured":"Cuadrada Maj\u00f3, C.: L\u2019Arxiu dels Marquesos de Santa Maria de Barber\u00e0. In: El rescat de les cent donzelles o de Sant Esteve: Manuscrit de la Genealogia del llinatge Pin\u00f3s, 1620. pp. 25\u201355. Publicacions URV (2018)","DOI":"10.17345\/9788484246961"},{"key":"20_CR13","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1007\/978-3-031-58547-0_18","volume-title":"IDA 2024","author":"VMH Dang","year":"2024","unstructured":"Dang, V.M.H., Verma, R.M.: Data quality in NLP: Metrics and a comprehensive taxonomy. In: Miliou, I., Piatkowski, N., Papapetrou, P. (eds.) IDA 2024. LNCS, vol. 14641, pp. 217\u2013229. Springer, Cham (2024). https:\/\/doi.org\/10.1007\/978-3-031-58547-0_18"},{"key":"20_CR14","doi-asserted-by":"publisher","unstructured":"Gebru, T., Morgenstern, J., Vecchione, B., Vaughan, J.W., Wallach, H., III, H.D., Crawford, K.: Datasheets for datasets. Commun. ACM 64(12), 86\u201392 (2021). https:\/\/doi.org\/10.1145\/3458723","DOI":"10.1145\/3458723"},{"key":"20_CR15","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 369\u2013376 (2006)","DOI":"10.1145\/1143844.1143891"},{"issue":"2","key":"20_CR16","doi-asserted-by":"publisher","first-page":"425","DOI":"10.1017\/S1351324922000110","volume":"29","author":"A Hamdi","year":"2023","unstructured":"Hamdi, A., Pontes, E.L., Sidere, N., Coustaty, M., Doucet, A.: In-depth analysis of the impact of OCR errors on named entity recognition and linking. Nat. Lang. Eng. 29(2), 425\u2013448 (2023)","journal-title":"Nat. Lang. Eng."},{"key":"20_CR17","unstructured":"Haverals, W.: Cerberus: guardian against character errors (2023). https:\/\/github.com\/WHaverals\/CERberus"},{"key":"20_CR18","unstructured":"Haverals, W., Kestemont, M.: The middle Dutch manuscripts surviving from the Carthusian monastery of Herne (14th century): constructing an open dataset of digital transcriptions. In: Computational Humanities Research Conference (CHR), vol. 3558, pp. 135\u2013152 (2023)"},{"key":"20_CR19","doi-asserted-by":"publisher","unstructured":"Hodel, T., Schoch, D., Schneider, C., Purcell, J.: General models for handwritten text recognition: feasibility and state-of-the Art. German kurrent as an example. J. Open Hum. Data (2021). https:\/\/doi.org\/10.5334\/johd.46","DOI":"10.5334\/johd.46"},{"key":"20_CR20","doi-asserted-by":"crossref","unstructured":"Kahle, P., Colutto, S., Hackl, G., M\u00fchlberger, G.: Transkribus: a service platform for transcription, recognition and retrieval of historical documents. In: IAPR International Conference on Document Analysis and Recognition (icdar). vol.\u00a04, pp. 19\u201324. IEEE (2017)","DOI":"10.1109\/ICDAR.2017.307"},{"key":"20_CR21","unstructured":"Kanerva, J., Ledins, C., K\u00e4pyaho, S., Ginter, F.: OCR error post-correction with LLMs in historical documents: no free lunches. arXiv preprint arXiv:2502.01205 (2025)"},{"key":"20_CR22","unstructured":"Kiessling, B.: Kraken: an universal text recognizer for the humanities. In: Digital Humanities 2019: Conference Abstracts (2019)"},{"key":"20_CR23","doi-asserted-by":"crossref","unstructured":"Kiessling, B., Tissot, R., Stokes, P., Ezra, D.S.B.: eScriptorium: an open source platform for historical document analysis. In: International Conference on Document Analysis and Recognition Workshops, vol.\u00a02, pp. 19\u201319. IEEE (2019)","DOI":"10.1109\/ICDARW.2019.10032"},{"key":"20_CR24","doi-asserted-by":"crossref","unstructured":"Li, M., Lv, T., Chen, J., Cui, L., Lu, Y., Florencio, D., Zhang, C., Li, Z., Wei, F.: TrOCR: transformer-based optical character recognition with pre-trained models. In: Proceedings of the AAAI conference on Artificial Intelligence, vol.\u00a037, pp. 13094\u201313102 (2023)","DOI":"10.1609\/aaai.v37i11.26538"},{"key":"20_CR25","unstructured":"Mar\u00ed i Brull, G.: Introducci\u00f3 al sistema d\u2019abreviatures d\u2019\u00e8poca medieval i moderna. Paratge 12, 37\u201344 (2000)"},{"issue":"5","key":"20_CR26","doi-asserted-by":"publisher","first-page":"1112","DOI":"10.3758\/s13423-014-0585-6","volume":"21","author":"ST Piantadosi","year":"2014","unstructured":"Piantadosi, S.T.: Zipf\u2019s word frequency law in natural language: a critical review and future directions. Psychon. Bull. Rev. 21(5), 1112\u20131130 (2014). https:\/\/doi.org\/10.3758\/s13423-014-0585-6","journal-title":"Psychon. Bull. Rev."},{"key":"20_CR27","doi-asserted-by":"publisher","unstructured":"Pinche, A., et al.: CATMuS Medieval (2024). https:\/\/doi.org\/10.5281\/zenodo.12743230","DOI":"10.5281\/zenodo.12743230"},{"key":"20_CR28","doi-asserted-by":"crossref","unstructured":"Pinche, A., Stokes, P.: Historical documents and automatic text recognition: introduction. J. Data Min. Dig. Hum. (2024)","DOI":"10.46298\/jdmdh.13247"},{"key":"20_CR29","doi-asserted-by":"crossref","unstructured":"Pletschacher, S., Antonacopoulos, A.: The PAGE (Page Analysis and Ground-Truth Elements) format framework. In: International Conference on Pattern Recognition, pp. 257\u2013260. IEEE Computer Society (2010)","DOI":"10.1109\/ICPR.2010.72"},{"key":"20_CR30","doi-asserted-by":"publisher","unstructured":"Puigcerver, J.: Are multidimensional recurrent layers really necessary for handwritten text recognition? In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), vol.\u00a001, pp. 67\u201372 (2017). https:\/\/doi.org\/10.1109\/ICDAR.2017.20","DOI":"10.1109\/ICDAR.2017.20"},{"issue":"6","key":"20_CR31","doi-asserted-by":"publisher","first-page":"1658","DOI":"10.1016\/j.patcog.2012.11.024","volume":"46","author":"V Romero","year":"2013","unstructured":"Romero, V., et al.: The ESPOSALLES database: an ancient marriage license corpus for off-line handwriting recognition. Pattern Recogn. 46(6), 1658\u20131669 (2013)","journal-title":"Pattern Recogn."},{"key":"20_CR32","doi-asserted-by":"crossref","unstructured":"Shine, N.K., Bhutani, G., Keerthana, T.S., Rohith, G.: An approach for improving optical character recognition using contrast enhancement technique. In: Journal of Physics: Conference Series, vol.\u00a02466, p. 012009. IOP Publishing (2023)","DOI":"10.1088\/1742-6596\/2466\/1\/012009"},{"key":"20_CR33","doi-asserted-by":"crossref","unstructured":"van Strien, D., Beelen, K., Coll\u00a0Ardanuy, M., Hosseini, K., McGillivray, B., Colavizza, G.: Assessing the impact of OCR quality on downstream NLP tasks. In: Proceedings of the 12th International Conference on Agents and Artificial Intelligence (ICAART). SCITEPRESS-Science and Technology Publications (2020)","DOI":"10.5220\/0009169004840496"},{"key":"20_CR34","doi-asserted-by":"crossref","unstructured":"Stutzmann, D., Moufflet, J.F., Hamel, S.: La recherche en plein texte dans les sources manuscrites m\u00e9di\u00e9vales: enjeux et perspectives du projet HIMANIS pour l\u2019\u00e9dition \u00e9lectronique. M\u00e9di\u00e9vales. Langues, Textes, Histoire 73(73), 67\u201396 (2017)","DOI":"10.4000\/medievales.8198"},{"key":"20_CR35","doi-asserted-by":"publisher","unstructured":"Stutzmann, D., Torres\u00a0Aguilar, S., Chaffenet, P.: HOME-Alcar: aligned and annotated cartularies (2021). https:\/\/doi.org\/10.5281\/zenodo.5600884","DOI":"10.5281\/zenodo.5600884"},{"key":"20_CR36","doi-asserted-by":"crossref","unstructured":"Suzgun, M., Shieber, S.M., Jurafsky, D.: string2string: a modern python library for string-to-string algorithms. In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations), pp. 278\u2013285 (2024)","DOI":"10.18653\/v1\/2024.acl-demos.26"},{"key":"20_CR37","unstructured":"Thomas, A., Gaizauskas, R., Lu, H.: Leveraging LLMs for post-OCR correction of historical newspapers. In: Proceedings of the Third Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA)@ LREC-COLING-2024. pp. 116\u2013121 (2024)"},{"key":"20_CR38","doi-asserted-by":"crossref","unstructured":"Torres\u00a0Aguilar, S.: Handwritten text recognition for historical documents using visual language models and GANs (2024). working paper or preprint","DOI":"10.46298\/jdmdh.10484"},{"key":"20_CR39","doi-asserted-by":"publisher","unstructured":"Torres\u00a0Aguilar, S.: Tridis: HTR model for multilingual medieval and early modern documentary manuscripts (11th-16th) (2024). https:\/\/doi.org\/10.5281\/zenodo.10788591","DOI":"10.5281\/zenodo.10788591"},{"key":"20_CR40","doi-asserted-by":"publisher","unstructured":"Torres\u00a0Aguilar, S.: Tridis v2: HTR model for multilingual medieval and early modern documentary manuscripts (11th-16th) (2024). https:\/\/doi.org\/10.5281\/zenodo.13862096","DOI":"10.5281\/zenodo.13862096"},{"key":"20_CR41","doi-asserted-by":"crossref","unstructured":"Torres\u00a0Aguilar, S., Jolivet, V.: Handwritten text recognition for documentary medieval manuscripts. J. Data Min. Dig. Hum. (2023)","DOI":"10.46298\/jdmdh.10484"},{"key":"20_CR42","unstructured":"Touvron, H., et\u00a0al.: Llama 2: open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)"},{"key":"20_CR43","unstructured":"Vidal-Gor\u00e8ne, C., Salah, C., Lucas, N., Decours-Perez, A., Perrier, A.: Enhancing Arabic Maghribi handwritten text recognition with RASAM 2: a comprehensive dataset and benchmarking. In: Computational Humanities Research (CHR), vol.\u00a03834, pp. 200\u2013216 (2024)"},{"issue":"1","key":"20_CR44","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0281041","volume":"18","author":"S Wichmann","year":"2023","unstructured":"Wichmann, S., Holman, E.W.: Cross-linguistic conditions on word length. PLoS ONE 18(1), e0281041 (2023)","journal-title":"PLoS ONE"},{"key":"20_CR45","first-page":"55006","volume":"36","author":"C Zhou","year":"2023","unstructured":"Zhou, C., et al.: LIMA: less is more for alignment. Adv. Neural. Inf. Process. Syst. 36, 55006\u201355021 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition \u2013 ICDAR 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-04624-6_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T05:34:36Z","timestamp":1758000876000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-04624-6_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,17]]},"ISBN":["9783032046239","9783032046246"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-04624-6_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,9,17]]},"assertion":[{"value":"17 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Wuhan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iapr.org\/icdar2025","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}