{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T16:48:50Z","timestamp":1771951730787,"version":"3.50.1"},"reference-count":67,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2022,4,4]],"date-time":"2022-04-04T00:00:00Z","timestamp":1649030400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,4,4]],"date-time":"2022-04-04T00:00:00Z","timestamp":1649030400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","award":["No 770299"],"award-info":[{"award-number":["No 770299"]}],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Digit Libr"],"published-print":{"date-parts":[[2022,9]]},"DOI":"10.1007\/s00799-022-00325-2","type":"journal-article","created":{"date-parts":[[2022,4,4]],"date-time":"2022-04-04T05:02:52Z","timestamp":1649048572000},"page":"241-266","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Assessing the impact of OCR noise on multilingual event detection over digitised documents"],"prefix":"10.1007","volume":"23","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6299-9452","authenticated-orcid":false,"given":"Emanuela","family":"Boros","sequence":"first","affiliation":[]},{"given":"Nhu Khoa","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Ga\u00ebl","family":"Lejeune","sequence":"additional","affiliation":[]},{"given":"Antoine","family":"Doucet","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,4,4]]},"reference":[{"key":"325_CR1","unstructured":"Bedi, H., Patil, S., Hingmire, S., Palshikar, G.: Event timeline generation from history textbooks. In: Proceedings of the 4th Workshop on Natural Language Processing Techniques for Educational Applications (NLPTEA 2017), pp. 69\u201377 (2017)"},{"key":"325_CR2","unstructured":"Boros, E.: Neural methods for event extraction. Ph.D. thesis, Universit\u00e9 Paris Sud (2018)"},{"key":"325_CR3","doi-asserted-by":"crossref","unstructured":"Boros, E., Besan\u00e7on, R., Ferret, O., Grau, B.: The importance of character-level information in an event detection model. In: International Conference on Applications of Natural Language to Information Systems, pp. 119\u2013131. Springer (2021)","DOI":"10.1007\/978-3-030-80599-9_11"},{"key":"325_CR4","unstructured":"Boro\u015f, E., Besan\u00e7on, R., Ferret, O., Grau, B.: Int\u00e9r\u00eat des mod\u00e8les de caract\u00e8res pour la d\u00e9tection d\u2019\u00e9v\u00e9nements (the interest of character-level models for event detection). In: Actes de la 28e Conf\u00e9rence sur le Traitement Automatique des Langues Naturelles. Volume 1: conf\u00e9rence principale, pp. 179\u2013188 (2021)"},{"key":"325_CR5","doi-asserted-by":"publisher","unstructured":"Boros, E., Hamdi, A., Linhares\u00a0Pontes, E., Cabrera-Diego, L.A., Moreno, J.G., Sidere, N., Doucet, A.: Alleviating digitization errors in named entity recognition for historical documents. In: Proceedings of the 24th Conference on Computational Natural Language Learning, pp. 431\u2013441. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.conll-1.35. https:\/\/www.aclweb.org\/anthology\/2020.conll-1.35","DOI":"10.18653\/v1\/2020.conll-1.35"},{"key":"325_CR6","unstructured":"Boros, E., Linhares\u00a0Pontes, E., Cabrera-Diego, L.A., Hamdi, A., Moreno, J.G., Sid\u00e8re, N., Doucet, A.: Robust Named Entity Recognition and Linking on Historical Multilingual Documents. In: Cappellato, L., Eickhoff, C., Ferro, N., N\u00e9v\u00e9ol, A. (eds.) CLEF 2020 Working Notes. Working Notes of CLEF 2020\u2014Conference and Labs of the Evaluation Forum. CEUR-WS (2020)"},{"key":"325_CR7","doi-asserted-by":"crossref","unstructured":"Boros, E., Moreno, J., Doucet, A.: Event detection with entity markers. In: European Conference on Information Retrieval, pp. 233\u2013240. Springer (2021)","DOI":"10.1007\/978-3-030-72240-1_20"},{"key":"325_CR8","doi-asserted-by":"crossref","unstructured":"Boro\u015f, E., Romero, V., Maarand, M., Zenklov\u00e1, K., K\u0159e\u010dkov\u00e1, J., Vidal, E., Stutzmann, D., Kermorvant, C.: A comparison of sequential and combined approaches for named entity recognition in a corpus of handwritten medieval charters. In: 2020 17th International Conference on Frontiers in Handwriting Recognition (ICFHR), pp. 79\u201384. IEEE (2020)","DOI":"10.1109\/ICFHR2020.2020.00025"},{"key":"325_CR9","doi-asserted-by":"crossref","unstructured":"Boschee, E., Natarajan, P., Weischedel, R.: Automatic extraction of events from open source text for predictive forecasting. In: Handbook of Computational Approaches to Counterterrorism, pp. 51\u201367. Springer (2013)","DOI":"10.1007\/978-1-4614-5311-6_3"},{"key":"325_CR10","unstructured":"Boschetti, F., Cimino, A., Dell\u2019Orletta, F., Lebani, G., Passaro, L., Picchi, P., Venturi, G., Montemagni, S., Lenci, A.: Computational analysis of historical documents: an application to Italian war bulletins in world war I and II. In: Workshop on Language Resources and Technologies for Processing and Linking Historical Documents and Archives (LRT4HDA 2014), pp. 70\u201375. ELRA (2014)"},{"key":"325_CR11","doi-asserted-by":"crossref","unstructured":"Bronstein, O., Dagan, I., Li, Q., Ji, H., Frank, A.: Seed-based event trigger labeling: how far can event descriptions get us? In: ACL, vol. 2, pp. 372\u2013376 (2015)","DOI":"10.3115\/v1\/P15-2061"},{"key":"325_CR12","unstructured":"Chen, C., Ng, V.I.: Joint modeling for Chinese event extraction with rich linguistic features. In: In COLING. Citeseer (2012)"},{"key":"325_CR13","doi-asserted-by":"crossref","unstructured":"Chen, Y., Xu, L., Liu, K., Zeng, D., Zhao, J.: Event extraction via dynamic multi-pooling convolutional neural networks. In: Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing, vol.\u00a01, pp. 167\u2013176 (2015)","DOI":"10.3115\/v1\/P15-1017"},{"issue":"5","key":"325_CR14","doi-asserted-by":"publisher","first-page":"S10","DOI":"10.1186\/2041-1480-2-S5-S10","volume":"2","author":"N Collier","year":"2011","unstructured":"Collier, N.: Towards cross-lingual alerting for bursty epidemic events. J. Biomed. Semant. 2(5), S10 (2011)","journal-title":"J. Biomed. Semant."},{"issue":"24","key":"325_CR15","doi-asserted-by":"publisher","first-page":"2940","DOI":"10.1093\/bioinformatics\/btn534","volume":"24","author":"N Collier","year":"2008","unstructured":"Collier, N., Doan, S., Kawazoe, A., Goodwin, R.M., Conway, M., Tateno, Y., Ngo, Q.H., Dien, D., Kawtrakul, A., Takeuchi, K., et al.: Biocaster: detecting public health rumors with a web-based text mining system. Bioinformatics 24(24), 2940\u20132941 (2008)","journal-title":"Bioinformatics"},{"key":"325_CR16","unstructured":"Cybulska, A., Vossen, P.: Event models for historical perspectives: determining relations between high and low level events in text, based on the classification of time, location and participants. In: LREC (2010)"},{"key":"325_CR17","unstructured":"Cybulska, A., Vossen, P.: Historical event extraction from text. In: Proceedings of the 5th ACL-HLT Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities, pp. 39\u201343 (2011)"},{"key":"325_CR18","unstructured":"Doddington, G., Mitchell, A., Przybocki, M., Ramshaw, L., Strassel, S., Weischedel, R.: The automatic content extraction (ace) program-tasks, data, and evaluation. In: Proceedings of LREC, vol.\u00a04, pp. 837\u2013840. Citeseer (2004)"},{"key":"325_CR19","doi-asserted-by":"publisher","unstructured":"Du, X., Cardie, C.: Event extraction by answering (almost) natural questions. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 671\u2013683. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.49. https:\/\/aclanthology.org\/2020.emnlp-main.49","DOI":"10.18653\/v1\/2020.emnlp-main.49"},{"key":"325_CR20","unstructured":"Duan, S., He, R., Zhao, W.: Exploiting document level information to improve event detection via recurrent neural networks. In: Eighth International Joint Conference on Natural Language Processing (IJCNLP 2017), pp. 352\u2013361. Asian Federation of Natural Language Processing (2017)"},{"key":"325_CR21","doi-asserted-by":"crossref","unstructured":"Feng, X., Huang, L., Tang, D., Ji, H., Qin, B., Liu, T.: A language-independent neural network for event detection. In: Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), vol.\u00a02, pp. 66\u201371 (2016)","DOI":"10.18653\/v1\/P16-2011"},{"key":"325_CR22","unstructured":"Filatova, E., Hatzivassiloglou, V.: Event-based extractive summarization (2004)"},{"key":"325_CR23","unstructured":"Giguet, E., Lucas, N.: La d\u00e9tection automatique des citations et des locuteurs dans les textes informatifs. Le discours rapport\u00e9 dans tous ses \u00e9tats: Question de fronti\u00e8res, pp. 410\u2013418 (2004)"},{"key":"325_CR24","doi-asserted-by":"crossref","unstructured":"Grishman, R., Sundheim, B.: Message understanding conference-6: a brief history. In: COLING 1996, pp. 466\u2013471 (1996)","DOI":"10.3115\/992628.992709"},{"key":"325_CR25","doi-asserted-by":"publisher","unstructured":"Hamborg, F., Lachnit, S., Schubotz, M., Hepp, T., Gipp, B.: Giveme5w: main event retrieval from news articles by extraction of the five journalistic w questions. In: Transforming Digital Worlds, pp. 356\u2013366. Springer International Publishing, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-78105-1_39","DOI":"10.1007\/978-3-319-78105-1_39"},{"key":"325_CR26","doi-asserted-by":"crossref","unstructured":"Hamdi, A., Jean-Caurant, A., Sidere, N., Coustaty, M., Doucet, A.: An analysis of the performance of named entity recognition over ocred documents. In: 2019 ACM\/IEEE Joint Conference on Digital Libraries (JCDL), pp. 333\u2013334. IEEE, Illinois, USA (2019)","DOI":"10.1109\/JCDL.2019.00057"},{"key":"325_CR27","unstructured":"Hong, Y., Zhang, J., Ma, B., Yao, J., Zhou, G., Zhu, Q.: Using cross-entity inference to improve event extraction. In: Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies, vol. 1, pp. 1127\u20131136. Association for Computational Linguistics (2011)"},{"key":"325_CR28","unstructured":"Huang, R., Riloff, E.: Peeling back the layers: detecting event role fillers in secondary contexts. In: ACL 2011, pp. 1137\u20131147 (2011)"},{"key":"325_CR29","doi-asserted-by":"crossref","unstructured":"Huff, A.G., Breit, N., Allen, T., Whiting, K., Kiley, C.: Evaluation and verification of the global rapid identification of threats system for infectious diseases in textual data sources. In: Interdisciplinary Perspectives on Infectious Diseases (2016)","DOI":"10.1155\/2016\/5080746"},{"key":"325_CR30","unstructured":"Ide, N., Woolner, D.: Exploiting semantic web technologies for intelligent access to historical documents. In: LREC. Citeseer (2004)"},{"issue":"4","key":"325_CR31","doi-asserted-by":"publisher","first-page":"62","DOI":"10.3390\/jimaging3040062","volume":"3","author":"N Journet","year":"2017","unstructured":"Journet, N., Visani, M., Mansencal, B., Van-Cuong, K., Billy, A.: Doccreator: a new software for creating synthetic ground-truthed document images. J. Imaging 3(4), 62 (2017)","journal-title":"J. Imaging"},{"key":"325_CR32","doi-asserted-by":"publisher","unstructured":"Lai, V., Nguyen, M.V., Kaufman, H., Nguyen, T.H.: Event extraction from historical texts: A new dataset for black rebellions. In: Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021, pp. 2390\u20132400. Association for Computational Linguistics (2021). https:\/\/doi.org\/10.18653\/v1\/2021.findings-acl.211. https:\/\/aclanthology.org\/2021.findings-acl.211","DOI":"10.18653\/v1\/2021.findings-acl.211"},{"key":"325_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/j.artmed.2015.06.005","author":"G Lejeune","year":"2015","unstructured":"Lejeune, G., Brixtel, R., Doucet, A., Lucas, N.: Multilingual event extraction for epidemic detection. Artif. Intell. Med. (2015). https:\/\/doi.org\/10.1016\/j.artmed.2015.06.005","journal-title":"Artif. Intell. Med."},{"issue":"4","key":"325_CR34","first-page":"1249","volume":"22","author":"G Lejeune","year":"2018","unstructured":"Lejeune, G., Zhu, L.: A new proposal for evaluating web page cleaning tools. Computacion y Sistemas 22(4), 1249\u20131258 (2018)","journal-title":"Computacion y Sistemas"},{"key":"325_CR35","unstructured":"Li, Q., Ji, H., Huang, L.: Joint event extraction via structured prediction with global features. In: Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 73\u201382. Association for Computational Linguistics, Sofia, Bulgaria (2013). https:\/\/www.aclweb.org\/anthology\/P13-1008"},{"key":"325_CR36","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1007\/978-3-030-64452-9_19","volume-title":"Digital Libraries at Times of Massive Societal Transition","author":"E Linhares Pontes","year":"2020","unstructured":"Linhares Pontes, E., Cabrera-Diego, L.A., Moreno, J.G., Boros, E., Hamdi, A., Sid\u00e8re, N., Coustaty, M., Doucet, A.: Entity linking for historical documents: challenges and solutions. In: Ishita, E., Pang, N.L.S., Zhou, L. (eds.) Digital Libraries at Times of Massive Societal Transition, pp. 215\u2013231. Springer, Cham (2020)"},{"key":"325_CR37","doi-asserted-by":"publisher","unstructured":"Linhares\u00a0Pontes, E., Hamdi, A., Sidere, N., Doucet, A.: Impact of OCR quality on named entity linking. In: Digital Libraries at the Crossroads of Digital Information for the Future\u201421st International Conference on Asia-Pacific Digital Libraries, ICADL 2019, Kuala Lumpur, Malaysia, November 4\u20137, 2019, Proceedings, pp. 102\u2013115 (2019). https:\/\/doi.org\/10.1007\/978-3-030-34058-2_11","DOI":"10.1007\/978-3-030-34058-2_11"},{"key":"325_CR38","doi-asserted-by":"crossref","unstructured":"Liu, J., Chen, Y., Liu, K., Bi, W., Liu, X.: Event extraction as machine reading comprehension. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1641\u20131651 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.128"},{"key":"325_CR39","doi-asserted-by":"crossref","unstructured":"Liu, M., Li, W., Wu, M., Lu, Q.: Extractive summarization based on event term clustering. In: Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics Companion Volume Proceedings of the Demo and Poster Sessions, pp. 185\u2013188 (2007)","DOI":"10.3115\/1557769.1557823"},{"key":"325_CR40","unstructured":"Lucas, N.: The enunciative structure of news dispatches, a contrastive rhetorical approach. in: Language, Culture, Rhetoric, pp. 154\u2013164 (2004)"},{"key":"325_CR41","unstructured":"Lucas, N.: Mod\u00e9lisation diff\u00e9rentielle du texte, de la linguistique aux algorithmes. Ph.D. thesis, Universit\u00e9 de Caen (2009)"},{"key":"325_CR42","volume-title":"1st International Conference on Learning Representations, ICLR 2013, p","author":"T Mikolov","year":"2013","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. In: Bengio, Y., LeCun, Y. (eds.) 1st International Conference on Learning Representations, ICLR 2013, p. IEEE, Scottsdale, Arizona, USA (2013)"},{"key":"325_CR43","doi-asserted-by":"crossref","unstructured":"Miller, D., Boisen, S., Schwartz, R., Stone, R., Weischedel, R.: Named entity extraction from noisy input: speech and OCR. In: Proceedings of the Sixth Conference on Applied Natural Language Processing, pp. 316\u2013324. Association for Computational Linguistics (2000)","DOI":"10.3115\/974147.974191"},{"key":"325_CR44","doi-asserted-by":"crossref","unstructured":"Muller, B., Sagot, B., Seddah, D.: Enhancing bert for lexical normalization. In: Proceedings of the 5th Workshop on Noisy User-Generated Text (W-NUT 2019), pp. 297\u2013306 (2019)","DOI":"10.18653\/v1\/D19-5539"},{"key":"325_CR45","doi-asserted-by":"crossref","unstructured":"Mutuvi, S., Boros, E., Doucet, A., Lejeune, G., Jatowt, A., Odeo, M.: Multilingual epidemiological text classification: a comparative study. In: COLING, International Conference on Computational Linguistics (2020)","DOI":"10.18653\/v1\/2020.coling-main.543"},{"key":"325_CR46","doi-asserted-by":"crossref","unstructured":"Mutuvi, S., Doucet, A., Odeo, M., Jatowt, A.: Evaluating the impact of OCR errors on topic modeling. In: International Conference on Asian Digital Libraries, pp. 3\u201314. Springer, Berlin (2018)","DOI":"10.1007\/978-3-030-04257-8_1"},{"key":"325_CR47","doi-asserted-by":"publisher","unstructured":"Nguyen, T.H., Cho, K., Grishman, R.: Joint event extraction via recurrent neural networks. In: Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 300\u2013309. Association for Computational Linguistics, San Diego, California (2016). https:\/\/doi.org\/10.18653\/v1\/N16-1034. https:\/\/www.aclweb.org\/anthology\/N16-1034","DOI":"10.18653\/v1\/N16-1034"},{"key":"325_CR48","doi-asserted-by":"publisher","unstructured":"Nguyen, T.H., Grishman, R.: Event detection and domain adaptation with convolutional neural networks. In: Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 2: Short Papers), pp. 365\u2013371. Association for Computational Linguistics, Beijing, China (2015). https:\/\/doi.org\/10.3115\/v1\/P15-2060. https:\/\/www.aclweb.org\/anthology\/P15-2060","DOI":"10.3115\/v1\/P15-2060"},{"key":"325_CR49","doi-asserted-by":"crossref","unstructured":"Nguyen, T.H., Grishman, R.: Modeling skip-grams for event detection with convolutional neural networks. In: Proceedings of EMNLP (2016)","DOI":"10.18653\/v1\/D16-1085"},{"key":"325_CR50","doi-asserted-by":"crossref","unstructured":"Nguyen, T.T.H., Jatowt, A., Coustaty, M., Nguyen, N.V., Doucet, A.: Deep statistical analysis of ocr errors for effective post-ocr processing. In: Proceedings of the 18th Joint Conference on Digital Libraries, pp. 29\u201338 (2019)","DOI":"10.1109\/JCDL.2019.00015"},{"key":"325_CR51","volume-title":"Integrated interdisciplinary workflows for research on historical newspapers: perspectives from humanities scholars, computer scientists, and librarians","author":"S Oberbichler","year":"2021","unstructured":"Oberbichler, S., Boro\u015f, E., Doucet, A., Marjanen, J., Pfanzelter, E., Rautiainen, J., Toivonen, H., Tolonen, M.: Integrated interdisciplinary workflows for research on historical newspapers: perspectives from humanities scholars, computer scientists, and librarians. J. Assoc. Inform. Sci, Technol (2021)"},{"key":"325_CR52","doi-asserted-by":"crossref","unstructured":"Pruthi, D., Dhingra, B., Lipton, Z.C.: Combating adversarial misspellings with robust word recognition. In: 57th Annual Meeting of the Association for Computational Linguistics (ACL 2019), pp. 5582\u20135591. Florence, Italy (2019)","DOI":"10.18653\/v1\/P19-1561"},{"key":"325_CR53","unstructured":"Riloff, E.: Automatically generating extraction patterns from untagged text. In: AAAI\u201996, pp. 1044\u20131049 (1996)"},{"issue":"1","key":"325_CR54","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1016\/0004-3702(95)00123-9","volume":"85","author":"E Riloff","year":"1996","unstructured":"Riloff, E.: An empirical study of automated dictionary construction for information extraction in three domains. Artif. Intell. 85(1), 101\u2013134 (1996)","journal-title":"Artif. Intell."},{"key":"325_CR55","unstructured":"Rodriquez, K.J., Bryant, M., Blanke, T., Luszczynska, M.: Comparison of named entity recognition tools for raw OCR text. In: Jancsary, J., (ed.) 11th Conference on Natural Language Processing, KONVENS 2012, Empirical Methods in Natural Language Processing, September 19\u201321, 2012, Scientific Series of the \u00d6GAI, vol.\u00a05, pp. 410\u2013414. \u00d6GAI, Wien, \u00d6sterreich, Vienna, Austria (2012). http:\/\/www.oegai.at\/konvens2012\/proceedings\/60_rodriquez12w\/"},{"key":"325_CR56","unstructured":"Rovera, M., Nanni, F., Ponzetto, S.P.: Providing advanced access to historical war memoirs through the identification of events, participants and roles (2019)"},{"key":"325_CR57","doi-asserted-by":"crossref","unstructured":"Saur\u00ed, R., Knippen, R., Verhagen, M., Pustejovsky, J.: Evita: A robust event recognizer for QA systems. In: Proceedings of Human Language Technology Conference and Conference on Empirical Methods in Natural Language Processing, pp. 700\u2013707. Association for Computational Linguistics, Vancouver, British Columbia, Canada (2005). https:\/\/aclanthology.org\/H05-1088","DOI":"10.3115\/1220575.1220663"},{"key":"325_CR58","volume-title":"Events and Periods as Concepts for Organizing Historical Knowledge","author":"RB Shaw","year":"2010","unstructured":"Shaw, R.B.: Events and Periods as Concepts for Organizing Historical Knowledge. University of California, Berkeley (2010)"},{"key":"325_CR59","doi-asserted-by":"crossref","unstructured":"Smith, R.: An overview of the tesseract ocr engine. In: Ninth International Conference on Document Analysis and Recognition (ICDAR 2007), vol.\u00a02, pp. 629\u2013633. IEEE, IEEE Computer Society, USA (2007)","DOI":"10.1109\/ICDAR.2007.4376991"},{"key":"325_CR60","unstructured":"Smith, S.L., Kindermans, P., Le, Q.V.: Don\u2019t decay the learning rate, increase the batch size. CoRR abs\/1711.00489 (2017). http:\/\/arxiv.org\/abs\/1711.00489"},{"key":"325_CR61","unstructured":"Sprugnoli, R.: Event detection and classification for the digital humanities. Ph.D. thesis, University of Trento (2018)"},{"key":"325_CR62","unstructured":"Sun, L., Hashimoto, K., Yin, W., Asai, A., Li, J., Yu, P., Xiong, C.: Adv-bert: bert is not robust on misspellings! generating nature adversarial samples on bert. arXiv preprint arXiv:2003.04985 (2020)"},{"key":"325_CR63","doi-asserted-by":"publisher","first-page":"4341","DOI":"10.1016\/j.tcs.2009.07.015","volume":"410","author":"E Ukkonen","year":"2009","unstructured":"Ukkonen, E.: Maximal and minimal representations of gapped and non-gapped motifs of a string. Theor. Comput. Sci. 410, 4341\u20134349 (2009). https:\/\/doi.org\/10.1016\/j.tcs.2009.07.015","journal-title":"Theor. Comput. Sci."},{"key":"325_CR64","doi-asserted-by":"crossref","unstructured":"van Strien, D., Beelen, K., Ardanuy, M.C., Hosseini, K., McGillivray, B., Colavizza, G.: Assessing the impact of ocr quality on downstream nlp tasks. In: ICAART 2020\u2014Proceedings of the 12th International Conference on Agents and Artificial Intelligence, vol. 1, pp. 484\u2013496 (2020)","DOI":"10.5220\/0009169004840496"},{"key":"325_CR65","volume-title":"Ace 2005 multilingual training corpus","author":"C Walker","year":"2005","unstructured":"Walker, C., Stephanie, S., Julie, M., Kazuaki, M.: Ace 2005 multilingual training corpus. Linguistic Data Consortium, Technical report (2005)"},{"key":"325_CR66","doi-asserted-by":"publisher","first-page":"315","DOI":"10.1007\/978-3-642-41491-6_29","volume-title":"Chinese Computational Linguistics and Natural Language Processing Based on Naturally Annotated Big Data","author":"P Wang","year":"2013","unstructured":"Wang, P., Sun, R., Zhao, H., Yu, K.: A new word language model evaluation metric for character based languages. In: Sun, M., Zhang, M., Lin, D., Wang, H. (eds.) Chinese Computational Linguistics and Natural Language Processing Based on Naturally Annotated Big Data, pp. 315\u2013324. Springer, Berlin (2013)"},{"key":"325_CR67","doi-asserted-by":"crossref","unstructured":"Yangarber, R., Grishman, R., Tapanainen, P., Huttunen, S.: Automatic acquisition of domain knowledge for information extraction. In: 18th International Conference on Computational Linguistics (COLING 2000), pp. 940\u2013946 (2000)","DOI":"10.3115\/992730.992782"}],"container-title":["International Journal on Digital Libraries"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-022-00325-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00799-022-00325-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-022-00325-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,22]],"date-time":"2022-08-22T14:12:34Z","timestamp":1661177554000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00799-022-00325-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,4,4]]},"references-count":67,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2022,9]]}},"alternative-id":["325"],"URL":"https:\/\/doi.org\/10.1007\/s00799-022-00325-2","relation":{},"ISSN":["1432-5012","1432-1300"],"issn-type":[{"value":"1432-5012","type":"print"},{"value":"1432-1300","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,4,4]]},"assertion":[{"value":"27 April 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 March 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 March 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 April 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Our code is freely available at .","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Availability of code, data, and material"}}]}}