{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T23:16:37Z","timestamp":1780355797420,"version":"3.54.1"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030863302","type":"print"},{"value":"9783030863319","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-86331-9_48","type":"book-chapter","created":{"date-parts":[[2021,9,4]],"date-time":"2021-09-04T02:05:57Z","timestamp":1630721157000},"page":"748-761","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Data Centric Domain Adaptation for Historical Text with OCR Errors"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4542-2437","authenticated-orcid":false,"given":"Luisa","family":"M\u00e4rz","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7190-2090","authenticated-orcid":false,"given":"Stefan","family":"Schweter","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1292-7052","authenticated-orcid":false,"given":"Nina","family":"Poerner","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0362-0267","authenticated-orcid":false,"given":"Benjamin","family":"Roth","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9514-7934","authenticated-orcid":false,"given":"Hinrich","family":"Sch\u00fctze","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2021,9,2]]},"reference":[{"key":"48_CR1","unstructured":"Akbik, A., Bergmann, T., Blythe, D., Rasul, K., Schweter, S., Vollgraf, R.: FLAIR: an easy-to-use framework for state-of-the-art NLP. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics (Demonstrations), pp. 54\u201359. Association for Computational Linguistics, Minneapolis (June 2019). https:\/\/www.aclweb.org\/anthology\/N19-4010"},{"key":"48_CR2","doi-asserted-by":"crossref","unstructured":"Akbik, A., Bergmann, T., Vollgraf, R.: Pooled contextualized embeddings for named entity recognition. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Long and Short Papers), vol. 1, pp. 724\u2013728. Association for Computational Linguistics, Minneapolis (June 2019)","DOI":"10.18653\/v1\/N19-1078"},{"key":"48_CR3","unstructured":"Akbik, A., Blythe, D., Vollgraf, R.: Contextual string embeddings for sequence labeling. In: 27th International Conference on Computational Linguistics, COLING 2018, pp. 1638\u20131649 (2018)"},{"key":"48_CR4","doi-asserted-by":"crossref","unstructured":"Baevski, A., Edunov, S., Liu, Y., Zettlemoyer, L., Auli, M.: Cloze-driven pretraining of self-attention networks. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp. 5359\u20135368. Association for Computational Linguistics, Hong Kong (November 2019). https:\/\/www.aclweb.org\/anthology\/D19-1539","DOI":"10.18653\/v1\/D19-1539"},{"key":"48_CR5","unstructured":"Berg-Kirkpatrick, T., Durrett, G., Klein, D.: Unsupervised transcription of historical documents. In: Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 207\u2013217. Association for Computational Linguistics, Sofia (August 2013). https:\/\/www.aclweb.org\/anthology\/P13-1021"},{"key":"48_CR6","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1162\/tacl_a_00051","volume":"5","author":"P Bojanowski","year":"2017","unstructured":"Bojanowski, P., Grave, E., Joulin, A., Mikolov, T.: Enriching word vectors with subword information. Trans. Assoc. Comput. Linguist. 5, 135\u2013146 (2017). https:\/\/www.aclweb.org\/anthology\/Q17-1010","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"48_CR7","unstructured":"Boros, E., et al.: Robust named entity recognition and linking on historical multilingual documents. In: Conference and Labs of the Evaluation Forum (CLEF 2020). Working Notes of CLEF 2020 - Conference and Labs of the Evaluation Forum, vol. 2696, pp. 1\u201317. CEUR-WS Working Notes, Thessaloniki (September 2020). https:\/\/hal.archives-ouvertes.fr\/hal-03026969"},{"key":"48_CR8","unstructured":"\u00c7avdar, M.: Distant supervision for French relation extraction (2017)"},{"key":"48_CR9","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Long and Short Papers), vol. 1, pp. 4171\u20134186. Association for Computational Linguistics, Minneapolis (June 2019). https:\/\/www.aclweb.org\/anthology\/N19-1423"},{"key":"48_CR10","unstructured":"Ehrmann, M., Colavizza, G., Rochat, Y., Kaplan, F.: Diachronic evaluation of NER systems on old newspapers. In: KONVENS (2016)"},{"key":"48_CR11","doi-asserted-by":"crossref","unstructured":"Ehrmann, M., Romanello, M., Fl\u00fcckiger, A., Clematide, S.: Extended overview of CLEF HIPE 2020: named entity processing on historical newspapers. Zenodo (October 2020)","DOI":"10.1007\/978-3-030-58219-7_21"},{"key":"48_CR12","doi-asserted-by":"crossref","unstructured":"Finkel, J.R., Grenager, T., Manning, C.: Incorporating non-local information into information extraction systems by Gibbs sampling. In: Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics (ACL 2005), pp. 363\u2013370. Association for Computational Linguistics, Ann Arbor (June 2005). https:\/\/www.aclweb.org\/anthology\/P05-1045","DOI":"10.3115\/1219840.1219885"},{"key":"48_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1007\/978-3-030-54956-5_7","volume-title":"Digital Libraries for Open Knowledge","author":"A Hamdi","year":"2020","unstructured":"Hamdi, A., Jean-Caurant, A., Sid\u00e8re, N., Coustaty, M., Doucet, A.: Assessing and minimizing the impact of OCR quality on named entity recognition. In: Hall, M., Mer\u010dun, T., Risse, T., Duchateau, F. (eds.) TPDL 2020. LNCS, vol. 12246, pp. 87\u2013101. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-54956-5_7"},{"key":"48_CR14","doi-asserted-by":"publisher","unstructured":"Jean-Caurant, A., Tamani, N., Courboulay, V., Burie, J.: Lexicographical-based order for post-OCR correction of named entities. In: 14th IAPR International Conference on Document Analysis and Recognition, ICDAR 2017, Kyoto, Japan, November 9\u201315, 2017, pp. 1192\u20131197. IEEE (2017). https:\/\/doi.org\/10.1109\/ICDAR.2017.197","DOI":"10.1109\/ICDAR.2017.197"},{"key":"48_CR15","doi-asserted-by":"crossref","unstructured":"Lample, G., Ballesteros, M., Subramanian, S., Kawakami, K., Dyer, C.: Neural architectures for named entity recognition. In: Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 260\u2013270. Association for Computational Linguistics, San Diego (June 2016). https:\/\/www.aclweb.org\/anthology\/N16-1030","DOI":"10.18653\/v1\/N16-1030"},{"key":"48_CR16","doi-asserted-by":"publisher","unstructured":"Martinek, J., Lenc, L., Kr\u00e1l, P., Nicolaou, A., Christlein, V.: Hybrid training data for historical text OCR, pp. 565\u2013570 (September 2019). https:\/\/doi.org\/10.1109\/ICDAR.2019.00096","DOI":"10.1109\/ICDAR.2019.00096"},{"key":"48_CR17","unstructured":"Neudecker, C.: An open corpus for named entity recognition in historic newspapers. In: Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016), pp. 4348\u20134352. European Language Resources Association (ELRA), Portoro\u017e (May 2016). https:\/\/www.aclweb.org\/anthology\/L16-1689"},{"key":"48_CR18","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1016\/j.artint.2012.03.006","volume":"194","author":"J Nothman","year":"2013","unstructured":"Nothman, J., Ringland, N., Radford, W., Murphy, T., Curran, J.R.: Learning multilingual named entity recognition from Wikipedia. Artif. Intell. 194, 151\u2013175 (2013)","journal-title":"Artif. Intell."},{"key":"48_CR19","doi-asserted-by":"crossref","unstructured":"Piktus, A., Edizel, N.B., Bojanowski, P., Grave, E., Ferreira, R., Silvestri, F.: Misspelling oblivious word embeddings. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Long and Short Papers), vol. 1, pp. 3226\u20133234. Association for Computational Linguistics, Minneapolis (June 2019). https:\/\/www.aclweb.org\/anthology\/N19-1326","DOI":"10.18653\/v1\/N19-1326"},{"key":"48_CR20","doi-asserted-by":"crossref","unstructured":"Ramponi, A., Plank, B.: Neural unsupervised domain adaptation in NLP\u2013a survey (2020)","DOI":"10.18653\/v1\/2020.coling-main.603"},{"key":"48_CR21","unstructured":"Schick, T., Sch\u00fctze, H.: Rare words: a major problem for contextualized embeddings and how to fix it by attentive mimicking. CoRR abs\/1904.06707 (2019). http:\/\/arxiv.org\/abs\/1904.06707"},{"key":"48_CR22","doi-asserted-by":"crossref","unstructured":"Schweter, S., Baiter, J.: Towards robust named entity recognition for historic German. In: Proceedings of the 4th Workshop on Representation Learning for NLP (RepL4NLP-2019), pp. 96\u2013103. Association for Computational Linguistics, Florence (August 2019). https:\/\/www.aclweb.org\/anthology\/W19-4312","DOI":"10.18653\/v1\/W19-4312"},{"key":"48_CR23","unstructured":"Schweter, S., M\u00e4rz, L.: Triple E - effective ensembling of embeddings and language models for NER of historical German. In: Cappellato, L., Eickhoff, C., Ferro, N., N\u00e9v\u00e9ol, A. (eds.) Working Notes of CLEF 2020 - Conference and Labs of the Evaluation Forum, Thessaloniki, Greece, September 22\u201325, 2020. CEUR Workshop Proceedings, vol. 2696. CEUR-WS.org (2020). http:\/\/ceur-ws.org\/Vol-2696\/paper_173.pdf"},{"key":"48_CR24","doi-asserted-by":"crossref","unstructured":"Tjong Kim Sang, E.F.: Introduction to the CoNLL-2002 shared task: language-independent named entity recognition. In: COLING-02: The 6th Conference on Natural Language Learning 2002 (CoNLL-2002) (2002). https:\/\/www.aclweb.org\/anthology\/W02-2024","DOI":"10.3115\/1118853.1118877"},{"key":"48_CR25","doi-asserted-by":"crossref","unstructured":"Tjong Kim Sang, E.F., De Meulder, F.: Introduction to the CoNLL-2003 shared task: language-independent named entity recognition. In: Proceedings of the Seventh Conference on Natural Language Learning at HLT-NAACL 2003, pp. 142\u2013147 (2003). https:\/\/www.aclweb.org\/anthology\/W03-0419","DOI":"10.3115\/1119176.1119195"},{"key":"48_CR26","doi-asserted-by":"crossref","unstructured":"Vobl, T., Gotscharek, A., Reffle, U., Ringlstetter, C., Schulz, K.U.: Pocoto - an open source system for efficient interactive postcorrection of ocred historical texts. In: Proceedings of the First International Conference on Digital Access to Textual Cultural Heritage, DATeCH 2014, pp. 57\u201361. ACM, New York (2014). http:\/\/doi.acm.org\/10.1145\/2595188.2595197","DOI":"10.1145\/2595188.2595197"},{"issue":"1","key":"48_CR27","doi-asserted-by":"publisher","first-page":"168","DOI":"10.1145\/321796.321811","volume":"21","author":"RA Wagner","year":"1974","unstructured":"Wagner, R.A., Fischer, M.J.: The string-to-string correction problem. J. ACM 21(1), 168\u2013173 (1974). https:\/\/doi.org\/10.1145\/321796.321811","journal-title":"J. ACM"},{"key":"48_CR28","doi-asserted-by":"crossref","unstructured":"Yeh, A.: More accurate tests for the statistical significance of result differences. In: The 18th International Conference on Computational Linguistics, COLING 2000, vol. 2 (2000). https:\/\/www.aclweb.org\/anthology\/C00-2137","DOI":"10.3115\/992730.992783"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition \u2013 ICDAR 2021"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-86331-9_48","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T22:03:41Z","timestamp":1756937021000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-86331-9_48"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030863302","9783030863319"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-86331-9_48","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"2 September 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lausanne","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Switzerland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iapr.org\/icdar2021","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"340","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"182","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"54% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.9","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.9","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Additionally, 13 competition reports are included.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}