{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,18]],"date-time":"2026-06-18T22:15:24Z","timestamp":1781820924757,"version":"3.54.5"},"publisher-location":"Cham","reference-count":37,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030549558","type":"print"},{"value":"9783030549565","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-54956-5_7","type":"book-chapter","created":{"date-parts":[[2020,8,16]],"date-time":"2020-08-16T23:02:46Z","timestamp":1597618966000},"page":"87-101","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":27,"title":["Assessing and Minimizing the Impact of OCR Quality on Named Entity Recognition"],"prefix":"10.1007","author":[{"given":"Ahmed","family":"Hamdi","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Axel","family":"Jean-Caurant","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nicolas","family":"Sid\u00e8re","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Micka\u00ebl","family":"Coustaty","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Antoine","family":"Doucet","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2020,8,17]]},"reference":[{"key":"7_CR1","doi-asserted-by":"crossref","unstructured":"Bikel, D.M., Schwartz, R., Weischedel, R.M.: An algorithm that learns what\u2019s in a name. Mach. Learn. 34(1\u20133), 211\u2013231 (1999)","DOI":"10.1023\/A:1007558221122"},{"key":"7_CR2","doi-asserted-by":"crossref","unstructured":"Chiron, G., Doucet, A., Coustaty, M., Visani, M., Moreux, J.P.: Impact of OCR errors on the use of digital libraries: towards a better access to information. In: Proceedings of the 17th ACM\/IEEE Joint Conference on Digital Libraries, pp. 249\u2013252. IEEE Press (2017)","DOI":"10.1109\/JCDL.2017.7991582"},{"key":"7_CR3","doi-asserted-by":"crossref","unstructured":"Chiu, J.P., Nichols, E.: Named entity recognition with bidirectional lstm-cnns. arXiv preprint arXiv:1511.08308 (2015)","DOI":"10.1162\/tacl_a_00104"},{"key":"7_CR4","first-page":"2493","volume":"12","author":"R Collobert","year":"2011","unstructured":"Collobert, R., Weston, J., Bottou, L., Karlen, M., Kavukcuoglu, K., Kuksa, P.: Natural language processing (almost) from scratch. J. Mach. Learn. Res. 12, 2493\u20132537 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"7_CR5","unstructured":"Croft, W., Harding, S., Taghva, K., Borsack, J.: An evaluation of information retrieval accuracy with simulated OCR output. In: Symposium on Document Analysis and Information Retrieval, pp. 115\u2013126 (1994)"},{"key":"7_CR6","unstructured":"Erik, F., Sang, T.K.: Introduction to the CoNLL-2002 shared task: Language-independent named entity recognition. In: Proceedings of CoNLL-2002, pp. 155\u2013158 (2002)"},{"key":"7_CR7","doi-asserted-by":"crossref","unstructured":"Favre, B., B\u00e9chet, F., Noc\u00e9ra, P.: Robust named entity extraction from large spoken archives. In: Proceedings of the conference on Human Language Technology and Empirical Methods in Natural Language Processing, pp. 491\u2013498. Association for Computational Linguistics (2005)","DOI":"10.3115\/1220575.1220637"},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"Finkel, J.R., Grenager, T., Manning, C.: Incorporating non-local information into information extraction systems by gibbs sampling. In: Proceedings of the 43rd Annual Meeting on Association for Computational Linguistics, pp. 363\u2013370. Association for Computational Linguistics (2005)","DOI":"10.3115\/1219840.1219885"},{"key":"7_CR9","unstructured":"Gali, K., Surana, H., Vaidya, A., Shishtla, P.M., Sharma, D.M.: Aggregating machine learning and rule based heuristics for named entity recognition. In: Proceedings of the IJCNLP-08 Workshop on Named Entity Recognition for South and South East Asian Languages (2008)"},{"key":"7_CR10","doi-asserted-by":"crossref","unstructured":"Gefen, A.: Les enjeux \u00e9pist\u00e9mologiques des humanit\u00e9s num\u00e9riques. Socio-La nouvelle revue des sciences sociales, pp. 61\u201374 (2014)","DOI":"10.4000\/socio.1296"},{"key":"7_CR11","unstructured":"Goldberg, Y., Levy, O.: word2vec explained: deriving mikolov et al\u2019.s negative-sampling word-embedding method. arXiv preprint arXiv:1402.3722 (2014)"},{"key":"7_CR12","doi-asserted-by":"crossref","unstructured":"Grishman, R., Sundheim, B.: Message understanding conference-6: a brief history. In: The 16th International Conference on Computational Linguistics COLING 1996, vol. 1 (1996)","DOI":"10.3115\/992628.992709"},{"key":"7_CR13","doi-asserted-by":"crossref","unstructured":"Hamdi, A., Jean-Caurant, A., Sidere, N., Coustaty, M., Doucet, A.: An analysis of the performance of named entity recognition over OCRED documents. In: 2019 ACM\/IEEE Joint Conference on Digital Libraries (JCDL), pp. 333\u2013334. IEEE (2019)","DOI":"10.1109\/JCDL.2019.00057"},{"key":"7_CR14","doi-asserted-by":"crossref","unstructured":"Holley, R.: How good can it get? analysing and improving OCR accuracy in largescale historic newspaper digitisation programs. D-Lib Magazine, 15(3\/4) (2009)","DOI":"10.1045\/march2009-holley"},{"key":"7_CR15","doi-asserted-by":"crossref","unstructured":"Jing, H., Lopresti, D., Shih, C.: Summarizing noisy documents. In: Proceedings of the Symposium on Document Image Understanding Technology, pp. 111\u2013119 (2003)","DOI":"10.3115\/1119467.1119471"},{"key":"7_CR16","doi-asserted-by":"crossref","unstructured":"Journet, N., Visani, M., Mansencal, B., Van-Cuong, K., Billy, A.: Doccreator: a new software for creating synthetic ground-truthed document images. J. Imag. 3(4), 62 (2017)","DOI":"10.3390\/jimaging3040062"},{"key":"7_CR17","doi-asserted-by":"crossref","unstructured":"Lample, G., Ballesteros, M., Subramanian, S., Kawakami, K., Dyer, C.: Neural architectures for named entity recognition. arXiv preprint arXiv:1603.01360 (2016)","DOI":"10.18653\/v1\/N16-1030"},{"issue":"3","key":"7_CR18","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1007\/s10032-009-0094-8","volume":"12","author":"D Lopresti","year":"2009","unstructured":"Lopresti, D.: Optical character recognition errors and their effects on natural language processing. Int. J. Document Anal. Recogn. (IJDAR) 12(3), 141\u2013151 (2009)","journal-title":"Int. J. Document Anal. Recogn. (IJDAR)"},{"key":"7_CR19","doi-asserted-by":"crossref","unstructured":"Lund, W.B., Kennard, D.J., Ringger, E.K.: Combining multiple thresholding binarization values to improve OCR output. In: Document Recognition and Retrieval XX, vol. 8658, p. 86580R. International Society for Optics and Photonics (2013)","DOI":"10.1117\/12.2006228"},{"key":"7_CR20","doi-asserted-by":"crossref","unstructured":"Ma, X., Hovy, E.: End-to-end sequence labeling via bi-directional LSTM-CNNS-CRF. arXiv preprint arXiv:1603.01354 (2016)","DOI":"10.18653\/v1\/P16-1101"},{"key":"7_CR21","doi-asserted-by":"crossref","unstructured":"Miller, D., Boisen, S., Schwartz, R., Stone, R., Weischedel, R.: Named entity extraction from noisy input: speech and OCR. In: Proceedings of the Sixth Conference on Applied Natural Language Processing, pp. 316\u2013324. Association for Computational Linguistics (2000)","DOI":"10.3115\/974147.974191"},{"issue":"1","key":"7_CR22","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1075\/li.30.1.03nad","volume":"30","author":"D Nadeau","year":"2007","unstructured":"Nadeau, D., Sekine, S.: A survey of named entity recognition and classification. Lingvisticae Investigationes 30(1), 3\u201326 (2007)","journal-title":"Lingvisticae Investigationes"},{"key":"7_CR23","doi-asserted-by":"crossref","unstructured":"Palmer, D.D., Ostendorf, M.: Improving information extraction by modeling errors in speech recognizer output. In: Proceedings of the First International Conference on Human Language Technology Research, pp. 1\u20135. Association for Computational Linguistics (2001)","DOI":"10.3115\/1072133.1072186"},{"key":"7_CR24","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.: Glove: global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"7_CR25","doi-asserted-by":"crossref","unstructured":"Peters, M.E., Neumann, M., Iyyer, M., Gardner, M., Clark, C., Lee, K., Zettlemoyer, L.: Deep contextualized word representations. arXiv preprint arXiv:1802.05365 (2018)","DOI":"10.18653\/v1\/N18-1202"},{"key":"7_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"102","DOI":"10.1007\/978-3-030-34058-2_11","volume-title":"Digital Libraries at the Crossroads of Digital Information for the Future","author":"E Linhares Pontes","year":"2019","unstructured":"Linhares Pontes, E., Hamdi, A., Sidere, N., Doucet, A.: Impact of OCR quality on named entity linking. In: Jatowt, A., Maeda, A., Syn, S.Y. (eds.) ICADL 2019. LNCS, vol. 11853, pp. 102\u2013115. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-34058-2_11"},{"key":"7_CR27","doi-asserted-by":"publisher","unstructured":"Ramshaw, L.A., Marcus, M.P.: Text chunking using transformation-based learning. In: Armstrong, S., Church, K., Isabelle, P., Manzi, S., Tzoukermann, E., Yarowsky, D. (eds.) Natural Language Processing Using Very Large Corpora, pp. 157\u2013176. Springer, Dordrecht (1999) https:\/\/doi.org\/10.1007\/978-94-017-2390-9_10","DOI":"10.1007\/978-94-017-2390-9_10"},{"key":"7_CR28","unstructured":"Riedl, M., Pad\u00f3, S.: A named entity recognition shootout for German. In: Proceedings of ACL, pp. 120\u2013125. Melbourne, Australia (2018), http:\/\/aclweb.org\/anthology\/P18-2020.pdf"},{"key":"7_CR29","unstructured":"Ritter, A., Clark, S., Etzioni, O., et al.: Named entity recognition in tweets: an experimental study. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing, pp. 1524\u20131534. Association for Computational Linguistics (2011)"},{"key":"7_CR30","unstructured":"Rodriquez, K.J., Bryant, M., Blanke, T., Luszczynska, M.: Comparison of named entity recognition tools for raw OCR text. In: KONVENS, pp. 410\u2013414 (2012)"},{"key":"7_CR31","unstructured":"Ruokolainen, T., Kettunen, K.: \u00c0 la recherche du nom perdu-searching for named entities with stanford ner in a finnish historical newspaper and journal collection. In: 13th IAPR International Workshop on Document Analysis Systems (2018)"},{"key":"7_CR32","doi-asserted-by":"crossref","unstructured":"van Strien, D., Beelen, K., Ardanuy, M.C., Hosseini, K., McGillivray, B., Colavizza, G.: Assessing the impact of OCR quality on downstream NLP tasks (2020)","DOI":"10.5220\/0009169004840496"},{"issue":"3","key":"7_CR33","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1016\/0306-4573(95)00058-5","volume":"32","author":"K Taghva","year":"1996","unstructured":"Taghva, K., Borsack, J., Condit, A.: Effects of ocr errors on ranking and feedback using the vector space model. Inf. Process. Manage. 32(3), 317\u2013327 (1996)","journal-title":"Inf. Process. Manage."},{"key":"7_CR34","doi-asserted-by":"crossref","unstructured":"Tjong Kim Sang, E.F., De Meulder, F.: Introduction to the CoNLL-2003 shared task: language-independent named entity recognition. In: Proceedings of the Seventh Conference on Natural Language Learning at HLT-NAACL 2003-vol. 4, pp. 142\u2013147. Association for Computational Linguistics (2003)","DOI":"10.3115\/1119176.1119195"},{"key":"7_CR35","doi-asserted-by":"crossref","unstructured":"Yalniz, I.Z., Manmatha, R.: A fast alignment scheme for automatic OCR evaluation of books. In: 2011 International Conference on Document Analysis and Recognition (ICDAR), pp. 754\u2013758. IEEE (2011)","DOI":"10.1109\/ICDAR.2011.157"},{"key":"7_CR36","unstructured":"Yaser, A.O.: Effect of degraded input on statistical machine translation. In: 2005 Symposium on Document Image Understanding Technology, p. 103 (2005)"},{"key":"7_CR37","unstructured":"Zuccon, G., Nguyen, A.N., Bergheim, A., Wickman, S., Grayson, N.: The impact of OCR accuracy on automated cancer classification of pathology reports. In: HIC, pp. 250\u2013256 (2012)"}],"container-title":["Lecture Notes in Computer Science","Digital Libraries for Open Knowledge"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-54956-5_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,24]],"date-time":"2021-04-24T02:30:26Z","timestamp":1619231426000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-54956-5_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030549558","9783030549565"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-54956-5_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"17 August 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TPDL","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Theory and Practice of Digital Libraries","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lyon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"France","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tpdl2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/eric.univ-lyon2.fr\/adbis-tpdl-eda-2020\/tpdl\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"53","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"14","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"26% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}