{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T01:32:00Z","timestamp":1771551120701,"version":"3.50.1"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030644512","type":"print"},{"value":"9783030644529","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-64452-9_3","type":"book-chapter","created":{"date-parts":[[2020,11,27]],"date-time":"2020-11-27T17:03:14Z","timestamp":1606496594000},"page":"33-42","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["When to Use OCR Post-correction for Named Entity Recognition?"],"prefix":"10.1007","author":[{"given":"Vinh-Nam","family":"Huynh","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8964-2135","authenticated-orcid":false,"given":"Ahmed","family":"Hamdi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6160-3356","authenticated-orcid":false,"given":"Antoine","family":"Doucet","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,11,26]]},"reference":[{"key":"3_CR1","doi-asserted-by":"crossref","unstructured":"Chiron, G., Doucet, A., Coustaty, M., Visani, M., Moreux, J.P.: Impact of OCR errors on the use of digital libraries: towards a better access to information. In: Proceedings of the 17th ACM\/IEEE Joint Conference on Digital Libraries, pp. 249\u2013252. IEEE Press (2017)","DOI":"10.1109\/JCDL.2017.7991582"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Chiu, J.P., Nichols, E.: Named entity recognition with bidirectional LSTM-CNNs. arXiv preprint arXiv:1511.08308 (2015)","DOI":"10.1162\/tacl_a_00104"},{"key":"3_CR3","unstructured":"Farahmand, A., Sarrafzadeh, H., Shanbehzadeh, J.: Document image noises and removal methods (2013)"},{"key":"3_CR4","doi-asserted-by":"crossref","unstructured":"Gefen, A.: Les enjeux \u00e9pist\u00e9mologiques des humanit\u00e9s num\u00e9riques. Socio-La nouvelle revue des sciences sociales (4), 61\u201374 (2014)","DOI":"10.4000\/socio.1296"},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Hamdi, A., Jean-Caurant, A., Sidere, N., Coustaty, M., Doucet, A.: An analysis of the performance of named entity recognition over OCRed documents. In: 2019 ACM\/IEEE Joint Conference on Digital Libraries (JCDL), pp. 333\u2013334. IEEE (2019)","DOI":"10.1109\/JCDL.2019.00057"},{"key":"3_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1007\/978-3-030-54956-5_7","volume-title":"Digital Libraries for Open Knowledge","author":"A Hamdi","year":"2020","unstructured":"Hamdi, A., Jean-Caurant, A., Sid\u00e8re, N., Coustaty, M., Doucet, A.: Assessing and minimizing the impact of OCR quality on named entity recognition. In: Hall, M., Mer\u010dun, T., Risse, T., Duchateau, F. (eds.) TPDL 2020. LNCS, vol. 12246, pp. 87\u2013101. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-54956-5_7"},{"issue":"4","key":"3_CR7","doi-asserted-by":"publisher","first-page":"62","DOI":"10.3390\/jimaging3040062","volume":"3","author":"N Journet","year":"2017","unstructured":"Journet, N., Visani, M., Mansencal, B., Van-Cuong, K., Billy, A.: DocCreator: a new software for creating synthetic ground-truthed document images. J. Imaging 3(4), 62 (2017)","journal-title":"J. Imaging"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Lample, G., Ballesteros, M., Subramanian, S., Kawakami, K., Dyer, C.: Neural architectures for named entity recognition. arXiv preprint arXiv:1603.01360 (2016)","DOI":"10.18653\/v1\/N16-1030"},{"issue":"3","key":"3_CR9","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1007\/s10032-009-0094-8","volume":"12","author":"D Lopresti","year":"2009","unstructured":"Lopresti, D.: Optical character recognition errors and their effects on natural language processing. Int. J. Doc. Anal. Recognit. (IJDAR) 12(3), 141\u2013151 (2009)","journal-title":"Int. J. Doc. Anal. Recognit. (IJDAR)"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Lund, W.B., Kennard, D.J., Ringger, E.K.: Combining multiple thresholding binarization values to improve OCR output. In: Document Recognition and Retrieval XX, vol. 8658, p. 86580R. International Society for Optics and Photonics (2013)","DOI":"10.1117\/12.2006228"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Ma, X., Hovy, E.: End-to-end sequence labeling via Bi-directional LSTM-CNNs-CRF. arXiv preprint arXiv:1603.01354 (2016)","DOI":"10.18653\/v1\/P16-1101"},{"issue":"5","key":"3_CR12","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1007\/s10791-008-9055-y","volume":"11","author":"W Magdy","year":"2008","unstructured":"Magdy, W., Darwish, K.: Effect of OCR error correction on Arabic retrieval. Inf. Retr. 11(5), 405\u2013425 (2008)","journal-title":"Inf. Retr."},{"key":"3_CR13","doi-asserted-by":"crossref","unstructured":"Miller, D., Boisen, S., Schwartz, R., Stone, R., Weischedel, R.: Named entity extraction from noisy input: speech and OCR. In: Proceedings of the Sixth Conference on Applied Natural Language Processing, pp. 316\u2013324. Association for Computational Linguistics (2000)","DOI":"10.3115\/974147.974191"},{"key":"3_CR14","doi-asserted-by":"crossref","unstructured":"Nguyen, T.T.H., Jatowt, A., Coustaty, M., Nguyen, N.V., Doucet, A.: Deep statistical analysis of OCR errors for effective post-OCR processing. In: 2019 ACM\/IEEE Joint Conference on Digital Libraries (JCDL), pp. 29\u201338. IEEE (2019)","DOI":"10.1109\/JCDL.2019.00015"},{"key":"3_CR15","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.: Glove: global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"3_CR16","unstructured":"Riedl, M., Pad\u00f3, S.: A named entity recognition shootout for German. In: Proceedings of ACL, Melbourne, Australia, pp. 120\u2013125 (2018). http:\/\/aclweb.org\/anthology\/P18-2020.pdf"},{"key":"3_CR17","unstructured":"Rodriquez, K.J., Bryant, M., Blanke, T., Luszczynska, M.: Comparison of named entity recognition tools for raw OCR text. In: KONVENS, pp. 410\u2013414 (2012)"},{"key":"3_CR18","doi-asserted-by":"publisher","unstructured":"van Strien, D., Beelen, K., Ardanuy, M., Hosseini, K., McGillivray, B., Colavizza, G.: Assessing the impact of OCR quality on downstream NLP tasks. In: Proceedings of the 12th International Conference on Agents and Artificial Intelligence, Valletta, Malta, pp. 484\u2013496. SCITEPRESS - Science and Technology Publications (2020). https:\/\/doi.org\/10.5220\/0009169004840496. http:\/\/www.scitepress.org\/DigitalLibrary\/Link.aspx?doi=10.5220\/0009169004840496","DOI":"10.5220\/0009169004840496"},{"key":"3_CR19","doi-asserted-by":"crossref","unstructured":"Tjong Kim Sang, E.F., De Meulder, F.: Introduction to the CoNLL-2003 shared task: language-independent named entity recognition. In: Proceedings of the Seventh Conference on Natural Language Learning at HLT-NAACL 2003-Volume 4, pp. 142\u2013147. Association for Computational Linguistics (2003)","DOI":"10.3115\/1119176.1119195"},{"key":"3_CR20","unstructured":"Zuccon, G., Nguyen, A.N., Bergheim, A., Wickman, S., Grayson, N.: The impact of OCR accuracy on automated cancer classification of pathology reports. In: HIC, pp. 250\u2013256 (2012)"}],"container-title":["Lecture Notes in Computer Science","Digital Libraries at Times of Massive Societal Transition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-64452-9_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,11,27]],"date-time":"2021-11-27T14:02:59Z","timestamp":1638021779000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-64452-9_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030644512","9783030644529"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-64452-9_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"26 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICADL","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Asian Digital Libraries","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kyoto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 November 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icadl2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icadl.net\/icadl2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"79","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"10","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"15","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"13% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4 practitioners and 10 work-in-progress papers are also included. The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}