{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T05:00:53Z","timestamp":1764565253537,"version":"3.46.0"},"publisher-location":"Singapore","reference-count":21,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819548606","type":"print"},{"value":"9789819548613","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,12,2]],"date-time":"2025-12-02T00:00:00Z","timestamp":1764633600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,2]],"date-time":"2025-12-02T00:00:00Z","timestamp":1764633600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-4861-3_21","type":"book-chapter","created":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T05:00:12Z","timestamp":1764565212000},"page":"245-254","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Rethinking OCR Evaluation for\u00a0Information Extraction in\u00a0Business Documents"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-2032-5898","authenticated-orcid":false,"given":"Ngoc Nhi","family":"Nguyen","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8964-2135","authenticated-orcid":false,"given":"Ahmed","family":"Hamdi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6160-3356","authenticated-orcid":false,"given":"Antoine","family":"Doucet","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7235-0665","authenticated-orcid":false,"given":"Adam","family":"Jatowt","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0123-439X","authenticated-orcid":false,"given":"Micka\u00ebl","family":"Coustaty","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,12,2]]},"reference":[{"key":"21_CR1","doi-asserted-by":"crossref","unstructured":"Appalaraju, S., Jasani, B., Kota, B.U., Xie, Y., Manmatha, R.: Docformer: end-to-end transformer for document understanding. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 993\u20131003 (2021)","DOI":"10.1109\/ICCV48922.2021.00103"},{"key":"21_CR2","doi-asserted-by":"crossref","unstructured":"Chiron, G., Doucet, A., Coustaty, M., Visani, M., Moreux, J.P.: Impact of OCR errors on the use of digital libraries: towards a better access to information. In: 2017 ACM\/IEEE Joint Conference on Digital Libraries (JCDL), pp.\u00a01\u20134. IEEE (2017)","DOI":"10.1109\/JCDL.2017.7991582"},{"key":"21_CR3","doi-asserted-by":"crossref","unstructured":"Ehrmann, M., Romanello, M., Fl\u00fcckiger, A., Clematide, S.: Overview of clef hipe 2020: Named entity recognition and linking on historical newspapers. In: International Conference of the Cross-Language Evaluation Forum for European Languages, pp. 288\u2013310. Springer (2020)","DOI":"10.1007\/978-3-030-58219-7_21"},{"key":"21_CR4","doi-asserted-by":"crossref","unstructured":"Giamphy, E., et al.: A quantitative analysis of noise impact on document ranking. In: 2023 IEEE International Conference on Systems, Man, and Cybernetics (SMC), pp. 4612\u20134618. IEEE (2023)","DOI":"10.1109\/SMC53992.2023.10394665"},{"key":"21_CR5","doi-asserted-by":"crossref","unstructured":"Gonz\u00e1lez-Gallardo, C.E., et al.: Yes but.. can ChatGPT identify entities in historical documents? In: 2023 ACM\/IEEE Joint Conference on Digital Libraries (JCDL), pp. 184\u2013189. IEEE (2023)","DOI":"10.1109\/JCDL57899.2023.00034"},{"key":"21_CR6","doi-asserted-by":"crossref","unstructured":"Gonz\u00e1lez-Gallardo, C.E., Tran, H.T.H., Hamdi, A., Doucet, A.: Leveraging open large language models for historical named entity recognition. In: International Conference on Theory and Practice of Digital Libraries, pp. 379\u2013395. Springer (2024)","DOI":"10.1007\/978-3-031-72437-4_22"},{"key":"21_CR7","doi-asserted-by":"crossref","unstructured":"Hamdi, A., Jean-Caurant, A., Sidere, N., Coustaty, M., Doucet, A.: An analysis of the performance of named entity recognition over OCRed documents. In: 2019 ACM\/IEEE Joint Conference on Digital Libraries (JCDL), pp. 333\u2013334. IEEE (2019)","DOI":"10.1109\/JCDL.2019.00057"},{"key":"21_CR8","doi-asserted-by":"crossref","unstructured":"Hamdi, A., Jean-Caurant, A., Sid\u00e8re, N., Coustaty, M., Doucet, A.: Assessing and minimizing the impact of OCR quality on named entity recognition. In: International Conference on Theory and Practice of Digital Libraries, pp. 87\u2013101. Springer (2020)","DOI":"10.1007\/978-3-030-54956-5_7"},{"issue":"2","key":"21_CR9","doi-asserted-by":"publisher","first-page":"425","DOI":"10.1017\/S1351324922000110","volume":"29","author":"A Hamdi","year":"2023","unstructured":"Hamdi, A., Pontes, E.L., Sidere, N., Coustaty, M., Doucet, A.: In-depth analysis of the impact of OCR errors on named entity recognition and linking. Nat. Lang. Eng. 29(2), 425\u2013448 (2023)","journal-title":"Nat. Lang. Eng."},{"key":"21_CR10","doi-asserted-by":"crossref","unstructured":"Huang, Y., et al.: Layoutlmv3: pre-training for document ai with unified text and image masking. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 1915\u20131924. Association for Computing Machinery (2022)","DOI":"10.1145\/3503161.3548112"},{"key":"21_CR11","unstructured":"Jiang, M., Hu, Y., Worthey, G., Dubnicek, R.C., Underwood, T., Downie, J.S.: Impact of OCR quality on BERT embeddings in the domain classification of book excerpts. Proceedings http:\/\/ceur-ws.org ISSN 1613, 0073 (2021)"},{"key":"21_CR12","unstructured":"Li, M., Cui, L., Huang, S., Wei, F., Zhou, M., Li, Z.: Tablebank: table benchmark for image-based table detection and recognition. In: Proceedings of the Twelfth Language Resources and Evaluation Conference, pp. 1918\u20131925 (2020)"},{"key":"21_CR13","doi-asserted-by":"crossref","unstructured":"Pontes, L.E., Hamdi, A., Sidere, N., Doucet, A.: Impact of OCR quality on named entity linking. In: International Conference on Asian Digital Libraries, pp. 102\u2013115. Springer (2019)","DOI":"10.1007\/978-3-030-34058-2_11"},{"key":"21_CR14","doi-asserted-by":"publisher","unstructured":"Liu, Y., et al.: Roberta: A robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (1907.11692) (2019). https:\/\/doi.org\/10.48550\/arXiv.1907.11692","DOI":"10.48550\/arXiv.1907.11692"},{"key":"21_CR15","doi-asserted-by":"crossref","unstructured":"Lopresti, D.: Measuring the impact of character recognition errors on downstream text analysis. In: Document Recognition and Retrieval XV, vol.\u00a06815, p. 68150G. International Society for Optics and Photonics (2008)","DOI":"10.1117\/12.767131"},{"key":"21_CR16","doi-asserted-by":"crossref","unstructured":"Neudecker, C., et al.: A survey of OCR evaluation tools and metrics. In: Proceedings of the 6th International Workshop on Historical Document Imaging and Processing, pp. 13\u201318 (2021)","DOI":"10.1145\/3476887.3476888"},{"key":"21_CR17","doi-asserted-by":"crossref","unstructured":"\u0160imsa, \u0160., et\u00a0al.: Docile benchmark for document information localization and extraction. In: International Conference on Document Analysis and Recognition, pp. 147\u2013166. Springer (2023)","DOI":"10.1007\/978-3-031-41679-8_9"},{"key":"21_CR18","doi-asserted-by":"crossref","unstructured":"\u0160imsa, \u0160., et\u00a0al.: Overview of docile 2023: document information localization and extraction. In: International Conference of the Cross-Language Evaluation Forum for European Languages, pp. 276\u2013293. Springer (2023)","DOI":"10.1007\/978-3-031-42448-9_21"},{"key":"21_CR19","doi-asserted-by":"crossref","unstructured":"Todorov, K., Colavizza, G.: An assessment of the impact of OCR noise on language models. arXiv preprint arXiv:2202.00470 (2022)","DOI":"10.5220\/0010945100003116"},{"key":"21_CR20","doi-asserted-by":"crossref","unstructured":"Van\u00a0Strien, D., et al.: Assessing the impact of OCR quality on downstream NLP tasks (2020)","DOI":"10.5220\/0009169004840496"},{"key":"21_CR21","doi-asserted-by":"crossref","unstructured":"Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: LayoutLM: pre-training of text and layout for document image understanding. In: Proceedings of the 26th ACM SIGKDD international conference on knowledge discovery and data mining, pp. 1192\u20131200 (2020)","DOI":"10.1145\/3394486.3403172"}],"container-title":["Lecture Notes in Computer Science","Intelligence and Equity: Shaping the Future of Knowledge"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-4861-3_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T05:00:17Z","timestamp":1764565217000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-4861-3_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,2]]},"ISBN":["9789819548606","9789819548613"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-4861-3_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,2]]},"assertion":[{"value":"2 December 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICADL","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Asian Digital Libraries","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Metro Manila","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Philippines","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 December 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icadl2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icadl.net\/icadl2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}