{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T18:46:56Z","timestamp":1776278816058,"version":"3.50.1"},"reference-count":59,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2023,1,26]],"date-time":"2023-01-26T00:00:00Z","timestamp":1674691200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,26]],"date-time":"2023-01-26T00:00:00Z","timestamp":1674691200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100002322","name":"Coordenacc\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior","doi-asserted-by":"publisher","award":["Finance Code 001"],"award-info":[{"award-number":["Finance Code 001"]}],"id":[{"id":"10.13039\/501100002322","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004225","name":"Petrobras","doi-asserted-by":"publisher","award":["2017\/00752-3"],"award-info":[{"award-number":["2017\/00752-3"]}],"id":[{"id":"10.13039\/501100004225","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003593","name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico","doi-asserted-by":"publisher","award":["2017\/00752-3"],"award-info":[{"award-number":["2017\/00752-3"]}],"id":[{"id":"10.13039\/501100003593","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Digit Libr"],"published-print":{"date-parts":[[2023,3]]},"DOI":"10.1007\/s00799-023-00345-6","type":"journal-article","created":{"date-parts":[[2023,1,26]],"date-time":"2023-01-26T14:03:21Z","timestamp":1674741801000},"page":"45-62","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Evaluating and mitigating the impact of OCR errors on information retrieval"],"prefix":"10.1007","volume":"24","author":[{"given":"Lucas Lima","family":"de Oliveira","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Danny Suarez","family":"Vargas","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ant\u00f4nio Marcelo Azevedo","family":"Alexandre","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"F\u00e1bio Corr\u00eaa","family":"Cordeiro","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Diogo da Silva Magalh\u00e3es","family":"Gomes","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Max de Castro","family":"Rodrigues","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Regis Kruel","family":"Romeu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4400-054X","authenticated-orcid":false,"given":"Viviane Pereira","family":"Moreira","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,1,26]]},"reference":[{"key":"345_CR1","doi-asserted-by":"crossref","unstructured":"Bazzo, G.T., Lorentz, G.A., Vargas, D.S., et\u00a0al.: Assessing the impact of OCR errors in information retrieval. In: European Conference on Information Retrieval, pp. 102\u2013109 (2020)","DOI":"10.1007\/978-3-030-45442-5_13"},{"key":"345_CR2","doi-asserted-by":"crossref","unstructured":"Bender, E.M.: On achieving and evaluating language-independence in nlp. Linguist. Issues Lang. Technol. 6 (2011)","DOI":"10.33011\/lilt.v6i.1239"},{"issue":"2","key":"345_CR3","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1007\/s00799-011-0073-9","volume":"11","author":"A Bia","year":"2010","unstructured":"Bia, A., Mu\u00f1oz, R., G\u00f3mez, J.: DiCoMo: the digitization cost model. Int. J. Digital Lib. 11(2), 141\u2013153 (2010)","journal-title":"Int. J. Digital Lib."},{"key":"345_CR4","doi-asserted-by":"crossref","unstructured":"Boros, E., Nguyen, N.K., Lejeune, G., et\u00a0al.: Assessing the impact of OCR noise on multilingual event detection over digitised documents. Int. J. Digital Lib. pp. 1\u201326 (2022)","DOI":"10.1007\/s00799-022-00325-2"},{"key":"345_CR5","doi-asserted-by":"crossref","unstructured":"Buckley, C., Voorhees, E.M.: Evaluating evaluation measure stability. In: ACM SIGIR Forum, pp. 235\u2013242 (2017)","DOI":"10.1145\/3130348.3130373"},{"key":"345_CR6","doi-asserted-by":"crossref","unstructured":"Carrasco, R.C.: An open-source OCR evaluation tool. In: Proceedings of the First International Conference on Digital Access to Textual Cultural Heritage, pp. 179\u2013184 (2014)","DOI":"10.1145\/2595188.2595221"},{"key":"345_CR7","doi-asserted-by":"crossref","unstructured":"Castro, J.D.B., Canchumuni, S.W.A., Villalobos, C.E.M., et\u00a0al.: Improvement optical character recognition for structured documents using generative adversarial networks. In: 2021 21st International Conference on Computational Science and Its Applications (ICCSA), pp. 285\u2013292 (2021)","DOI":"10.1109\/ICCSA54496.2021.00046"},{"key":"345_CR8","doi-asserted-by":"crossref","unstructured":"Chiron, G., Doucet, A., Coustaty, M., et\u00a0al: ICDAR2017 competition on post-OCR text correction. In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), pp .1423\u20131428 (2017)","DOI":"10.1109\/ICDAR.2017.232"},{"key":"345_CR9","unstructured":"Consoli, B., Santos, J., Gomes, D., et\u00a0al.: Embeddings for named entity recognition in geoscience portuguese literature. In: Proceedings of The 12th Language Resources and Evaluation Conference, pp. 4625\u20134630 (2020)"},{"key":"345_CR10","unstructured":"Croft, W.B., Harding, S., Taghva, K., et\u00a0al.: An evaluation of information retrieval accuracy with simulated OCR output. In: Symposium on Document Analysis and Information Retrieval, pp. 115\u2013126 (1994)"},{"issue":"4","key":"345_CR11","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/s10032-020-00359-9","volume":"23","author":"S Drobac","year":"2020","unstructured":"Drobac, S., Lind\u00e9n, K.: Optical character recognition with neural networks and post-correction with finite state methods. Int. J. Document Anal. Recog. (IJDAR) 23(4), 279\u2013295 (2020)","journal-title":"Int. J. Document Anal. Recog. (IJDAR)"},{"issue":"113","key":"345_CR12","first-page":"662","volume":"152","author":"H Dutta","year":"2022","unstructured":"Dutta, H., Gupta, A.: PNRank: Unsupervised ranking of person name entities from noisy OCR text. Decis. Support Syst. 152(113), 662 (2022)","journal-title":"Decis. Support Syst."},{"key":"345_CR13","unstructured":"Ehrmann, M., Hamdi, A., Pontes, E.L., et\u00a0al.: Named entity recognition and classification on historical documents: A survey. arXiv preprint arXiv:2109.11406 (2021)"},{"key":"345_CR14","doi-asserted-by":"crossref","unstructured":"Evershed, J., Fitch, K.: Correcting noisy OCR: Context beats confusion. In: Proceedings of the First International Conference on Digital Access to Textual Cultural Heritage, pp. 45\u201351 (2014)","DOI":"10.1145\/2595188.2595200"},{"issue":"5","key":"345_CR15","doi-asserted-by":"publisher","first-page":"840","DOI":"10.1016\/j.ipm.2016.03.004","volume":"52","author":"FN Flores","year":"2016","unstructured":"Flores, F.N., Moreira, V.P.: Assessing the impact of stemming accuracy on information retrieval-a multilingual perspective. Inf. Process. Manag. 52(5), 840\u2013854 (2016)","journal-title":"Inf. Process. Manag."},{"key":"345_CR16","doi-asserted-by":"publisher","first-page":"726","DOI":"10.1007\/978-3-031-06555-2_49","volume-title":"Document Analysis Systems","author":"M Francois","year":"2022","unstructured":"Francois, M., Eglin, V., Biou, M.: Text detection and post-OCR correction in engineering documents. In: Uchida, S., Barney, E., Eglin, V. (eds.) Document Analysis Systems, pp. 726\u2013740. Springer International Publishing, Cham (2022)"},{"issue":"5","key":"345_CR17","doi-asserted-by":"publisher","first-page":"873","DOI":"10.1016\/j.ipm.2016.03.006","volume":"52","author":"K Ghosh","year":"2016","unstructured":"Ghosh, K., Chakraborty, A., Parui, S.K., et al.: Improving information retrieval performance on OCRed text in the absence of clean text ground truth. Inf. Process. Manag. 52(5), 873\u2013884 (2016)","journal-title":"Inf. Process. Manag."},{"issue":"103","key":"345_CR18","first-page":"347","volume":"124","author":"D Gomes","year":"2021","unstructured":"Gomes, D., Cordeiro, F., Consoli, B., et al.: Portuguese word embeddings for the oil and gas industry: Development and evaluation. Comput. Ind. 124(103), 347 (2021)","journal-title":"Comput. Ind."},{"key":"345_CR19","unstructured":"Gupte, A., Romanov, A., Mantravadi, S., et\u00a0al.: Lights, camera, action! a framework to improve nlp accuracy over OCR documents (2021)"},{"key":"345_CR20","doi-asserted-by":"crossref","unstructured":"H\u00e4m\u00e4l\u00e4inen, M., Hengchen, S.: From the Paft to the Fiiture: a Fully Automatic NMT and Word Embeddings Method for OCR Post-Correction. In: Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019), pp. 431\u2013436 (2019)","DOI":"10.26615\/978-954-452-056-4_051"},{"key":"345_CR21","doi-asserted-by":"crossref","unstructured":"Hamdi, A., Jean-Caurant, A., Sid\u00e8re, N., et\u00a0al.: Assessing and minimizing the impact of OCR quality on named entity recognition. In: International Conference on Theory and Practice of Digital Libraries, Springer, pp. 87\u2013101 (2020)","DOI":"10.1007\/978-3-030-54956-5_7"},{"key":"345_CR22","doi-asserted-by":"crossref","unstructured":"Hegghammer, T.: OCR with tesseract, amazon textract, and google document ai: a benchmarking experiment. J. Comput. Social Sci. 1\u201322 (2021)","DOI":"10.31235\/osf.io\/6zfvs"},{"key":"345_CR23","doi-asserted-by":"crossref","unstructured":"Hull, D.: Using statistical testing in the evaluation of retrieval experiments. In: Proceedings of the 16th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 329\u2013338 (1993)","DOI":"10.1145\/160688.160758"},{"key":"345_CR24","doi-asserted-by":"crossref","unstructured":"Huynh, V.N., Hamdi, A., Doucet, A.: When to use OCR post-correction for named entity recognition? In: International Conference on Asian Digital Libraries, Springer, pp. 33\u201342 (2020)","DOI":"10.1007\/978-3-030-64452-9_3"},{"key":"345_CR25","unstructured":"Jiang, M., Hu, Y., Worthey, G., et\u00a0al.: Impact of OCR quality on BERT embeddings in the domain classification of book excerpts. Proceedings http:\/\/ceur-ws.org ISSN 1613:0073 (2021)"},{"key":"345_CR26","doi-asserted-by":"crossref","unstructured":"Jing, H., Lopresti, D., Shih, C.: Summarization of noisy documents: A pilot study. In: Proceedings of the HLT-NAACL 03 text summarization workshop, pp. 25\u201332 (2003)","DOI":"10.3115\/1119467.1119471"},{"key":"345_CR27","doi-asserted-by":"crossref","unstructured":"Johnson, S., Jourlin, P., Jones, K.S., et\u00a0al.: Spoken document retrieval for TREC-7 at cambridge university. In: TREC, p.\u00a01 (1999)","DOI":"10.6028\/NIST.SP.500-242.sdr-cambridge"},{"issue":"2","key":"345_CR28","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1023\/A:1009902609570","volume":"2","author":"PB Kantor","year":"2000","unstructured":"Kantor, P.B., Voorhees, E.M.: The TREC-5 confusion track: Comparing retrieval methods for scanned text. Inf. Retrieval 2(2), 165\u2013176 (2000)","journal-title":"Inf. Retrieval"},{"key":"345_CR29","unstructured":"Kettunen, K., Keskustalo, H., Kumpulainen, S., et\u00a0al.: OCR quality affects perceived usefulness of historical newspaper clippings\u2013a user study (2022). https:\/\/arxiv.org\/abs\/2203.03557"},{"issue":"3","key":"345_CR30","doi-asserted-by":"publisher","first-page":"633","DOI":"10.1016\/j.ipm.2005.06.006","volume":"42","author":"AM Lam-Adesina","year":"2006","unstructured":"Lam-Adesina, A.M., Jones, G.J.: Examining and improving the effectiveness of relevance feedback for retrieval of scanned text documents. Inf. Process. Manag. 42(3), 633\u2013649 (2006)","journal-title":"Inf. Process. Manag."},{"key":"345_CR31","doi-asserted-by":"crossref","unstructured":"Lawley, C.J., Raimondo, S., Chen, T., et\u00a0al.: Geoscience language models and their intrinsic evaluation. Appl. Comput. Geosci., 100084 (2022)","DOI":"10.1016\/j.acags.2022.100084"},{"key":"345_CR32","unstructured":"Lin, X.: Impact of imperfect OCR on part-of-speech tagging. In: Seventh International Conference on Document Analysis and Recognition, Proceedings., pp. 284\u2013288 (2003)"},{"key":"345_CR33","doi-asserted-by":"crossref","unstructured":"Linhares\u00a0Pontes, E., Hamdi, A., Sidere, N., et\u00a0al.: Impact of OCR quality on named entity linking. In: International Conference on Asian Digital Libraries, Springer, pp. 102\u2013115 (2019)","DOI":"10.1007\/978-3-030-34058-2_11"},{"key":"345_CR34","doi-asserted-by":"crossref","unstructured":"Linhares\u00a0Pontes, E., Cabrera-Diego, L.A., Moreno, J.G., et\u00a0al.: MELHISSA: a multilingual entity linking architecture for historical press articles. Int. J. Digital Lib. 1\u201328 (2021)","DOI":"10.1007\/s00799-021-00319-6"},{"key":"345_CR35","doi-asserted-by":"crossref","unstructured":"Ma, X., Pradeep, R., Nogueira, R., et\u00a0al.: Document expansion baselines and learned sparse lexical representations for ms marco v1 and v2. In: Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 3187\u20133197 (2022)","DOI":"10.1145\/3477495.3531749"},{"issue":"23","key":"345_CR36","doi-asserted-by":"publisher","first-page":"17,209","DOI":"10.1007\/s00521-020-04910-x","volume":"32","author":"J Mart\u00ednek","year":"2020","unstructured":"Mart\u00ednek, J., Lenc, L., Kr\u00e1l, P.: Building an efficient OCR system for historical documents with little training data. Neural Comput. Appl. 32(23), 17,209-17,227 (2020)","journal-title":"Neural Comput. Appl."},{"issue":"6","key":"345_CR37","doi-asserted-by":"publisher","first-page":"874","DOI":"10.1016\/j.ipm.2018.06.001","volume":"54","author":"J Mei","year":"2018","unstructured":"Mei, J., Islam, A., Moh\u2019d, A., et al.: Statistical learning for OCR error correction. Inf. Process. Manag. 54(6), 874\u2013887 (2018)","journal-title":"Inf. Process. Manag."},{"key":"345_CR38","doi-asserted-by":"crossref","unstructured":"Miller, D., Boisen, S., Schwartz, R., et\u00a0al.: Named entity extraction from noisy input: speech and OCR. In: Sixth Applied Natural Language Processing Conference, pp. 316\u2013324 (2000)","DOI":"10.3115\/974147.974191"},{"issue":"3","key":"345_CR39","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1023\/A:1026564708926","volume":"3","author":"E Mittendorf","year":"2000","unstructured":"Mittendorf, E., Sch\u00e4uble, P.: Information retrieval can cope with many errors. Inf. Retrieval 3(3), 189\u2013216 (2000)","journal-title":"Inf. Retrieval"},{"key":"345_CR40","doi-asserted-by":"crossref","unstructured":"Mutuvi, S., Doucet, A., Odeo, M., et\u00a0al.: Evaluating the impact of OCR errors on topic modeling. In: International Conference on Asian Digital Libraries, pp. 3\u201314 (2018)","DOI":"10.1007\/978-3-030-04257-8_1"},{"key":"345_CR41","doi-asserted-by":"crossref","unstructured":"Nguyen, T., Jatowt, A., Coustaty, M., et\u00a0al.: Deep statistical analysis of OCR errors for effective post-OCR processing. In: Joint Conference on Digital Libraries (JCDL), pp. 29\u201338 (2019)","DOI":"10.1109\/JCDL.2019.00015"},{"issue":"6","key":"345_CR42","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3453476","volume":"54","author":"TTH Nguyen","year":"2021","unstructured":"Nguyen, T.T.H., Jatowt, A., Coustaty, M., et al.: Survey of post-OCR processing approaches. ACM Comput. Surv. (CSUR) 54(6), 1\u201337 (2021)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"345_CR43","unstructured":"Nogueira, R., Cho, K.: Passage re-ranking with bert. arXiv preprint arXiv:1901.04085 (2019)"},{"key":"345_CR44","doi-asserted-by":"crossref","unstructured":"Lima\u00a0de Oliveira, L., Romeu, R.K., Moreira, V.P.: REGIS: A test collection for geoscientific documents in portuguese. In: Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2363\u20132368 (2021)","DOI":"10.1145\/3404835.3463256"},{"key":"345_CR45","doi-asserted-by":"crossref","unstructured":"Rigaud, C., Doucet, A., Coustaty, M., et\u00a0al.: ICDAR 2019 competition on post-OCR text correction. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 1588\u20131593 (2019)","DOI":"10.1109\/ICDAR.2019.00255"},{"key":"345_CR46","doi-asserted-by":"crossref","unstructured":"Sakai, T.: Statistical reform in information retrieval? In: ACM SIGIR Forum, pp. 3\u201312 (2014)","DOI":"10.1145\/2641383.2641385"},{"key":"345_CR47","doi-asserted-by":"crossref","unstructured":"Santos, D., Rocha, P.: The key to the first CLEF with portuguese: Topics, questions and answers in CHAVE. In: Workshop of the Cross-Language Evaluation Forum for European Languages, pp. 821\u2013832 (2004)","DOI":"10.1007\/11519645_80"},{"issue":"6","key":"345_CR48","first-page":"545","volume":"4","author":"S Singh","year":"2013","unstructured":"Singh, S.: Optical character recognition techniques: a survey. J. Emerg. Trends Comput. Inf. Sci. 4(6), 545\u2013550 (2013)","journal-title":"J. Emerg. Trends Comput. Inf. Sci."},{"key":"345_CR49","doi-asserted-by":"crossref","unstructured":"Smucker, M.D., Allan, J., Carterette, B.: A comparison of statistical significance tests for information retrieval evaluation. In: Proceedings of the Sixteenth ACM Conference on Information and Knowledge Management, pp. 623\u2013632 (2007)","DOI":"10.1145\/1321440.1321528"},{"key":"345_CR50","doi-asserted-by":"crossref","unstructured":"van Strien, D., Beelen, K., Ardanuy, M.C., et\u00a0al.: Assessing the impact of OCR quality on downstream NLP tasks. In: Proceedings of the 12th International Conference on Agents and Artificial Intelligence, ICAART, pp. 484\u2013496 (2020)","DOI":"10.5220\/0009169004840496"},{"issue":"1","key":"345_CR51","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1002\/(SICI)1097-4571(199401)45:1<50::AID-ASI6>3.0.CO;2-B","volume":"45","author":"K Taghva","year":"1994","unstructured":"Taghva, K., Borsack, J., Condit, A., et al.: The effects of noisy data on text retrieval. J. Am. Soc. Inf. Sci. 45(1), 50\u201358 (1994)","journal-title":"J. Am. Soc. Inf. Sci."},{"issue":"3","key":"345_CR52","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1016\/0306-4573(95)00058-5","volume":"32","author":"K Taghva","year":"1996","unstructured":"Taghva, K., Borsack, J., Condit, A.: Effects of OCR errors on ranking and feedback using the vector space model. Inf. Process. Manag. 32(3), 317\u2013327 (1996)","journal-title":"Inf. Process. Manag."},{"issue":"1","key":"345_CR53","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1145\/214174.214180","volume":"14","author":"K Taghva","year":"1996","unstructured":"Taghva, K., Borsack, J., Condit, A.: Evaluation of model-based retrieval effectiveness with OCR text. ACM Trans. Inf. Syst. (TOIS) 14(1), 64\u201393 (1996)","journal-title":"ACM Trans. Inf. Syst. (TOIS)"},{"key":"345_CR54","doi-asserted-by":"crossref","unstructured":"Traub, M.C., Samar, T., Van\u00a0Ossenbruggen, J., et\u00a0al.: Impact of crowdsourcing OCR improvements on retrievability bias. In: Proceedings of the 18th ACM\/IEEE on Joint Conference on Digital Libraries, pp. 29\u201336 (2018)","DOI":"10.1145\/3197026.3197046"},{"key":"345_CR55","doi-asserted-by":"crossref","unstructured":"Vargas, D.S., de\u00a0Oliveira, L.L., Moreira, V.P., et\u00a0al.: sOCRates-a post-OCR text correction method. In: Anais do XXXVI Simp\u00f3sio Brasileiro de Bancos de Dados, pp. 61\u201372 (2021)","DOI":"10.5753\/sbbd.2021.17866"},{"key":"345_CR56","doi-asserted-by":"crossref","unstructured":"Wiedenhofer, L., Hein, H.G., Dengel, A.: Post-processing of OCR results for automatic indexing. In: Proceedings of 3rd International Conference on Document Analysis and Recognition, IEEE, pp. 592\u2013596 (1995)","DOI":"10.1109\/ICDAR.1995.601966"},{"key":"345_CR57","doi-asserted-by":"crossref","unstructured":"Zhuang, S., Zuccon, G.: Dealing with typos for BERT-based passage retrieval and ranking. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 2836\u20132842 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.225"},{"key":"345_CR58","doi-asserted-by":"crossref","unstructured":"Zosa, E., Mutuvi, S., Granroth-Wilding, M., et\u00a0al.: Evaluating the robustness of embedding-based topic models to ocr noise. In: International Conference on Asian Digital Libraries, Springer, pp. 392\u2013400 (2021)","DOI":"10.1007\/978-3-030-91669-5_30"},{"key":"345_CR59","doi-asserted-by":"crossref","unstructured":"Zu, G., Murata, M., Ohyama, W., et\u00a0al.: The impact of OCR accuracy on automatic text classification. In: Advanced Workshop on Content Computing, pp. 403\u2013409 (2004)","DOI":"10.1007\/978-3-540-30483-8_49"}],"container-title":["International Journal on Digital Libraries"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-023-00345-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00799-023-00345-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-023-00345-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,13]],"date-time":"2024-10-13T02:59:59Z","timestamp":1728788399000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00799-023-00345-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1,26]]},"references-count":59,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,3]]}},"alternative-id":["345"],"URL":"https:\/\/doi.org\/10.1007\/s00799-023-00345-6","relation":{},"ISSN":["1432-5012","1432-1300"],"issn-type":[{"value":"1432-5012","type":"print"},{"value":"1432-1300","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,1,26]]},"assertion":[{"value":"5 July 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 January 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 January 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 January 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests to declare.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}}]}}