{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T03:56:14Z","timestamp":1743134174311,"version":"3.40.3"},"publisher-location":"Cham","reference-count":22,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031065545"},{"type":"electronic","value":"9783031065552"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-06555-2_32","type":"book-chapter","created":{"date-parts":[[2022,5,17]],"date-time":"2022-05-17T09:10:13Z","timestamp":1652778613000},"page":"479-491","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["ReadOCR: A Novel Dataset and\u00a0Readability Assessment of\u00a0OCRed Texts"],"prefix":"10.1007","author":[{"given":"Hai Thi Tuyet","family":"Nguyen","sequence":"first","affiliation":[]},{"given":"Adam","family":"Jatowt","sequence":"additional","affiliation":[]},{"given":"Micka\u00ebl","family":"Coustaty","sequence":"additional","affiliation":[]},{"given":"Antoine","family":"Doucet","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,5,18]]},"reference":[{"key":"32_CR1","doi-asserted-by":"crossref","unstructured":"Abdulkader, A., Casey, M.R.: Low cost correction of OCR errors using learning in a multi-engine environment. In: 10th International Conference on Document Analysis and Recognition, ICDAR 2009, pp. 576\u2013580. IEEE Computer Society (2009)","DOI":"10.1109\/ICDAR.2009.242"},{"key":"32_CR2","doi-asserted-by":"publisher","unstructured":"Bazzo, G.T., Lorentz, G.A., Suarez Vargas, D., Moreira, V.P.: Assessing the impact of OCR errors in information retrieval. In: Jose, J.M., et al. (eds.) ECIR 2020. LNCS, vol. 12036, pp. 102\u2013109. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-45442-5_13","DOI":"10.1007\/978-3-030-45442-5_13"},{"key":"32_CR3","doi-asserted-by":"crossref","unstructured":"Bird, S.: NLTK: the natural language toolkit. In: Proceedings of the COLING\/ACL 2006 Interactive Presentation Sessions, pp. 69\u201372 (2006)","DOI":"10.3115\/1225403.1225421"},{"key":"32_CR4","doi-asserted-by":"crossref","unstructured":"Boros, E., et al.: Alleviating digitization errors in named entity recognition for historical documents. In: Proceedings of the 24th Conference on Computational Natural Language Learning, CoNLL 2020, pp. 431\u2013441. Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2020.conll-1.35"},{"key":"32_CR5","doi-asserted-by":"crossref","unstructured":"Chiron, G., Doucet, A., Coustaty, M., Moreux, J.P.: ICDAR 2017 competition on post-OCR text correction. In: 14th IAPR International Conference on Document Analysis and Recognition, pp. 1423\u20131428. IEEE (2017)","DOI":"10.1109\/ICDAR.2017.232"},{"issue":"5\u20136","key":"32_CR6","doi-asserted-by":"publisher","first-page":"340","DOI":"10.1080\/0163853X.2017.1296264","volume":"54","author":"SA Crossley","year":"2017","unstructured":"Crossley, S.A., Skalicky, S., Dascalu, M., McNamara, D.S., Kyle, K.: Predicting text comprehension, processing, and familiarity in adult readers: new approaches to readability formulas. Discourse Process. 54(5\u20136), 340\u2013359 (2017)","journal-title":"Discourse Process."},{"key":"32_CR7","first-page":"37","volume":"27","author":"E Dale","year":"1948","unstructured":"Dale, E., Chall, J.S.: A formula for predicting readability: instructions. Educ. Res. Bull. 27, 37\u201354 (1948)","journal-title":"Educ. Res. Bull."},{"key":"32_CR8","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019), pp. 4171\u20134186. Association for Computational Linguistics (2019)"},{"key":"32_CR9","doi-asserted-by":"crossref","unstructured":"Kincaid, J.P., Fishburne, R.P., Jr., Rogers, R.L., Chissom, B.S.: Derivation of new readability formulas (automated readability index, fog count and flesch reading ease formula) for Navy enlisted personnel. Tech. rep, Naval Technical Training Command Millington TN Research Branch (1975)","DOI":"10.21236\/ADA006655"},{"issue":"2","key":"32_CR10","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1016\/j.jcm.2016.02.012","volume":"15","author":"T Koo","year":"2016","unstructured":"Koo, T., Li, M.: A guideline of selecting and reporting intraclass correlation coefficients for reliability research. J. Chiropr. Med. 15(2), 155\u2013163 (2016)","journal-title":"J. Chiropr. Med."},{"issue":"1","key":"32_CR11","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1162\/coli_a_00398","volume":"47","author":"M Martinc","year":"2021","unstructured":"Martinc, M., Pollak, S., Robnik-\u0160ikonja, M.: Supervised and unsupervised neural approaches to text readability. Comput. Linguist. 47(1), 141\u2013179 (2021)","journal-title":"Comput. Linguist."},{"issue":"6","key":"32_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3453476","volume":"54","author":"TTH Nguyen","year":"2021","unstructured":"Nguyen, T.T.H., Jatowt, A., Coustaty, M., Doucet, A.: Survey of post-OCR processing approaches. ACM Comput. Surv. 54(6), 1\u201337 (2021)","journal-title":"ACM Comput. Surv."},{"key":"32_CR13","doi-asserted-by":"crossref","unstructured":"Nguyen, T., Jatowt, A., Coustaty, M., Nguyen, N., Doucet, A.: Deep statistical analysis of OCR errors for effective post-OCR processing. In: 19th ACM\/IEEE Joint Conference on Digital Libraries, pp. 29\u201338 (2019)","DOI":"10.1109\/JCDL.2019.00015"},{"key":"32_CR14","doi-asserted-by":"publisher","unstructured":"Linhares Pontes, E., Hamdi, A., Sidere, N., Doucet, A.: Impact of OCR quality on named entity linking. In: Jatowt, A., Maeda, A., Syn, S.Y. (eds.) ICADL 2019. LNCS, vol. 11853, pp. 102\u2013115. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-34058-2_11","DOI":"10.1007\/978-3-030-34058-2_11"},{"key":"32_CR15","doi-asserted-by":"publisher","first-page":"187","DOI":"10.4103\/picr.PICR_123_17","volume":"8","author":"P Ranganathan","year":"2017","unstructured":"Ranganathan, P., Pramesh, C., Aggarwal, R.: Common pitfalls in statistical analysis: measures of agreement. Perspect. Clin. Res. 8, 187 (2017)","journal-title":"Perspect. Clin. Res."},{"issue":"2","key":"32_CR16","doi-asserted-by":"publisher","first-page":"420","DOI":"10.1037\/0033-2909.86.2.420","volume":"86","author":"PE Shrout","year":"1979","unstructured":"Shrout, P.E., Fleiss, J.L.: Intraclass correlations: uses in assessing rater reliability. Psychol. Bull. 86(2), 420 (1979)","journal-title":"Psychol. Bull."},{"key":"32_CR17","doi-asserted-by":"crossref","unstructured":"van Strien, D., Beelen, K., Ardanuy, M.C., Hosseini, K., McGillivray, B., Colavizza, G.: Assessing the impact of OCR quality on downstream NLP tasks. In: Proceedings of the 12th International Conference on Agents and Artificial Intelligence, ICAART 2020. pp. 484\u2013496. SCITEPRESS (2020)","DOI":"10.5220\/0009169004840496"},{"key":"32_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"252","DOI":"10.1007\/978-3-319-24592-8_19","volume-title":"Research and Advanced Technology for Digital Libraries","author":"MC Traub","year":"2015","unstructured":"Traub, M.C., van Ossenbruggen, J., Hardman, L.: Impact analysis of OCR quality on research tasks in digital archives. In: Kapidakis, S., Mazurek, C., Werla, M. (eds.) TPDL 2015. LNCS, vol. 9316, pp. 252\u2013263. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24592-8_19"},{"key":"32_CR19","doi-asserted-by":"crossref","unstructured":"Vajjala, S., Lu\u010di\u0107, I.: OneStopEnglish corpus: a new corpus for automatic readability assessment and text simplification. In: Proceedings of the Thirteenth Workshop on Innovative Use of NLP for Building Educational Applications, pp. 297\u2013304 (2018)","DOI":"10.18653\/v1\/W18-0535"},{"key":"32_CR20","unstructured":"Vajjala, S., Meurers, D.: On improving the accuracy of readability classification using insights from second language acquisition. In: Proceedings of the Seventh Workshop on Building Educational Applications Using NLP, pp. 163\u2013173 (2012)"},{"key":"32_CR21","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1162\/tacl_a_00139","volume":"3","author":"W Xu","year":"2015","unstructured":"Xu, W., Callison-Burch, C., Napoles, C.: Problems in current text simplification research: new data can help. Trans. Assoc. Comput. Linguist. 3, 283\u2013297 (2015)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"32_CR22","doi-asserted-by":"crossref","unstructured":"Yang, Z., Yang, D., Dyer, C., He, X., Smola, A., Hovy, E.: Hierarchical attention networks for document classification, pp. 1480\u20131489. Association for Computational Linguistics, San Diego (2016)","DOI":"10.18653\/v1\/N16-1174"}],"container-title":["Lecture Notes in Computer Science","Document Analysis Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-06555-2_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T16:08:18Z","timestamp":1710259698000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-06555-2_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031065545","9783031065552"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-06555-2_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"18 May 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DAS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Document Analysis Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"La Rochelle","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"France","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 May 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 May 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"das2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/das2022.univ-lr.fr\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"94","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"52","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"16","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"55% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.85","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}