{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,2]],"date-time":"2025-05-02T04:15:00Z","timestamp":1746159300394,"version":"3.40.4"},"reference-count":37,"publisher":"SPIE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013,12,27]]},"DOI":"10.1117\/12.2042502","type":"proceedings-article","created":{"date-parts":[[2014,2,3]],"date-time":"2014-02-03T17:55:05Z","timestamp":1391450105000},"page":"90210A","source":"Crossref","is-referenced-by-count":3,"title":["How well does multiple OCR error correction generalize?"],"prefix":"10.1117","volume":"9021","author":[{"given":"William B.","family":"Lund","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eric K.","family":"Ringger","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniel D.","family":"Walker","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"189","reference":[{"key":"c1","doi-asserted-by":"crossref","unstructured":"Lund, W. B., Kennard, D. J., and Ringger, E. K., \u201cCombining multiple thresholding binarization values to improve OCR output,\u201d in [Proceedings of Document Recognition and Retrieval XX], (Feb. 2013).","DOI":"10.1117\/12.2006228"},{"key":"c2","doi-asserted-by":"crossref","unstructured":"Lund, W. B. and Ringger, E. K., \u201cImproving optical character recognition through efficient multiple system alignment,\u201d in [Proceedings of the 9th ACM\/IEEE-CS joint conference on Digital libraries], 231\u2013240, ACM, Austin, TX, USA (2009).","DOI":"10.1145\/1555400.1555437"},{"key":"c3","doi-asserted-by":"publisher","DOI":"10.1109\/34.667881"},{"key":"c4","doi-asserted-by":"crossref","unstructured":"Antonacopoulos, A. and Karatzas, D., \u201cSemantics-based content extraction in typewritten historical documents,\u201d in [Proceedings of the 8th International Conference on Document Analysis and Recogniction, 2005], 1, 48\u201353 (Aug. 2005).","DOI":"10.1109\/ICDAR.2005.215"},{"key":"c5","doi-asserted-by":"crossref","unstructured":"Kae, A. and Learned-Miller, E., \u201cLearning on the fly: Font-free approaches to difficult OCR problems,\u201d in [Proceedings of the International Conference on Document Analysis and Recognition (ICDAR) 2009], (2009).","DOI":"10.1109\/ICDAR.2009.260"},{"key":"c6","doi-asserted-by":"crossref","unstructured":"Bertolami, R. and Bunke, H., \u201cEnsemble methods for handwritten text line recognition systems,\u201d in [Proccedings of the 2005 IEEE International Conference on Systems, Man and Cybermetrics], (Oct. 2005).","DOI":"10.1109\/ICSMC.2005.1571497"},{"key":"c7","doi-asserted-by":"crossref","unstructured":"Si, L., Kanungo, T., and Huang, X., \u201cBoosting performance of bio-entity recognition by combining results from multiple systems,\u201d in [Proceedings of the 5th international workshop on Bioinformatics], 76\u201383, ACM, Chicago, Illinois (2005).","DOI":"10.1145\/1134030.1134044"},{"key":"c8","first-page":"986","article-title":"An empirical study on computing consensus translations from multiple machine translation systems","author":"Macherey","year":"2007"},{"key":"c9","doi-asserted-by":"crossref","unstructured":"Charniak, E. and Johnson, M., \u201cCoarse-to-fine n-best parsing and MaxEnt discriminative reranking,\u201d in [Proceedings of the 43rd Annual Meeting of the ACL], 173\u2013180 (June 2005).","DOI":"10.3115\/1219840.1219862"},{"key":"c10","unstructured":"Klein, S. T. and Kopel, M., \u201cA voting system for automatic OCR correction,\u201d in [Proceedings of the SIGIR 2002 Workshop on Information Retrieval and OCR], (Aug. 2002)."},{"key":"c11","doi-asserted-by":"crossref","unstructured":"Cecotti, H. and Belaid, A., \u201cHybrid OCR combination approach complemented by a specialized ICR applied on ancient documents,\u201d in [Proceedings of the 8th International Conference on Document Analysis and Recognition, 2005], 2, 1045\u20131049 (Aug. 2005).","DOI":"10.1109\/ICDAR.2005.130"},{"key":"c12","doi-asserted-by":"crossref","unstructured":"Caruana, R., Niculescu-Mizil, A., Crew, G., and Ksikes, A., \u201cEnsemble selection from libraries of models,\u201d in [Proceedings of the twenty-first international conference on Machine learning], 18 (2004).","DOI":"10.1145\/1015330.1015432"},{"key":"c13","doi-asserted-by":"crossref","unstructured":"Lund, W. B., Walker, D. D., and Ringger, E. K., \u201cProgressive alignment and discriminative error correction for multiple OCR engines,\u201d in [Proceedings of the 11th International Conference on Document Analysis and Recognition (ICDAR 2011)], (Sept. 2011).","DOI":"10.1109\/ICDAR.2011.303"},{"key":"c14","doi-asserted-by":"crossref","unstructured":"Lund, W. B., Kennard, D. J., and Ringger, E. K., \u201cWhy multiple document image binarizations improve OCR,\u201d in [Proceedings of the Workshop on Historical Document Imaging and Processing 2013 (HIP 2013)], (Aug. 2013).","DOI":"10.1145\/2501115.2501126"},{"key":"c15","doi-asserted-by":"publisher","DOI":"10.1109\/34.58871"},{"key":"c16","doi-asserted-by":"crossref","unstructured":"Dietterich, T. G., \u201cEnsemble methods in machine learning,\u201d in [Multiple classifier systems], Springer (2000).","DOI":"10.1007\/3-540-45014-9_1"},{"key":"c17","unstructured":"Cer, D., Manning, C. D., and Jurafsky, D., \u201cPositive diversity tuning for machine translation system combination,\u201d in [Proceedings of the Eighth Workshop on Statistical Machine Translation], 320\u2013328, Association for Computational Linguistics, Sofia, Bulgaria (2013)."},{"key":"c18","doi-asserted-by":"crossref","unstructured":"Gimpel, K., Batra, D., Dyer, C., and Shakhnarovich, G., \u201cA systematic exploration of diversity in machine translation,\u201d in [Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP 2013)], (Oct. 2013).","DOI":"10.18653\/v1\/D13-1111"},{"key":"c19","doi-asserted-by":"publisher","DOI":"10.1089\/cmb.1994.1.337"},{"key":"c20","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pcbi.0030123"},{"key":"c21","doi-asserted-by":"publisher","DOI":"10.1089\/cmb.2006.13.1323"},{"key":"c22","doi-asserted-by":"publisher","DOI":"10.1006\/cviu.1996.0502"},{"key":"c23","unstructured":"Thoma, G. and Le, D., \u201cMedical database input using integrated OCR and document analysis and labeling technology,\u201d in [Proceedings 1997 Symposium on Document Image Understanding Technology], 280 (1997)."},{"key":"c24","doi-asserted-by":"crossref","unstructured":"Esakov, J., Lopresti, D. P., and Sandberg, J., \u201cClassification and distribution of optical character recognition errors,\u201d in [Proceedings of IS&T\/SPIE International Symposium on Electronic Imaging], 204\u2013216 (Feb. 1994).","DOI":"10.1117\/12.171108"},{"key":"c25","doi-asserted-by":"crossref","unstructured":"Kolak, O., Byrne, W. J., and Resnik, P., \u201cA generative probabilistic OCR model for NLP applications,\u201d in [Proceedings of HLT-NAACL 2003], 55\u201362 (May 2003).","DOI":"10.3115\/1073445.1073463"},{"key":"c26","doi-asserted-by":"crossref","unstructured":"Lund, W. B. and Ringger, E. K., \u201cError correction with in-domain training across multiple OCR system outputs,\u201d in [Proceedings of the 11th International Conference on Document Analysis and Recognition (ICDAR 2011)], (Sept. 2011).","DOI":"10.1109\/ICDAR.2011.138"},{"key":"c27","doi-asserted-by":"crossref","unstructured":"Yamazoe, T., Etoh, M., Yoshimura, T., and Tsujino, K., \u201cHypothesis preservation approach to scene text recognition with weighted finite-state transducer,\u201d in [2011 International Conference on Document Analysis and Recognition (ICDAR)], 359 \u2013363 (Sept. 2011).","DOI":"10.1109\/ICDAR.2011.80"},{"key":"c28","doi-asserted-by":"crossref","unstructured":"Sarkar, P., Baird, H. S., and Zhang, X., \u201cTraining on severely degraded text-line images,\u201d in [Proceedings of the Seventh International Conference on Document Analysis and Recognition - Volume 1], 38, IEEE Computer Society (2003).","DOI":"10.1109\/ICDAR.2003.1227624"},{"key":"c29","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-84628-726-8"},{"key":"c30","first-page":"2766","article-title":"Daily battle communiques, 1944\u20131945","author":"Jordan","year":"1945"},{"key":"c31","unstructured":"Fraud, P., \u201c19th century Mormon article newspaper index,\u201d L. Tom Perry Special Collections, Brigham Young University (2012)."},{"key":"c32","unstructured":"Lewis, D. D., \u201cReuters-21578,\u201d http:\/\/www.daviddlewis.com\/resources\/testcollections\/reuters21578\/ (2013)."},{"key":"c33","article-title":"A synthetic document image dataset for developing and evaluating historical document processing methods","volume":"8297","author":"Walker","year":"2012"},{"key":"c34","unstructured":"Berry, M. W., Browne, M., and Signer, B., \u201c2001 topic annotated Enron email data set.\u201d http:\/\/www.ldc.upenn.edu\/ (June 2007)."},{"key":"c35","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-004-8682-1"},{"key":"c36","unstructured":"McCallum, A. K., \u201cMALLET: a machine learning for language toolkit..\u201d http:\/\/mallet.cs.umass.edu (2002)."},{"key":"c37","unstructured":"Ajot, J., Fiscus, J., Radde, N., and Laprun, C., \u201cAsclite - Multi-dimensional alignment program..\u201d http:\/\/www.nist.gov\/speech\/tools\/asclite.html (2008)."}],"event":{"name":"IS&T\/SPIE Electronic Imaging","location":"San Francisco, California, USA"},"container-title":["SPIE Proceedings","Document Recognition and Retrieval XXI"],"original-title":[],"deposited":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T19:16:38Z","timestamp":1746126998000},"score":1,"resource":{"primary":{"URL":"http:\/\/proceedings.spiedigitallibrary.org\/proceeding.aspx?doi=10.1117\/12.2042502"}},"subtitle":[],"editor":[{"given":"Bertrand","family":"Co\u00fcasnon","sequence":"first","affiliation":[],"role":[{"role":"editor","vocabulary":"crossref"}]},{"given":"Eric K.","family":"Ringger","sequence":"additional","affiliation":[],"role":[{"role":"editor","vocabulary":"crossref"}]}],"short-title":[],"issued":{"date-parts":[[2013,12,27]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1117\/12.2042502","relation":{},"ISSN":["0277-786X"],"issn-type":[{"type":"print","value":"0277-786X"}],"subject":[],"published":{"date-parts":[[2013,12,27]]}}}