{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T13:37:36Z","timestamp":1760708256582,"version":"3.38.0"},"reference-count":22,"publisher":"Springer Science and Business Media LLC","issue":"2","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IJDAR"],"published-print":{"date-parts":[[2011,6]]},"DOI":"10.1007\/s10032-010-0133-5","type":"journal-article","created":{"date-parts":[[2010,11,2]],"date-time":"2010-11-02T14:31:00Z","timestamp":1288708260000},"page":"173-187","source":"Crossref","is-referenced-by-count":22,"title":["Character confusion versus focus word-based correction of spelling and OCR variants in corpora"],"prefix":"10.1007","volume":"14","author":[{"given":"Martin W. C.","family":"Reynaert","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2010,11,3]]},"reference":[{"issue":"1","key":"133_CR1","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1145\/375360.375365","volume":"33","author":"G. Navarro","year":"2001","unstructured":"Navarro G.: A guided tour to approximate string matching. ACM Comput. Surv. 33(1), 31\u201388 (2001)","journal-title":"ACM Comput. Surv."},{"issue":"4","key":"133_CR2","doi-asserted-by":"crossref","first-page":"377","DOI":"10.1145\/146370.146380","volume":"24","author":"K. Kukich","year":"1992","unstructured":"Kukich K.: Techniques for automatically correcting words in text. ACM Comput. Surv. 24(4), 377\u2013439 (1992)","journal-title":"ACM Comput. Surv."},{"key":"133_CR3","unstructured":"Cucerzan, S., Brill, E.: Spelling correction as an iterative process that exploits the collective knowledge of web users. In: Lin, D., Wu, D. (eds.) Proceedings of EMNLP 2004, pp. 293\u2013300. Association for Computational Linguistics, Barcelona (2004)"},{"issue":"1","key":"133_CR4","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1006\/cviu.1996.0502","volume":"67","author":"D. Lopresti","year":"1997","unstructured":"Lopresti D., Zhou J.: Using consensus sequence voting to correct OCR errors. Comput. Vis. Image Underst. 67(1), 39\u201347 (1997)","journal-title":"Comput. Vis. Image Underst."},{"key":"133_CR5","doi-asserted-by":"crossref","unstructured":"Kernighan, M.D., Church, K.W., Gale, W.A.: A spelling correction program based on a noisy channel model. In: COLING-90, vol. II, pp. 205\u2013211. Helsinki (1990)","DOI":"10.3115\/997939.997975"},{"key":"133_CR6","doi-asserted-by":"crossref","unstructured":"Oflazer, K., G\u00fczey, C.: Spelling correction in agglutinative languages. In: ANLP, pp. 194\u2013195. (1994)","DOI":"10.3115\/974358.974406"},{"key":"133_CR7","unstructured":"Sun, X., Gao, J., Micol, D., Quirk, C.: Learning phrase-based spelling error models from clickthrough data. In: Proceedings of the 48th Annual Meeting of the Association for Computational Linguistics (ACL\u201910) (2010)"},{"key":"133_CR8","first-page":"289","volume-title":"Proc Data Compression Conference","author":"W.J. Teahan","year":"1998","unstructured":"Teahan W.J., Inglis S., Cleary J.G., Holmes G.: Correcting English text using PPM models. In: Storer, J.A., Reif, J.H. (eds) Proc Data Compression Conference, pp. 289\u2013298. IEEE Computer Society Press, Society Press, Los Alamitos, CA (1998)"},{"key":"133_CR9","unstructured":"Kolak, O., Resnik, P.: OCR error correction using a noisy channel model. In: Proceedings of the second international conference on Human Language Technology Research, pp. 257\u2013262. Morgan Kaufmann Publishers Inc., San Francisco, CA, (2002)"},{"key":"133_CR10","doi-asserted-by":"crossref","unstructured":"Brill, E., Moore, R.C.: An improved error model for noisy channel spelling correction. In: Proceedings of the 38th Annual Meeting of the ACL, pp. 286\u2013293. (2000)","DOI":"10.3115\/1075218.1075255"},{"key":"133_CR11","doi-asserted-by":"crossref","unstructured":"Strohmaier, C.M., Ringlstetter, C., Schulz, K.U., Mihov, S.: Lexical postcorrection of OCR-results: the web as a dynamic secondary dictionary? In: International Conference on Document Analysis and Recognition 2:1133 (2003)","DOI":"10.1109\/ICDAR.2003.1227833"},{"issue":"3","key":"133_CR12","doi-asserted-by":"crossref","first-page":"295","DOI":"10.1162\/coli.2006.32.3.295","volume":"32","author":"C. Ringlstetter","year":"2006","unstructured":"Ringlstetter C., Schulz K.U., Mihov S.: Orthographic errors in web pages: toward cleaner web corpora. Comput. Linguist. 32(3), 295\u2013340 (2006)","journal-title":"Comput. Linguist."},{"key":"133_CR13","unstructured":"Levenshtein, V.: Binary codes capable of correcting deletions, insertions, and reversals. In: Cybernetics and Control Theory, vol. 10(8), pp. 707\u2013710 (1965), original in: Doklady Nauk SSSR 163(4):845\u2013848 (1965)"},{"key":"133_CR14","doi-asserted-by":"crossref","unstructured":"Gotscharek, A., Neumann, A., Reffle, U., Ringlstetter, C., Schulz, K.U.: Enabling information retrieval on historical document collections: the role of matching procedures and special lexica. In: AND \u201909: Proceedings of The Third Workshop on Analytics for Noisy Unstructured Text Data, pp. 69\u201376. ACM, New York, NY (2009)","DOI":"10.1145\/1568296.1568309"},{"key":"133_CR15","doi-asserted-by":"crossref","unstructured":"Reynaert, M.: Text induced spelling correction. In: Proceedings COLING 2004, Geneva (2004)","DOI":"10.3115\/1706238.1706256"},{"key":"133_CR16","doi-asserted-by":"crossref","unstructured":"Reynaert, M.: Text-induced spelling correction. PhD thesis, Tilburg University (2005)","DOI":"10.3115\/1706238.1706256"},{"issue":"3","key":"133_CR17","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1145\/363958.363994","volume":"7","author":"F.J. Damerau","year":"1964","unstructured":"Damerau F.J.: A technique for computer detection and correction of spelling errors. Commun. ACM 7(3), 171\u2013176 (1964)","journal-title":"Commun. ACM"},{"key":"133_CR18","doi-asserted-by":"crossref","unstructured":"Reynaert, M.: Non-interactive OCR post-correction for giga-scale digitization projects. In: Proceedings of CICLing 2008. Lecture Notes in Computer Science vol. 4919\/2008, pp. 617\u2013630. Springer, Berlin (2008)","DOI":"10.1007\/978-3-540-78135-6_53"},{"key":"133_CR19","doi-asserted-by":"crossref","unstructured":"Reynaert, M.: Parallel identification of the spelling variants in corpora. In: Proceedings of The Third Workshop on Analytics for Noisy Unstructured Text Data 2009 (AND-2009), pp. 77\u201384. Barcelona, Spain (2009)","DOI":"10.1145\/1568296.1568310"},{"key":"133_CR20","doi-asserted-by":"crossref","first-page":"781","DOI":"10.1006\/jmla.1993.1039","volume":"32","author":"U. Frauenfelder","year":"1993","unstructured":"Frauenfelder U., Baayen R., Hellwig F., Schreuder R.: Neighbourhood density and frequency across languages and modalities. J. Mem. Lang. 32, 781\u2013804 (1993)","journal-title":"J. Mem. Lang."},{"key":"133_CR21","volume-title":"The psycho-biology of language: an introduction to dynamic philology, 2nd edn","author":"G.K. Zipf","year":"1935","unstructured":"Zipf G.K.: The psycho-biology of language: an introduction to dynamic philology, 2nd edn. The M.I.T. Press, Cambridge, MA (1935)"},{"key":"133_CR22","volume-title":"Information Retrieval","author":"C.J. Rijsbergen van","year":"1975","unstructured":"van Rijsbergen C.J.: Information Retrieval. Butterworths, London (1975)"}],"container-title":["International Journal on Document Analysis and Recognition (IJDAR)"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/www.springerlink.com\/index\/pdf\/10.1007\/s10032-010-0133-5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,27]],"date-time":"2025-02-27T14:03:44Z","timestamp":1740665024000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10032-010-0133-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,11,3]]},"references-count":22,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2011,6]]}},"alternative-id":["133"],"URL":"https:\/\/doi.org\/10.1007\/s10032-010-0133-5","relation":{},"ISSN":["1433-2833","1433-2825"],"issn-type":[{"type":"print","value":"1433-2833"},{"type":"electronic","value":"1433-2825"}],"subject":[],"published":{"date-parts":[[2010,11,3]]}}}