{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T00:45:17Z","timestamp":1778201117384,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":15,"publisher":"ACM","license":[{"start":{"date-parts":[[2009,7,25]],"date-time":"2009-07-25T00:00:00Z","timestamp":1248480000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2009,7,25]]},"DOI":"10.1145\/1577802.1577804","type":"proceedings-article","created":{"date-parts":[[2009,8,4]],"date-time":"2009-08-04T13:38:06Z","timestamp":1249393086000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":89,"title":["Adapting the Tesseract open source OCR engine for multilingual OCR"],"prefix":"10.1145","author":[{"given":"Ray","family":"Smith","sequence":"first","affiliation":[{"name":"Google Inc., Mountain View, CA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daria","family":"Antonova","sequence":"additional","affiliation":[{"name":"Google Inc., Mountain View, CA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dar-Shyang","family":"Lee","sequence":"additional","affiliation":[{"name":"Google Inc., Mountain View, CA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2009,7,25]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"163","volume-title":"Conf. on Pattern Recognition","author":"Nagy G.","year":"1988","unstructured":"Nagy , G. , \" Chinese character recognition : a twenty-five-year perspective\" 9th Int . Conf. on Pattern Recognition , Nov 1988 , pp 163 -- 167 . Nagy, G., \"Chinese character recognition: a twenty-five-year perspective\" 9th Int. Conf. on Pattern Recognition, Nov 1988, pp 163--167."},{"key":"e_1_3_2_1_2_1","first-page":"179","volume-title":"Proc. ICIP-94","volume":"1","author":"Xia F.","year":"1994","unstructured":"Xia , F. \" Knowledge -based sub-pattern segmentation : decompositions of Chinese characters\" Image Processing 1994 . Proc. ICIP-94 , IEEE Int. Conf. vol. 1 , 13--16 Nov 1994 , pp 179 -- 182 . Xia, F. \"Knowledge-based sub-pattern segmentation: decompositions of Chinese characters\" Image Processing 1994. Proc. ICIP-94, IEEE Int. Conf. vol. 1, 13--16 Nov 1994, pp 179--182."},{"key":"e_1_3_2_1_3_1","first-page":"337","volume-title":"Proc. 5th Int. Conf. on Document Analysis and Recognition","author":"Zhidong Lu","year":"1999","unstructured":"Zhidong Lu , Schwartz, R. Natarajan , P. Bazzi , I. Makhoul , J. \" Advances in the BBN BYBLOS OCR system\" Proc. 5th Int. Conf. on Document Analysis and Recognition , 1999 , pp 337 -- 340 . Zhidong Lu, Schwartz, R. Natarajan, P. Bazzi, I. Makhoul, J. \"Advances in the BBN BYBLOS OCR system\" Proc. 5th Int. Conf. on Document Analysis and Recognition, 1999, pp 337--340."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1117\/12.335808"},{"key":"e_1_3_2_1_5_1","first-page":"800","volume-title":"6th Int. Conf on Document Analysis and Recognition","author":"Bansal V.","year":"2001","unstructured":"Bansal , V. ; Sinha , R. M. K , \"A complete OCR for printed Hindi text in Devanagari script\" Proc . 6th Int. Conf on Document Analysis and Recognition , 2001 , pp 800 -- 804 . Bansal, V.; Sinha, R. M. K, \"A complete OCR for printed Hindi text in Devanagari script\" Proc. 6th Int. Conf on Document Analysis and Recognition, 2001, pp 800--804."},{"key":"e_1_3_2_1_6_1","first-page":"122","volume-title":"Workshop on document Image Analysis for Libraries","author":"Govindaraju V.","year":"2004","unstructured":"Govindaraju , V. , et. al. \" Tools for enabling digital access to multi-lingual Indic documents\" Proc 1st Int . Workshop on document Image Analysis for Libraries , 2004 , pp 122 -- 133 . Govindaraju, V., et. al. \"Tools for enabling digital access to multi-lingual Indic documents\" Proc 1st Int. Workshop on document Image Analysis for Libraries, 2004, pp 122--133."},{"key":"e_1_3_2_1_7_1","unstructured":"Official Google Blog: http:\/\/googleblog.blogspot.com\/2008\/07\/hitting-40-languages.html.  Official Google Blog: http:\/\/googleblog.blogspot.com\/2008\/07\/hitting-40-languages.html."},{"key":"e_1_3_2_1_8_1","first-page":"629","volume-title":"Conf. on Document Analysis and Recognition","author":"Smith R.","year":"2007","unstructured":"Smith , R. , \" An Overview of the Tesseract OCR Engine\" Proc 9th Int . Conf. on Document Analysis and Recognition , 2007 , pp 629 -- 633 . Smith, R., \"An Overview of the Tesseract OCR Engine\" Proc 9th Int. Conf. on Document Analysis and Recognition, 2007, pp 629--633."},{"key":"e_1_3_2_1_9_1","unstructured":"Tesseract Open-Source OCR: http:\/\/code.google.com\/p\/tesseract-ocr.  Tesseract Open-Source OCR: http:\/\/code.google.com\/p\/tesseract-ocr."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2009.257"},{"key":"e_1_3_2_1_11_1","first-page":"1145","volume-title":"3rd Int. Conf. on Document Analysis and Recognition","author":"Smith R.","year":"1995","unstructured":"Smith , R. , \"A simple and efficient skew detection algorithm via text row accumulation\" Proc . 3rd Int. Conf. on Document Analysis and Recognition , 1995 , pp 1145 -- 1148 . Smith, R., \"A simple and efficient skew detection algorithm via text row accumulation\" Proc. 3rd Int. Conf. on Document Analysis and Recognition, 1995, pp 1145--1148."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1577802.1577809"},{"key":"e_1_3_2_1_13_1","first-page":"518","volume-title":"Proc. 25th Int. Conf. on Very Large Data Bases","author":"Gionis A.","year":"1999","unstructured":"Gionis , A. , Indyk , P. , Motwani , R. , \" Similarity Search in High Dimensions via Hashing\" Proc. 25th Int. Conf. on Very Large Data Bases , 1999 , pp 518 -- 529 . Gionis, A., Indyk, P., Motwani, R., \"Similarity Search in High Dimensions via Hashing\" Proc. 25th Int. Conf. on Very Large Data Bases, 1999, pp 518--529."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10618-008-0096-z"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022648800760"}],"event":{"name":"MOCR '09: International Workshop on Multilingual OCR","location":"Barcelona Spain","acronym":"MOCR '09"},"container-title":["Proceedings of the International Workshop on Multilingual OCR"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1577802.1577804","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1577802.1577804","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T20:22:35Z","timestamp":1750278155000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1577802.1577804"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,7,25]]},"references-count":15,"alternative-id":["10.1145\/1577802.1577804","10.1145\/1577802"],"URL":"https:\/\/doi.org\/10.1145\/1577802.1577804","relation":{},"subject":[],"published":{"date-parts":[[2009,7,25]]},"assertion":[{"value":"2009-07-25","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}