{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T04:31:35Z","timestamp":1750307495251,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":18,"publisher":"ACM","license":[{"start":{"date-parts":[[2010,10,26]],"date-time":"2010-10-26T00:00:00Z","timestamp":1288051200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2010,10,26]]},"DOI":"10.1145\/1871840.1871843","type":"proceedings-article","created":{"date-parts":[[2010,10,28]],"date-time":"2010-10-28T14:43:19Z","timestamp":1288276999000},"page":"3-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Document"],"prefix":"10.1145","author":[{"given":"Herv\u00e9","family":"D\u00e9jean","sequence":"first","affiliation":[{"name":"Xerox Research Centre Europe, Meylan, France"}]},{"given":"Jean-Luc","family":"Meunier","sequence":"additional","affiliation":[{"name":"Xerox Research Centre Europe, Meylan, France"}]}],"member":"320","published-online":{"date-parts":[[2010,10,26]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/11669487_25"},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings 2003 Symposium on Document Image Understanding Technology","author":"Beitzel Steven M.","year":"2003","unstructured":"Steven M. Beitzel , Eric C. Jensen , David A. Grossman , A Survey of Retrieval Strategies for OCR Text Collections , Proceedings 2003 Symposium on Document Image Understanding Technology , 2003 Steven M. Beitzel, Eric C. Jensen, David A. Grossman, A Survey of Retrieval Strategies for OCR Text Collections, Proceedings 2003 Symposium on Document Image Understanding Technology, 2003"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1096601.1096605"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1284420.1284456"},{"key":"e_1_3_2_1_5_1","first-page":"68150K","volume-title":"Kathrin. Proceedings of the SPIE","volume":"6815","author":"D\u00e9jean Herv\u00e9","year":"2008","unstructured":"Herv\u00e9 D\u00e9jean , Jean-Luc Meunier , Versatile page number analysis, Document Recognition and Retrieval XV. Edited by Yanikoglu, Berrin A.; Berkner , Kathrin. Proceedings of the SPIE , Volume 6815 , pp. 68150K - 68150K -9 ( 2008 ). Herv\u00e9 D\u00e9jean, Jean-Luc Meunier, Versatile page number analysis, Document Recognition and Retrieval XV. Edited by Yanikoglu, Berrin A.; Berkner, Kathrin. Proceedings of the SPIE, Volume 6815, pp. 68150K-68150K-9 (2008)."},{"key":"e_1_3_2_1_6_1","volume-title":"to DRR","author":"D\u00e9jean Herv\u00e9","year":"2011","unstructured":"Herv\u00e9 D\u00e9jean , Unsupervised page Template inference , submitted to DRR 2011 . Herv\u00e9 D\u00e9jean, Unsupervised page Template inference, submitted to DRR 2011."},{"key":"e_1_3_2_1_7_1","volume-title":"conference proceedings","author":"IMPACT, OCR","year":"2009","unstructured":"IMPACT, OCR in Mass digitisation , conference proceedings , April 2009 , IMPACT, OCR in Mass digitisation, conference proceedings, April 2009,"},{"key":"e_1_3_2_1_8_1","first-page":"120","volume-title":"Overview of the INEX 2009 Book Track, INEX Workshop pre-proceedings","author":"Kazai Gabriella","year":"2009","unstructured":"Gabriella Kazai , Antoine Doucet , Marijn Koolen , and Monica Landoni , Overview of the INEX 2009 Book Track, INEX Workshop pre-proceedings , pp. 120 -- 129 , 2009 . Gabriella Kazai, Antoine Doucet, Marijn Koolen, and Monica Landoni, Overview of the INEX 2009 Book Track, INEX Workshop pre-proceedings, pp. 120--129, 2009."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2009.133"},{"key":"e_1_3_2_1_10_1","volume-title":"US patent application 20040139384","author":"Xiaofan","year":"2004","unstructured":"Xiaofan lin , Removal of extraneous text from electronic documents , US patent application 20040139384 , 2004 Xiaofan lin, Removal of extraneous text from electronic documents, US patent application 20040139384, 2004"},{"key":"e_1_3_2_1_11_1","volume-title":"Yan Xiong: Detection and analysis of table of contents based on content association. IJDAR 8(2--3): 132--143","author":"Lin Xiaofan","year":"2006","unstructured":"Xiaofan Lin , Yan Xiong: Detection and analysis of table of contents based on content association. IJDAR 8(2--3): 132--143 , 2006 . Xiaofan Lin, Yan Xiong: Detection and analysis of table of contents based on content association. IJDAR 8(2--3): 132--143, 2006."},{"volume-title":"Quality Assurance in High","author":"Lin Xiaofan","key":"e_1_3_2_1_12_1","unstructured":"Xiaofan Lin , Quality Assurance in High Volume Document Digitization: A survey, Hewlett-Packard Laboratories report, 2006 Xiaofan Lin, Quality Assurance in High Volume Document Digitization: A survey, Hewlett-Packard Laboratories report, 2006"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390749.1390753"},{"volume-title":"D-Lib Magazine July\/August 2009","author":"Tanner Simon","key":"e_1_3_2_1_15_1","unstructured":"Simon Tanner , Trevor Mu\u00f1oz , Pich Hemy Ros , D-Lib Magazine July\/August 2009 Volume 15 Number 7\/8 Measuring Mass Text Digitization Quality and Usefulness Lessons Learned from Assessing the OCR Accuracy of the British Library's 19th Century Online Newspaper Archive Simon Tanner, Trevor Mu\u00f1oz, Pich Hemy Ros, D-Lib Magazine July\/August 2009 Volume 15 Number 7\/8 Measuring Mass Text Digitization Quality and Usefulness Lessons Learned from Assessing the OCR Accuracy of the British Library's 19th Century Online Newspaper Archive"},{"key":"e_1_3_2_1_16_1","volume-title":"May","author":"Tesseract","year":"2008","unstructured":"Tesseract , open source OCR engine , May 2008 . http:\/\/code.google.com\/p\/tesseract-ocr\/ Tesseract, open source OCR engine, May 2008. http:\/\/code.google.com\/p\/tesseract-ocr\/"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/MS.2003.1196325"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/1304596.1304903"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/1815330.1815356"}],"event":{"name":"CIKM '10: International Conference on Information and Knowledge Management","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Toronto ON Canada","acronym":"CIKM '10"},"container-title":["Proceedings of the fourth workshop on Analytics for noisy unstructured text data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1871840.1871843","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1871840.1871843","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T12:17:38Z","timestamp":1750249058000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1871840.1871843"}},"subtitle":["a useful level for facing noisy data"],"short-title":[],"issued":{"date-parts":[[2010,10,26]]},"references-count":18,"alternative-id":["10.1145\/1871840.1871843","10.1145\/1871840"],"URL":"https:\/\/doi.org\/10.1145\/1871840.1871843","relation":{},"subject":[],"published":{"date-parts":[[2010,10,26]]},"assertion":[{"value":"2010-10-26","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}