{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T14:16:03Z","timestamp":1771337763626,"version":"3.50.1"},"reference-count":12,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2007,9]]},"DOI":"10.1109\/icdar.2007.4377029","type":"proceedings-article","created":{"date-parts":[[2007,11,7]],"date-time":"2007-11-07T19:17:03Z","timestamp":1194463023000},"page":"819-823","source":"Crossref","is-referenced-by-count":34,"title":["Google Book Search: Document Understanding on a Massive Scale"],"prefix":"10.1109","author":[{"given":"L.","family":"Vincent","sequence":"first","affiliation":[]}],"member":"263","reference":[{"key":"ref4","article-title":"The hOCR microformat for OCR workflow and results","author":"breuel","year":"2007","journal-title":"Ninth Int Conf on Document Analysis and Recognition ICDAR 2007"},{"key":"ref3","first-page":"302","article-title":"Ground truth for layout analysis performance evaluation","volume":"3872","author":"antonacopoulos","year":"2006","journal-title":"Document Analysis Systems VII Proceedings of the International Association for Pattern Recognition (IAPR) Workshop on Document Analysis Systems (DAS2006)"},{"key":"ref10","article-title":"An overview of the Tesseract OCR engine","author":"smith","year":"2007","journal-title":"IC-DAR '2007 International Conference on Document Analysis and Recognition"},{"key":"ref6","year":"1998","journal-title":"Google Google search technology"},{"key":"ref11","author":"smith","year":"2006","journal-title":"Tesseract OCR home page"},{"key":"ref5","author":"breuel","year":"2007","journal-title":"OCRopus home page"},{"key":"ref12","author":"vincent","year":"2006","journal-title":"Announcing Tesseract OCR"},{"key":"ref8","year":"2006","journal-title":"Google Google open source projects"},{"key":"ref7","year":"2004","journal-title":"Google About Google Book Search"},{"key":"ref2","article-title":"ICDAR2007 page segmentation competition","author":"antonacopoulos","year":"2007","journal-title":"Ninth Int Conf on Document Analysis and Recognition ICDAR 2007"},{"key":"ref9","author":"raman","year":"2007","journal-title":"Google and open source OCR"},{"key":"ref1","author":"anderson","year":"2006","journal-title":"The Long Tail"}],"event":{"name":"Ninth International Conference on Document Analysis and Recognition (ICDAR 2007) Vol 2","location":"Curitiba, Parana, Brazil","start":{"date-parts":[[2007,9,23]]},"end":{"date-parts":[[2007,9,26]]}},"container-title":["Ninth International Conference on Document Analysis and Recognition (ICDAR 2007) Vol 2"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/4376968\/4376969\/04377029.pdf?arnumber=4377029","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,3,16]],"date-time":"2017-03-16T16:25:19Z","timestamp":1489681519000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/4377029\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2007,9]]},"references-count":12,"URL":"https:\/\/doi.org\/10.1109\/icdar.2007.4377029","relation":{},"ISSN":["1520-5363"],"issn-type":[{"value":"1520-5363","type":"print"}],"subject":[],"published":{"date-parts":[[2007,9]]}}}