{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,23]],"date-time":"2025-10-23T11:01:35Z","timestamp":1761217295834,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":10,"publisher":"ACM","license":[{"start":{"date-parts":[[2009,9,16]],"date-time":"2009-09-16T00:00:00Z","timestamp":1253059200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2009,9,16]]},"DOI":"10.1145\/1600193.1600241","type":"proceedings-article","created":{"date-parts":[[2009,9,16]],"date-time":"2009-09-16T17:33:15Z","timestamp":1253122395000},"page":"218-221","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":17,"title":["Web document text and images extraction using DOM analysis and natural language processing"],"prefix":"10.1145","author":[{"given":"Parag Mulendra","family":"Joshi","sequence":"first","affiliation":[{"name":"Hewlett-Packard Laboratories, Palo Alto, CA, USA"}]},{"given":"Sam","family":"Liu","sequence":"additional","affiliation":[{"name":"Hewlett-Packard Laboratories, Palo Alto, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2009,9,16]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/276304.276330"},{"volume-title":"Microsoft Research","year":"2003","author":"Cai D.","key":"e_1_3_2_1_2_1"},{"volume-title":"Proceedings of the 40th Anniversary Meeting of the Association for Computational Linguistics","year":"2002","author":"Cunningham H.","key":"e_1_3_2_1_3_1"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/775152.775182"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/565117.565137"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/775047.775134"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.5555\/846219.847340"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/956863.956961"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1526709.1526840"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/956750.956785"}],"event":{"name":"DocEng '09: ACM Symposium on Document Engineering","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","ACM Association for Computing Machinery","SIGDOC ACM Special Interest Group for Design of Communications"],"location":"Munich Germany","acronym":"DocEng '09"},"container-title":["Proceedings of the 9th ACM symposium on Document engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1600193.1600241","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1600193.1600241","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T12:23:16Z","timestamp":1750249396000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1600193.1600241"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,9,16]]},"references-count":10,"alternative-id":["10.1145\/1600193.1600241","10.1145\/1600193"],"URL":"https:\/\/doi.org\/10.1145\/1600193.1600241","relation":{},"subject":[],"published":{"date-parts":[[2009,9,16]]},"assertion":[{"value":"2009-09-16","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}