{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T07:03:41Z","timestamp":1778655821185,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2008,8,24]],"date-time":"2008-08-24T00:00:00Z","timestamp":1219536000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2008,8,24]]},"DOI":"10.1145\/1401890.1401927","type":"proceedings-article","created":{"date-parts":[[2008,8,27]],"date-time":"2008-08-27T11:56:41Z","timestamp":1219838201000},"page":"274-282","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":17,"title":["Entity categorization over large document collections"],"prefix":"10.1145","author":[{"given":"Venkatesh","family":"Ganti","sequence":"first","affiliation":[{"name":"Microsoft Research, Redmond, WA, USA"}]},{"given":"Arnd C.","family":"K\u00f6nig","sequence":"additional","affiliation":[{"name":"Microsoft Research, Redmond, WA, USA"}]},{"given":"Rares","family":"Vernica","sequence":"additional","affiliation":[{"name":"University of California, Irvine, Irvine, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2008,8,24]]},"reference":[{"issue":"4","key":"e_1_3_2_1_1_1","first-page":"3","volume":"28","author":"Agichtein E.","year":"2005","unstructured":"E. Agichtein . Scaling Information Extraction to Large Document Collections. IEEE Data Eng. Bull. , 28 ( 4 ): 3 -- 10 , 2005 . E. Agichtein. Scaling Information Extraction to Large Document Collections. IEEE Data Eng. Bull., 28(4):3--10, 2005.","journal-title":"Large Document Collections. IEEE Data Eng. Bull."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2003.1260786"},{"key":"e_1_3_2_1_3_1","volume-title":"ACM SIGKDD","author":"Agichtein E.","year":"2006","unstructured":"E. Agichtein and S. Sarawagi . Scalable Information Extraction and integration . In ACM SIGKDD , 2006 . E. Agichtein and S. Sarawagi. Scalable Information Extraction and integration. In ACM SIGKDD, 2006."},{"key":"e_1_3_2_1_4_1","volume-title":"Introduction to Information Extraction Technology. IJCAI-99 Tutorial","author":"Appelt D. E.","year":"1999","unstructured":"D. E. Appelt and D. Israel . Introduction to Information Extraction Technology. IJCAI-99 Tutorial , 1999 . D. E. Appelt and D. Israel. Introduction to Information Extraction Technology. IJCAI-99 Tutorial, 1999."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/1281192.1281202"},{"key":"e_1_3_2_1_6_1","first-page":"2670","volume-title":"IJCAI","author":"Banko M.","year":"2007","unstructured":"M. Banko , M. J. Cafarella , S. Soderland , M. Broadhead , and O. Etzioni . Open Information Extraction from the Web . In IJCAI , pages 2670 -- 2676 , 2007 . M. Banko, M. J. Cafarella, S. Soderland, M. Broadhead, and O. Etzioni. Open Information Extraction from the Web. In IJCAI, pages 2670--2676, 2007."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/362686.362692"},{"key":"e_1_3_2_1_8_1","volume-title":"Relational Web Search. In WWW Conference","author":"Cafarella M.","year":"2006","unstructured":"M. Cafarella , M. Banko , and O. Etzioni . Relational Web Search. In WWW Conference , 2006 . M. Cafarella, M. Banko, and O. Etzioni. Relational Web Search. In WWW Conference, 2006."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1060745.1060811"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2006.55"},{"key":"e_1_3_2_1_11_1","volume-title":"SIGKDD","author":"Cohen W.","year":"2004","unstructured":"W. Cohen and A. McCallum . Information Extraction and Integration: an Overview . In SIGKDD , 2004 . W. Cohen and A. McCallum. Information Extraction and Integration: an Overview. In SIGKDD, 2004."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jalgor.2003.12.001"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1061318.1061325"},{"key":"e_1_3_2_1_14_1","volume-title":"IJCAI","author":"Downey D.","year":"2005","unstructured":"D. Downey , O. Etzioni , and S. Soderland . A Probabilistic Model of Redundancy in Information Extraction . In IJCAI , 2005 . D. Downey, O. Etzioni, and S. Soderland. A Probabilistic Model of Redundancy in Information Extraction. In IJCAI, 2005."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/11875604_84"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/152610.152611"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1142473.1142504"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/1150402.1150474"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.3115\/1119176.1119206"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/1150402.1150482"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/571024"},{"key":"e_1_3_2_1_22_1","first-page":"185","volume-title":"Advances in Kernel Methods: Support Vector Machine Learning","author":"Platt J.","year":"1999","unstructured":"J. Platt . Fast Training of SVM's Using Sequential Minimal Optimization . In Advances in Kernel Methods: Support Vector Machine Learning , pages 185 -- 209 . MIT Press , 1999 . J. Platt. Fast Training of SVM's Using Sequential Minimal Optimization. In Advances in Kernel Methods: Support Vector Machine Learning, pages 185--209. MIT Press, 1999."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/1031171.1031280"},{"key":"e_1_3_2_1_24_1","volume-title":"The State of Record Linkage and Current Research Problems. Technical report","author":"Winkler W.","year":"1999","unstructured":"W. Winkler . The State of Record Linkage and Current Research Problems. Technical report , U.S. Bureau of the Census, 1999 . W. Winkler. The State of Record Linkage and Current Research Problems. Technical report, U.S. Bureau of the Census, 1999."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073163"}],"event":{"name":"KDD08: The 14th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","location":"Las Vegas Nevada USA","acronym":"KDD08","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","ACM Association for Computing Machinery"]},"container-title":["Proceedings of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1401890.1401927","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1401890.1401927","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T13:56:40Z","timestamp":1750255000000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1401890.1401927"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008,8,24]]},"references-count":25,"alternative-id":["10.1145\/1401890.1401927","10.1145\/1401890"],"URL":"https:\/\/doi.org\/10.1145\/1401890.1401927","relation":{},"subject":[],"published":{"date-parts":[[2008,8,24]]},"assertion":[{"value":"2008-08-24","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}