{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T04:08:24Z","timestamp":1759205304961,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2004,6,13]],"date-time":"2004-06-13T00:00:00Z","timestamp":1087084800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2004,6,13]]},"DOI":"10.1145\/1007568.1007584","type":"proceedings-article","created":{"date-parts":[[2004,7,20]],"date-time":"2004-07-20T15:55:38Z","timestamp":1090338938000},"page":"119-130","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":82,"title":["Using the structure of Web sites for automatic segmentation of tables"],"prefix":"10.1145","author":[{"given":"Kristina","family":"Lerman","sequence":"first","affiliation":[{"name":"USC Information Sciences Institute, Marina del Rey, CA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lise","family":"Getoor","sequence":"additional","affiliation":[{"name":"University of Maryland, College Park, MD"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Steven","family":"Minton","sequence":"additional","affiliation":[{"name":"Fetch Technologies, Manhattan Beach, CA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Craig","family":"Knoblock","sequence":"additional","affiliation":[{"name":"USC Information Sciences Institute, Marina del Rey, CA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2004,6,13]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/872757.872799"},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the Sixth International Workshop on Web and Databases (WebDB03)","author":"Arlotta L.","year":"2003","unstructured":"L. Arlotta , V. Crescenzi , G. Mecca , and P. Marialdo . Automatic annotation of data extracted from large web sites . In Proceedings of the Sixth International Workshop on Web and Databases (WebDB03) , 2003 . L. Arlotta, V. Crescenzi, G. Mecca, and P. Marialdo. Automatic annotation of data extracted from large web sites. In Proceedings of the Sixth International Workshop on Web and Databases (WebDB03), 2003."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/375663.375682"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/371920.372182"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.3115\/990820.990845"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/511446.511477"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the International Workshop on Data Semantics in Web Information Systems (DASWIS-2001)","author":"Crescenzi V.","year":"2001","unstructured":"V. Crescenzi , G. Mecca , and P. Merialdo . Automatic web information extraction in the ROADRUNNER system . In Proceedings of the International Workshop on Data Semantics in Web Information Systems (DASWIS-2001) , 2001 . V. Crescenzi, G. Mecca, and P. Merialdo. Automatic web information extraction in the ROADRUNNER system. In Proceedings of the International Workshop on Data Semantics in Web Information Systems (DASWIS-2001), 2001."},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings of the 27th Conference on Very Large Databases (VLDB)","author":"Crescenzi V.","year":"2001","unstructured":"V. Crescenzi , G. Mecca , and P. Merialdo . RoadRunner: Towards automatic data extraction from large web sites . In Proceedings of the 27th Conference on Very Large Databases (VLDB) , Rome, Italy , 2001 . V. Crescenzi, G. Mecca, and P. Merialdo. RoadRunner: Towards automatic data extraction from large web sites. In Proceedings of the 27th Conference on Very Large Databases (VLDB), Rome, Italy, 2001."},{"volume-title":"Thesis proposal","author":"Gazen C.","key":"e_1_3_2_1_9_1","unstructured":"C. Gazen . Thesis proposal , Carnegie Mellon University . C. Gazen. Thesis proposal, Carnegie Mellon University."},{"key":"e_1_3_2_1_10_1","first-page":"472","volume-title":"Proc. Conf. Advances in Neural Information Processing Systems, NIPS","volume":"8","author":"Ghahramani Z.","year":"1995","unstructured":"Z. Ghahramani and M. I. Jordan . Factorial hidden Markov models. In D. S. Touretzky, M. C. Mozer, and M. E. Hasselmo, editors , Proc. Conf. Advances in Neural Information Processing Systems, NIPS , volume 8 , pages 472 -- 478 . MIT Press , 1995 . Z. Ghahramani and M. I. Jordan. Factorial hidden Markov models. In D. S. Touretzky, M. C. Mozer, and M. E. Hasselmo, editors, Proc. Conf. Advances in Neural Information Processing Systems, NIPS, volume 8, pages 472--478. MIT Press, 1995."},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the 1st International Workshop on Web Document Analysis","author":"Hurst M.","year":"2001","unstructured":"M. Hurst . Layout and language: Challenges for table understanding on the web. In In Web Document Analysis , Proceedings of the 1st International Workshop on Web Document Analysis , 2001 . M. Hurst. Layout and language: Challenges for table understanding on the web. In In Web Document Analysis, Proceedings of the 1st International Workshop on Web Document Analysis, 2001."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.5555\/646270.684824"},{"key":"e_1_3_2_1_14_1","volume-title":"Intelligent Information Agents R&D in Europe: An AgentLink perspective","author":"Kushmerick N.","year":"2002","unstructured":"N. Kushmerick and B. Thoma . Intelligent Information Agents R&D in Europe: An AgentLink perspective , chapter Adaptive information extraction: Core technologies for information agents. Springer , 2002 . N. Kushmerick and B. Thoma. Intelligent Information Agents R&D in Europe: An AgentLink perspective, chapter Adaptive information extraction: Core technologies for information agents. Springer, 2002."},{"key":"e_1_3_2_1_15_1","first-page":"282","volume-title":"Proc. 18th International Conf. on Machine Learning","author":"Lafferty J.","year":"2001","unstructured":"J. Lafferty , A. McCallum , and F. Pereira . Conditional random fields: Probabilistic models for segmenting and labeling sequence data . In Proc. 18th International Conf. on Machine Learning , pages 282 -- 289 . Morgan Kaufmann, San Francisco, CA , 2001 . J. Lafferty, A. McCallum, and F. Pereira. Conditional random fields: Probabilistic models for segmenting and labeling sequence data. In Proc. 18th International Conf. on Machine Learning, pages 282--289. Morgan Kaufmann, San Francisco, CA, 2001."},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the 15th National Conference on Artificial Intelligence (AAAI-2000)","author":"Lerman K.","year":"2000","unstructured":"K. Lerman and S. Minton . Learning the Common Structure of Data . In Proceedings of the 15th National Conference on Artificial Intelligence (AAAI-2000) , Menlo Park , 2000 . AAAI Press. K. Lerman and S. Minton. Learning the Common Structure of Data. In Proceedings of the 15th National Conference on Artificial Intelligence (AAAI-2000), Menlo Park, 2000. AAAI Press."},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the workshop on Advances in Text Extraction and Mining (IJCAI-2001)","author":"Lerman K.","year":"2001","unstructured":"K. Lerman , C. A. Knoblock , and S. Minton . Automatic data extraction from lists and tables in web sources . In Proceedings of the workshop on Advances in Text Extraction and Mining (IJCAI-2001) , Menlo Park , 2001 . AAAI Press. K. Lerman, C. A. Knoblock, and S. Minton. Automatic data extraction from lists and tables in web sources. In Proceedings of the workshop on Advances in Text Extraction and Mining (IJCAI-2001), Menlo Park, 2001. AAAI Press."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/1622420.1622425"},{"key":"e_1_3_2_1_19_1","volume-title":"Populating the Semantic Web. Submitted to the workshop on Advances in Text Extraction and Mining (ATEM-2004)","author":"Lerman K.","year":"2004","unstructured":"K. Lerman , C. Gazen , S. Minton , and C. A. Knoblock ,. Populating the Semantic Web. Submitted to the workshop on Advances in Text Extraction and Mining (ATEM-2004) , 2004 . K. Lerman, C. Gazen, S. Minton, and C. A. Knoblock,. Populating the Semantic Web. Submitted to the workshop on Advances in Text Extraction and Mining (ATEM-2004), 2004."},{"key":"e_1_3_2_1_21_1","first-page":"435","volume-title":"Proceedings of the 19th International Conference on Machine Learning (ICML 2002","author":"Muslea I.","year":"2002","unstructured":"I. Muslea , S. Minton , and C. Knoblock . Active + semi-supervised learning = robust multi-view learning . In Proceedings of the 19th International Conference on Machine Learning (ICML 2002 ), pages 435 -- 442 . Morgan Kaufmann, San Francisco, CA , 2002 . I. Muslea, S. Minton, and C. Knoblock. Active + semi-supervised learning = robust multi-view learning. In Proceedings of the 19th International Conference on Machine Learning (ICML 2002), pages 435--442. Morgan Kaufmann, San Francisco, CA, 2002."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.3115\/1034678.1034746"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/860435.860479"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/263690.263816"},{"key":"e_1_3_2_1_25_1","unstructured":"L. R. Rabiner. A tutorial on hidden markov models and selected applications in speech recognition. In Readings in Speech Recognition.   L. R. Rabiner. A tutorial on hidden markov models and selected applications in speech recognition. In Readings in Speech Recognition."},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the Twenty-seventh International Conference on Very Large Databases","author":"Raghavan S.","year":"2001","unstructured":"S. Raghavan and H. Garcia-Molina . Crawling the hidden web . In Proceedings of the Twenty-seventh International Conference on Very Large Databases , 2001 . S. Raghavan and H. Garcia-Molina. Crawling the hidden web. In Proceedings of the Twenty-seventh International Conference on Very Large Databases, 2001."},{"key":"e_1_3_2_1_27_1","unstructured":"J. P. Walser. Wsat(oip) package.  J. P. Walser. Wsat(oip) package."},{"key":"e_1_3_2_1_28_1","series-title":"LNCS","doi-asserted-by":"crossref","DOI":"10.1007\/3-540-48369-1","volume-title":"Integer Optimization by Local Search: A Domain Independent Approach","author":"Walser J. P.","year":"1999","unstructured":"J. P. Walser . Integer Optimization by Local Search: A Domain Independent Approach , volume 1637 of LNCS . Springer , New York , 1999 . J. P. Walser. Integer Optimization by Local Search: A Domain Independent Approach, volume 1637 of LNCS. Springer, New York, 1999."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/647798.736657"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/511446.511478"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.5555\/647798.736825"},{"key":"e_1_3_2_1_32_1","volume-title":"in Proceedings of the International Workshop on Web Document Analysis (WDA 2001","author":"Yoshida M.","year":"2001","unstructured":"M. Yoshida , K. Torisawa , and J. Tsujii . A method to integrate tables of the world wide web . In in Proceedings of the International Workshop on Web Document Analysis (WDA 2001 ), Seattle, U.S. , September 2001 . M. Yoshida, K. Torisawa, and J. Tsujii. A method to integrate tables of the world wide web. In in Proceedings of the International Workshop on Web Document Analysis (WDA 2001), Seattle, U.S., September 2001."}],"event":{"name":"SIGMOD\/PODS04: International Conference on Management of Data and Symposium on Principles Database and Systems","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"],"location":"Paris France","acronym":"SIGMOD\/PODS04"},"container-title":["Proceedings of the 2004 ACM SIGMOD international conference on Management of data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1007568.1007584","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1007568.1007584","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T17:23:56Z","timestamp":1750267436000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1007568.1007584"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2004,6,13]]},"references-count":30,"alternative-id":["10.1145\/1007568.1007584","10.1145\/1007568"],"URL":"https:\/\/doi.org\/10.1145\/1007568.1007584","relation":{},"subject":[],"published":{"date-parts":[[2004,6,13]]},"assertion":[{"value":"2004-06-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}