{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T15:29:55Z","timestamp":1759073395491},"reference-count":33,"publisher":"Elsevier BV","issue":"8","license":[{"start":{"date-parts":[[2001,12,1]],"date-time":"2001-12-01T00:00:00Z","timestamp":1007164800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Information Systems"],"published-print":{"date-parts":[[2001,12]]},"DOI":"10.1016\/s0306-4379(01)00040-0","type":"journal-article","created":{"date-parts":[[2002,10,14]],"date-time":"2002-10-14T17:55:05Z","timestamp":1034618105000},"page":"563-583","source":"Crossref","is-referenced-by-count":7,"title":["An XML-enabled data extraction toolkit for web sources"],"prefix":"10.1016","volume":"26","author":[{"given":"Ling","family":"Liu","sequence":"first","affiliation":[]},{"given":"Calton","family":"Pu","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Han","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/S0306-4379(01)00040-0_BIB1","unstructured":"C.A. Knoblock, S. Minton, J.L. Ambite, N. Ashish, P.J. Modi, I. Muslea, A. Philpot, S. Tejada, Modeling web sources for information integration, Proceedings of AAAI Conference, 1998."},{"key":"10.1016\/S0306-4379(01)00040-0_BIB2","doi-asserted-by":"crossref","unstructured":"L. Liu, C. Pu, W. Tang, Continual queries for internet-scale event-driven information delivery, IEEE Knowledge Data Eng. (Special Issue on Web Technology) (1999) 610\u2013628.","DOI":"10.1109\/69.790816"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB3","doi-asserted-by":"crossref","unstructured":"L. Liu, C. Pu, W. Tang, J. Biggs, D. Buttler, W. Han, P. Benninghoff, F. Zu, CQ: a personalized update monitoring toolkit. Proceedings of ACM SIGMOD Conference, 1998.","DOI":"10.1145\/276304.276376"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB4","unstructured":"N. Kushmerick, D. Weil, R. Doorenbos, Wrapper induction for information extraction, Proceedings of International Joint Conference on Artificial Intelligence (IJCAI), 1997."},{"key":"10.1016\/S0306-4379(01)00040-0_BIB5","unstructured":"H. Garcia-Molina, et al., The TSIMMIS approach to mediation: data models and languages (extended abstract), NGITS, 1995."},{"key":"10.1016\/S0306-4379(01)00040-0_BIB6","doi-asserted-by":"crossref","unstructured":"J. Hammer, M. Brennig, H. Garcia-Molina, S. Nesterov, V. Vassalos, R. Yerneni, Template-based wrappers in the tsimmis system, Proceedings of ACM SIGMOD Conference, 1997.","DOI":"10.1145\/253260.253395"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB7","doi-asserted-by":"crossref","unstructured":"P. Atzeni, G. Mecca, Cut and paste, Proceedings of 16th ACM SIGMOD Symposium on Principles of Database Systems, 1997.","DOI":"10.1145\/263661.263678"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB8","unstructured":"J. Hammer, H. Garcia-Molina, J. Cho, R. Aranha, A. Crespo, Extracting semi-structured data from the web, Proceedings of Workshop on Management of Semi-structured Data, 1997, p. 18\u201325."},{"key":"10.1016\/S0306-4379(01)00040-0_BIB9","doi-asserted-by":"crossref","unstructured":"B. Adelberg, Nodose\u2014a tool for semi-automatically extracting structured and semi-structured data from text documents, ACM SIGMOD, 1998.","DOI":"10.1145\/276304.276330"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB10","unstructured":"A. Sahuguet, F. Azavant, WysiWyg Web Wrapper Factory (W4F), Proceedings of WWW Conference, 1999."},{"key":"10.1016\/S0306-4379(01)00040-0_BIB11","doi-asserted-by":"crossref","unstructured":"N. Ashish, C.A. Knoblock, Semi-automatic wrapper generation for internet information sources, Proceedings of Coopis Conference, 1997.","DOI":"10.1109\/COOPIS.1997.613813"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB12","doi-asserted-by":"crossref","unstructured":"R. Doorenbos, O. Etzioni, D. Weld, A scalable comparison-shopping agent for the world wide web, Proceedings of Autonomous Agents, 1997, pp. 39\u201348.","DOI":"10.1145\/267658.267666"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB13","unstructured":"N. Kushmerick, Wrapper induction for information extraction, Ph.D. Dissertation, Department of Computer Science, University of Washington, TR UW-CSE-97-11-04, 1997."},{"key":"10.1016\/S0306-4379(01)00040-0_BIB14","unstructured":"S. Soderland, Learning to extract text-based information from the world wide web, Proceedings of Knowledge Discovery and Data Mining, 1997."},{"key":"10.1016\/S0306-4379(01)00040-0_BIB15","unstructured":"D. Raggett, Clean Up Your Web Pahes with HTML TIDY, http:\/\/www.w3.org\/People\/Raggett\/tidy\/, 1999."},{"key":"10.1016\/S0306-4379(01)00040-0_BIB16","unstructured":"W3C, Reformulating HTML in XML, http:\/\/www.w3.org\/TR\/WD-html-in-xml\/, 1999."},{"key":"10.1016\/S0306-4379(01)00040-0_BIB17","doi-asserted-by":"crossref","unstructured":"G. Huck, P. Fankhauser, K. Aberer, E.J. Neuhold, Jedi: exchanging and synthesizing information from the web, Coopis, 1998.","DOI":"10.1109\/COOPIS.1998.706182"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB18","doi-asserted-by":"crossref","unstructured":"S. Abiteboul, D. Quass, J. McHugh, J. Widom, J. Weiner, The lorel query language for semi-structured data, J. Digital Library (1998).","DOI":"10.1007\/s007990050005"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB19","doi-asserted-by":"crossref","unstructured":"P. Buneman, S. Davidson, G.H.D. Suciu, A query language and optimization techniques for unstructured data, Proceedings of ACM SIGMOD Conference, 1996.","DOI":"10.1145\/233269.233368"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB20","unstructured":"D. Konopnicki, O. Shemueli, W3qs: a query system for the world wide web, Proceedings of the Very Large Databases Conference, 1995."},{"key":"10.1016\/S0306-4379(01)00040-0_BIB21","doi-asserted-by":"crossref","unstructured":"A.O. Mendelzon, G. Mihaila, T. Milo, Querying the world wide web, International Conference on Parallel and Distributed Information Systems (PDIS), 1996.","DOI":"10.1109\/PDIS.1996.568671"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB22","unstructured":"T. Kistlera, H. Marais, WebL: a programming language for the web. http:\/\/www.research.digital.com\/ SRC\/WebL\/index.html, 1998."},{"key":"10.1016\/S0306-4379(01)00040-0_BIB23","unstructured":"C. Allen, WIDL: application integration with XML, World Wide Web J. 2(4) (1997)."},{"key":"10.1016\/S0306-4379(01)00040-0_BIB24","doi-asserted-by":"crossref","unstructured":"G. Arocena, A. Mendelzon, WebOQL: restructuring documents, databases, and webs, Proceedings of ICDE\u201998, February, 1998.","DOI":"10.1109\/ICDE.1998.655754"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB25","unstructured":"A. Deutsch, M. Fernandez, D. Florescu, A. Levy, D. Suciu, XML-QL: a query language for XML, http:\/\/www.w3c.org\/TR\/1998\/NOTE-xml-ql-19980819 (1998)."},{"key":"10.1016\/S0306-4379(01)00040-0_BIB26","doi-asserted-by":"crossref","unstructured":"J. Gruser, L. Raschid, M. Vidal, L. Bright, A wrapper generation toolkit to specify and construct wrappers for web accessible data sources. ftp:\/\/ftp.umiacs.umd.edu\/pub\/louiqa\/ BAA9709\/PUB98\/1CoopIS98.ps, 1998.","DOI":"10.1109\/COOPIS.1998.706180"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB27","unstructured":"WWWC, XML Pointer Language, http:\/\/www.w3.org\/TR\/1998\/WD-xptr-19980303, 1998."},{"key":"10.1016\/S0306-4379(01)00040-0_BIB28","doi-asserted-by":"crossref","unstructured":"L. Liu, W. Han, D. Buttler, C. Pu, W. Tang, XWrap: an XML-enabled wrapper construction system for web information sources, Proceedings of the International Conference on Data Engineering, San Diego, March 2000, pp. 611\u2013621.","DOI":"10.1109\/ICDE.2000.839475"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB29","doi-asserted-by":"crossref","unstructured":"D. Buttler, L. Liu, C. Pu, A fully automated object extract system for the Web, Proceedings of the International Conference on Distributed Computing Systems, Phoenix, Arizona, April 2001, pp. 611\u2013621.","DOI":"10.1109\/ICDSC.2001.918966"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB30","unstructured":"P.A. Bernstein, Microsoft repository, VLDB\u201997 Tutorial and ACM SIGMOD\u201996 Tutorial, 1997."},{"key":"10.1016\/S0306-4379(01)00040-0_BIB31","doi-asserted-by":"crossref","unstructured":"P.A. Bernstein, T. Bergstraesser, J. Carlson, S. Pal, P. Sanders, D. Shutt, Microsoft repository version 2 and the open information model, Information Systems 24 (2) (1999).","DOI":"10.1016\/S0306-4379(99)00006-X"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB32","doi-asserted-by":"crossref","unstructured":"T. Bergstraesser, P.A. Bernstein, S. Pal, D. Shutt, Versions and workspaces in microsoft repositories, ACM SIGMOD, 1999.","DOI":"10.1145\/304182.304248"},{"key":"10.1016\/S0306-4379(01)00040-0_BIB33","doi-asserted-by":"crossref","unstructured":"L. Liu, C. Pu, W. Han, XWrap: an XML-enabled wrapper construction system for web information sources, Proceedings of the International Conference on Data Engineering, San Diego, March 2000, pp. 611\u2013621.","DOI":"10.1109\/ICDE.2000.839475"}],"container-title":["Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0306437901000400?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0306437901000400?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2020,1,28]],"date-time":"2020-01-28T16:28:59Z","timestamp":1580228939000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0306437901000400"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2001,12]]},"references-count":33,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2001,12]]}},"alternative-id":["S0306437901000400"],"URL":"https:\/\/doi.org\/10.1016\/s0306-4379(01)00040-0","relation":{},"ISSN":["0306-4379"],"issn-type":[{"value":"0306-4379","type":"print"}],"subject":[],"published":{"date-parts":[[2001,12]]}}}