{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T13:29:21Z","timestamp":1725542961680},"publisher-location":"Berlin, Heidelberg","reference-count":20,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540370338"},{"type":"electronic","value":"9783540370352"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2006]]},"DOI":"10.1007\/11811220_50","type":"book-chapter","created":{"date-parts":[[2006,7,24]],"date-time":"2006-07-24T12:40:48Z","timestamp":1153744848000},"page":"586-598","source":"Crossref","is-referenced-by-count":0,"title":["Information Extraction from Semi-structured Web Documents"],"prefix":"10.1007","author":[{"given":"Bo-Hyun","family":"Yun","sequence":"first","affiliation":[]},{"given":"Chang-Ho","family":"Seo","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"50_CR1","doi-asserted-by":"crossref","unstructured":"Adelberg, B.: NoDoSE- A tool for Semi-Automatically Extracting Structured and Semistructured Data from Text Documents. In: ACM SIGMOD (1998)","DOI":"10.1145\/276304.276330"},{"key":"50_CR2","doi-asserted-by":"crossref","unstructured":"Arasu, A., Garcia-Molina, H.: Extracting structured data from web pages. In: ACM SIGMOD (2003)","DOI":"10.1145\/872757.872799"},{"key":"50_CR3","series-title":"Lecture Notes in Computer Science","volume-title":"Declarative Information Extraction, Web Crawling, and Recursive Wrapping with Lixto","author":"R. Baumgartner","year":"2001","unstructured":"Baumgartner, R., Flesca, S., Gottlob, G.: Declarative Information Extraction, Web Crawling, and Recursive Wrapping with Lixto. LNCS. Springer, Heidelberg (2001)"},{"key":"50_CR4","doi-asserted-by":"crossref","unstructured":"Blum, A., Mitchell, T.: Combining Labeled and Unlabeled Data with Co-Training. In: Proceedings of the 1998 Conference on Computational Learning Theory (1998)","DOI":"10.1145\/279943.279962"},{"key":"50_CR5","doi-asserted-by":"crossref","unstructured":"Buttler, D., Liu, L., Pu, C.: A Fully Automated Object Extraction System for the World Wide Web. In: Proceedings of the 2001 International Conference on Distrubuted Computing Systems (May 2001)","DOI":"10.1109\/ICDSC.2001.918966"},{"key":"50_CR6","unstructured":"Califf, M.E.: Relational Learning Techniques for Natural Language Information Extraction, PhD thesis, University of Texas at Austin (August 1998)"},{"key":"50_CR7","unstructured":"Ciravegna, F.: Learning to Tag for Information Extraction from Text. In: Workshop Machine Learning for Information Extraction, European Conference on Artifical Intelligence ECCAI, August, Berlin, Germany (2000)"},{"key":"50_CR8","doi-asserted-by":"crossref","unstructured":"Cohen, W., Hurst, M., Jensen, L.S.: A flexible learning system for wrapping tables and lists in html documents. In: The Eleventh International World Wide Web Conference WWW 2002 (2002)","DOI":"10.1145\/511446.511477"},{"key":"50_CR9","doi-asserted-by":"crossref","unstructured":"Crescenzi, V., Mecca, G., Merialdo, P.: RoadRunner: Towards Automatic Data Extraction from Large Web Sites. In: Proceedings of 27th International Conference on Very Large Data Bases (2001)","DOI":"10.1145\/564691.564778"},{"key":"50_CR10","unstructured":"Eikvil, L.: Information Extraction from World Wide Web: A Survey, Report No. 945 (July 1999) ISBN 82-539-0429-0"},{"key":"50_CR11","doi-asserted-by":"crossref","unstructured":"Embley, D.W., Campbell, D.M., Jiang, Y.S., Ng, Y.-K., Smith, R.D., Liddle, S.W., Quass, D.W.: A Conceptual-Modeling Approach to Extracting Data from the Web. In: International Conference on Conceptual Modeling \/ the Entity Relationship Approach (1998)","DOI":"10.1007\/978-3-540-49524-6_7"},{"key":"50_CR12","unstructured":"Freitag, D.: Machine Learning for Information Extraction in Informal Domains, PhD thesis, Computer Science Department, Carnegie Mellon University, Pittsburgh, PA (November 1998)"},{"key":"50_CR13","unstructured":"Freitag, D., Kushmerick, N.: Boosted Wrapper Induction. In: Proceedings of the Seventh National Conference on Artificial, pp. 577\u2013583 (2000)"},{"key":"50_CR14","doi-asserted-by":"crossref","unstructured":"Gruser, J.R., Raschid, L., Vidal, M.E., Bright, L.: Wrapper Generation for Web Accessible Data Sources. In: Proceedings of the 3rd IFCIS International Conference on Cooperative Information Systems, New York (August 1998)","DOI":"10.1109\/COOPIS.1998.706180"},{"key":"50_CR15","unstructured":"Hsu, C.N., Chang, C.C.: Finite-State Transducers for Semi-Structured Text Mining. In: Workshop on Text Mining IJCAI 1999 (1999)"},{"key":"50_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1007\/3-540-40030-3_16","volume-title":"Learning Language in Logic","author":"M. Junker","year":"2000","unstructured":"Junker, M., Sintek, M., Rinck, M.: Learning for text categorization and information extraction with ILP. In: Cussens, J., D\u017eeroski, S. (eds.) LLL 1999. LNCS, vol.\u00a01925, p. 247. Springer, Heidelberg (2000)"},{"issue":"2","key":"50_CR17","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1109\/5254.757626","volume":"14","author":"N. Kushmerick","year":"1999","unstructured":"Kushmerick, N.: Gleaning the Web. IEEE Intelligent Systems\u00a014(2), 20\u201322 (1999)","journal-title":"IEEE Intelligent Systems"},{"key":"50_CR18","volume-title":"Adaptive Information Extraction: A Core Technology for Information Agents","author":"N. Kushmerick","year":"2002","unstructured":"Kushmerick, N., Thomas, B.: Intelligent Information Agents R&D in Europe: An AgentLink perspective. In: Adaptive Information Extraction: A Core Technology for Information Agents. Springer, Heidelberg (2002)"},{"key":"50_CR19","doi-asserted-by":"crossref","unstructured":"Liu, L., Pu, C., Han, W.: XWRAP: An XML-enabled Wrapper Construction System for Web Information Sources. In: Proceedings of the 16th International Conference on Data Engineering (2000)","DOI":"10.1109\/ICDE.2000.839475"},{"issue":"1","key":"50_CR20","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1145\/643477.643480","volume":"3","author":"P. Merialdo","year":"2003","unstructured":"Merialdo, P., Atzeni, P., Mecca, G.: Design and development of data-intensive web sites: The araneus approach. ACM Transaction on Internet Technology TOIT\u00a03(1), 49\u201392 (2003)","journal-title":"ACM Transaction on Internet Technology TOIT"}],"container-title":["Lecture Notes in Computer Science","Knowledge Science, Engineering and Management"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/11811220_50.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,17]],"date-time":"2020-11-17T20:13:56Z","timestamp":1605644036000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/11811220_50"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2006]]},"ISBN":["9783540370338","9783540370352"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/11811220_50","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2006]]}}}