{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,6]],"date-time":"2024-09-06T23:51:30Z","timestamp":1725666690942},"publisher-location":"Berlin, Heidelberg","reference-count":22,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642286001"},{"type":"electronic","value":"9783642286018"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-28601-8_16","type":"book-chapter","created":{"date-parts":[[2012,3,6]],"date-time":"2012-03-06T09:59:57Z","timestamp":1331027997000},"page":"181-193","source":"Crossref","is-referenced-by-count":3,"title":["Information Extraction from Webpages Based on DOM Distances"],"prefix":"10.1007","author":[{"given":"Carlos","family":"Castillo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"H\u00e9ctor","family":"Valero","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jos\u00e9 Guadalupe","family":"Ramos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Josep","family":"Silva","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"16_CR1","doi-asserted-by":"crossref","unstructured":"Dalvi, B., Cohen, W.W., Callan, J.: Websets: Extracting sets of entities from the web using unsupervised information extraction. Technical report, Carnegie Mellon School of computer Science (2011)","DOI":"10.1145\/2124295.2124327"},{"key":"16_CR2","unstructured":"Kushmerick, N., Weld, D.S., Doorenbos, R.: Wrapper induction for information extraction. In: Proceedings of the Fifteenth International Joint Conference on Artificial Intelligence (IJCAI 1997) (1997)"},{"key":"16_CR3","doi-asserted-by":"crossref","unstructured":"Cohen, W.W., Hurst, M., Jensen, L.S.: A flexible learning system for wrapping tables and lists in html documents. In: Proceedings of the international World Wide Web conference (WWW 2002), pp. 232\u2013241 (2002)","DOI":"10.1145\/511475.511477"},{"issue":"5","key":"16_CR4","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1109\/MIS.2002.1039832","volume":"17","author":"P.Y. Lee","year":"2002","unstructured":"Lee, P.Y., Hui, S.C., Fong, A.C.M.: Neural networks for web content filtering. IEEE Intelligent Systems\u00a017(5), 48\u201357 (2002)","journal-title":"IEEE Intelligent Systems"},{"key":"16_CR5","unstructured":"Anti-Porn Parental Controls Software. Porn Filtering (March 2010), \n                    \n                      http:\/\/www.tueagles.com\/anti-porn\/"},{"key":"16_CR6","doi-asserted-by":"publisher","first-page":"859","DOI":"10.1093\/ietisy\/e90-d.5.859","volume":"E90","author":"B.-Y. Kang","year":"2007","unstructured":"Kang, B.-Y., Kim, H.-G.: Web page filtering for domain ontology with the context of concept. IEICE - Trans. Inf. Syst.\u00a0E90, D859\u2013D862 (2007)","journal-title":"IEICE - Trans. Inf. Syst."},{"key":"16_CR7","doi-asserted-by":"crossref","unstructured":"Henzinger, M.: The Past, Present and Future of Web Information Retrieval. In: Proceedings of the 23th ACM Symposium on Principles of Database Systems (2004)","DOI":"10.1145\/1055558.1055566"},{"key":"16_CR8","unstructured":"W3C Consortium. Resource Description Framework (RDF), \n                    \n                      www.w3.org\/RDF"},{"key":"16_CR9","unstructured":"W3C Consortium. Web Ontology Language (OWL), \n                    \n                      www.w3.org\/2004\/OWL"},{"key":"16_CR10","unstructured":"Microformats.org. The Official Microformats Site (2009), \n                    \n                      http:\/\/microformats.org"},{"key":"16_CR11","doi-asserted-by":"crossref","unstructured":"Khare, R., \u00c7elik, T.: Microformats: a Pragmatic Path to the Semantic Web. In: Proceedings of the 15h International Conference on World Wide Web, pp. 865\u2013866 (2006)","DOI":"10.1145\/1135777.1135917"},{"issue":"1","key":"16_CR12","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1109\/MIC.2006.13","volume":"10","author":"R. Khare","year":"2006","unstructured":"Khare, R.: Microformats: The Next (Small) Thing on the Semantic Web? IEEE Internet Computing\u00a010(1), 68\u201375 (2006)","journal-title":"IEEE Internet Computing"},{"issue":"2","key":"16_CR13","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1007\/s11280-004-4873-3","volume":"8","author":"S. Gupta","year":"2005","unstructured":"Gupta, S., et al.: Automating Content Extraction of HTML Documents. World Wide Archive\u00a08(2), 179\u2013224 (2005)","journal-title":"World Wide Archive"},{"key":"16_CR14","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1093\/ietisy\/e91-d.2.251","volume":"E91-D","author":"P. Li","year":"2008","unstructured":"Li, P., Liu, M., Lin, Y., Lai, Y.: Accelerating Web Content Filtering by the Early Decision Algorithm. IEICE Transactions on Information and Systems\u00a0E91-D, 251\u2013257 (2008)","journal-title":"IEICE Transactions on Information and Systems"},{"key":"16_CR15","unstructured":"W3C Consortium, Document Object Model (DOM), \n                    \n                      www.w3.org\/DOM"},{"key":"16_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"156","DOI":"10.1007\/978-3-540-30216-2_13","volume-title":"Algorithms and Models for the Web-Graph","author":"R. Baeza-Yates","year":"2004","unstructured":"Baeza-Yates, R., Castillo, C.: Crawling the Infinite Web: Five Levels Are Enough. In: Leonardi, S. (ed.) WAW 2004. LNCS, vol.\u00a03243, pp. 156\u2013167. Springer, Heidelberg (2004)"},{"key":"16_CR17","doi-asserted-by":"crossref","unstructured":"Micarelli, A., Gasparetti, F.: Adaptative Focused Crawling. In: The Adaptative Web, pp. 231\u2013262 (2007)","DOI":"10.1007\/978-3-540-72079-9_7"},{"key":"16_CR18","volume-title":"Designing Web Usability: The Practice of Simplicity","author":"J. Nielsen","year":"2010","unstructured":"Nielsen, J.: Designing Web Usability: The Practice of Simplicity. New Riders Publishing, Indianapolis (2010) ISBN 1-56205-810-X"},{"key":"16_CR19","series-title":"The Information Retrieval Series","volume-title":"Visualization for Information Retrieval","author":"J. Zhang","year":"2007","unstructured":"Zhang, J.: Visualization for Information Retrieval. The Information Retrieval Series. Springer, Heidelberg (2007) ISBN 3-54075-1475"},{"key":"16_CR20","doi-asserted-by":"crossref","unstructured":"Hearst, M.A.: TileBars: Visualization of Term Distribution Information. In: Proceedings of the ACM SIGCHI Conference on Human Factors in Computing Systems, Denver, CO, pp. 59\u201366 (May 1995)","DOI":"10.1145\/223904.223912"},{"key":"16_CR21","unstructured":"Gottron, T.: Evaluating Content Extraction on HTML Documents. In: Proceedings of the 2nd International Conference on Internet Technologies and Applications, pp. 123\u2013132 (2007)"},{"key":"16_CR22","unstructured":"Apache Foundation. The Apache crawler Nutch (2010), \n                    \n                      http:\/\/nutch.apache.org"}],"container-title":["Lecture Notes in Computer Science","Computational Linguistics and Intelligent Text Processing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-28601-8_16.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,5,4]],"date-time":"2021-05-04T11:02:28Z","timestamp":1620126148000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-28601-8_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642286001","9783642286018"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-28601-8_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}