{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,5]],"date-time":"2025-10-05T19:59:16Z","timestamp":1759694356515},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2009,11,18]],"date-time":"2009-11-18T00:00:00Z","timestamp":1258502400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Knowl Inf Syst"],"published-print":{"date-parts":[[2010,11]]},"DOI":"10.1007\/s10115-009-0266-3","type":"journal-article","created":{"date-parts":[[2009,11,17]],"date-time":"2009-11-17T11:26:08Z","timestamp":1258457168000},"page":"303-326","source":"Crossref","is-referenced-by-count":11,"title":["xCrawl: a high-recall crawling method for Web mining"],"prefix":"10.1007","volume":"25","author":[{"given":"Kostyantyn","family":"Shchekotykhin","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dietmar","family":"Jannach","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gerhard","family":"Friedrich","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2009,11,18]]},"reference":[{"key":"266_CR1","first-page":"96","volume-title":"Proceedings of the 10th international world wide web conference","author":"CC Aggarwal","year":"2001","unstructured":"Aggarwal CC, Al-Garawi F, Yu PS (2001) Intelligent crawling on the World Wide Web with arbitrary predicates. In: Shen VY, Saito N, Lyu RM, Zurko ME (eds) Proceedings of the 10th international world wide web conference. ACM, New York, pp 96\u2013105"},{"key":"266_CR2","first-page":"113","volume-title":"Proceedings of the 19th IEEE international conference on data engineering","author":"E Agichtein","year":"2003","unstructured":"Agichtein E, Gravano L (2003) Querying text databases for efficient information extraction. In: Dayal U, Ramamritham K, Vijayaraman TM (eds) Proceedings of the 19th IEEE international conference on data engineering. IEEE Computer Society, Los Alamitos, pp 113\u2013124"},{"key":"266_CR3","first-page":"125","volume-title":"Proceedings of the fourth international conference on web information systems engineering","author":"A Bergholz","year":"2003","unstructured":"Bergholz A, Chidlovskii B (2003) Crawling for domain-specific Hidden Web resources. In: Catarci T, Mercella M, Mylopoulos J, Orlowska ME (eds) Proceedings of the fourth international conference on web information systems engineering. IEEE Computer Society, Los Alamitos, pp 125\u2013133"},{"key":"266_CR4","volume-title":"Mining the Web: discovering knowledge from hypertext data","author":"S Chakrabarti","year":"2003","unstructured":"Chakrabarti S (2003) Mining the Web: discovering knowledge from hypertext data. Morgan Kaufmann, San Francisco"},{"key":"266_CR5","doi-asserted-by":"crossref","first-page":"1623","DOI":"10.1016\/S1389-1286(99)00052-3","volume":"31","author":"S Chakrabarti","year":"1999","unstructured":"Chakrabarti S, van den Berg M, Dom B (1999) Focused crawling: a new approach to topic-specific Web resource discovery. Comput Netw 31: 1623\u20131640","journal-title":"Comput Netw"},{"key":"266_CR6","first-page":"148","volume-title":"Proceedings of the 11th International World Wide Web Conference","author":"S Chakrabarti","year":"2002","unstructured":"Chakrabarti S, Punera K, Subramanyam M (2002) Accelerated focused crawling through online relevance feedback. In: Lassner D, De Roure D, Iyengar A (eds) Proceedings of the 11th International World Wide Web Conference. ACM, New York, pp 148\u2013159"},{"key":"266_CR7","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1016\/S0169-7552(98)00108-1","volume":"30","author":"J Cho","year":"1998","unstructured":"Cho J, Garcia-Molina H, Page L (1998) Efficient crawling through URL ordering. Comput Netw ISDN Syst 30: 161\u2013172","journal-title":"Comput Netw ISDN Syst"},{"key":"266_CR8","doi-asserted-by":"crossref","first-page":"69","DOI":"10.1016\/S0004-3702(00)00004-7","volume":"118","author":"M Craven","year":"2000","unstructured":"Craven M, DiPasquo D, Freitag D et\u00a0al (2000) Learning to construct knowledge bases from the World Wide Web. Artif Intell 118: 69\u2013113","journal-title":"Artif Intell"},{"key":"266_CR9","doi-asserted-by":"crossref","first-page":"421","DOI":"10.1145\/1242572.1242630","volume-title":"Proceedings of the 16th international conference on world wide web","author":"A Dasgupta","year":"2007","unstructured":"Dasgupta A, Ghosh A, Kumar R et\u00a0al (2007) The discoverability of the Web. In: Williamson CL, Zurko ME, Patel-Schneider PF et\u00a0al (eds) Proceedings of the 16th international conference on world wide web. ACM, New York, pp 421\u2013430"},{"key":"266_CR10","first-page":"527","volume-title":"Proceedings of 26th international conference on very large data bases","author":"M Diligenti","year":"2000","unstructured":"Diligenti M, Coetzee F, Lawrence S et\u00a0al (2000) Focused crawling using context graphs. In: Abbadi AE, Brodie ML, Chakravarthy S et\u00a0al (eds) Proceedings of 26th international conference on very large data bases. Morgan Kaufmann, San Francisco, pp 527\u2013534"},{"key":"266_CR11","doi-asserted-by":"crossref","first-page":"178","DOI":"10.1145\/775152.775178","volume-title":"Proceedings of the 12th international conference on world wide web","author":"S Dill","year":"2003","unstructured":"Dill S, Eiron N, Gibson D et\u00a0al (2003) SemTag and seeker: bootstrapping the semantic Web via automated semantic annotation. In: Hencsey G, White B, Chen Y et\u00a0al (eds) Proceedings of the 12th international conference on world wide web. ACM, New York, pp 178\u2013186"},{"key":"266_CR12","first-page":"396","volume-title":"Proceedings of the thirtieth international conference on very large data bases","author":"M Ester","year":"2004","unstructured":"Ester M, Kriegel HP, Schubert M (2004) Accurate and efficient crawling for relevant websites. In: Nascimento MA, \u00d6zsu MT, Kossmann D et\u00a0al (eds) Proceedings of the thirtieth international conference on very large data bases. Morgan Kaufmann, San Francisco, pp 396\u2013407"},{"key":"266_CR13","doi-asserted-by":"crossref","first-page":"11","DOI":"10.2753\/JEC1086-4415110201","volume":"11","author":"A Felfernig","year":"2007","unstructured":"Felfernig A, Friedrich G, Jannach D et\u00a0al (2007) An integrated environment for the development of knowledge-based recommender applications. Int J Electron Commer 11: 11\u201334","journal-title":"Int J Electron Commer"},{"key":"266_CR14","volume-title":"Proceedings of the 16th international conference on world wide web","author":"W Gatterbauer","year":"2007","unstructured":"Gatterbauer W, Bohunsky P, Herzog M et\u00a0al (2007) Towards domain-independent information extraction from web tables. In: Williamson CL, Zurko ME, Patel-Schneider PF et\u00a0al (eds) Proceedings of the 16th international conference on world wide web. ACM, New York"},{"key":"266_CR15","doi-asserted-by":"crossref","first-page":"784","DOI":"10.1109\/TKDE.2003.1208999","volume":"15","author":"TH Haveliwala","year":"2003","unstructured":"Haveliwala TH (2003) Topic-Sensitive PageRank: a context-sensitive ranking algorithm for Web search. IEEE Trans Knowl Data Eng 15: 784\u2013796","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"266_CR16","doi-asserted-by":"crossref","first-page":"265","DOI":"10.1145\/1142473.1142504","volume-title":"Proceedings of the 2006 ACM SIGMOD international conference on management of data","author":"PG Ipeirotis","year":"2006","unstructured":"Ipeirotis PG, Agichtein E, Jain P et\u00a0al (2006) To search or to crawl?: towards a query optimizer for text-centric tasks. In: Chaudhuri S, Hristidis V, Polyzotis N (eds) Proceedings of the 2006 ACM SIGMOD international conference on management of data. ACM, New York, pp 265\u2013276"},{"key":"266_CR17","doi-asserted-by":"crossref","unstructured":"Jannach D, Shchekotykhin K, Friedrich G (2009) Automated ontology instantiation from tabular web sources\u2014the AllRight system, Web semantics: science, services and agents on the world wide web (in press)","DOI":"10.1016\/j.websem.2009.04.002"},{"key":"266_CR18","doi-asserted-by":"crossref","first-page":"604","DOI":"10.1145\/324133.324140","volume":"46","author":"J Kleinberg","year":"1999","unstructured":"Kleinberg J (1999) Authoritative sources in a hyperlinked environment. J ACM 46: 604\u2013632","journal-title":"J ACM"},{"key":"266_CR19","doi-asserted-by":"crossref","unstructured":"Kleinberg J, Kumar R, Raghavan P et\u00a0al (1999) The Web as a graph: measurements, models, and methods. In: Asano T, Imai H, Lee DT et\u00a0al (eds) Proceedings of the 5th annual international conference on computing and combinatorics. Lecture notes in computer science, vol 1627. Springer, Berlin, pp 1\u201317","DOI":"10.1007\/3-540-48686-0_1"},{"key":"266_CR20","first-page":"272","volume-title":"Proceedings of 9th international conference on information and knowledge management","author":"A Kruger","year":"2000","unstructured":"Kruger A, Giles CL, Coetzee F et\u00a0al (2000) DEADLINER: building a new Niche search engine. In: Agah A, Callan J, Rundensteiner E et\u00a0al (eds) Proceedings of 9th international conference on information and knowledge management. ACM, New York, pp 272\u2013281"},{"key":"266_CR21","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1145\/383952.383995","volume-title":"Proceedings of the 24th annual international ACM SIGIR conference on research and development in information retrieval","author":"F Menczer","year":"2001","unstructured":"Menczer F, Pant G, Srinivasan P et\u00a0al (2001) Evaluating topic-driven web crawlers. In: Kraft DH, Croft WB, Harper DJ et\u00a0al (eds) Proceedings of the 24th annual international ACM SIGIR conference on research and development in information retrieval. ACM, New York, pp 241\u2013249"},{"key":"266_CR22","first-page":"122","volume-title":"Proceedings of the 8th international conference on web engineering","author":"A Mesbah","year":"2008","unstructured":"Mesbah A, Bozdag E, van Deursen A (2008) Crawling AJAX by inferring user interface state changes. In: Schwabe D, Curbera F, Dantzig P (eds) Proceedings of the 8th international conference on web engineering. IEEE Computer Society, Los Alamitos, pp 122\u2013134"},{"key":"266_CR23","doi-asserted-by":"crossref","first-page":"281","DOI":"10.1007\/s10115-007-0107-1","volume":"16","author":"T Peng","year":"2008","unstructured":"Peng T, Zuo W, He F (2008) SVM based adaptive learning method for text classification from positive and unlabeled documents. Knowl Inf Syst 16: 281\u2013301","journal-title":"Knowl Inf Syst"},{"key":"266_CR24","first-page":"335","volume-title":"Proceedings of the 16th international conference on machine learning","author":"J Rennie","year":"1999","unstructured":"Rennie J, McCallum A (1999) Using reinforcement learning to spider the Web efficiently. In: Bratko I, Dzeroski S (eds) Proceedings of the 16th international conference on machine learning. Morgan Kaufmann, San Francisco, pp 335\u2013343"},{"key":"266_CR25","doi-asserted-by":"crossref","first-page":"359","DOI":"10.1108\/eb026866","volume":"46","author":"SE Robertson","year":"1990","unstructured":"Robertson SE (1990) On term selection for query expansion. J Documentation 46: 359\u2013364","journal-title":"J Documentation"},{"key":"266_CR26","first-page":"3","volume":"3","author":"U Schonfeld","year":"2009","unstructured":"Schonfeld U, Bar-Yossef Z, Keidar I (2009) Do not crawl in the DUST: different URLs with similar text. ACM Trans Web 3: 3\u201331","journal-title":"ACM Trans Web"},{"key":"266_CR27","doi-asserted-by":"crossref","first-page":"169","DOI":"10.1145\/1298406.1298438","volume-title":"Proccedings of the 4th international conference on knowledge capture","author":"K Shchekotykhin","year":"2007","unstructured":"Shchekotykhin K, Jannach D, Friedrich G (2007) Clustering Web documents with tables for information extraction. In: Sleeman D, Barker K (eds) Proccedings of the 4th international conference on knowledge capture. ACM, New York, pp 169\u2013170"},{"key":"266_CR28","first-page":"463","volume-title":"Proceedings of the 6th international semantic web conference and 2nd Asian semantic web conference","author":"K Shchekotykhin","year":"2007","unstructured":"Shchekotykhin K, Jannach D, Friedrich G et\u00a0al (2007) AllRight: automatic ontology instantiation from tabular web documents. In: Aberer K, Choi K, Noy N et\u00a0al (eds) Proceedings of the 6th international semantic web conference and 2nd Asian semantic web conference. Springer, Berlin, pp 463\u2013476"},{"key":"266_CR29","doi-asserted-by":"crossref","first-page":"327","DOI":"10.1007\/s10115-007-0094-2","volume":"14","author":"H Tong","year":"2008","unstructured":"Tong H, Faloutsos C, Pan JY (2008) Random walk with restart: fast solutions and applications. Knowl Inf Syst 14: 327\u2013346","journal-title":"Knowl Inf Syst"},{"key":"266_CR30","doi-asserted-by":"crossref","first-page":"265","DOI":"10.1007\/s10115-008-0152-4","volume":"19","author":"P Wang","year":"2009","unstructured":"Wang P, Hu J, Zeng HJ et\u00a0al (2009) Using Wikipedia knowledge to improve text classification. Knowl Inf Syst 19: 265\u2013281","journal-title":"Knowl Inf Syst"},{"key":"266_CR31","volume-title":"Data mining: practical machine learning tools and techniques with Java implementations","author":"I Witten","year":"2000","unstructured":"Witten I, Frank E (2000) Data mining: practical machine learning tools and techniques with Java implementations. Morgan Kaufmann, San Francisco"},{"key":"266_CR32","doi-asserted-by":"crossref","first-page":"70","DOI":"10.1109\/TKDE.2004.1264823","volume":"16","author":"H Yu","year":"2004","unstructured":"Yu H, Han J, Chang KCC (2004) PEBL: web page classification without negative examples. IEEE Trans Knowl Data Eng 16: 70\u201381","journal-title":"IEEE Trans Knowl Data Eng"}],"container-title":["Knowledge and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-009-0266-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10115-009-0266-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-009-0266-3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,14]],"date-time":"2021-10-14T06:49:08Z","timestamp":1634194148000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10115-009-0266-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,11,18]]},"references-count":32,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2010,11]]}},"alternative-id":["266"],"URL":"https:\/\/doi.org\/10.1007\/s10115-009-0266-3","relation":{},"ISSN":["0219-1377","0219-3116"],"issn-type":[{"value":"0219-1377","type":"print"},{"value":"0219-3116","type":"electronic"}],"subject":[],"published":{"date-parts":[[2009,11,18]]}}}