{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T00:10:08Z","timestamp":1750810208638,"version":"3.41.0"},"reference-count":79,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2017,8,20]],"date-time":"2017-08-20T00:00:00Z","timestamp":1503187200000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100004895","name":"European Social Fund","doi-asserted-by":"publisher","award":["Agreement UDA-POKL.04.01.01-00-051\/10-00"],"award-info":[{"award-number":["Agreement UDA-POKL.04.01.01-00-051\/10-00"]}],"id":[{"id":"10.13039\/501100004895","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Knowl Inf Syst"],"published-print":{"date-parts":[[2018,3]]},"DOI":"10.1007\/s10115-017-1097-2","type":"journal-article","created":{"date-parts":[[2017,8,20]],"date-time":"2017-08-20T10:51:37Z","timestamp":1503226297000},"page":"711-776","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["The BigGrams: the semi-supervised information extraction system from HTML: an improvement in the wrapper induction"],"prefix":"10.1007","volume":"54","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4951-2264","authenticated-orcid":false,"given":"Marcin Micha\u0142","family":"Miro\u0144czuk","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,8,20]]},"reference":[{"key":"1097_CR1","doi-asserted-by":"publisher","unstructured":"Agichtein E, Gravano L (2000) Snowball: extracting relations from large plain-text collections. In: Proceedings of the Fifth ACM conference on digital libraries, DL\u201900. ACM, New York, pp 85\u201394. doi: 10.1145\/336597.336644","DOI":"10.1145\/336597.336644"},{"key":"1097_CR2","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1016\/j.eswa.2016.11.034","volume":"71","author":"R Ali","year":"2017","unstructured":"Ali R, Lee S, Chung TC (2017) Accurate multi-criteria decision making methodology for recommending machine learning algorithm. Expert Syst Appl 71:257\u2013278","journal-title":"Expert Syst Appl"},{"key":"1097_CR3","unstructured":"Banko M, Cafarella MJ, Soderland S, Broadhead M, Etzioni O (2007) Open information extraction from the web. In: Proceedings of the 20th international joint conference on artifical intelligence, IJCAI\u201907. Morgan Kaufmann Publishers Inc., San Francisco, pp 2670\u20132676. http:\/\/dl.acm.org\/citation.cfm?id=1625275.1625705"},{"key":"1097_CR4","unstructured":"Blohm S (2014) Large-scale pattern-based information extraction from the world wide web. Karlsruher Institut f\u00fcr Technologie. http:\/\/www.ebook.de\/de\/product\/18345051\/sebastian_blohm_large_scale_pattern_based_information_extraction_from_the_world_wide_web.html , http:\/\/d-nb.info\/1000088529"},{"key":"1097_CR5","doi-asserted-by":"crossref","unstructured":"Brin S (1999) Extracting patterns and relations from the world wide web. In: Selected papers from the international workshop on the world wide web and databases, WebDB \u201998. Springer-Verlag, pp 172\u2013183. http:\/\/dl.acm.org\/citation.cfm?id=646543.696220","DOI":"10.1007\/10704656_11"},{"key":"1097_CR6","unstructured":"Brin S (November 1999) Extracting patterns and relations from the world wide web. Technical Report 1999-65, Stanford InfoLab. http:\/\/ilpubs.stanford.edu:8090\/421\/ , previous number = SIDL-WP-1999-0119"},{"key":"1097_CR7","doi-asserted-by":"crossref","unstructured":"Bronzi M, Crescenzi V, Merialdo P, Papotti P (2013) Extraction and integration of partially overlapping web sources. PVLDB 6(10):805\u2013816. http:\/\/www.vldb.org\/pvldb\/vol6\/p805-bronzi.pdf","DOI":"10.14778\/2536206.2536209"},{"key":"1097_CR8","unstructured":"Bunescu R, Pasca M (2006) Using encyclopedic knowledge for named entity disambiguation. In: Proceesings of the 11th conference of the European chapter of the association for computational linguistics (EACL-06). Trento, pp 9\u201316. http:\/\/www.cs.utexas.edu\/users\/ai-lab\/?bunescu:eacl06"},{"key":"1097_CR9","doi-asserted-by":"crossref","unstructured":"Carlson A, Betteridge J, Hruschka Jr, ER, Mitchell TM (2009) Coupling semi-supervised learning of categories and relations. In: Proceedings of the NAACL HLT 2009 workskop on semi-supervised learning for natural language processing","DOI":"10.3115\/1621829.1621830"},{"key":"1097_CR10","doi-asserted-by":"crossref","unstructured":"Carlson A, Betteridge J, Kisiel B, Settles B, Hruschka Jr ER, Mitchell TM (2010) Toward an architecture for never-ending language learning. In: Proceedings of the Twenty-Fourth conference on artificial intelligence (AAAI 2010)","DOI":"10.1609\/aaai.v24i1.7519"},{"issue":"10","key":"1097_CR11","doi-asserted-by":"crossref","first-page":"1411","DOI":"10.1109\/TKDE.2006.152","volume":"18","author":"CH Chang","year":"2006","unstructured":"Chang CH, Kayed M, Girgis M, Shaalan K (2006) A survey of web information extraction systems. IEEE Trans Knowl Data Eng 18(10):1411\u20131428","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"1097_CR12","doi-asserted-by":"crossref","unstructured":"Chang C, Lui S (2001) IEPAD: information extraction based on pattern discovery. In: Shen VY, Saito N, Lyu MR, Zurko ME (eds) Proceedings of the Tenth International World Wide Web Conference, WWW 10. ACM, Hong Kong, pp 681\u2013688, May 1\u20135","DOI":"10.1145\/371920.372182"},{"key":"1097_CR13","doi-asserted-by":"crossref","unstructured":"Chiticariu L, Li Y, Reiss FR (2013) Rule-based information extraction is dead! long live rule-based information extraction systems! In: Proceedings of the 2013 conference on Empirical Methods in Natural Language Processing, EMNLP 2013, 18\u201321 October 2013, Grand Hyatt Seattle, Seattle, Washington, USA, A meeting of SIGDAT, a Special Interest Group of the ACL. ACL, pp 827\u2013832. http:\/\/aclweb.org\/anthology\/D\/D13\/D13-1079.pdf","DOI":"10.18653\/v1\/D13-1079"},{"key":"1097_CR14","volume-title":"Ontology learning and population from text: algorithms, evaluation and applications","author":"P Cimiano","year":"2006","unstructured":"Cimiano P (2006) Ontology learning and population from text: algorithms, evaluation and applications. Springer-Verlag, New York Inc, Secaucus"},{"issue":"5","key":"1097_CR15","doi-asserted-by":"crossref","first-page":"731","DOI":"10.1145\/1017460.1017462","volume":"51","author":"V Crescenzi","year":"2004","unstructured":"Crescenzi V, Mecca G (2004) Automatic information extraction from large websites. J ACM 51(5):731\u2013779","journal-title":"J ACM"},{"key":"1097_CR16","unstructured":"Cucerzan S (2007) Large-scale named entity disambiguation based on wikipedia data. In: Proceedings of the 2007 joint conference on EMNLP and CNLL. pp 708\u2013716"},{"key":"1097_CR17","doi-asserted-by":"publisher","unstructured":"Czerski D, Ciesielski K, Drami\u0144ski M, K\u0142opotek M, \u0141ozi\u0144ski P, Wierzcho\u0144 S (2016) What NEKST?\u2014semantic search engine for polish internet. Springer International Publishing, Cham, pp 335\u2013347. doi: 10.1007\/978-3-319-30165-5_16","DOI":"10.1007\/978-3-319-30165-5_16"},{"key":"1097_CR18","doi-asserted-by":"crossref","unstructured":"Dalvi BB, Callan J, Cohen WW (2010) Entity list completion using set expansion techniques. In: Voorhees EM, Buckland LP (eds.) TREC. National Institute of Standards and Technology (NIST). http:\/\/dblp.uni-trier.de\/db\/conf\/trec\/trec2010.html","DOI":"10.6028\/NIST.SP.500-294.entity-CMU_LIRA"},{"key":"1097_CR19","doi-asserted-by":"publisher","unstructured":"Dalvi BB, Cohen WW, Callan J (2012) Websets: extracting sets of entities from the web using unsupervised information extraction. In: Proceedings of the Fifth ACM International Conference on Web Search and Data Mining, WSDM \u201912, ACM, New York, pp 243\u2013252. doi: 10.1145\/2124295.2124327","DOI":"10.1145\/2124295.2124327"},{"key":"1097_CR20","doi-asserted-by":"crossref","first-page":"54","DOI":"10.1016\/j.datak.2012.10.002","volume":"83","author":"J Knijff de","year":"2013","unstructured":"de Knijff J, Frasincar F, Hogenboom F (2013) Domain taxonomy learning from text: the subsumption method versus hierarchical clustering. Data Knowl Eng 83:54\u201369","journal-title":"Data Knowl Eng"},{"key":"1097_CR21","unstructured":"Downey DC (2008) Redundancy in web-scale information extraction: probabilistic model and experimental results. University of Washington. http:\/\/books.google.pl\/books?id=THnZtgAACAAJ"},{"issue":"12","key":"1097_CR22","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1145\/1409360.1409378","volume":"51","author":"O Etzioni","year":"2008","unstructured":"Etzioni O, Banko M, Soderland S, Weld DS (2008) Open information extraction from the web. Commun ACM 51(12):68\u201374. doi: 10.1145\/1409360.1409378","journal-title":"Commun ACM"},{"issue":"1","key":"1097_CR23","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1016\/j.artint.2005.03.001","volume":"165","author":"O Etzioni","year":"2005","unstructured":"Etzioni O, Cafarella M, Downey D, Popescu AM, Shaked T, Soderland S, Weld DS, Yates A (2005) Unsupervised named-entity extraction from the web: an experimental study. Artif Intell 165(1):91\u2013134. doi: 10.1016\/j.artint.2005.03.001","journal-title":"Artif Intell"},{"key":"1097_CR24","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1016\/j.knosys.2014.07.007","volume":"70","author":"E Ferrara","year":"2014","unstructured":"Ferrara E, Meo PD, Fiumara G, Baumgartner R (2014) Web data extraction, applications and techniques: a survey. Knowl Based Syst 70:301\u2013323","journal-title":"Knowl Based Syst"},{"issue":"1","key":"1097_CR25","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1145\/1882471.1882479","volume":"12","author":"G Forman","year":"2010","unstructured":"Forman G, Scholz M (2010) Apples-to-apples in cross-validation studies: pitfalls in classifier performance measurement. ACM SIGKDD Explor Newsl 12(1):49\u201357","journal-title":"ACM SIGKDD Explor Newsl"},{"key":"1097_CR26","doi-asserted-by":"crossref","unstructured":"Furche T, Gottlob G, Grasso G, Guo X, Orsi G, Schallhart C, Wang C (2014) DIADEM: thousands of websites to a single database. PVLDB 7(14):1845\u20131856. http:\/\/www.vldb.org\/pvldb\/vol7\/p1845-furche.pdf","DOI":"10.14778\/2733085.2733091"},{"key":"1097_CR27","unstructured":"Haav H (2004) A semi-automatic method to ontology design by using FCA. University of Ostrava, Department of Computer Science"},{"key":"1097_CR28","doi-asserted-by":"crossref","unstructured":"Hao Q, Cai R, Pang Y, Zhang L (2011) From one tree to a forest: a unified solution for structured web data extraction. In: Proceedings of the 34th international ACM SIGIR conference on research and development in information retrieval (SIGIR 2011). Association for Computing Machinery, Inc., pp 775\u2013784","DOI":"10.1145\/2009916.2010020"},{"key":"1097_CR29","unstructured":"Harrell Jr F, Dupont C (2013) Hmisc: Harrell miscellaneous. R Package"},{"issue":"23","key":"1097_CR30","doi-asserted-by":"crossref","first-page":"146","DOI":"10.1080\/00437956.1954.11659520","volume":"10","author":"Z Harris","year":"1954","unstructured":"Harris Z (1954) Distributional structure. Word 10(23):146\u2013162","journal-title":"Word"},{"key":"1097_CR31","doi-asserted-by":"crossref","unstructured":"He Y, Xin D (2011) SEISA: set expansion by iterative similarity aggregation. In: Srinivasan S, Ramamritham K, Kumar A, Ravindra MP, Bertino E, Kumar R (eds) Proceedings of the 20th International Conference on World Wide Web, WWW 2011, Hyderabad, India, March 28\u2013April 1, 2011. ACM, pp 427\u2013436","DOI":"10.1145\/1963405.1963467"},{"key":"1097_CR32","doi-asserted-by":"crossref","unstructured":"Hemnani A, Bressan S (2002) Extracting information from semi-structured web documents. In: Proceedings of the workshops on advances in Object-Oriented Information Systems OOIS\u201902. Springer-Verlag, London, pp 166\u2013175. http:\/\/dl.acm.org\/citation.cfm?id=645790.667826","DOI":"10.1007\/3-540-46105-1_20"},{"key":"1097_CR33","volume-title":"Nonparametric statistical methods","author":"M Hollander","year":"2013","unstructured":"Hollander M, Wolfe DA, Chicken E (2013) Nonparametric statistical methods. Wiley, Hoboken"},{"key":"1097_CR34","doi-asserted-by":"crossref","unstructured":"Hsu C, Dung M (1998) Generating finite-state transducers for semi-structured data extraction from the web. Inf Syst 23(9):521\u2013538. http:\/\/dl.acm.org\/citation.cfm?id=306766.306775","DOI":"10.1016\/S0306-4379(98)00027-1"},{"key":"1097_CR35","doi-asserted-by":"crossref","first-page":"74","DOI":"10.1016\/j.is.2016.05.003","volume":"62","author":"P Jim\u00e9nez","year":"2016","unstructured":"Jim\u00e9nez P, Corchuelo R (2016) On learning web information extraction rules with TANGO. Inf Syst 62:74\u2013103","journal-title":"Inf Syst"},{"issue":"1","key":"1097_CR36","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1007\/s10115-016-0921-4","volume":"49","author":"P Jim\u00e9nez","year":"2016","unstructured":"Jim\u00e9nez P, Corchuelo R (2016) Roller: a novel approach to web information extraction. Knowl Inf Syst 49(1):197\u2013241","journal-title":"Knowl Inf Syst"},{"issue":"2","key":"1097_CR37","doi-asserted-by":"crossref","first-page":"524","DOI":"10.1109\/TKDE.2015.2475759","volume":"28","author":"Y Kang","year":"2016","unstructured":"Kang Y, Haghighi PD, Burstein F (2016) Taxofinder: a graph-based approach for taxonomy learning. IEEE Trans Knowl Data Eng 28(2):524\u2013536","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"1097_CR38","first-page":"294","volume-title":"Foundations of real-world understanding","author":"J Karlgren","year":"2001","unstructured":"Karlgren J, Sahlgren M (2001) From words to understanding. In: Uesaka Y, Kanerva P, Asoh H (eds) Foundations of real-world understanding. CSLI Publications, Stanford, pp 294\u2013308"},{"issue":"2","key":"1097_CR39","doi-asserted-by":"crossref","first-page":"249","DOI":"10.1109\/TKDE.2009.82","volume":"22","author":"M Kayed","year":"2010","unstructured":"Kayed M, Chang C (2010) Fivatech: page-level web data extraction from template pages. IEEE Trans Knowl Data Eng 22(2):249\u2013263","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"1","key":"1097_CR40","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1142\/S0219622012500095","volume":"11","author":"G Kou","year":"2012","unstructured":"Kou G, Lu Y, Peng Y, Shi Y (2012) Evaluation of classification algorithms using MCDM and rank correlation. Int J Inf Technol Decis Mak 11(1):197\u2013225","journal-title":"Int J Inf Technol Decis Mak"},{"key":"1097_CR41","doi-asserted-by":"crossref","first-page":"217","DOI":"10.1016\/j.asoc.2015.08.012","volume":"37","author":"RA Krohling","year":"2015","unstructured":"Krohling RA, Lourenzutti R, Campos M (2015) Ranking and comparing evolutionary algorithms with hellinger-topsis. Appl Soft Comput 37:217\u2013226","journal-title":"Appl Soft Comput"},{"key":"1097_CR42","doi-asserted-by":"crossref","first-page":"308","DOI":"10.1016\/j.procs.2015.07.054","volume":"55","author":"RA Krohling","year":"2015","unstructured":"Krohling RA, Pacheco AG (2015) A-topsis-an approach based on topsis for ranking evolutionary algorithms. Procedia Comput Sci 55:308\u2013317","journal-title":"Procedia Comput Sci"},{"key":"1097_CR43","doi-asserted-by":"crossref","unstructured":"Liu B, Zhai Y (2005) NET\u2014a system for extracting web data from flat and nested data records. In: Ngu AHH, Kitsuregawa M, Neuhold EJ, Chung J, Sheng QZ (eds.) Web Information Systems Engineering\u2014WISE 2005, 6th International Conference on Web Information Systems Engineering, New York, November 20\u201322 2005, Proceedings of the Lecture Notes in Computer Science, vol 3806. Springer, pp 487\u2013495","DOI":"10.1007\/11581062_39"},{"key":"1097_CR44","doi-asserted-by":"crossref","unstructured":"Maedche A, Staab S (2000) Mining ontologies from text. In: Procedia of Knowledge Engineering and Knowledge Management (EKAW 2000). LNAI 1937, Springer","DOI":"10.1007\/3-540-39967-4_14"},{"key":"1097_CR45","doi-asserted-by":"crossref","unstructured":"Maimon O, Rokach L (2005) Introduction to knowledge discovery in databases. In: Maimon O, Rokach L (eds.) The data mining and knowledge discovery handbook. Springer, pp 1\u201317. http:\/\/dblp.uni-trier.de\/db\/books\/collections\/datamining2005.html","DOI":"10.1007\/0-387-25465-X_1"},{"key":"1097_CR46","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511809071","volume-title":"Introduction to information retrieval","author":"CD Manning","year":"2008","unstructured":"Manning CD, Raghavan P, Sch\u00fctze H (2008) Introduction to information retrieval. Cambridge University Press, New York"},{"key":"1097_CR47","doi-asserted-by":"crossref","unstructured":"Mironczuk M, Czerski D, Sydow M, Klopotek MA (2013) Language-independent information extraction based on formal concept analysis. In: Informatics and applications (ICIA), 2013 second international conference on, pp 323\u2013329","DOI":"10.1109\/ICoIA.2013.6650277"},{"key":"1097_CR48","unstructured":"Moens M (2006) Information extraction: algorithms and prospects in a retrieval context (the information retrieval series). Springer International Series on Information Retrieval, Springer, Secaucus. http:\/\/books.google.pl\/books?id=t5oMg54hBxwC"},{"key":"1097_CR49","unstructured":"Navigli R, Velardi P, Faralli S (2011) A graph-based algorithm for inducing lexical taxonomies from scratch. In: Walsh T (ed) IJCAI 2011, Proceedings of the 22nd international joint conference on artificial intelligence, Barcelona, Catalonia, July 16\u201322, 2011. IJCAI\/AAAI, pp 1872\u20131877"},{"key":"1097_CR50","doi-asserted-by":"publisher","unstructured":"Park BK, Han H, Song IY (2005) PIES: a web information extraction system using ontology and tag patterns. Springer, Berlin, pp 688\u2013693. doi: 10.1007\/11563952_65","DOI":"10.1007\/11563952_65"},{"key":"1097_CR51","doi-asserted-by":"crossref","unstructured":"Pasupat P, Liang P (2014) Zero-shot entity extraction from web pages. In: Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics, ACL 2014, June 22\u201327, 2014, Baltimore, MD, USA, Volume 1: Long Papers. pp 391\u2013401. http:\/\/aclweb.org\/anthology\/P\/P14\/P14-1037.pdf","DOI":"10.3115\/v1\/P14-1037"},{"key":"1097_CR52","doi-asserted-by":"crossref","unstructured":"Pawlak Z (1981) Information systems theoretical foundations. Inf Syst 6(3):205\u2013218. http:\/\/www.sciencedirect.com\/science\/article\/pii\/0306437981900235","DOI":"10.1016\/0306-4379(81)90023-5"},{"key":"1097_CR53","doi-asserted-by":"crossref","unstructured":"Piskorski J, Yangarber R (2013) Information extraction: Past, present and future. In: Poibeau T, Saggion H, Piskorski J, Yangarber R (eds) Multi-source, multilingual information extraction and summarization. Springer, Berlin, pp 23\u201349. Theory and Applications of Natural Language Processing","DOI":"10.1007\/978-3-642-28569-1_2"},{"key":"1097_CR54","doi-asserted-by":"crossref","first-page":"521","DOI":"10.1002\/aris.1440400120","volume":"40","author":"U Priss","year":"1996","unstructured":"Priss U (1996) Formal concept analysis in information science. Annu Rev Inf Sci Technol 40:521\u2013543","journal-title":"Annu Rev Inf Sci Technol"},{"key":"1097_CR55","doi-asserted-by":"crossref","unstructured":"Qiu D, Barbosa L, Dong XL, Shen Y, Srivastava D (2015) DEXTER: large-scale discovery and extraction of product specifications on the web. PVLDB 8(13):2194\u20132205. http:\/\/www.vldb.org\/pvldb\/vol8\/p2194-qiu.pdf","DOI":"10.14778\/2831360.2831372"},{"key":"1097_CR56","unstructured":"Riloff E, Jones R (1999) Learning dictionaries for information extraction by multi-level bootstrapping. In: Proceedings of the Sixteenth National Conference on Artificial Intelligence and the Eleventh Innovative Applications of Artificial Intelligence Conference Innovative Applications of Artificial Intelligence. AAAI\u201999\/IAAI \u201999. American Association for Artificial Intelligence, Menlo Park, pp 474\u2013479. http:\/\/dl.acm.org\/citation.cfm?id=315149.315364"},{"issue":"4","key":"1097_CR57","doi-asserted-by":"crossref","first-page":"467","DOI":"10.1007\/s10462-015-9433-y","volume":"44","author":"G Santafe","year":"2015","unstructured":"Santafe G, Inza I, Lozano JA (2015) Dealing with the evaluation of supervised classification algorithms. Artif Intell Rev 44(4):467\u2013508","journal-title":"Artif Intell Rev"},{"issue":"3","key":"1097_CR58","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1561\/1900000003","volume":"1","author":"S Sarawagi","year":"2008","unstructured":"Sarawagi S (2008) Information extraction. Found Trends Databases 1(3):261\u2013377. doi: 10.1561\/1900000003","journal-title":"Found Trends Databases"},{"key":"1097_CR59","volume-title":"Inference over the web","author":"S Schoenmackers","year":"2011","unstructured":"Schoenmackers S (2011) Inference over the web. University of Washington, Seattle"},{"key":"1097_CR60","doi-asserted-by":"crossref","unstructured":"Schulz A, L\u00e4ssig J, Gaedke M (2016) Practical web data extraction: are we there yet?\u2014a short survey. In: 2016 IEEE\/WIC\/ACM International Conference on Web Intelligence, WI 2016, Omaha, NE, USA, October 13\u201316 2016. IEEE Computer Society, pp 562\u2013567","DOI":"10.1109\/WI.2016.0096"},{"issue":"9","key":"1097_CR61","doi-asserted-by":"crossref","first-page":"1960","DOI":"10.1109\/TKDE.2012.135","volume":"25","author":"HA Sleiman","year":"2013","unstructured":"Sleiman HA, Corchuelo R (2013) A survey on region extractors from web documents. IEEE Trans Knowl Data Eng 25(9):1960\u20131981","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"1097_CR62","doi-asserted-by":"crossref","unstructured":"Sleiman HA, Corchuelo R (2012) An unsupervised technique to extract information from semi-structured web pages. In: Wang XS, Cruz IF, Delis A, Huang G (eds) Web Information Systems Engineering\u2014WISE 2012\u201413th International Conference, Paphos, Cyprus, November 28\u201330, 2012. Proceedings of the Lecture Notes in Computer Science, vol 7651. Springer, pp 631\u2013637","DOI":"10.1007\/978-3-642-35063-4_46"},{"key":"1097_CR63","doi-asserted-by":"crossref","unstructured":"Sleiman HA, Corchuelo R (2013) Tex: an efficient and effective unsupervised web information extractor. Knowl Based Syst 39:109\u2013123. http:\/\/dblp.uni-trier.de\/db\/journals\/kbs\/kbs39.html","DOI":"10.1016\/j.knosys.2012.10.009"},{"key":"1097_CR64","doi-asserted-by":"crossref","first-page":"61","DOI":"10.1016\/j.neucom.2013.05.057","volume":"135","author":"HA Sleiman","year":"2014","unstructured":"Sleiman HA, Corchuelo R (2014) A class of neural-network-based transducers for web information extraction. Neurocomputing 135:61\u201368","journal-title":"Neurocomputing"},{"issue":"6","key":"1097_CR65","doi-asserted-by":"crossref","first-page":"1544","DOI":"10.1109\/TKDE.2013.161","volume":"26","author":"HA Sleiman","year":"2014","unstructured":"Sleiman HA, Corchuelo R (2014) Trinity: on using trinary trees for unsupervised web data extraction. IEEE Trans Knowl Data Eng 26(6):1544\u20131556","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"3\u20134","key":"1097_CR66","doi-asserted-by":"crossref","first-page":"533","DOI":"10.1007\/s00521-013-1516-6","volume":"25","author":"S Small","year":"2014","unstructured":"Small S, Medsker L (2014) Review of information extraction technologies and applications. Neural Comput Appl 25(3\u20134):533\u2013548","journal-title":"Neural Comput Appl"},{"issue":"4","key":"1097_CR67","doi-asserted-by":"crossref","first-page":"427","DOI":"10.1016\/j.ipm.2009.03.002","volume":"45","author":"M Sokolova","year":"2009","unstructured":"Sokolova M, Lapalme G (2009) A systematic analysis of performance measures for classification tasks. Inf Process Manag 45(4):427\u2013437","journal-title":"Inf Process Manag"},{"key":"1097_CR68","doi-asserted-by":"crossref","unstructured":"Tandon N, de Melo G, Weikum G (2011) Deriving a web-scale common sense fact database. In: Proceedings of the twenty-fifth AAAI conference on artificial intelligence, AAAI\u201911. AAAI Press, San Francisco, CA, pp 152\u2013157","DOI":"10.1609\/aaai.v25i1.7841"},{"issue":"7","key":"1097_CR69","doi-asserted-by":"crossref","first-page":"683","DOI":"10.1016\/j.datak.2009.02.010","volume":"68","author":"C Tao","year":"2009","unstructured":"Tao C, Embley DW (2009) Automatic hidden-web table interpretation, conceptualization, and semantic annotation. Data Knowl Eng 68(7):683\u2013703","journal-title":"Data Knowl Eng"},{"key":"1097_CR70","unstructured":"Team RC (2017) R: a language and environment for statistical computing. In: R foundation for statistical computing. Vienna, Austria"},{"key":"1097_CR71","doi-asserted-by":"crossref","unstructured":"Umamageswari B, Kalpana R (2017) Web harvesting: web data extraction techniques for deep web pages. In: Kumar A (ed) Web usage mining techniques and applications across industries, pp 351\u2013378","DOI":"10.4018\/978-1-5225-0613-3.ch014"},{"issue":"5","key":"1097_CR72","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1134\/S0361768816050078","volume":"42","author":"MI Varlamov","year":"2016","unstructured":"Varlamov MI, Turdakov DY (2016) A survey of methods for the extraction of information from web resources. Program Comput Softw 42(5):279\u2013291","journal-title":"Program Comput Softw"},{"key":"1097_CR73","doi-asserted-by":"publisher","unstructured":"Wang RC, Cohen W (2007) Language-independent set expansion of named entities using the web. In: Proceedings of the 2007 Seventh IEEE International Conference on Data Mining, ICDM \u201907. IEEE Computer Society, Washington, pp 342\u2013350. doi: 10.1109\/ICDM.2007.104","DOI":"10.1109\/ICDM.2007.104"},{"key":"1097_CR74","unstructured":"Wang RC, Cohen WW (2009) Character-level analysis of semi-structured documents for set expansion. In: Proceedings of the 2009 conference on Empirical Methods in Natural Language Processing: Volume 3, EMNLP\u201909, Association for Computational Linguistics, Stroudsburg, pp 1503\u20131512. http:\/\/dl.acm.org\/citation.cfm?id=1699648.1699697"},{"issue":"2","key":"1097_CR75","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1145\/1964897.1964904","volume":"12","author":"T Weninger","year":"2011","unstructured":"Weninger T, Fumarola F, Barber R, Han J, Malerba D (2011) Unexpected results in automatic list extraction on the web. SIGKDD Explor Newsl 12(2):26\u201330. doi: 10.1145\/1964897.1964904","journal-title":"SIGKDD Explor Newsl"},{"issue":"3","key":"1097_CR76","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1145\/2516633.2516638","volume":"7","author":"T Weninger","year":"2013","unstructured":"Weninger T, Johnston TJ, Han J (2013) The parallel path framework for entity discovery on the web. TWEB 7(3):161\u20131629","journal-title":"TWEB"},{"key":"1097_CR77","first-page":"429","volume-title":"StatSoft \u201993","author":"KE Wolff","year":"1994","unstructured":"Wolff KE (1994) A first course in formal concept analysis. In: Faulbaum F (ed) StatSoft \u201993. Gustav Fischer Verlag, Jena, pp 429\u2013438"},{"key":"1097_CR78","doi-asserted-by":"crossref","unstructured":"Yates A, Banko M, Broadhead M, Cafarella MJ, Etzioni O, Soderland S (2007) Textrunner: open information extraction on the web. In: HLT-NAACL (Demonstrations), pp 25\u201326. http:\/\/acl.ldc.upenn.edu\/N\/N07\/N07-4013.pdf","DOI":"10.3115\/1614164.1614177"},{"key":"1097_CR79","doi-asserted-by":"crossref","first-page":"654","DOI":"10.1016\/j.asoc.2015.12.021","volume":"40","author":"SY Yuen","year":"2016","unstructured":"Yuen SY, Chow CK, Zhang X, Lou Y (2016) Which algorithm should I choose: an evolutionary algorithm portfolio approach. Appl Soft Comput 40:654\u2013673","journal-title":"Appl Soft Comput"}],"container-title":["Knowledge and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10115-017-1097-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-017-1097-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-017-1097-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,24]],"date-time":"2025-06-24T23:38:57Z","timestamp":1750808337000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10115-017-1097-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,8,20]]},"references-count":79,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2018,3]]}},"alternative-id":["1097"],"URL":"https:\/\/doi.org\/10.1007\/s10115-017-1097-2","relation":{},"ISSN":["0219-1377","0219-3116"],"issn-type":[{"type":"print","value":"0219-1377"},{"type":"electronic","value":"0219-3116"}],"subject":[],"published":{"date-parts":[[2017,8,20]]}}}