{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T01:37:26Z","timestamp":1743125846565,"version":"3.40.3"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030147983"},{"type":"electronic","value":"9783030147990"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-14799-0_7","type":"book-chapter","created":{"date-parts":[[2019,4,1]],"date-time":"2019-04-01T23:07:34Z","timestamp":1554160054000},"page":"77-89","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Robust Web Data Extraction Based on Unsupervised Visual Validation"],"prefix":"10.1007","author":[{"given":"Benoit","family":"Potvin","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Roger","family":"Villemaire","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,3,7]]},"reference":[{"key":"7_CR1","doi-asserted-by":"crossref","unstructured":"Apostolova, E., Pourashraf, P., Sack, J.: Digital leafleting: extracting structured data from multimedia online flyers. In: Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 283\u2013292 (2015)","DOI":"10.3115\/v1\/N15-1032"},{"issue":"5","key":"7_CR2","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1038\/scientificamerican0501-34","volume":"284","author":"T Berners-Lee","year":"2001","unstructured":"Berners-Lee, T., Hendler, J., Lassila, O., et al.: The semantic web. Sci. Am. 284(5), 28\u201337 (2001)","journal-title":"Sci. Am."},{"issue":"1","key":"7_CR3","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman, L.: Random forests. Mach. Learn. 45(1), 5\u201332 (2001)","journal-title":"Mach. Learn."},{"key":"7_CR4","doi-asserted-by":"crossref","unstructured":"Burget, R., Rudolfova, I.: Web page element classification based on visual features. In: First Asian Conference on Intelligent Information and Database Systems, ACIIDS 2009, pp. 67\u201372. IEEE (2009)","DOI":"10.1109\/ACIIDS.2009.71"},{"issue":"10","key":"7_CR5","doi-asserted-by":"publisher","first-page":"1411","DOI":"10.1109\/TKDE.2006.152","volume":"18","author":"CH Chang","year":"2006","unstructured":"Chang, C.H., Kayed, M., Girgis, M.R., Shaalan, K.F.: A survey of web information extraction systems. IEEE Trans. Knowl. Data Eng. 18(10), 1411\u20131428 (2006)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"7_CR6","series-title":"Smart Innovation, Systems and Technologies","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1007\/978-3-642-19618-8_3","volume-title":"Combinations of Intelligent Methods and Applications","author":"E Ferrara","year":"2011","unstructured":"Ferrara, E., Baumgartner, R.: Automatic wrapper adaptation by tree edit distance matching. In: Hatzilygeroudis, I., Prentzas, J. (eds.) Combinations of Intelligent Methods and Applications. SIST, vol. 8, pp. 41\u201354. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-19618-8_3"},{"key":"7_CR7","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1016\/j.knosys.2014.07.007","volume":"70","author":"E Ferrara","year":"2014","unstructured":"Ferrara, E., De Meo, P., Fiumara, G., Baumgartner, R.: Web data extraction, applications and techniques: a survey. Knowl.-Based Syst. 70, 301\u2013323 (2014)","journal-title":"Knowl.-Based Syst."},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"Gatterbauer, W., Bohunsky, P., Herzog, M., Kr\u00fcpl, B., Pollak, B.: Towards domain-independent information extraction from web tables. In: Proceedings of the 16th International Conference on World Wide Web, pp. 71\u201380. ACM (2007)","DOI":"10.1145\/1242572.1242583"},{"key":"7_CR9","series-title":"IFIP Advances in Information and Communication Technology","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/978-3-319-44944-9_14","volume-title":"Artificial Intelligence Applications and Innovations","author":"T Gogar","year":"2016","unstructured":"Gogar, T., Hubacek, O., Sedivy, J.: Deep neural networks for web page information extraction. In: Iliadis, L., Maglogiannis, I. (eds.) AIAI 2016. IAICT, vol. 475, pp. 154\u2013163. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-44944-9_14"},{"key":"7_CR10","unstructured":"Grassi, M., Morbidoni, C., Nucci, M., Fonda, S., Ledda, G.: Pundit: semantically structured annotations for web contents and digital libraries. In: SDA, pp. 49\u201360 (2012)"},{"issue":"3","key":"7_CR11","first-page":"268","volume":"8","author":"H Han","year":"2009","unstructured":"Han, H., Noro, T., Tokuda, T.: An automatic web news article contents extraction system based on RSS feeds. J. Web Eng. 8(3), 268 (2009)","journal-title":"J. Web Eng."},{"key":"7_CR12","doi-asserted-by":"crossref","unstructured":"Kang, J., Choi, J.: Detecting informative web page blocks for efficient information extraction using visual block segmentation. In: International Symposium on Information Technology Convergence, ISITC 2007, pp. 306\u2013310. IEEE (2007)","DOI":"10.1109\/ISITC.2007.6"},{"key":"7_CR13","doi-asserted-by":"crossref","unstructured":"Kohlsch\u00fctter, C., Fankhauser, P., Nejdl, W.: Boilerplate detection using shallow text features. In: Proceedings of the Third ACM International Conference on Web Search and Data Mining, pp. 441\u2013450. ACM (2010)","DOI":"10.1145\/1718487.1718542"},{"key":"7_CR14","doi-asserted-by":"crossref","unstructured":"Kr\u00fcpl-Sypien, B., Fayzrakhmanov, R.R., Holzinger, W., Panzenb\u00f6ck, M., Baumgartner, R.: A versatile model for web page representation, information extraction and content re-packaging. In: Proceedings of the 11th ACM Symposium on Document Engineering, pp. 129\u2013138. ACM (2011)","DOI":"10.1145\/2034691.2034721"},{"key":"7_CR15","doi-asserted-by":"crossref","unstructured":"Liu, F.T., Ting, K.M., Zhou, Z.H.: Isolation forest. In: Eighth IEEE International Conference on Data Mining, ICDM 2008, pp. 413\u2013422. IEEE (2008)","DOI":"10.1109\/ICDM.2008.17"},{"key":"7_CR16","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-39940-9","volume-title":"Encyclopedia of Database Systems","author":"L Liu","year":"2009","unstructured":"Liu, L., \u00d6zsu, M.T.: Encyclopedia of Database Systems, vol. 6. Springer, New York (2009)"},{"key":"7_CR17","doi-asserted-by":"crossref","unstructured":"Parameswaran, A., Dalvi, N., Garcia-Molina, H., Rastogi, R.: Optimal schemesfor robust web extraction. Proc. VLDB Conf. 4(11)(2011)","DOI":"10.14778\/3402707.3402735"},{"issue":"Oct","key":"7_CR18","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., et al.: Scikit-learn: machine learning in python. J. Mach. Learn. Res. 12(Oct), 2825\u20132830 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"7_CR19","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"132","DOI":"10.1007\/978-3-319-96133-0_10","volume-title":"Machine Learning and Data Mining in Pattern Recognition","author":"B Potvin","year":"2018","unstructured":"Potvin, B., Villemaire, R.: When different is wrong: visual unsupervised validation for web information extraction. In: Perner, P. (ed.) MLDM 2018. LNCS (LNAI), vol. 10935, pp. 132\u2013146. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-96133-0_10"},{"key":"7_CR20","doi-asserted-by":"crossref","unstructured":"Tang, J., Hong, M., Zhang, D.L., Li, J.: Information extraction: methodologies and applications. In: Emerging Technologies of Text Mining: Techniques and Applications, pp. 1\u201333. IGI Global (2008)","DOI":"10.4018\/978-1-59904-373-9.ch001"},{"key":"7_CR21","doi-asserted-by":"crossref","unstructured":"Wang, J., et al.: Can we learn a template-independent wrapper for news article extraction from a single training site? In: Proceedings of the 15th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 1345\u20131354. ACM (2009)","DOI":"10.1145\/1557019.1557163"},{"key":"7_CR22","doi-asserted-by":"crossref","unstructured":"Wang, R.C., Cohen, W.W.: Language-independent set expansion of named entities using the web. In: ICDM, pp. 342\u2013350. IEEE (2007)","DOI":"10.1109\/ICDM.2007.104"},{"issue":"2","key":"7_CR23","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1145\/2897350.2897353","volume":"17","author":"T Weninger","year":"2016","unstructured":"Weninger, T., Palacios, R., Crescenzi, V., Gottron, T., Merialdo, P.: Web content extraction: a metaanalysis of its past and thoughts on its future. ACM SIGKDD Explor. Newsl. 17(2), 17\u201323 (2016)","journal-title":"ACM SIGKDD Explor. Newsl."},{"key":"7_CR24","volume-title":"Data Mining: Practical Machinelearning Tools and Techniques","author":"IH Witten","year":"2016","unstructured":"Witten, I.H., Frank, E., Hall, M.A., Pal, C.J.: Data Mining: Practical Machinelearning Tools and Techniques. Morgan Kaufmann, Burlington (2016)"}],"container-title":["Lecture Notes in Computer Science","Intelligent Information and Database Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-14799-0_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T13:34:13Z","timestamp":1710250453000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-14799-0_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030147983","9783030147990"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-14799-0_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"7 March 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACIIDS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Intelligent Information and Database Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Yogyakarta","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Indonesia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 April 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 April 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aciids2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aciids.pwr.edu.pl\/2019\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}