{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:36:45Z","timestamp":1742913405238,"version":"3.40.3"},"publisher-location":"Cham","reference-count":26,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319961323"},{"type":"electronic","value":"9783319961330"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-96133-0_10","type":"book-chapter","created":{"date-parts":[[2018,7,7]],"date-time":"2018-07-07T11:54:57Z","timestamp":1530964497000},"page":"132-146","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["When Different Is Wrong: Visual Unsupervised Validation for Web Information Extraction"],"prefix":"10.1007","author":[{"given":"Benoit","family":"Potvin","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Roger","family":"Villemaire","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,7,8]]},"reference":[{"key":"10_CR1","unstructured":"Agyemang, M.: Web content outlier mining: motivation, framework, and algorithms. University of Calgary (2006)"},{"key":"10_CR2","doi-asserted-by":"crossref","unstructured":"Agyemang, M., Barker, K., Alhajj, R.: Framework for mining web content outliers. In: Proceedings of the 2004 ACM Symposium on Applied Computing, pp. 590\u2013594. ACM (2004)","DOI":"10.1145\/967900.968022"},{"issue":"5","key":"10_CR3","doi-asserted-by":"crossref","first-page":"473","DOI":"10.3233\/IDA-2005-9505","volume":"9","author":"M Agyemang","year":"2005","unstructured":"Agyemang, M., Barker, K., Alhajj, R.: Web outlier mining: discovering outliers from web datasets. Intell. Data Anal. 9(5), 473\u2013486 (2005)","journal-title":"Intell. Data Anal."},{"key":"10_CR4","doi-asserted-by":"crossref","unstructured":"Apostolova, E., Tomuro, N.: Combining visual and textual features for information extraction from online flyers. In: EMNLP, pp. 1924\u20131929 (2014)","DOI":"10.3115\/v1\/D14-1206"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Burget, R., Rudolfova, I.: Web page element classification based on visual features. In: 2009 First Asian Conference on Intelligent Information and Database Systems, ACIIDS 2009, pp. 67\u201372. IEEE (2009)","DOI":"10.1109\/ACIIDS.2009.71"},{"issue":"3","key":"10_CR6","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1145\/1541880.1541882","volume":"41","author":"V Chandola","year":"2009","unstructured":"Chandola, V., Banerjee, A., Kumar, V.: Anomaly detection: a survey. ACM Comput. Surv. (CSUR) 41(3), 15 (2009)","journal-title":"ACM Comput. Surv. (CSUR)"},{"issue":"10","key":"10_CR7","doi-asserted-by":"publisher","first-page":"1411","DOI":"10.1109\/TKDE.2006.152","volume":"18","author":"CH Chang","year":"2006","unstructured":"Chang, C.H., Kayed, M., Girgis, M.R., Shaalan, K.F.: A survey of web information extraction systems. IEEE Trans. Knowl. Data Eng. 18(10), 1411\u20131428 (2006)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"419","DOI":"10.1007\/978-3-642-32281-5_41","volume-title":"Web-Age Information Management","author":"V Chenthamarakshan","year":"2012","unstructured":"Chenthamarakshan, V., Varadarajan, R., Deshpande, P.M., Krishnapuram, R., Stolze, K.: WYSIWYE: an algebra for expressing spatial and textual rules for information extraction. In: Gao, H., Lim, L., Wang, W., Li, C., Chen, L. (eds.) WAIM 2012. LNCS, vol. 7418, pp. 419\u2013433. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-32281-5_41"},{"issue":"1","key":"10_CR9","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1016\/j.jvlc.2009.06.001","volume":"21","author":"G Della Penna","year":"2010","unstructured":"Della Penna, G., Magazzeni, D., Orefice, S.: Visual extraction of information from web pages. J. Vis. Lang. Comput. 21(1), 23\u201332 (2010)","journal-title":"J. Vis. Lang. Comput."},{"issue":"3","key":"10_CR10","doi-asserted-by":"publisher","first-page":"667","DOI":"10.1007\/s10115-011-0394-4","volume":"30","author":"G Della Penna","year":"2012","unstructured":"Della Penna, G., Magazzeni, D., Orefice, S.: A spatial relation-based framework to perform visual information extraction. Knowl. Inf. Syst. 30(3), 667 (2012)","journal-title":"Knowl. Inf. Syst."},{"key":"10_CR11","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1016\/j.knosys.2014.07.007","volume":"70","author":"E Ferrara","year":"2014","unstructured":"Ferrara, E., De Meo, P., Fiumara, G., Baumgartner, R.: Web data extraction, applications and techniques: a survey. Knowl.-Based Syst. 70, 301\u2013323 (2014)","journal-title":"Knowl.-Based Syst."},{"key":"10_CR12","unstructured":"Gatterbauer, W., Bohunsky, P.: Table extraction using spatial reasoning on the CSS2 visual box model. In: Proceedings of the 21st National Conference on Artificial Intelligence (2006)"},{"key":"10_CR13","series-title":"IFIP Advances in Information and Communication Technology","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/978-3-319-44944-9_14","volume-title":"Artificial Intelligence Applications and Innovations","author":"T Gogar","year":"2016","unstructured":"Gogar, T., Hubacek, O., Sedivy, J.: Deep neural networks for web page information extraction. In: Iliadis, L., Maglogiannis, I. (eds.) AIAI 2016. IAICT, vol. 475, pp. 154\u2013163. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-44944-9_14"},{"issue":"4","key":"10_CR14","doi-asserted-by":"publisher","first-page":"e0152173","DOI":"10.1371\/journal.pone.0152173","volume":"11","author":"M Goldstein","year":"2016","unstructured":"Goldstein, M., Uchida, S.: A comparative evaluation of unsupervised anomaly detection algorithms for multivariate data. PLoS ONE 11(4), e0152173 (2016)","journal-title":"PLoS ONE"},{"key":"10_CR15","volume-title":"Anomaly Detection in Large Datasets","author":"MB Goldstein","year":"2014","unstructured":"Goldstein, M.B.: Anomaly Detection in Large Datasets. Verlag Dr. Hut, Munich (2014)"},{"key":"10_CR16","doi-asserted-by":"crossref","unstructured":"Huosong, X., Zhaoyan, F., Liuyan, P.: Chinese web text outlier mining based on domain knowledge. In: 2010 Second WRI Global Congress on Intelligent Systems (GCIS), vol. 2, pp. 73\u201377. IEEE (2010)","DOI":"10.1109\/GCIS.2010.66"},{"issue":"22","key":"10_CR17","first-page":"12156","volume":"12","author":"MRR Khan","year":"2017","unstructured":"Khan, M.R.R., Ahmed, M.I., Riyad, M.A.: A novel analytical approach for identifying outliers from web documents. Int. J. Appl. Eng. Res. 12(22), 12156\u201312161 (2017)","journal-title":"Int. J. Appl. Eng. Res."},{"key":"10_CR18","doi-asserted-by":"crossref","unstructured":"Kohlsch\u00fctter, C., Fankhauser, P., Nejdl, W.: Boilerplate detection using shallow text features. In: Proceedings of the Third ACM International Conference on Web Search and Data Mining, pp. 441\u2013450. ACM (2010)","DOI":"10.1145\/1718487.1718542"},{"key":"10_CR19","volume-title":"Evaluating Web Content Extraction Algorithms","author":"T Kovacic","year":"2012","unstructured":"Kovacic, T.: Evaluating Web Content Extraction Algorithms. University of Ljubljana, Ljubljana (2012)"},{"key":"10_CR20","doi-asserted-by":"crossref","unstructured":"Kr\u00fcpl-Sypien, B., Fayzrakhmanov, R.R., Holzinger, W., Panzenb\u00f6ck, M., Baumgartner, R.: A versatile model for web page representation, information extraction and content re-packaging. In: Proceedings of the 11th ACM Symposium on Document Engineering, pp. 129\u2013138. ACM (2011)","DOI":"10.1145\/2034691.2034721"},{"issue":"7553","key":"10_CR21","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436 (2015)","journal-title":"Nature"},{"key":"10_CR22","doi-asserted-by":"crossref","unstructured":"Li, W., Mo, W., Zhang, X., Lu, Y., Squiers, J.J., Sellke, E.W., Fan, W., DiMaio, J.M., Thatcher, J.E.: Burn injury diagnostic imaging device\u2019s accuracy improved by outlier detection and removal. In: SPIE Defense+ Security, p. 947206. International Society for Optics and Photonics (2015)","DOI":"10.1117\/12.2177433"},{"key":"10_CR23","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"641","DOI":"10.1007\/978-3-319-57454-7_50","volume-title":"Advances in Knowledge Discovery and Data Mining","author":"H Vu","year":"2017","unstructured":"Vu, H., Nguyen, T.D., Travers, A., Venkatesh, S., Phung, D.: Energy-based localized anomaly detection in video surveillance. In: Kim, J., et al. (eds.) PAKDD 2017. LNCS (LNAI), vol. 10234, pp. 641\u2013653. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-57454-7_50"},{"issue":"2","key":"10_CR24","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1145\/2897350.2897353","volume":"17","author":"T Weninger","year":"2016","unstructured":"Weninger, T., Palacios, R., Crescenzi, V., Gottron, T., Merialdo, P.: Web content extraction: a meta-analysis of its past and thoughts on its future. ACM SIGKDD Explor. Newsl. 17(2), 17\u201323 (2016)","journal-title":"ACM SIGKDD Explor. Newsl."},{"key":"10_CR25","volume-title":"Data Mining: Practical Machine Learning Tools and Techniques","author":"IH Witten","year":"2016","unstructured":"Witten, I.H., Frank, E., Hall, M.A., Pal, C.J.: Data Mining: Practical Machine Learning Tools and Techniques. Morgan Kaufmann, Los Altos (2016)"},{"issue":"12","key":"10_CR26","doi-asserted-by":"publisher","first-page":"1773","DOI":"10.1109\/TVCG.2014.2346922","volume":"20","author":"J Zhao","year":"2014","unstructured":"Zhao, J., Cao, N., Wen, Z., Song, Y., Lin, Y.R., Collins, C.: # FluxFlow: visual analysis of anomalous information spreading on social media. IEEE Trans. Vis. Comput. Graph. 20(12), 1773\u20131782 (2014)","journal-title":"IEEE Trans. Vis. Comput. Graph."}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Data Mining in Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-96133-0_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T11:26:06Z","timestamp":1709810766000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-96133-0_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319961323","9783319961330"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-96133-0_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"8 July 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MLDM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Machine Learning and Data Mining in Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"New York, NY","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 July 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 July 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mldm2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.mldm.de\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}