{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T00:15:58Z","timestamp":1780359358442,"version":"3.54.1"},"reference-count":226,"publisher":"SAGE Publications","issue":"6","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["AIC"],"published-print":{"date-parts":[[2016,12,1]]},"DOI":"10.3233\/aic-160710","type":"journal-article","created":{"date-parts":[[2016,11,4]],"date-time":"2016-11-04T11:15:36Z","timestamp":1478258136000},"page":"627-663","source":"Crossref","is-referenced-by-count":55,"title":["A survey on pre-processing techniques: Relevant issues in the context of environmental data mining"],"prefix":"10.1177","volume":"29","author":[{"given":"Karina","family":"Gibert","sequence":"first","affiliation":[{"name":"Knowledge Engineering and Machine Learning Group, Department of Statistics and Operation Research, Universitat Polit\u00e8cnica de Catalunya-BarcelonaTech, Barcelona, Catalonia, Spain"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Miquel","family":"S\u00e0nchez\u2013Marr\u00e8","sequence":"additional","affiliation":[{"name":"Knowledge Engineering and Machine Learning Group, Computer Science Department, Universitat Polit\u00e8cnica de Catalunya-BarcelonaTech, Barcelona, Catalonia, Spain"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Joaqu\u00edn","family":"Izquierdo","sequence":"additional","affiliation":[{"name":"Fluing-IMM Universitat Polit\u00e8cnica de Val\u00e8ncia, Valencia, Spain"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"179","reference":[{"key":"10.3233\/AIC-160710_ref1","doi-asserted-by":"crossref","unstructured":"C.C.\u00a0Aggarwal, Supervised Outlier Detection, Arfken and Weber, 2012.","DOI":"10.1007\/978-1-4614-6396-2_6"},{"key":"10.3233\/AIC-160710_ref2","doi-asserted-by":"crossref","unstructured":"C.C.\u00a0Aggarwal, Outlier analysis, in: Data Mining, Springer, 2015, pp.\u00a0237\u2013263.","DOI":"10.1007\/978-3-319-14142-8_8"},{"key":"10.3233\/AIC-160710_ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-5725-8_2"},{"issue":"1","key":"10.3233\/AIC-160710_ref4","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1007\/BF00153759","article-title":"Instance-based learning algorithms","volume":"6","author":"Aha","year":"1991","journal-title":"Machine Learning"},{"key":"10.3233\/AIC-160710_ref5","doi-asserted-by":"crossref","unstructured":"J.\u00a0Aitchison, in: Principles of Compositional Data Analysis, Lecture Notes \u2013 Monograph Series, 1994, pp.\u00a073\u201381.","DOI":"10.1214\/lnms\/1215463786"},{"issue":"2","key":"10.3233\/AIC-160710_ref6","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1016\/j.chemolab.2004.06.004","article-title":"A two-stage evolutionary algorithm for variable selection in the development of RBF neural network models","volume":"75","author":"Alexandridis","year":"2005","journal-title":"Chemometrics and Intelligent Laboratory Systems"},{"key":"10.3233\/AIC-160710_ref8","doi-asserted-by":"crossref","unstructured":"P.D.\u00a0Allison, Missing Data, Vol.\u00a0136, Sage Publications, 2001.","DOI":"10.4135\/9781412985079"},{"key":"10.3233\/AIC-160710_ref9","unstructured":"G.\u00a0Andrienko and A.\u00a0Andrienko, Research on visual analysis of spatio-temporal data at fraunhofer ais: An overview of history and functionality of commongis, in: Proceedings of the Knowledge-Based Services for the Public Services Symposium, Workshop III: Knowledge Discovery for Environmental Management, 2004, pp.\u00a026\u201331."},{"issue":"7","key":"10.3233\/AIC-160710_ref10","doi-asserted-by":"publisher","first-page":"8170","DOI":"10.1016\/j.eswa.2010.12.160","article-title":"Empirical study of feature selection methods based on individual feature evaluation for classification problems","volume":"38","author":"Arauzo-Azofra","year":"2011","journal-title":"Expert Systems with Applications"},{"issue":"3","key":"10.3233\/AIC-160710_ref11","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1007\/s10844-007-0037-0","article-title":"Consistency measures for feature selection","volume":"30","author":"Arauzo-Azofra","year":"2008","journal-title":"Journal of Intelligent Information Systems"},{"key":"10.3233\/AIC-160710_ref12","unstructured":"I.\u00a0Arregui, A.\u00a0Balaguer et al., Learning on the relationships between respiratory disease and the use of traditional stoves in Bangladesh households, in: Procs IEMSs\u20192016, Vol.\u00a03, 2016."},{"key":"10.3233\/AIC-160710_ref13","unstructured":"I.N.\u00a0Athanasiadis, V.G.\u00a0Kaburlasos, P.A.\u00a0Mitkas and V.\u00a0Petridis, Applying machine learning techniques on air quality data for real-time decision support, in: First International NAISO Symposium on Information Technologies in Environmental Engineering (ITEE-2003), Gdansk, Poland, Citeseer, 2003."},{"key":"10.3233\/AIC-160710_ref14","unstructured":"J.\u00a0Atserias et al., Syntactic and semantic services in an open-source NLP library, in: Procs LREC, Vol.\u00a06, 2006."},{"issue":"1","key":"10.3233\/AIC-160710_ref15","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1002\/mpr.329","article-title":"Multiple imputation by chained equations: What is it and how does it work?","volume":"20","author":"Azur","year":"2011","journal-title":"International Journal of Methods in Psychiatric Research"},{"key":"10.3233\/AIC-160710_ref16","unstructured":"A.\u00a0Bargiela and W.\u00a0Pedrycz, Granular Computing: An Introduction, Vol.\u00a0717, Springer Science & Business, Media, 2012."},{"key":"10.3233\/AIC-160710_ref17","unstructured":"V.\u00a0Barnett, V.\u00a0Barnett and T.\u00a0Lewis, Outliers in Statistical Data, Wiley, 1978."},{"issue":"251\u2013260","key":"10.3233\/AIC-160710_ref18","first-page":"48","article-title":"A study of k-nearest neighbour as an imputation method","volume":"87","author":"Batista","year":"2002","journal-title":"HIS"},{"issue":"1","key":"10.3233\/AIC-160710_ref19","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1145\/1007730.1007735","article-title":"A study of the behavior of several methods for balancing machine learning training data","volume":"6","author":"Batista","year":"2004","journal-title":"ACM Sigkdd Explorations Newsletter"},{"issue":"3","key":"10.3233\/AIC-160710_ref20","doi-asserted-by":"publisher","first-page":"439","DOI":"10.1016\/S0925-2312(03)00388-6","article-title":"Short-term water level prediction using neural networks and neuro-fuzzy approach","volume":"55","author":"Bazartseren","year":"2003","journal-title":"Neurocomputing"},{"key":"10.3233\/AIC-160710_ref21","first-page":"1183","article-title":"Distributional word clusters vs. words for text categorization","volume":"3","author":"Bekkerman","year":"2003","journal-title":"Journal of Machine Learning Research"},{"issue":"21","key":"10.3233\/AIC-160710_ref22","first-page":"3222","article-title":"Bioremediation of polluted wastewater influent: Phosphorus and nitrogen removal","volume":"5","author":"Akpor","year":"2010","journal-title":"Scientific Research and Essays"},{"key":"10.3233\/AIC-160710_ref23","unstructured":"D.A.\u00a0Belsley, E.\u00a0Kuh and R.E.\u00a0Welsch, Regression Diagnostics: Identifying Influential Data and Sources of Collinearity, Vol.\u00a0571, John Wiley & Sons, 2005."},{"issue":"12","key":"10.3233\/AIC-160710_ref25","doi-asserted-by":"publisher","first-page":"1445","DOI":"10.1002\/int.1068","article-title":"Nearest prototype classifier designs: An experimental study","volume":"16","author":"Bezdek","year":"2001","journal-title":"International Journal of Intelligent Systems"},{"key":"10.3233\/AIC-160710_ref26","doi-asserted-by":"publisher","DOI":"10.1145\/1557019.1557041"},{"issue":"4","key":"10.3233\/AIC-160710_ref27","doi-asserted-by":"publisher","first-page":"407","DOI":"10.1086\/667590","article-title":"A migratory northern ungulate in the pursuit of spring: Jumping or surfing the green wave?","volume":"180","author":"Bischof","year":"2012","journal-title":"The American Naturalist"},{"issue":"1890","key":"10.3233\/AIC-160710_ref28","doi-asserted-by":"publisher","first-page":"1035","DOI":"10.1098\/rsta.2008.0180","article-title":"Godiva2: Interactive visualization of environmental data on the web","volume":"367","author":"Blower","year":"2009","journal-title":"Philosophical Transactions of the Royal Society of London A: Mathematical, Physical and Engineering Sciences"},{"issue":"3","key":"10.3233\/AIC-160710_ref29","doi-asserted-by":"publisher","first-page":"483","DOI":"10.1007\/s10115-012-0487-8","article-title":"A review of feature selection methods on synthetic data","volume":"34","author":"Bol\u00f3n-Canedo","year":"2013","journal-title":"Knowledge and Information Systems"},{"issue":"2","key":"10.3233\/AIC-160710_ref30","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1111\/j.2517-6161.1964.tb00553.x","article-title":"An analysis of transformations","volume":"26","author":"Box","year":"1964","journal-title":"Journal of the Royal Statistical Society, Series B (Methodological)"},{"key":"10.3233\/AIC-160710_ref31","unstructured":"G.\u00a0Bretana, Admiralty Manual of Navigation, Vol.\u00a01, 1987."},{"key":"10.3233\/AIC-160710_ref32","doi-asserted-by":"crossref","unstructured":"M.M.\u00a0Breunig, H.-P.\u00a0Kriegel, R.T.\u00a0Ng and J.\u00a0Sander, Lof: Identifying density-based local outliers, in: ACM Sigmod Record, Vol.\u00a029, ACM, 2000, pp.\u00a093\u2013104.","DOI":"10.1145\/335191.335388"},{"issue":"2","key":"10.3233\/AIC-160710_ref33","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1023\/A:1014043630878","article-title":"Advances in instance selection for instance-based learning algorithms","volume":"6","author":"Brighton","year":"2002","journal-title":"Data Mining and Knowledge Discovery"},{"key":"10.3233\/AIC-160710_ref34","first-page":"27","article-title":"Conditional likelihood maximisation: A unifying framework for information theoretic feature selection","volume":"13","author":"Brown","year":"2012","journal-title":"Journal of Machine Learning Research"},{"issue":"2","key":"10.3233\/AIC-160710_ref35","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1145\/380995.381059","article-title":"Postprocessing in machine learning and data mining","volume":"2","author":"Bruha","year":"2000","journal-title":"ACM SIGKDD Explorations Newsletter"},{"issue":"5","key":"10.3233\/AIC-160710_ref36","doi-asserted-by":"publisher","first-page":"505","DOI":"10.1111\/j.1467-9876.2008.00627.x","article-title":"A latent Gaussian model for compositional data with zeros","volume":"57","author":"Butler","year":"2008","journal-title":"Journal of the Royal Statistical Society: Series C (Applied Statistics)"},{"key":"10.3233\/AIC-160710_ref37","doi-asserted-by":"crossref","unstructured":"Y.\u00a0Caises, A.\u00a0Gonz\u00e1lez, E.\u00a0Leyva and R.\u00a0P\u00e9rez, Scis: Combining instance selection methods to increase their effectiveness over a wide range of domains, in: International Conference on Intelligent Data Engineering and Automated Learning, Springer, 2009, pp.\u00a017\u201324.","DOI":"10.1007\/978-3-642-04394-9_3"},{"issue":"6","key":"10.3233\/AIC-160710_ref38","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1109\/TEVC.2003.819265","article-title":"Using evolutionary algorithms as instance selection for data reduction in KDD: An experimental study","volume":"7","author":"Cano","year":"2003","journal-title":"IEEE Transactions on Evolutionary Computation"},{"key":"10.3233\/AIC-160710_ref39","first-page":"1245","article-title":"Benefiting from the variables that variable selection discards","volume":"3","author":"Caruana","year":"2003","journal-title":"Journal of Machine Learning Research"},{"issue":"3","key":"10.3233\/AIC-160710_ref40","doi-asserted-by":"publisher","first-page":"408","DOI":"10.1109\/3477.931531","article-title":"Another move toward the minimum consistent subset: A tabu search approach to the condensed nearest neighbor rule","volume":"31","author":"Cerveron","year":"2001","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics)"},{"key":"10.3233\/AIC-160710_ref41","unstructured":"J.M.\u00a0Chambers, Graphical Methods for Data Analysis, Wadsworth, 1983."},{"issue":"3","key":"10.3233\/AIC-160710_ref42","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1145\/1541880.1541882","article-title":"Anomaly detection: A survey","volume":"41","author":"Chandola","year":"2009","journal-title":"ACM Computing Surveys (CSUR)"},{"issue":"1","key":"10.3233\/AIC-160710_ref43","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1016\/j.compeleceng.2013.11.024","article-title":"A survey on feature selection methods","volume":"40","author":"Chandrashekar","year":"2014","journal-title":"Computers & Electrical Engineering"},{"key":"10.3233\/AIC-160710_ref44","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1613\/jair.953","article-title":"Smote: Synthetic minority over-sampling technique","volume":"16","author":"Chawla","year":"2002","journal-title":"Journal of Artificial Intelligence Research"},{"key":"10.3233\/AIC-160710_ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2006.1119"},{"key":"10.3233\/AIC-160710_ref46","unstructured":"K.\u00a0Cios, W.\u00a0Pedrycz, R.W.\u00a0Swiniarski and L.A.\u00a0Kurgan, Data Mining: A Knowledge Discovery Approach, Springer, 2007."},{"issue":"7","key":"10.3233\/AIC-160710_ref47","doi-asserted-by":"publisher","first-page":"731","DOI":"10.1080\/17441692.2012.699975","article-title":"Uncovering text mining: A survey of current work on web-based epidemic intelligence","volume":"7","author":"Collier","year":"2012","journal-title":"Global Public Health"},{"issue":"365","key":"10.3233\/AIC-160710_ref48","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1080\/01621459.1979.10481634","article-title":"Influential observations in linear regression","volume":"74","author":"Cook","year":"1979","journal-title":"Journal of the American Statistical Association"},{"key":"10.3233\/AIC-160710_ref49","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.ins.2012.10.039","article-title":"Using sensitivity analysis and visualization techniques to open black box data mining models","volume":"225","author":"Cortez","year":"2013","journal-title":"Information Sciences"},{"issue":"8","key":"10.3233\/AIC-160710_ref50","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1145\/135226.135228","article-title":"Trading MIPS and memory for knowledge engineering","volume":"35","author":"Creecy","year":"1992","journal-title":"Communications of the ACM"},{"key":"10.3233\/AIC-160710_ref51","doi-asserted-by":"publisher","DOI":"10.1109\/SUTC.2008.86"},{"key":"10.3233\/AIC-160710_ref52","unstructured":"S.K.\u00a0Das, High-Level Data Fusion, Artech House, 2008."},{"key":"10.3233\/AIC-160710_ref53","unstructured":"M.\u00a0Dash and H.\u00a0Liu, Handling large unsupervised data via dimensionality reduction, in: 1999 ACM SIGMOD Workshop on Research Issues in Data Mining and Knowledge Discovery, 1999."},{"issue":"1","key":"10.3233\/AIC-160710_ref54","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1016\/S0004-3702(03)00079-1","article-title":"Consistency-based search in feature selection","volume":"151","author":"Dash","year":"2003","journal-title":"Artificial Intelligence"},{"key":"10.3233\/AIC-160710_ref55","doi-asserted-by":"crossref","unstructured":"J.L.\u00a0Davidson and J.\u00a0Jalan, Feature selection for steganalysis using the mahalanobis distance, in: IS&T\/SPIE Electronic Imaging, International Society for Optics and Photonics, 2010, pp.\u00a0754104.","DOI":"10.1117\/12.841074"},{"issue":"1","key":"10.3233\/AIC-160710_ref56","doi-asserted-by":"publisher","first-page":"60","DOI":"10.4018\/jamc.2010102604","article-title":"A survey on evolutionary instance selection and generation","volume":"1","author":"Derrac","year":"2010","journal-title":"Int\u2019l J. Applied Metaheuristic Computing"},{"key":"10.3233\/AIC-160710_ref57","unstructured":"P.A.\u00a0Devijver and J.\u00a0Kittler, On the edited nearest neighbor rule, in: Proc. 5th Int. Conf. on Pattern Recognition, 1980, pp.\u00a072\u201380."},{"key":"10.3233\/AIC-160710_ref58","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45014-9_1"},{"issue":"1","key":"10.3233\/AIC-160710_ref59","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/2193-1801-2-1","article-title":"Principled missing data methods for researchers","volume":"2","author":"Dong","year":"2013","journal-title":"SpringerPlus"},{"key":"10.3233\/AIC-160710_ref60","doi-asserted-by":"crossref","unstructured":"B.\u00a0Duval, J.-K.\u00a0Hao and J.C.\u00a0Hernandez Hernandez, A\u00a0memetic algorithm for gene selection and molecular classification of cancer, in: Proceedings of the 11th Annual Conference on Genetic and Evolutionary Computation, ACM, 2009, pp.\u00a0201\u2013208.","DOI":"10.1145\/1569901.1569930"},{"key":"10.3233\/AIC-160710_ref61","first-page":"845","article-title":"Feature selection for unsupervised learning","volume":"5","author":"Dy","year":"2004","journal-title":"Journal of Machine Learning Research"},{"issue":"4","key":"10.3233\/AIC-160710_ref62","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1016\/j.simpat.2004.11.008","article-title":"The relevance of aggregating a water consumption model cannot be disconnected from the choice of information available on the resource","volume":"13","author":"Edwards","year":"2005","journal-title":"Simulation Modelling Practice and Theory"},{"key":"10.3233\/AIC-160710_ref64","unstructured":"EPA, European Waste Catalogue and Hazardous Waste List, European Environmental Protection Agency, 2002."},{"issue":"12","key":"10.3233\/AIC-160710_ref67","doi-asserted-by":"publisher","first-page":"3692","DOI":"10.1016\/j.patcog.2008.05.019","article-title":"Impact of imputation of missing values on classification error for discrete data","volume":"41","author":"Farhangfar","year":"2008","journal-title":"Pattern Recognition"},{"issue":"2","key":"10.3233\/AIC-160710_ref68","doi-asserted-by":"publisher","first-page":"459","DOI":"10.1016\/S0096-3003(02)00151-0","article-title":"Long-term fuzzy management of water resource systems","volume":"137","author":"Faye","year":"2003","journal-title":"Applied Mathematics and Computation"},{"key":"10.3233\/AIC-160710_ref69","unstructured":"U.M.\u00a0Fayyad, G.\u00a0Piatetsky-Shapiro, P.\u00a0Smyth and R.\u00a0Uthurusamy, Advances in Knowledge Discovery and Data Mining, Vol.\u00a021, AAAI Press, Menlo Park, 1996."},{"issue":"18","key":"10.3233\/AIC-160710_ref70","doi-asserted-by":"publisher","first-page":"2378","DOI":"10.1016\/j.fss.2007.12.023","article-title":"A\u00a0study of the behaviour of linguistic fuzzy rule based classification systems in the framework of imbalanced data-sets","volume":"159","author":"Fern\u00e1ndez","year":"2008","journal-title":"Fuzzy Sets and Systems"},{"key":"10.3233\/AIC-160710_ref71","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45597-3_4"},{"key":"10.3233\/AIC-160710_ref72","unstructured":"P.G.\u00a0Foschi, D.\u00a0Kolippakkam, H.\u00a0Liu and A.\u00a0Mandvikar, Feature extraction for image mining, in: Multimedia Information Systems, 2002, pp.\u00a0103\u2013109."},{"key":"10.3233\/AIC-160710_ref73","unstructured":"E.\u00a0Frank and I.H.\u00a0Witten, Making better use of global discretization, in: Proc. of the Sixteenth International Conference on Machine Learning, 1999."},{"issue":"3\u20134","key":"10.3233\/AIC-160710_ref75","doi-asserted-by":"publisher","first-page":"353","DOI":"10.1007\/s10044-008-0106-1","article-title":"Prototype reduction using an artificial immune model","volume":"11","author":"Garain","year":"2008","journal-title":"Pattern Analysis and Applications"},{"key":"10.3233\/AIC-160710_ref76","doi-asserted-by":"publisher","DOI":"10.1007\/11681960_12"},{"issue":"8","key":"10.3233\/AIC-160710_ref77","doi-asserted-by":"publisher","first-page":"2693","DOI":"10.1016\/j.patcog.2008.02.006","article-title":"A memetic algorithm for evolutionary prototype selection: A scaling up approach","volume":"41","author":"Garc\u00eda","year":"2008","journal-title":"Pattern Recognition"},{"issue":"3","key":"10.3233\/AIC-160710_ref78","doi-asserted-by":"publisher","first-page":"417","DOI":"10.1109\/TPAMI.2011.142","article-title":"Prototype selection for nearest neighbor classification: Taxonomy and empirical study","volume":"34","author":"Garcia","year":"2012","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"6","key":"10.3233\/AIC-160710_ref79","first-page":"512","article-title":"Evolutionary computation for training set selection","volume":"1","author":"Garc\u00eda-Pedrajas","year":"2011","journal-title":"Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery"},{"key":"10.3233\/AIC-160710_ref80","doi-asserted-by":"crossref","unstructured":"M.H.\u00a0Gerardi, Wastewater Bacteria, Vol.\u00a05, John Wiley & Sons, 2006.","DOI":"10.1002\/0471979910"},{"issue":"1","key":"10.3233\/AIC-160710_ref81","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1080\/00207160.2013.783209","article-title":"Mixed intelligent-multivariate missing imputation","volume":"91","author":"Gibert","year":"2014","journal-title":"International Journal of Computer Mathematics"},{"issue":"1","key":"10.3233\/AIC-160710_ref82","doi-asserted-by":"crossref","first-page":"113","DOI":"10.3233\/AIC-140611","article-title":"atlp: A color-based model of uncertainty to evaluate the risk of decisions based on prototypes","volume":"28","author":"Gibert","year":"2015","journal-title":"AI Communications"},{"issue":"5","key":"10.3233\/AIC-160710_ref83","doi-asserted-by":"publisher","first-page":"807","DOI":"10.1080\/00207160.2014.898065","article-title":"On the understanding of profiles by means of post-processing techniques: An application to financial assets","volume":"93","author":"Gibert","year":"2016","journal-title":"International Journal of Computer Mathematics"},{"issue":"4","key":"10.3233\/AIC-160710_ref84","first-page":"213","article-title":"Clustering based on rules and knowledge discovery in ill-structured domains","volume":"1","author":"Gibert","year":"1998","journal-title":"Revista Computaci\u00f3n y Sistemas"},{"issue":"4","key":"10.3233\/AIC-160710_ref85","first-page":"178","article-title":"The role of KDD support-interpretation tools in the conceptualization of medical profiles: An application to neurorehabilitation","volume":"16","author":"Gibert","year":"2008","journal-title":"Acta Informatica Medica"},{"key":"10.3233\/AIC-160710_ref86","unstructured":"K.\u00a0Gibert, J.\u00a0Izquierdo, G.\u00a0Holmes, I.\u00a0Athanasiadis, J.\u00a0Comas and M.\u00a0S\u00e0nchez-Marr\u00e8, On the role of pre and post-processing in environmental data mining, in: Proceedings of IEMSs 2008 International Congress on Environmental Modeling and Software, iEMSs, 2008, pp.\u00a01937\u20131958."},{"issue":"7","key":"10.3233\/AIC-160710_ref87","doi-asserted-by":"publisher","first-page":"1633","DOI":"10.1016\/j.mcm.2011.10.046","article-title":"Post-processing: Bridging the gap between modelling and effective decision-support. The profile assessment grid in human behaviour","volume":"57","author":"Gibert","year":"2013","journal-title":"Mathematical and Computer Modelling"},{"issue":"6","key":"10.3233\/AIC-160710_ref88","doi-asserted-by":"publisher","first-page":"712","DOI":"10.1016\/j.envsoft.2009.11.004","article-title":"Knowledge discovery with clustering based on rules by states: A water treatment application","volume":"25","author":"Gibert","year":"2010","journal-title":"Environmental Modelling & Software"},{"key":"10.3233\/AIC-160710_ref89","unstructured":"K.\u00a0Gibert and M.\u00a0S\u00e0nchez-Marr\u00e8, Improving ontological knowledge with reinforcement in recommending the data mining method for real problems, in: Procs of CAEPIA 2015 (TAMIDA), CEDI, 2015, pp.\u00a0769\u2013778."},{"key":"10.3233\/AIC-160710_ref90","unstructured":"K.\u00a0Gibert, M.\u00a0S\u00e8nchez-Marr\u00e8 and V.\u00a0Codina, Choosing the right data mining technique: Classification of methods and intelligent recommendation, in: Proceedings of IEMSs 2010 International Congress on Environmental Modeling and Software, iEMSs, 2010, pp.\u00a02448\u20132453."},{"issue":"3","key":"10.3233\/AIC-160710_ref91","first-page":"319","article-title":"Classification based on rules and medical research","volume":"15","author":"Gibert","year":"1999","journal-title":"Journal of Applied Stochastic Models and Data Analysis (JAMSDA)"},{"key":"10.3233\/AIC-160710_ref92","doi-asserted-by":"publisher","first-page":"205","DOI":"10.1016\/S1574-101X(08)00612-1","article-title":"Chapter twelve data mining for environmental systems","volume":"3","author":"Gibert","year":"2008","journal-title":"Developments in Integrated Environmental Assessment"},{"key":"10.3233\/AIC-160710_ref93","first-page":"132","article-title":"Genetic classifier system as a heuristic weighting method for a case-based classifier system","volume":"22","author":"Golobardes","year":"2000","journal-title":"Butllet\u0131 de l\u2019Associaci\u00f3 Catalana d\u2019Intel. ligencia Artificial"},{"key":"10.3233\/AIC-160710_ref94","unstructured":"P.I.\u00a0Good and P.\u00a0Good, Resampling Methods: A Practical Guide to Data Analysis, Springer Science & Business, Media, 2013."},{"key":"10.3233\/AIC-160710_ref95","doi-asserted-by":"publisher","first-page":"549","DOI":"10.1146\/annurev.psych.58.110405.085530","article-title":"Missing data analysis: Making it work in the real world","volume":"60","author":"Graham","year":"2009","journal-title":"Annual Review of Psychology"},{"key":"10.3233\/AIC-160710_ref96","doi-asserted-by":"crossref","unstructured":"J.W.\u00a0Graham, P.E.\u00a0Cumsille and E.\u00a0Elek-Fisk, Methods for handling missing data, in: Handbook of Psychology, 2003.","DOI":"10.1002\/0471264385.wei0204"},{"issue":"6","key":"10.3233\/AIC-160710_ref97","doi-asserted-by":"publisher","first-page":"903","DOI":"10.1016\/S0167-8655(02)00202-7","article-title":"Increasing sensitivity of preterm birth by changing rule strengths","volume":"24","author":"Grzymala-Busse","year":"2003","journal-title":"Pattern Recognition Letters"},{"issue":"1","key":"10.3233\/AIC-160710_ref98","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1145\/1007730.1007736","article-title":"Learning from imbalanced data sets with boosting and data generation: The databoost-im approach","volume":"6","author":"Guo","year":"2004","journal-title":"ACM SIGKDD Explorations Newsletter"},{"issue":"1","key":"10.3233\/AIC-160710_ref100","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1145\/1656274.1656278","article-title":"The WEKA data mining software: An update","volume":"11","author":"Hall","year":"2009","journal-title":"ACM SIGKDD Explorations Newsletter"},{"key":"10.3233\/AIC-160710_ref101","unstructured":"M.\u00a0Hall, I.\u00a0Witten and E.\u00a0Frank, Data Mining: Practical Machine Learning Tools and Techniques, Kaufmann, Burlington, 2011."},{"key":"10.3233\/AIC-160710_ref102","unstructured":"M.A.\u00a0Hall, Correlation-based feature selection for discrete and numeric class machine learning, in: Proceedings of the Seventeenth International Conference on Machine Learning, Morgan Kaufmann Publishers Inc., 2000, pp.\u00a0359\u2013366."},{"key":"10.3233\/AIC-160710_ref103","unstructured":"M.A.\u00a0Hall and L.A.\u00a0Smith, Practical Feature Subset Selection for Machine Learning, Springer, 1998."},{"key":"10.3233\/AIC-160710_ref104","unstructured":"J.\u00a0Han and M.\u00a0Kamber, Data Mining: Concepts and Techniques, Morgan Kaufmann, 2006."},{"key":"10.3233\/AIC-160710_ref105","unstructured":"P.\u00a0Hanrahan, Tableau software white paper-visual thinking for business intelligence, in: Tableau Software, Seattle, WA, 2003."},{"key":"10.3233\/AIC-160710_ref106","doi-asserted-by":"crossref","unstructured":"P.\u00a0Hart, The condensed nearest neighbor rule, in: IEEE Trans. Inform. Theory (Corresp.), Vol.\u00a0IT-14, 1968, pp.\u00a0515\u2013516.","DOI":"10.1109\/TIT.1968.1054155"},{"key":"10.3233\/AIC-160710_ref107","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1016\/j.advengsoft.2012.02.005","article-title":"Multi-agent adaptive boosting on semi-supervised water supply clusters","volume":"50","author":"Herrera","year":"2012","journal-title":"Advances in Engineering Software"},{"issue":"5786","key":"10.3233\/AIC-160710_ref108","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1126\/science.1127647","article-title":"Reducing the dimensionality of data with neural networks","volume":"313","author":"Hinton","year":"2006","journal-title":"Science"},{"key":"10.3233\/AIC-160710_ref109","doi-asserted-by":"publisher","DOI":"10.1007\/11553939_83"},{"key":"10.3233\/AIC-160710_ref110","doi-asserted-by":"crossref","unstructured":"N.\u00a0Howe and C.\u00a0Cardie, Examining locally varying weights for nearest neighbor algorithms, in: International Conference on Case-Based Reasoning, Springer, 1997, pp.\u00a0455\u2013466.","DOI":"10.1007\/3-540-63233-6_515"},{"key":"10.3233\/AIC-160710_ref111","unstructured":"N.\u00a0Howe and C.\u00a0Cardie, Feature subset selection and order identification for unsupervised learning, in: Proceedings of 17th International Conference on Machine Learning, Morgan Kaufmann, 2000."},{"key":"10.3233\/AIC-160710_ref112","unstructured":"M.Y.\u00a0Huh, Incremental subset selection for complex data, in: Proceedings, COMPSTAT2006, Rome, Italy, 2006."},{"key":"10.3233\/AIC-160710_ref113","unstructured":"L.\u00a0Ingsrisawang and D.\u00a0Potawee, Multiple imputation for missing data in repeated measurements using MCMC and copulas, in: Proceedings of the International MultiConference of Engineers and Computer Scientists (IMECs), Hong Kong, 2012."},{"key":"10.3233\/AIC-160710_ref114","doi-asserted-by":"crossref","unstructured":"N.\u00a0Ishii and Y.\u00a0Wang, Learning feature weights for similarity using genetic algorithms, in: Intelligence and Systems, 1998. Proceedings, IEEE International Joint Symposia on, IEEE, 1998, pp.\u00a027\u201333.","DOI":"10.1109\/IJSIS.1998.685412"},{"key":"10.3233\/AIC-160710_ref115","first-page":"25","article-title":"Establishing an environmental data platform for promoting coastal zone environmental management","volume":"XXXVII","author":"Ishiwata","year":"2008","journal-title":"The International Archives of the Photogrammetry, Remote Sensing and Spatial Information Sciences"},{"key":"10.3233\/AIC-160710_ref116","unstructured":"ISO\/TC211, Iso19115 Geographic Information \u2013 Metadata, 2003."},{"key":"10.3233\/AIC-160710_ref117","unstructured":"ISO\/TC211, Iso19136 geographic information \u2013 Geomatics, 2007."},{"issue":"3","key":"10.3233\/AIC-160710_ref118","doi-asserted-by":"publisher","first-page":"341","DOI":"10.1016\/j.mcm.2006.11.013","article-title":"Fault detection in water supply systems using hybrid (theory and data-driven) modelling","volume":"46","author":"Izquierdo","year":"2007","journal-title":"Mathematical and Computer Modelling"},{"key":"10.3233\/AIC-160710_ref119","unstructured":"J.\u00a0Izquierdo, R.\u00a0P\u00e9rez, P.\u00a0L\u00f3pez and P.\u00a0Iglesias, Neural identification of fuzzy anomalies in pressurized water systems, in: Proceedings of the 3rd Biennial Meeting of the International Environmental Modeling and Software Society, iEMSs, Burlington, VT, USA, 2006."},{"key":"10.3233\/AIC-160710_ref120","doi-asserted-by":"crossref","unstructured":"N.\u00a0Jankowski and M.\u00a0Grochowski, Comparison of instances selection algorithms. I. Algorithms survey, in: International Conference on Artificial Intelligence and Soft Computing, Springer, 2004, pp.\u00a0598\u2013603.","DOI":"10.1007\/978-3-540-24844-6_90"},{"issue":"5","key":"10.3233\/AIC-160710_ref121","doi-asserted-by":"crossref","first-page":"429","DOI":"10.3233\/IDA-2002-6504","article-title":"The class imbalance problem: A systematic study","volume":"6","author":"Japkowicz","year":"2002","journal-title":"Intelligent Data Analysis"},{"issue":"3","key":"10.3233\/AIC-160710_ref122","doi-asserted-by":"publisher","first-page":"465","DOI":"10.1109\/TKDE.2010.263","article-title":"Feature selection based on class-dependent densities for high-dimensional binary data","volume":"24","author":"Javed","year":"2012","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"issue":"3","key":"10.3233\/AIC-160710_ref123","first-page":"279","article-title":"Improving water access indicators in developing countries: A proposal using water point mapping methodology","volume":"8","author":"Jim\u00e9nez","year":"2008","journal-title":"Water Science and Technology: Water Supply"},{"key":"10.3233\/AIC-160710_ref124","unstructured":"G.H.\u00a0John and P.\u00a0Langley, Static versus dynamic sampling for data mining, in: KDD, Vol.\u00a096, 1996, pp.\u00a0367\u2013370."},{"issue":"23","key":"10.3233\/AIC-160710_ref125","doi-asserted-by":"publisher","first-page":"4295","DOI":"10.1021\/ac00119a015","article-title":"Genetic algorithms as a tool for wavelength selection in multivariate calibration","volume":"67","author":"Jouan-Rimbaud","year":"1995","journal-title":"Analytical Chemistry"},{"issue":"2","key":"10.3233\/AIC-160710_ref126","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1016\/S0165-0114(03)00119-2","article-title":"Temporal problems solved by dynamic fuzzy network based on genetic algorithm with variable-length chromosomes","volume":"142","author":"Juang","year":"2004","journal-title":"Fuzzy Sets and Systems"},{"issue":"3","key":"10.3233\/AIC-160710_ref127","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1007\/s10462-011-9282-2","article-title":"Missing values: How many can they be to preserve classification reliability?","volume":"40","author":"Juhola","year":"2013","journal-title":"Artificial Intelligence Review"},{"issue":"2","key":"10.3233\/AIC-160710_ref129","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1093\/bioinformatics\/bth499","article-title":"Missing value estimation for DNA microarray gene expression data: Local least squares imputation","volume":"21","author":"Kim","year":"2005","journal-title":"Bioinformatics"},{"key":"10.3233\/AIC-160710_ref130","doi-asserted-by":"crossref","unstructured":"K.\u00a0Kira and L.A.\u00a0Rendell, A practical approach to feature selection, in: Proceedings of the Ninth International Workshop on Machine Learning, 1992, pp.\u00a0249\u2013256.","DOI":"10.1016\/B978-1-55860-247-2.50037-1"},{"issue":"2","key":"10.3233\/AIC-160710_ref131","first-page":"1137","article-title":"A study of cross-validation and bootstrap for accuracy estimation and model selection","volume":"14","author":"Kohavi","year":"1995","journal-title":"Ijcai"},{"issue":"1","key":"10.3233\/AIC-160710_ref132","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1016\/S0004-3702(97)00043-X","article-title":"Wrappers for feature subset selection","volume":"97","author":"Kohavi","year":"1997","journal-title":"Artificial Intelligence"},{"key":"10.3233\/AIC-160710_ref133","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-5725-8_3"},{"key":"10.3233\/AIC-160710_ref134","unstructured":"R.\u00a0Kohavi, P.\u00a0Langley and Y.\u00a0Yun, The utility of feature weighting in nearest-neighbor algorithms, in: Proceedings of the Ninth European Conference on Machine Learning, 1997, pp.\u00a085\u201392."},{"key":"10.3233\/AIC-160710_ref135","unstructured":"D.\u00a0Koller and M.\u00a0Sahami, Toward optimal feature selection, in: 13th International Conference on Machine Learning, 1995."},{"key":"10.3233\/AIC-160710_ref136","doi-asserted-by":"crossref","unstructured":"S.\u00a0Konishi, Introduction to Multivariate Analysis: Linear and Nonlinear Modeling, CRC Press, 2014.","DOI":"10.1201\/b17077"},{"key":"10.3233\/AIC-160710_ref137","doi-asserted-by":"crossref","unstructured":"I.\u00a0Kononenko, Estimating attributes: Analysis and extensions of relief, in: European Conference on Machine Learning, Springer, 1994, pp.\u00a0171\u2013182.","DOI":"10.1007\/3-540-57868-4_57"},{"issue":"2\u20133","key":"10.3233\/AIC-160710_ref138","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1023\/A:1007452223027","article-title":"Machine learning for the detection of oil spills in satellite radar images","volume":"30","author":"Kubat","year":"1998","journal-title":"Machine Learning"},{"issue":"2","key":"10.3233\/AIC-160710_ref139","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1016\/S1364-8152(99)00031-6","article-title":"A\u00a0fuzzy model of heavy metal loadings in Liverpool bay","volume":"15","author":"Kuncheva","year":"2000","journal-title":"Environmental Modelling & Software"},{"issue":"6","key":"10.3233\/AIC-160710_ref140","doi-asserted-by":"publisher","first-page":"1041","DOI":"10.1016\/S0031-3203(96)00134-3","article-title":"Fitness functions in editing k-nn reference set by genetic algorithms","volume":"30","author":"Kuncheva","year":"1997","journal-title":"Pattern Recognition"},{"key":"10.3233\/AIC-160710_ref141","unstructured":"M.H.\u00a0Kutner, C.J.\u00a0Nachtsheim, J.\u00a0Neter, W.\u00a0Li et al., Applied Linear Statistical Models, Vol.\u00a0103, McGraw-Hill, Irwin, New York, 2005."},{"issue":"1","key":"10.3233\/AIC-160710_ref142","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1109\/72.977291","article-title":"Input feature selection for classification problems","volume":"13","author":"Kwak","year":"2002","journal-title":"IEEE Transactions on Neural Networks"},{"key":"10.3233\/AIC-160710_ref143","unstructured":"P.\u00a0Langley and W.\u00a0Iba, Average-case analysis of a nearest neighbor algorithm, in: IJCAI, Citeseer, 1993, pp.\u00a0889\u2013894."},{"issue":"4","key":"10.3233\/AIC-160710_ref144","doi-asserted-by":"publisher","first-page":"1106","DOI":"10.1109\/TCBB.2012.33","article-title":"A survey on filter techniques for feature selection in gene expression microarray analysis","volume":"9","author":"Lazar","year":"2012","journal-title":"IEEE\/ACM Transactions on Computational Biology and Bioinformatics (TCBB)"},{"key":"10.3233\/AIC-160710_ref145","unstructured":"L.\u00a0Lebart, Correspondence analysis, in: Data Science, Classification, and Related Methods: Proceedings of the Fifth Conference of the International Federation of Classification Societies (IFCS-96), Kobe, Japan, March 27\u201330, 1996, Springer Science & Business, Media, 2013, p.\u00a0423."},{"key":"10.3233\/AIC-160710_ref146","unstructured":"K.\u00a0Leung and C.\u00a0Leckie, Unsupervised anomaly detection in network intrusion detection using clusters, in: Proceedings of the Twenty-Eighth Australasian Conference on Computer Science, Vol.\u00a038, Australian Computer Society, Inc., 2005, pp.\u00a0333\u2013342."},{"key":"10.3233\/AIC-160710_ref147","unstructured":"R.J.\u00a0Little and D.B.\u00a0Rubin, Statistical Analysis with Missing Data, John Wiley & Sons, 2014."},{"key":"10.3233\/AIC-160710_ref148","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/978-1-4757-3359-4_1","volume-title":"Data Reduction Via Instance Selection. In Instance Selection and Construction for Data Mining","author":"Liu","year":"2001"},{"key":"10.3233\/AIC-160710_ref149","doi-asserted-by":"crossref","unstructured":"H.\u00a0Liu and H.\u00a0Motoda, Computational Methods of Feature Selection, CRC Press, 2007.","DOI":"10.1201\/9781584888796"},{"key":"10.3233\/AIC-160710_ref150","unstructured":"H.\u00a0Liu and H.\u00a0Motoda, Feature Selection for Knowledge Discovery and Data Mining, Vol.\u00a0454, Springer Science & Business, Media, 2012."},{"issue":"7","key":"10.3233\/AIC-160710_ref151","doi-asserted-by":"publisher","first-page":"1330","DOI":"10.1016\/j.patcog.2008.10.028","article-title":"Feature selection with dynamic mutual information","volume":"42","author":"Liu","year":"2009","journal-title":"Pattern Recognition"},{"issue":"4","key":"10.3233\/AIC-160710_ref152","doi-asserted-by":"publisher","first-page":"491","DOI":"10.1109\/TKDE.2005.66","article-title":"Toward integrating feature selection algorithms for classification and clustering","volume":"17","author":"Liu","year":"2005","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"issue":"1","key":"10.3233\/AIC-160710_ref153","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1007\/s10115-011-0424-2","article-title":"On the choice of the best imputation methods for missing values considering three groups of classification methods","volume":"32","author":"Luengo","year":"2012","journal-title":"Knowledge and Information Systems"},{"issue":"3","key":"10.3233\/AIC-160710_ref154","doi-asserted-by":"publisher","first-page":"495","DOI":"10.1016\/j.patcog.2005.11.004","article-title":"A clustering method for automatic biometric template selection","volume":"39","author":"Lumini","year":"2006","journal-title":"Pattern Recognition"},{"issue":"3","key":"10.3233\/AIC-160710_ref155","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1051\/limn\/2012009","article-title":"The role of organisms in hyporheic processes: Gaps in current knowledge, needs for future research and applications","volume":"48","author":"Marmonier","year":"2012","journal-title":"Annales de Limnologie \u2013 International Journal of Limnology"},{"issue":"5","key":"10.3233\/AIC-160710_ref156","doi-asserted-by":"publisher","first-page":"1437","DOI":"10.1007\/s11269-010-9753-2","article-title":"Reliability and tolerance comparison in water supply networks","volume":"25","author":"Mart\u00ednez-Rodr\u00edguez","year":"2011","journal-title":"Water Resources Management"},{"key":"10.3233\/AIC-160710_ref157","doi-asserted-by":"publisher","DOI":"10.1145\/1150402.1150531"},{"key":"10.3233\/AIC-160710_ref158","doi-asserted-by":"crossref","unstructured":"H.J.\u00a0Miller and J.\u00a0Han, Geographic Data Mining and Knowledge Discovery, CRC Press, 2009.","DOI":"10.1201\/9781420073980"},{"key":"10.3233\/AIC-160710_ref159","doi-asserted-by":"crossref","unstructured":"M.\u00a0Minelli, M.\u00a0Chambers and A.\u00a0Dhiraj, Big Data, Big Analytics: Emerging Business Intelligence and Analytic Trends for Today\u2019s Businesses, John Wiley & Sons, 2012.","DOI":"10.1002\/9781118562260"},{"issue":"2","key":"10.3233\/AIC-160710_ref160","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1016\/0004-3702(82)90040-6","article-title":"Generalization as search","volume":"18","author":"Mitchell","year":"1982","journal-title":"Artificial Intelligence"},{"key":"10.3233\/AIC-160710_ref161","doi-asserted-by":"crossref","unstructured":"P.\u00a0Mitra, C.\u00a0Murthy and S.K.\u00a0Pal, Unsupervised feature selection using feature similarity, in: IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol.\u00a024, 2002, pp.\u00a0301\u2013312.","DOI":"10.1109\/34.990133"},{"key":"10.3233\/AIC-160710_ref162","unstructured":"T.\u00a0Mohri and H.\u00a0Tanaka, An optimal weighting criterion of case indexing for both numeric and symbolic attributes, in: AAAI-94 Workshop Program: Case-Based Reasoning, Working Notes, 1994, pp.\u00a0123\u2013127."},{"key":"10.3233\/AIC-160710_ref163","unstructured":"L.C.\u00a0Molina, L.\u00a0Belanche and \u00c0.\u00a0Nebot, Feature selection algorithms: A survey and experimental evaluation, in: Data Mining, ICDM 2003. Proceedings. 2002 IEEE International Conference on, IEEE, 2002, pp.\u00a0306\u2013313."},{"issue":"12","key":"10.3233\/AIC-160710_ref164","doi-asserted-by":"publisher","first-page":"2771","DOI":"10.1016\/S0031-3203(01)00208-4","article-title":"An efficient prototype merging strategy for the condensed 1-nn rule through class-conditional hierarchical clustering","volume":"35","author":"Mollineda","year":"2002","journal-title":"Pattern Recognition"},{"key":"10.3233\/AIC-160710_ref165","unstructured":"D.S.\u00a0Moore, G.P.\u00a0McCabe and M.J.\u00a0Evans, Introduction to the Practice of Statistics Minitab Manual and Minitab Version 14, WH Freeman & Co., 2005."},{"key":"10.3233\/AIC-160710_ref166","doi-asserted-by":"crossref","unstructured":"A.\u00a0Murakami and T.\u00a0Nasukawa, Tweeting about the tsunami?: Mining Twitter for information on the Tohoku earthquake and tsunami, in: Proceedings of the 21st International Conference on World Wide Web, ACM, 2012, pp.\u00a0709\u2013710.","DOI":"10.1145\/2187980.2188187"},{"issue":"9","key":"10.3233\/AIC-160710_ref167","doi-asserted-by":"publisher","first-page":"1932","DOI":"10.1016\/j.patcog.2008.11.018","article-title":"An improvement on floating search algorithms for feature subset selection","volume":"42","author":"Nakariyakul","year":"2009","journal-title":"Pattern Recognition"},{"issue":"3","key":"10.3233\/AIC-160710_ref168","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1016\/j.patrec.2005.08.015","article-title":"Maxdiff kd-trees for data condensation","volume":"27","author":"Narayan","year":"2006","journal-title":"Pattern Recognition Letters"},{"issue":"9","key":"10.3233\/AIC-160710_ref169","doi-asserted-by":"publisher","first-page":"917","DOI":"10.1109\/TC.1977.1674939","article-title":"A branch and bound algorithm for feature subset selection","volume":"100","author":"Narendra","year":"1977","journal-title":"IEEE Transactions on Computers"},{"issue":"4","key":"10.3233\/AIC-160710_ref170","doi-asserted-by":"publisher","first-page":"275","DOI":"10.1007\/s10462-010-9156-z","article-title":"A study of the effect of different types of noise on the precision of supervised learning techniques","volume":"33","author":"Nettleton","year":"2010","journal-title":"Artificial Intelligence Review"},{"key":"10.3233\/AIC-160710_ref171","unstructured":"M.\u00a0Nixon, Feature Extraction & Image Processing, Academic Press, 2008."},{"key":"10.3233\/AIC-160710_ref173","unstructured":"H.\u00a0N\u00fa\u00f1ez and M.\u00a0S\u00e0nchez-Marr\u00e8, Instance-based learning techniques of unsupervised feature weighting do not perform so badly!, in: ECAI, Vol.\u00a016, 2004, pp.\u00a0102\u2013106."},{"key":"10.3233\/AIC-160710_ref174","doi-asserted-by":"crossref","unstructured":"H.\u00a0N\u00fa\u00f1ez, M.\u00a0S\u00e0nchez-Marr\u00e8 and U.\u00a0Cort\u00e9s, Improving similarity assessment with entropy-based local weighting, in: International Conference on Case-Based Reasoning, Springer, 2003, pp.\u00a0377\u2013391.","DOI":"10.1007\/3-540-45006-8_30"},{"issue":"2","key":"10.3233\/AIC-160710_ref175","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1016\/S0165-0114(03)00307-5","article-title":"Self-organizing polynomial neural networks based on polynomial and fuzzy polynomial neurons: Analysis and design","volume":"142","author":"Oh","year":"2004","journal-title":"Fuzzy Sets and Systems"},{"key":"10.3233\/AIC-160710_ref176","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-75175-5_4"},{"key":"10.3233\/AIC-160710_ref177","doi-asserted-by":"crossref","unstructured":"J.A.\u00a0Olvera-L\u00f3pez, J.A.\u00a0Carrasco-Ochoa and J.F.\u00a0Mart\u00ednez-Trinidad, Prototype selection via prototype relevance, in: Iberoamerican Congress on Pattern Recognition, Springer, 2008, pp.\u00a0153\u2013160.","DOI":"10.1007\/978-3-540-85920-8_19"},{"issue":"1\u20132","key":"10.3233\/AIC-160710_ref178","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/1500000011","article-title":"Opinion mining and sentiment analysis","volume":"2","author":"Pang","year":"2008","journal-title":"Foundations and Trends in Information Retrieval"},{"key":"10.3233\/AIC-160710_ref179","doi-asserted-by":"crossref","unstructured":"R.\u00a0Paredes and E.\u00a0Vidal, Weighting prototypes. A new editing approach, in: Proceedings of the International Conference on Pattern Recognition ICPR, Vol.\u00a02, 2000, pp.\u00a025\u201328.","DOI":"10.1109\/ICPR.2000.906011"},{"key":"10.3233\/AIC-160710_ref180","unstructured":"Z.\u00a0Pawlak, Rough Sets: Theoretical Aspects of Reasoning About Data, Vol.\u00a09, Springer Science & Business, Media, 2012."},{"key":"10.3233\/AIC-160710_ref181","doi-asserted-by":"crossref","unstructured":"V.\u00a0Pawlowsky-Glahn and A.\u00a0Buccianti, Compositional Data Analysis: Theory and Applications, John Wiley & Sons, 2011.","DOI":"10.1002\/9781119976462"},{"issue":"11","key":"10.3233\/AIC-160710_ref183","doi-asserted-by":"publisher","first-page":"559","DOI":"10.1080\/14786440109462720","article-title":"Liii. on lines and planes of closest fit to systems of points in space","volume":"2","author":"Pearson","year":"1901","journal-title":"The London, Edinburgh, and Dublin Philosophical Magazine and Journal of Science"},{"issue":"8","key":"10.3233\/AIC-160710_ref184","doi-asserted-by":"publisher","first-page":"1226","DOI":"10.1109\/TPAMI.2005.159","article-title":"Feature selection based on mutual information criteria of max-dependency, max-relevance, and min-redundancy","volume":"27","author":"Peng","year":"2005","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"02","key":"10.3233\/AIC-160710_ref185","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1142\/S0219720006001941","article-title":"Choosing SNPS using feature selection","volume":"4","author":"Phuong","year":"2006","journal-title":"Journal of Bioinformatics and Computational Biology"},{"issue":"3","key":"10.3233\/AIC-160710_ref186","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1191\/0962280206sm448oa","article-title":"Can one assess whether missing data are missing at random in medical studies?","volume":"15","author":"Potthoff","year":"2006","journal-title":"Statistical Methods in Medical Research"},{"issue":"3","key":"10.3233\/AIC-160710_ref187","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1023\/A:1007601015854","article-title":"Robust classification for imprecise environments","volume":"42","author":"Provost","year":"2001","journal-title":"Machine Learning"},{"issue":"11","key":"10.3233\/AIC-160710_ref188","doi-asserted-by":"publisher","first-page":"1119","DOI":"10.1016\/0167-8655(94)90127-9","article-title":"Floating search methods in feature selection","volume":"15","author":"Pudil","year":"1994","journal-title":"Pattern Recognition Letters"},{"key":"10.3233\/AIC-160710_ref189","unstructured":"W.F.\u00a0Punch III, E.D.\u00a0Goodman, M.\u00a0Pei, L.\u00a0Chia-Shun, P.D.\u00a0Hovland and R.J.\u00a0Enbody, Further research on feature selection and classification using genetic algorithms, in: ICGA, 1993, pp.\u00a0557\u2013564."},{"key":"10.3233\/AIC-160710_ref190","unstructured":"D.\u00a0Pyle, Data Preparation for Data Mining, Vol.\u00a01, Morgan Kaufmann, 1999."},{"key":"10.3233\/AIC-160710_ref191","unstructured":"J.R.\u00a0Quinlan, C4.5: Programs for Machine Learning, Elsevier, 2014."},{"key":"10.3233\/AIC-160710_ref192","doi-asserted-by":"publisher","first-page":"311","DOI":"10.1016\/j.knosys.2013.12.005","article-title":"Fimus: A framework for imputing missing values using co-appearance, correlation and similarity analysis","volume":"56","author":"Rahman","year":"2014","journal-title":"Knowledge-Based Systems"},{"issue":"10","key":"10.3233\/AIC-160710_ref193","doi-asserted-by":"publisher","first-page":"1554","DOI":"10.1016\/j.patrec.2005.01.003","article-title":"A divide-and-conquer approach to the pairwise opposite class-nearest neighbor (poc-nn) algorithm","volume":"26","author":"Raicharoen","year":"2005","journal-title":"Pattern Recognition Letters"},{"key":"10.3233\/AIC-160710_ref194","unstructured":"E.\u00a0Ramos-Martinez, A.M.\u00a0Herrera Fernandez, J.\u00a0Izquierdo and R.\u00a0Perez-Garcia, A multi-disciplinary procedure to ascertain biofilm formation in drinking water pipes, in: International Congress on Environmental Modelling and Software, iEMSs, 2016."},{"key":"10.3233\/AIC-160710_ref195","unstructured":"M.\u00a0Refaat, Data Preparation for Data Mining Using SAS, Morgan Kaufmann, 2010."},{"issue":"2","key":"10.3233\/AIC-160710_ref196","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1023\/A:1014047731786","article-title":"A unifying view on instance selection","volume":"6","author":"Reinartz","year":"2002","journal-title":"Data Mining and Knowledge Discovery"},{"key":"10.3233\/AIC-160710_ref197","doi-asserted-by":"crossref","unstructured":"A.C.\u00a0Rencher, Methods of Multivariate Analysis, Vol.\u00a0492, John Wiley & Sons, 2003.","DOI":"10.1002\/0471271357"},{"key":"10.3233\/AIC-160710_ref198","first-page":"1371","article-title":"Overfitting in making comparisons between variable selection methods","volume":"3","author":"Reunanen","year":"2003","journal-title":"Journal of Machine Learning Research"},{"issue":"4","key":"10.3233\/AIC-160710_ref199","doi-asserted-by":"publisher","first-page":"1009","DOI":"10.1016\/S0031-3203(02)00119-X","article-title":"Finding representative patterns with ordered projections","volume":"36","author":"Riquelme","year":"2003","journal-title":"Pattern Recognition"},{"issue":"6","key":"10.3233\/AIC-160710_ref200","doi-asserted-by":"publisher","first-page":"665","DOI":"10.1109\/TIT.1975.1055464","article-title":"An algorithm for a selective nearest neighbor decision rule","volume":"21","author":"Ritter","year":"1975","journal-title":"IEEE Transactions on Information Theory"},{"key":"10.3233\/AIC-160710_ref201","doi-asserted-by":"crossref","unstructured":"J.C.\u00a0Roberts, State of the art: Coordinated & multiple views in exploratory visualization, in: Coordinated and Multiple Views in Exploratory Visualization, 2007. CMV\u201907. Fifth International Conference on, IEEE, 2007, pp.\u00a061\u201371.","DOI":"10.1109\/CMV.2007.20"},{"key":"10.3233\/AIC-160710_ref202","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-36079-4_20"},{"key":"10.3233\/AIC-160710_ref203","doi-asserted-by":"crossref","unstructured":"G.\u00a0Roffo, S.\u00a0Melzi and M.\u00a0Cristani, Infinite feature selection, in: Proceedings of the IEEE International Conference on Computer Vision, 2015, pp.\u00a04202\u20134210.","DOI":"10.1109\/ICCV.2015.478"},{"issue":"3","key":"10.3233\/AIC-160710_ref204","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1093\/biomet\/63.3.581","article-title":"Inference and missing data","volume":"63","author":"Rubin","year":"1976","journal-title":"Biometrika"},{"issue":"19","key":"10.3233\/AIC-160710_ref205","doi-asserted-by":"publisher","first-page":"2507","DOI":"10.1093\/bioinformatics\/btm344","article-title":"A review of feature selection techniques in bioinformatics","volume":"23","author":"Saeys","year":"2007","journal-title":"Bioinformatics"},{"issue":"2","key":"10.3233\/AIC-160710_ref206","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1037\/1082-989X.7.2.147","article-title":"Missing data: Our view of the state of the art","volume":"7","author":"Schafer","year":"2002","journal-title":"Psychological Methods"},{"issue":"2","key":"10.3233\/AIC-160710_ref207","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1111\/0824-7935.00146","article-title":"Transferring case knowledge to adaptation knowledge: An approach for case-base maintenance","volume":"17","author":"Shiu","year":"2001","journal-title":"Computational Intelligence"},{"issue":"1","key":"10.3233\/AIC-160710_ref209","first-page":"307","article-title":"Outlier detection: Applications and techniques","volume":"9","author":"Singh","year":"2012","journal-title":"International Journal of Computer Science Issues"},{"issue":"11","key":"10.3233\/AIC-160710_ref210","doi-asserted-by":"publisher","first-page":"1157","DOI":"10.1016\/S0167-8655(99)00083-5","article-title":"Adaptive floating search methods in feature selection","volume":"20","author":"Somol","year":"1999","journal-title":"Pattern Recognition Letters"},{"key":"10.3233\/AIC-160710_ref211","doi-asserted-by":"crossref","unstructured":"B.\u00a0Spillmann, M.\u00a0Neuhaus, H.\u00a0Bunke, E.\u00a0Pekalska and R.P.\u00a0Duin, Transforming strings to vector spaces using prototype selection, in: Joint IAPR International Workshops on Statistical Techniques in Pattern Recognition (SPR) and Structural and Syntactic Pattern Recognition (SSPR), Springer, 2006, pp.\u00a0287\u2013296.","DOI":"10.1007\/11815921_31"},{"issue":"12","key":"10.3233\/AIC-160710_ref212","doi-asserted-by":"publisher","first-page":"1213","DOI":"10.1145\/7902.7906","article-title":"Toward memory-based reasoning","volume":"29","author":"Stanfill","year":"1986","journal-title":"Communications of the ACM"},{"key":"10.3233\/AIC-160710_ref213","unstructured":"S.D.\u00a0Stearns, On selecting features for pattern classifiers, in: Proceedings of the 3rd International Joint Conference on Pattern Recognition, 1976, pp.\u00a071\u201375."},{"key":"10.3233\/AIC-160710_ref214","unstructured":"Y.\u00a0Sun, C.\u00a0Babbs and E.\u00a0Delp, A comparison of feature selection methods for the detection of breast cancers in mammograms: Adaptive sequential floating search vs. genetic algorithm, in: 2005 IEEE Engineering in Medicine and Biology 27th Annual Conference, IEEE, 2006, pp.\u00a06532\u20136535."},{"issue":"1","key":"10.3233\/AIC-160710_ref215","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1080\/10618600.1998.10474764","article-title":"Xgobi: Interactive dynamic data visualization in the x window system","volume":"7","author":"Swayne","year":"1998","journal-title":"Journal of Computational and Graphical Statistics"},{"issue":"10","key":"10.3233\/AIC-160710_ref216","doi-asserted-by":"publisher","first-page":"2793","DOI":"10.1016\/j.csda.2011.04.012","article-title":"Iterative stepwise regression imputation using standard and robust methods","volume":"55","author":"Templ","year":"2011","journal-title":"Computational Statistics & Data Analysis"},{"key":"10.3233\/AIC-160710_ref217","doi-asserted-by":"crossref","unstructured":"H.C.\u00a0Thode Jr., Testing for Normality, Statistics: Textbooks and Monographs, Vol.\u00a0164, 2002.","DOI":"10.1201\/9780203910894"},{"key":"10.3233\/AIC-160710_ref218","doi-asserted-by":"publisher","first-page":"448","DOI":"10.1109\/TSMC.1976.4309523","article-title":"An experiment with the edited nearest-neighbor rule","volume":"6","author":"Tomek","year":"1976","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics"},{"issue":"15","key":"10.3233\/AIC-160710_ref219","first-page":"79","article-title":"\u00cdndices de calidad de agua en fuentes superficiales utilizadas en la producci\u00f3n de agua para consumo humano: Una revisi\u00f3n cr\u00edtica","volume":"8","author":"Torres","year":"2009","journal-title":"Revista Ingenier\u00edas Universidad de Medell\u00edn"},{"issue":"1","key":"10.3233\/AIC-160710_ref220","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1504\/IJMCDM.2010.033684","article-title":"Preference assessment for the management of sewage sludge application on agricultural soils","volume":"1","author":"Valls","year":"2010","journal-title":"International Journal of Multicriteria Decision Making"},{"issue":"9","key":"10.3233\/AIC-160710_ref221","doi-asserted-by":"publisher","first-page":"1417","DOI":"10.1109\/TPAMI.2005.187","article-title":"The nearest subclass classifier: A compromise between the nearest mean and nearest neighbor classifier","volume":"27","author":"Veenman","year":"2005","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"10","key":"10.3233\/AIC-160710_ref222","doi-asserted-by":"publisher","first-page":"1624","DOI":"10.1016\/j.neunet.2005.11.003","article-title":"Missing data imputation through GTM as a mixture of t-distributions","volume":"19","author":"Vellido","year":"2006","journal-title":"Neural Networks"},{"issue":"4","key":"10.3233\/AIC-160710_ref224","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1007\/s00500-009-0414-4","article-title":"Taximeter verification with GPS and soft computing techniques","volume":"14","author":"Villar","year":"2010","journal-title":"Soft Computing"},{"issue":"3","key":"10.3233\/AIC-160710_ref225","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1016\/S1353-8292(01)00006-5","article-title":"Environmental risk and (re) action: Air quality, health, and civic involvement in an urban industrial neighbourhood","volume":"7","author":"Wakefield","year":"2001","journal-title":"Health & Place"},{"issue":"6879","key":"10.3233\/AIC-160710_ref226","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1038\/416389a","article-title":"Ecological responses to recent climate change","volume":"416","author":"Walther","year":"2002","journal-title":"Nature"},{"key":"10.3233\/AIC-160710_ref227","doi-asserted-by":"crossref","first-page":"315","DOI":"10.1613\/jair.1199","article-title":"Learning when training data are costly: The effect of class distribution on tree induction","volume":"19","author":"Weiss","year":"2003","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"1\u20135","key":"10.3233\/AIC-160710_ref228","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1023\/A:1006593614256","article-title":"A review and empirical evaluation of feature weighting methods for a class of lazy learning algorithms","volume":"11","author":"Wettschereck","year":"1997","journal-title":"Artificial Intelligence Review"},{"key":"10.3233\/AIC-160710_ref229","doi-asserted-by":"publisher","first-page":"408","DOI":"10.1109\/TSMC.1972.4309137","article-title":"Asymptotic properties of nearest neighbor rules using edited data","volume":"3","author":"Wilson","year":"1972","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics"},{"issue":"3","key":"10.3233\/AIC-160710_ref230","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1023\/A:1007626913721","article-title":"Reduction techniques for instance-based learning algorithms","volume":"38","author":"Wilson","year":"2000","journal-title":"Machine Learning"},{"key":"10.3233\/AIC-160710_ref231","unstructured":"I.H.\u00a0Witten, E.\u00a0Frank and M.A.\u00a0Hall, Data Mining: Practical Machine Learning Tools and Techniques, Morgan Kaufmann Publishers Inc., 2011."},{"issue":"5","key":"10.3233\/AIC-160710_ref232","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1109\/MCG.1999.788794","article-title":"Visual data mining","volume":"19","author":"Wong","year":"1999","journal-title":"IEEE Computer Graphics and Applications"},{"issue":"4","key":"10.3233\/AIC-160710_ref233","doi-asserted-by":"publisher","first-page":"1261","DOI":"10.1214\/aos\/1176350142","article-title":"Jackknife, bootstrap and other resampling methods in regression analysis","volume":"14","author":"Wu","year":"1986","journal-title":"The Annals of Statistics"},{"issue":"1","key":"10.3233\/AIC-160710_ref234","doi-asserted-by":"publisher","first-page":"164","DOI":"10.1109\/TPWRS.2006.888990","article-title":"Power distribution fault cause identification with imbalanced data using the data mining-based fuzzy classification e-algorithm","volume":"22","author":"Xu","year":"2007","journal-title":"IEEE Transactions on Power Systems"},{"issue":"2","key":"10.3233\/AIC-160710_ref235","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1109\/5254.671091","article-title":"Feature subset selection using a genetic algorithm","volume":"13","author":"Yang","year":"1998","journal-title":"IEEE Intelligent Systems"},{"issue":"3","key":"10.3233\/AIC-160710_ref236","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1080\/00401706.1995.10484330","article-title":"Discussion: Probability theory and fuzzy logic are complementary rather than competitive","volume":"37","author":"Zadeh","year":"1995","journal-title":"Technometrics"},{"issue":"7","key":"10.3233\/AIC-160710_ref237","doi-asserted-by":"publisher","first-page":"1481","DOI":"10.1016\/S0031-3203(01)00137-6","article-title":"Optimal reference subset selection for nearest neighbor classification by tabu search","volume":"35","author":"Zhang","year":"2002","journal-title":"Pattern Recognition"},{"issue":"1","key":"10.3233\/AIC-160710_ref238","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1007\/s10994-013-5373-4","article-title":"Massively parallel feature selection: An approach based on variance preservation","volume":"92","author":"Zhao","year":"2013","journal-title":"Machine Learning"}],"container-title":["AI Communications"],"original-title":[],"link":[{"URL":"https:\/\/content.iospress.com\/download?id=10.3233\/AIC-160710","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T18:27:28Z","timestamp":1777400848000},"score":1,"resource":{"primary":{"URL":"https:\/\/journals.sagepub.com\/doi\/full\/10.3233\/AIC-160710"}},"subtitle":[],"editor":[{"given":"Karina","family":"Gibert","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"editor"}]}],"short-title":[],"issued":{"date-parts":[[2016,12,1]]},"references-count":226,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.3233\/aic-160710","relation":{},"ISSN":["1875-8452","0921-7126"],"issn-type":[{"value":"1875-8452","type":"electronic"},{"value":"0921-7126","type":"print"}],"subject":[],"published":{"date-parts":[[2016,12,1]]}}}