{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T16:56:05Z","timestamp":1775667365932,"version":"3.50.1"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2017,12,16]],"date-time":"2017-12-16T00:00:00Z","timestamp":1513382400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100007136","name":"Secretar\u00eda de Estado de Investigaci\u00f3n, Desarrollo e Innovaci\u00f3n","doi-asserted-by":"publisher","award":["TIN2015-65069-C2-1-R"],"award-info":[{"award-number":["TIN2015-65069-C2-1-R"]}],"id":[{"id":"10.13039\/501100007136","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008425","name":"Conseller\u00eda de Cultura, Educaci\u00f3n e Ordenaci\u00f3n Universitaria, Xunta de Galicia","doi-asserted-by":"publisher","award":["GRC2014\/035"],"award-info":[{"award-number":["GRC2014\/035"]}],"id":[{"id":"10.13039\/501100008425","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Knowl Inf Syst"],"published-print":{"date-parts":[[2018,8]]},"DOI":"10.1007\/s10115-017-1140-3","type":"journal-article","created":{"date-parts":[[2017,12,16]],"date-time":"2017-12-16T05:52:50Z","timestamp":1513403570000},"page":"395-442","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":45,"title":["On the scalability of feature selection methods on high-dimensional data"],"prefix":"10.1007","volume":"56","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0524-6427","authenticated-orcid":false,"given":"V.","family":"Bol\u00f3n-Canedo","sequence":"first","affiliation":[]},{"given":"D.","family":"Rego-Fern\u00e1ndez","sequence":"additional","affiliation":[]},{"given":"D.","family":"Peteiro-Barral","sequence":"additional","affiliation":[]},{"given":"A.","family":"Alonso-Betanzos","sequence":"additional","affiliation":[]},{"given":"B.","family":"Guijarro-Berdi\u00f1as","sequence":"additional","affiliation":[]},{"given":"N.","family":"S\u00e1nchez-Maro\u00f1o","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,12,16]]},"reference":[{"key":"1140_CR1","unstructured":"Ahmed A, Xing EP (2013) Scalable dynamic nonparametric Bayesian models of contents and users. In: International joint conference on artificial intelligence, IJCAI, pp 3111\u20133116"},{"key":"1140_CR2","doi-asserted-by":"crossref","unstructured":"Alonso-Betanzos A, Bol\u00f3n-Canedo V, Fern\u00e1ndez-Francos D, Porto-D\u00edaz I, S\u00e1nchez-Maro\u00f1o N (2013) Efficiency and scalability methods for computational intellect, chapter up-to-date feature selection methods for scalable and efficient machine learning, IGI Global, pp 1\u201326","DOI":"10.4018\/978-1-4666-3942-3.ch001"},{"key":"1140_CR3","doi-asserted-by":"crossref","unstructured":"Bolon-Canedo V, Sanchez-Marono N, Alonso-Betanzos A (2010) On the effectiveness of discretization on gene selection of microarray data. In: The 2010 international joint conference on neural networks (IJCNN), IEEE, pp 3167\u20133174","DOI":"10.1109\/IJCNN.2010.5596825"},{"issue":"5","key":"1140_CR4","doi-asserted-by":"crossref","first-page":"5947","DOI":"10.1016\/j.eswa.2010.11.028","volume":"38","author":"V Bol\u00f3n-Canedo","year":"2011","unstructured":"Bol\u00f3n-Canedo V, S\u00e1nchez-Maro\u00f1o N, Alonso-Betanzos A (2011) Feature selection and classification in multiple class datasets: an application to kdd cup 99 dataset. Expert Syst Appl 38(5):5947\u20135957","journal-title":"Expert Syst Appl"},{"issue":"3","key":"1140_CR5","doi-asserted-by":"crossref","first-page":"483","DOI":"10.1007\/s10115-012-0487-8","volume":"34","author":"V Bol\u00f3n-Canedo","year":"2013","unstructured":"Bol\u00f3n-Canedo V, S\u00e1nchez-Maro\u00f1o N, Alonso-Betanzos A (2013) A review of feature selection methods on synthetic data. Knowl Inf Syst 34(3):483\u2013519","journal-title":"Knowl Inf Syst"},{"key":"1140_CR6","doi-asserted-by":"crossref","unstructured":"Bottou L, Bousquet O (2011) The tradeoffs of large-scale learning. In: Optimization for machine learning, pp 351\u2013368","DOI":"10.7551\/mitpress\/8996.003.0015"},{"key":"1140_CR7","volume-title":"Classification and regression trees","author":"L Breinman","year":"1984","unstructured":"Breinman L, Friedman JH, Olshen RA, Stone CJ (1984) Classification and regression trees. Wadsworth and Brooks-Cole Advanced Books and Software, Pacific Grove"},{"key":"1140_CR8","first-page":"27","volume":"13","author":"G Brown","year":"2012","unstructured":"Brown G, Pocock A, Zhao M-J, Luj\u00e1n M (2012) Conditional likelihood maximisation: a unifying framework for information theoretic feature selection. J Mach Learn Res 13:27\u201366","journal-title":"J Mach Learn Res"},{"issue":"1\u20132","key":"1140_CR9","doi-asserted-by":"crossref","first-page":"155","DOI":"10.1016\/S0004-3702(03)00079-1","volume":"151","author":"M Dash","year":"2003","unstructured":"Dash M, Liu H (2003) Consistency-based search in feature selection. Artif Intell 151(1\u20132):155\u2013176","journal-title":"Artif Intell"},{"issue":"1","key":"1140_CR10","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1186\/1471-2105-7-3","volume":"7","author":"R D\u00edaz-Uriarte","year":"2006","unstructured":"D\u00edaz-Uriarte R, De Andres SA (2006) Gene selection and classification of microarray data using random forest. BMC Bioinform 7(1):3","journal-title":"BMC Bioinform"},{"key":"1140_CR11","doi-asserted-by":"crossref","first-page":"2040","DOI":"10.1016\/j.comnet.2013.04.005","volume":"57","author":"A Fahad","year":"2013","unstructured":"Fahad A, Tari Z, Khalil I, Habib I, Alnuweiri H (2013) Toward an efficient and scalable feature selection approach for internet traffic classification. Comput Netw 57:2040\u20132057","journal-title":"Comput Netw"},{"issue":"200","key":"1140_CR12","doi-asserted-by":"crossref","first-page":"675","DOI":"10.1080\/01621459.1937.10503522","volume":"32","author":"M Friedman","year":"1937","unstructured":"Friedman M (1937) The use of ranks to avoid the assumption of normality implicit in the analysis of variance. J Am Stat Assoc 32(200):675\u2013701","journal-title":"J Am Stat Assoc"},{"key":"1140_CR13","doi-asserted-by":"crossref","unstructured":"Gulgezen G, Cataltepe Z, Yu L (2009) Stable and accurate feature selection. In: Joint European conference on machine learning and knowledge discovery in databases. Springer, pp 455\u2013468","DOI":"10.1007\/978-3-642-04180-8_47"},{"key":"1140_CR14","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-540-35488-8","volume-title":"Feature extraction: foundations and applications","author":"I Guyon","year":"2006","unstructured":"Guyon I (2006) Feature extraction: foundations and applications, vol 207. Springer, Berlin"},{"key":"1140_CR15","first-page":"1157","volume":"3","author":"I Guyon","year":"2003","unstructured":"Guyon I, Elisseeff A (2003) An introduction to variable and feature selection. J Mach Learn Res 3:1157\u20131182","journal-title":"J Mach Learn Res"},{"issue":"1\u20133","key":"1140_CR16","doi-asserted-by":"crossref","first-page":"389","DOI":"10.1023\/A:1012487302797","volume":"46","author":"I Guyon","year":"2002","unstructured":"Guyon I, Weston J, Barnhill S, Vapnik V (2002) Gene selection for cancer classification using support vector machines. Mach Learn 46(1\u20133):389\u2013422","journal-title":"Mach Learn"},{"issue":"1","key":"1140_CR17","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1145\/1656274.1656278","volume":"11","author":"M Hall","year":"2009","unstructured":"Hall M, Frank E, Holmes G, Pfahringer B, Reutemann P, Witten IH (2009) The weka data mining software: an update. ACM SIGKDD Explor Newsl 11(1):10\u201318","journal-title":"ACM SIGKDD Explor Newsl"},{"key":"1140_CR18","unstructured":"Hall MA, Smith LA (1998) Practical feature subset selection for machine learning. In: McDonald C (ed) Computer science \u201998 proceedings of the 21st Australasian computer science conference ACSC\u201998, Perth, 4\u20136 February, 1998. Springer, Berlin, pp 181\u2013191"},{"key":"1140_CR19","doi-asserted-by":"crossref","unstructured":"Hoi SC, Wang J, Zhao P, Jin R (2012) Online feature selection for mining big data. In: Proceedings of the 1st international workshop on big data, streams and heterogeneous source mining: algorithms, systems, programming models and applications, ACM, pp 93\u2013100","DOI":"10.1145\/2351316.2351329"},{"issue":"1","key":"1140_CR20","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1109\/TIT.1968.1054102","volume":"14","author":"G Hughes","year":"1968","unstructured":"Hughes G (1968) On the mean accuracy of statistical pattern recognizers. IEEE Trans Inf Theory 14(1):55\u201363","journal-title":"IEEE Trans Inf Theory"},{"key":"1140_CR21","first-page":"121","volume":"94","author":"GH John","year":"1994","unstructured":"John GH, Kohavi R, Pfleger K et al (1994) Irrelevant features and the subset selection problem. ICML 94:121\u2013129","journal-title":"ICML"},{"issue":"1\/2","key":"1140_CR22","doi-asserted-by":"crossref","first-page":"81","DOI":"10.2307\/2332226","volume":"30","author":"MG Kendall","year":"1938","unstructured":"Kendall MG (1938) A new measure of rank correlation. Biometrika 30(1\/2):81\u201393","journal-title":"Biometrika"},{"key":"1140_CR23","doi-asserted-by":"crossref","unstructured":"Kira K, Rendell LA (1992) A practical approach to feature selection. In: Proceedings of the ninth international workshop on machine learning, pp 249\u2013256. Morgan Kaufmann Publishers Inc","DOI":"10.1016\/B978-1-55860-247-2.50037-1"},{"key":"1140_CR24","unstructured":"Koller D, Sahami M (1995) Toward optimal feature selection. In: 13th international conference on machine learning, pp 284\u2013292"},{"key":"1140_CR25","doi-asserted-by":"crossref","unstructured":"Kononenko I (1994) Estimating attributes: analysis and extensions of relief. In: Machine learning: ECML-94, Springer, pp 171\u2013182","DOI":"10.1007\/3-540-57868-4_57"},{"key":"1140_CR26","doi-asserted-by":"crossref","unstructured":"Kumar R, Vassilvitskii S (2010) Generalized distances between rankings. In: Proceedings of the 19th international conference on world wide web, ACM, pp 571\u2013580","DOI":"10.1145\/1772690.1772749"},{"key":"1140_CR27","unstructured":"Liu H, Setiono R (1995) Chi2: feature selection and discretization of numeric attributes. In: Proceedings of seventh international conference on tools with artificial intelligence, 1995, IEEE, pp 388\u2013391"},{"key":"1140_CR28","unstructured":"Liu H, Setiono R (1996) A probabilistic approach to feature selection\u2014a filter solution. In: Proceedings of the 13th international conference on machine learning, pp 319\u2013327. Morgan Kaufmann"},{"issue":"4","key":"1140_CR29","doi-asserted-by":"crossref","first-page":"491","DOI":"10.1109\/TKDE.2005.66","volume":"17","author":"H Liu","year":"2005","unstructured":"Liu H, Yu L (2005) Toward integrating feature selection algorithms for classification and clustering. IEEE Trans Knowl Data Eng 17(4):491\u2013502","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"1140_CR30","doi-asserted-by":"crossref","unstructured":"Luo D, Wang F, Sun J, Markatou M, Hu J, Ebadollahi S (2012) Sor: Scalable orthogonal regression for non-redundant feature selection and its healthcare applications. In: SIAM data mining conference, pp 576\u2013587","DOI":"10.1137\/1.9781611972825.50"},{"key":"1140_CR31","unstructured":"Mej\u00eda-Lavalle M, Sucar E, Arroyo G (2006) Feature selection with a perceptron neural net. In: Proceedings of the international workshop on feature selection for data mining, pp 131\u2013135"},{"key":"1140_CR32","unstructured":"Nemenyi P (1963) Distribution-free multiple comparisons. Ph.D. thesis, Princeton University"},{"key":"1140_CR33","doi-asserted-by":"crossref","unstructured":"Nogueira S, Brown G (2016) Measuring the stability of feature selection. In: Joint European conference on machine learning and knowledge discovery in databases, Springer, pp 442\u2013457","DOI":"10.1007\/978-3-319-46227-1_28"},{"issue":"8","key":"1140_CR34","doi-asserted-by":"crossref","first-page":"1226","DOI":"10.1109\/TPAMI.2005.159","volume":"27","author":"H Peng","year":"2005","unstructured":"Peng H, Long F, Ding C (2005) Feature selection based on mutual information criteria of max-dependency, max-relevance, and min-redundancy. IEEE Trans Pattern Anal Mach Intell 27(8):1226\u20131238","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"1","key":"1140_CR35","first-page":"21","volume":"2","author":"D Peteiro-Barral","year":"2012","unstructured":"Peteiro-Barral D, Bolon-Canedo V, Alonso-Betanzos A, Guijarro-Berdinas B, Sanchez-Marono N (2012) Scalability analysis of filter-based methods for feature selection. Adv Smart Syst Res 2(1):21\u201326","journal-title":"Adv Smart Syst Res"},{"issue":"8","key":"1140_CR36","doi-asserted-by":"crossref","first-page":"2807","DOI":"10.1016\/j.eswa.2012.11.016","volume":"4","author":"D Peteiro-Barral","year":"2012","unstructured":"Peteiro-Barral D, Bol\u00f3n-Canedo V, Alonso-Betanzos A, Guijarro-Berdi\u00f1as B, S\u00e1nchez-Maro\u00f1o N (2012) Toward the scalability of neural networks through feature selection. Expert Syst Appl 4(8):2807\u20132816","journal-title":"Expert Syst Appl"},{"key":"1140_CR37","doi-asserted-by":"crossref","unstructured":"Peteiro-Barral D, Guijarro-Berdi\u00f1as B (2013) A study on the scalability of artificial neural networks training algorithms using multiple-criteria decision-making methods. In: Artificial intelligence and soft computing, volume 7894 of lecture notes in computer science, Springer, pp 162\u2013173","DOI":"10.1007\/978-3-642-38658-9_15"},{"issue":"1","key":"1140_CR38","first-page":"81","volume":"1","author":"JR Quinlan","year":"1986","unstructured":"Quinlan JR (1986) Induction of decision trees. Mach Learn 1(1):81\u2013106","journal-title":"Mach Learn"},{"issue":"1","key":"1140_CR39","doi-asserted-by":"crossref","first-page":"205","DOI":"10.1198\/jcgs.2010.09046","volume":"19","author":"VC Raykar","year":"2010","unstructured":"Raykar VC, Duraiswami R, Zhao LH (2010) Fast computation of kernel estimators. J Comput Graph Stat 19(1):205\u2013220","journal-title":"J Comput Graph Stat"},{"key":"1140_CR40","unstructured":"Rokach L, Schclar A, Itach E (2013) Ensemble methods for multi-label classification. arXiv preprint\u00a0 arXiv:1307.1769"},{"key":"1140_CR41","unstructured":"Sonnenburg S, Franc V, Yom-Tov E, Sebag M (2008) Pascal large scale learning challenge. In: 25th international conference on machine learning (ICML2008) workshop. Journal of Machine Learning Research, vol 10, pp 1937\u20131953"},{"issue":"1","key":"1140_CR42","doi-asserted-by":"crossref","first-page":"72","DOI":"10.2307\/1412159","volume":"15","author":"C Spearman","year":"1904","unstructured":"Spearman C (1904) The proof and measurement of association between two things. Am J Psychol 15(1):72\u2013101","journal-title":"Am J Psychol"},{"key":"1140_CR43","doi-asserted-by":"crossref","unstructured":"Sun Y, Todorovic S, Goodison S (2008) A feature selection algorithm capable of handling extremely large data dimensionality. In: Proceedings of the 2008 SIAM international conference in data mining, pp 530\u2013540","DOI":"10.1137\/1.9781611972788.48"},{"key":"1140_CR44","unstructured":"Thrun SB, Bala J, Bloedorn E, Bratko I, Cestnik B, Cheng J, De Jong K, Dzeroski S, Fahlman SE, Fisher D, et\u00a0al(1991) The monk\u2019s problems a performance comparison of different learning algorithms"},{"key":"1140_CR45","unstructured":"Tsoumakas G, Katakis I, Vlahavas I (2010) Mining multi-label data. In: Data mining and knowledge discovery handbook, Springer, pp 667\u2013685"},{"key":"1140_CR46","first-page":"1205","volume":"5","author":"L Yu","year":"2004","unstructured":"Yu L, Liu H (2004) Efficient feature selection via analysis of relevance and redundancy. J Mach Learn Res 5:1205\u20131224","journal-title":"J Mach Learn Res"},{"key":"1140_CR47","first-page":"1205","volume":"5","author":"L Yu","year":"2004","unstructured":"Yu L, Liu H (2004) Efficient feature selection via analysis of relevance and redundancy. J Mach Learn Res 5:1205\u20131224","journal-title":"J Mach Learn Res"},{"key":"1140_CR48","doi-asserted-by":"crossref","unstructured":"Yui M, Kojima I (2013) A database-hadoop hybrid approach to scalable machine learning. In: IEEE international congress on Big Data 2013. IEEE, pp 1\u20138","DOI":"10.1109\/BigData.Congress.2013.10"},{"issue":"19","key":"1140_CR49","doi-asserted-by":"crossref","first-page":"3218","DOI":"10.1016\/j.ins.2009.06.010","volume":"179","author":"M-L Zhang","year":"2009","unstructured":"Zhang M-L, Pe\u00f1a JM, Robles V (2009) Feature selection for multi-label naive bayes classification. Inf Sci 179(19):3218\u20133229","journal-title":"Inf Sci"},{"key":"1140_CR50","first-page":"1156","volume":"7","author":"Z Zhao","year":"2007","unstructured":"Zhao Z, Liu H (2007) Searching for interacting features. IJCAI 7:1156\u20131161","journal-title":"IJCAI"},{"key":"1140_CR51","doi-asserted-by":"crossref","first-page":"195","DOI":"10.1007\/s10994-013-5373-4","volume":"92","author":"Z Zhao","year":"2013","unstructured":"Zhao Z, Zhang R, Cox J, Duling D, Sarle W (2013) Massively parallel feature selection: an approach based on variance preservation. Mach Learn 92:195\u2013220","journal-title":"Mach Learn"},{"key":"1140_CR52","doi-asserted-by":"crossref","DOI":"10.1201\/b11426","volume-title":"Spectral feature selection for data mining","author":"ZA Zhao","year":"2011","unstructured":"Zhao ZA, Liu H (2011) Spectral feature selection for data mining. Chapman & Hall\/CRC, Boca Raton"},{"issue":"2","key":"1140_CR53","doi-asserted-by":"crossref","first-page":"263","DOI":"10.1109\/TCBB.2008.105","volume":"7","author":"Z Zhu","year":"2010","unstructured":"Zhu Z, Ong Y-S, Zurada JM (2010) Identification of full and partial class relevant genes. IEEE\/ACM Trans Comput Biol Bioinf 7(2):263\u2013277","journal-title":"IEEE\/ACM Trans Comput Biol Bioinf"}],"container-title":["Knowledge and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10115-017-1140-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-017-1140-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-017-1140-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,29]],"date-time":"2023-08-29T19:50:21Z","timestamp":1693338621000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10115-017-1140-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,12,16]]},"references-count":53,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2018,8]]}},"alternative-id":["1140"],"URL":"https:\/\/doi.org\/10.1007\/s10115-017-1140-3","relation":{},"ISSN":["0219-1377","0219-3116"],"issn-type":[{"value":"0219-1377","type":"print"},{"value":"0219-3116","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,12,16]]}}}