{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T05:38:11Z","timestamp":1774589891872,"version":"3.50.1"},"reference-count":92,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2018,8,7]],"date-time":"2018-08-07T00:00:00Z","timestamp":1533600000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100000781","name":"European Research Council","doi-asserted-by":"publisher","award":["617393"],"award-info":[{"award-number":["617393"]}],"id":[{"id":"10.13039\/501100000781","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2019,2]]},"DOI":"10.1007\/s10994-018-5748-7","type":"journal-article","created":{"date-parts":[[2018,8,7]],"date-time":"2018-08-07T19:59:17Z","timestamp":1533671957000},"page":"149-202","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":61,"title":["A greedy feature selection algorithm for Big Data of high dimensionality"],"prefix":"10.1007","volume":"108","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2492-959X","authenticated-orcid":false,"given":"Ioannis","family":"Tsamardinos","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7355-8871","authenticated-orcid":false,"given":"Giorgos","family":"Borboudakis","sequence":"additional","affiliation":[]},{"given":"Pavlos","family":"Katsogridakis","sequence":"additional","affiliation":[]},{"given":"Polyvios","family":"Pratikakis","sequence":"additional","affiliation":[]},{"given":"Vassilis","family":"Christophides","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,8,7]]},"reference":[{"key":"5748_CR1","series-title":"Wiley series in probability and statistics","doi-asserted-by":"publisher","DOI":"10.1002\/0471249688","volume-title":"Categorical data analysis","author":"A Agresti","year":"2002","unstructured":"Agresti, A. (2002). Categorical data analysis (2nd ed.)., Wiley series in probability and statistics Hoboken: Wiley.","edition":"2"},{"key":"5748_CR2","unstructured":"Akaike, H. (1973). Information theory and an extension of the maximum likelihood principle. In Second international symposium on information theory (pp. 267\u2013281). Budapest: Akad\u00e9miai Kiado."},{"issue":"Jan","key":"5748_CR3","first-page":"171","volume":"11","author":"CF Aliferis","year":"2010","unstructured":"Aliferis, C. F., Statnikov, A., Tsamardinos, I., Mani, S., & Koutsoukos, X. D. (2010). Local causal and Markov blanket induction for causal discovery and feature selection for classification part i: Algorithms and empirical evaluation. Journal of Machine Learning Research, 11(Jan), 171\u2013234.","journal-title":"Journal of Machine Learning Research"},{"key":"5748_CR4","unstructured":"Aliferis, C.\u00a0F., Tsamardinos, I., & Statnikov, A. (2003). HITON: A novel Markov blanket algorithm for optimal variable selection. In AMIA annual symposium proceedings. American Medical Informatics Association."},{"issue":"1","key":"5748_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.2140\/pjm.1966.16.1","volume":"16","author":"L Armijo","year":"1966","unstructured":"Armijo, L. (1966). Minimization of functions having Lipschitz continuous first partial derivatives. Pacific Journal of Mathematics, 16(1), 1\u20133.","journal-title":"Pacific Journal of Mathematics"},{"key":"5748_CR6","doi-asserted-by":"publisher","first-page":"414","DOI":"10.1214\/07-STS243","volume":"22","author":"BJ Becker","year":"2007","unstructured":"Becker, B. J., & Wu, M.-J. (2007). The synthesis of regression slopes in meta-analysis. Statistical Science, 22, 414\u2013429.","journal-title":"Statistical Science"},{"issue":"2","key":"5748_CR7","doi-asserted-by":"publisher","first-page":"813","DOI":"10.1214\/15-AOS1388","volume":"44","author":"D Bertsimas","year":"2016","unstructured":"Bertsimas, D., King, A., & Mazumder, R. (2016). Best subset selection via a modern optimization lens. The Annals of Statistics, 44(2), 813\u2013852.","journal-title":"The Annals of Statistics"},{"key":"5748_CR8","unstructured":"Blumensath, T., & Davies, M.\u00a0E. (2007). On the difference between orthogonal matching pursuit and orthogonal least squares. Technical report, University of Edinburgh."},{"key":"5748_CR9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-21858-8","volume-title":"Feature selection for high-dimensional data","author":"V Bol\u00f3n-Canedo","year":"2015","unstructured":"Bol\u00f3n-Canedo, V., S\u00e1nchez-Maro\u00f1o, N., & Alonso-Betanzos, A. (2015a). Feature selection for high-dimensional data (1st ed.). Berlin: Springer.","edition":"1"},{"key":"5748_CR10","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1016\/j.knosys.2015.05.014","volume":"86","author":"V Bol\u00f3n-Canedo","year":"2015","unstructured":"Bol\u00f3n-Canedo, V., S\u00e1nchez-Maro\u00f1o, N., & Alonso-Betanzos, A. (2015b). Recent advances and emerging challenges of feature selection in the context of big data. Knowledge-Based Systems, 86, 33\u201345.","journal-title":"Knowledge-Based Systems"},{"key":"5748_CR11","doi-asserted-by":"crossref","unstructured":"Bol\u00f3n-Canedo, V., Sechidis, K., S\u00e1nchez-Marono, N., Alonso-Betanzos, A., & Brown, G. (2017). Exploring the consequences of distributed feature selection in DNA microarray data. In International joint conference on neural networks (pp. 1665\u20131672).","DOI":"10.1109\/IJCNN.2017.7966051"},{"key":"5748_CR12","unstructured":"Borboudakis, G., & Tsamardinos, I. (2017). Forward-backward selection with early dropping. arXiv:1705.10770 [cs.LG]."},{"key":"5748_CR13","unstructured":"Bradley, J.\u00a0K., Kyrola, A., Bickson, D., & Guestrin, C. (2011). Parallel coordinate descent for l1-regularized loss minimization. In Proceedings of the 28th international conference on machine learning, ICML 2011, Bellevue, Washington, USA, June 28\u2013July 2, 2011 (pp. 321\u2013328)."},{"key":"5748_CR14","first-page":"27","volume":"13","author":"G Brown","year":"2012","unstructured":"Brown, G., Pocock, A., Zhao, M.-J., & Luj\u00e1n, M. (2012). Conditional likelihood maximisation: A unifying framework for information theoretic feature selection. Journal of Machine Learning Research, 13, 27\u201366.","journal-title":"Journal of Machine Learning Research"},{"key":"5748_CR15","doi-asserted-by":"publisher","first-page":"10162","DOI":"10.1038\/ncomms10162","volume":"6","author":"O Canela-Xandri","year":"2015","unstructured":"Canela-Xandri, O., Law, A., Gray, A., Woolliams, J. A., & Tenesa, A. (2015). A new tool called dissect for analysing large genomic data sets using a big data approach. Nature Communications, 6, 10162.","journal-title":"Nature Communications"},{"issue":"1","key":"5748_CR16","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1186\/s13742-015-0047-8","volume":"4","author":"CC Chang","year":"2015","unstructured":"Chang, C. C., Chow, C. C., Tellier, L. C., Vattikuti, S., Purcell, S. M., & Lee, J. J. (2015). Second-generation plink: rising to the challenge of larger and richer datasets. Gigascience, 4(1), 7.","journal-title":"Gigascience"},{"key":"5748_CR17","doi-asserted-by":"publisher","DOI":"10.1201\/9781420036046","volume-title":"On a class of incomplete gamma functions with applications","author":"MA Chaudhry","year":"2001","unstructured":"Chaudhry, M. A., & Zubair, S. M. (2001). On a class of incomplete gamma functions with applications. Boca Raton: CRC Press."},{"issue":"9","key":"5748_CR18","doi-asserted-by":"publisher","first-page":"793","DOI":"10.1056\/NEJMp1500523","volume":"372","author":"FS Collins","year":"2015","unstructured":"Collins, F. S., & Varmus, H. (2015). A new initiative on precision medicine. New England Journal of Medicine, 372(9), 793\u2013795.","journal-title":"New England Journal of Medicine"},{"issue":"7063","key":"5748_CR19","doi-asserted-by":"publisher","first-page":"1299","DOI":"10.1038\/nature04226","volume":"437","author":"IH Consortium","year":"2005","unstructured":"Consortium, I. H. (2005). A haplotype map of the human genome. Nature, 437(7063), 1299\u20131320.","journal-title":"Nature"},{"issue":"7","key":"5748_CR20","doi-asserted-by":"publisher","first-page":"2183","DOI":"10.1117\/12.173207","volume":"33","author":"GM Davis","year":"1994","unstructured":"Davis, G. M., Mallat, S. G., & Zhang, Z. (1994). Adaptive time\u2013frequency decompositions. Optical Engineering, 33(7), 2183\u20132192.","journal-title":"Optical Engineering"},{"key":"5748_CR21","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511802843","volume-title":"Bootstrap methods and their application","author":"AC Davison","year":"1997","unstructured":"Davison, A. C., & Hinkley, D. V. (1997). Bootstrap methods and their application (Vol. 1). Cambridge: Cambridge university press."},{"key":"5748_CR22","first-page":"194","volume":"1995","author":"J Dougherty","year":"1995","unstructured":"Dougherty, J., Kohavi, R., & Sahami, M. (1995). Supervised and unsupervised discretization of continuous features. Machine Learning Proceedings, 1995, 194\u2013202.","journal-title":"Machine Learning Proceedings"},{"issue":"2","key":"5748_CR23","doi-asserted-by":"publisher","first-page":"407","DOI":"10.1214\/009053604000000067","volume":"32","author":"B Efron","year":"2004","unstructured":"Efron, B., Hastie, T., Johnstone, I., Tibshirani, R., et al. (2004). Least angle regression. The Annals of Statistics, 32(2), 407\u2013499.","journal-title":"The Annals of Statistics"},{"key":"5748_CR24","doi-asserted-by":"crossref","DOI":"10.1201\/9780429246593","volume-title":"An introduction to the bootstrap","author":"B Efron","year":"1994","unstructured":"Efron, B., & Tibshirani, R. J. (1994). An introduction to the bootstrap. Boca Raton: CRC press."},{"key":"5748_CR25","doi-asserted-by":"publisher","first-page":"775","DOI":"10.1016\/S1573-4412(84)02005-5","volume":"2","author":"RF Engle","year":"1984","unstructured":"Engle, R. F. (1984). Wald, likelihood ratio, and Lagrange multiplier tests in econometrics. Handbook of Econometrics, 2, 775\u2013826.","journal-title":"Handbook of Econometrics"},{"key":"5748_CR26","unstructured":"Fan, J., Feng, Y., & Wu, Y. (2010). High-dimensional variable selection for Cox\u2019s proportional hazards model. In Borrowing strength: Theory powering applications\u2013a Festschrift for Lawrence D. Brown (pp. 70\u201386). Institute of Mathematical Statistics."},{"key":"5748_CR27","volume-title":"Statistical methods for research workers","author":"R Fisher","year":"1932","unstructured":"Fisher, R. (1932). Statistical methods for research workers. Edinburgh: Oliver & Boyd."},{"issue":"Nov","key":"5748_CR28","first-page":"1531","volume":"5","author":"F Fleuret","year":"2004","unstructured":"Fleuret, F. (2004). Fast binary feature selection with conditional mutual information. Journal of Machine Learning Research, 5(Nov), 1531\u20131555.","journal-title":"Journal of Machine Learning Research"},{"issue":"6","key":"5748_CR29","doi-asserted-by":"publisher","first-page":"1183","DOI":"10.1214\/aos\/1176344003","volume":"5","author":"RV Foutz","year":"1977","unstructured":"Foutz, R. V., & Srivastava, R. C. (1977). The performance of the likelihood ratio test when the model is incorrect. The Annals of Statistics, 5(6), 1183\u20131194.","journal-title":"The Annals of Statistics"},{"issue":"Mar","key":"5748_CR30","first-page":"1157","volume":"3","author":"I Guyon","year":"2003","unstructured":"Guyon, I., & Elisseeff, A. (2003). An introduction to variable and feature selection. Journal of Machine Learning Research, 3(Mar), 1157\u20131182.","journal-title":"Journal of Machine Learning Research"},{"key":"5748_CR31","unstructured":"Hameed, M.\u00a0A. (2012). Comparative analysis of orthogonal matching pursuit and least angle regression. Master\u2019s thesis, Michigan State University, Electrical Engineering."},{"key":"5748_CR32","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-3462-1","volume-title":"Regression modeling strategies","author":"F Harrell","year":"2001","unstructured":"Harrell, F. (2001). Regression modeling strategies (corrected ed.). Berlin: Springer.","edition":"corrected"},{"issue":"4","key":"5748_CR33","doi-asserted-by":"publisher","first-page":"486","DOI":"10.1037\/1082-989X.3.4.486","volume":"3","author":"LV Hedges","year":"1998","unstructured":"Hedges, L. V., & Vevea, J. L. (1998). Fixed-and random-effects models in meta-analysis. Psychological Methods, 3(4), 486.","journal-title":"Psychological Methods"},{"key":"5748_CR34","doi-asserted-by":"publisher","DOI":"10.1002\/9781118548387.ch1","volume-title":"Introduction to the Logistic Regression Model","author":"DW Hosmer Jr","year":"2013","unstructured":"Hosmer, D. W, Jr., Lemeshow, S., & Sturdivant, R. X. (2013). Introduction to the Logistic Regression Model. Hoboken: Wiley."},{"issue":"1","key":"5748_CR35","first-page":"1903","volume":"17","author":"S Ivanoff","year":"2016","unstructured":"Ivanoff, S., Picard, F., & Rivoirard, V. (2016). Adaptive Lasso and group-Lasso for functional Poisson regression. Journal of Machine Learning Research, 17(1), 1903\u20131948.","journal-title":"Journal of Machine Learning Research"},{"key":"5748_CR36","unstructured":"John, G.H., Kohavi, R., & Pfleger, K. (1994). Irrelevant features and the subset selection problem. In Machine learning: Proceedings of the eleventh international conference (pp. 121\u2013129)."},{"key":"5748_CR37","unstructured":"Kerber, R. (1992). Chimerge: Discretization of numeric attributes. In Proceedings of the tenth national conference on Artificial intelligence, (pp. 123\u2013128). AAAI Press."},{"key":"5748_CR38","unstructured":"Koller, D., & Sahami, M. (1996). Toward optimal feature selection. In Proceedings of the Thirteenth International Conference on Machine Learning, (pp. 284\u2013292)."},{"issue":"12","key":"5748_CR39","doi-asserted-by":"publisher","first-page":"1306","DOI":"10.14778\/2536274.2536302","volume":"6","author":"P Konda","year":"2013","unstructured":"Konda, P., Kumar, A., R\u00e9, C., & Sashikanth, V. (2013). Feature selection in enterprise analytics: A demonstration using an R-based data analytics system. Proceedings of the VLDB Endowment, 6(12), 1306\u20131309.","journal-title":"Proceedings of the VLDB Endowment"},{"key":"5748_CR40","volume-title":"Applied Linear Statistical Models","author":"MH Kutner","year":"2004","unstructured":"Kutner, M. H., Nachtsheim, C. J., Neter, J., & Li, W. (2004). Applied Linear Statistical Models (5th ed.). New York: McGraw-Hill\/Irwin.","edition":"5"},{"issue":"7","key":"5748_CR41","doi-asserted-by":"publisher","first-page":"1","DOI":"10.18637\/jss.v080.i07","volume":"80","author":"V Lagani","year":"2017","unstructured":"Lagani, V., Athineou, G., Farcomeni, A., Tsagris, M., & Tsamardinos, I. (2017). Feature selection with the R package MXM: Discovering statistically equivalent feature subsets. Journal of Statistical Software, 80(7), 1\u201325.","journal-title":"Journal of Statistical Software"},{"issue":"7","key":"5748_CR42","doi-asserted-by":"publisher","first-page":"1","DOI":"10.5936\/csbj.201303004","volume":"6","author":"V Lagani","year":"2013","unstructured":"Lagani, V., Kortas, G., & Tsamardinos, I. (2013). Biomarker signature identification in omics data with multi-class outcomes. Computational and Structural Biotechnology Journal, 6(7), 1\u20137.","journal-title":"Computational and Structural Biotechnology Journal"},{"issue":"15","key":"5748_CR43","doi-asserted-by":"publisher","first-page":"1887","DOI":"10.1093\/bioinformatics\/btq261","volume":"26","author":"V Lagani","year":"2010","unstructured":"Lagani, V., & Tsamardinos, I. (2010). Structure-based variable selection for survival data. Bioinformatics, 26(15), 1887\u20131894.","journal-title":"Bioinformatics"},{"key":"5748_CR44","unstructured":"Lee, S., Kim, J.\u00a0K., Zheng, X., Ho, Q., Gibson, G.\u00a0A., & Xing, E.\u00a0P. (2014). On model parallelization and scheduling strategies for distributed machine learning. In Advances in neural information processing systems 27: Annual conference on neural information processing systems 2014(pp. 2834\u20132842), December 8\u201313, 2014, Montreal."},{"issue":"6","key":"5748_CR45","doi-asserted-by":"publisher","first-page":"94:1","DOI":"10.1145\/3136625","volume":"50","author":"J Li","year":"2017","unstructured":"Li, J., Cheng, K., Wang, S., Morstatter, F., Trevino, R\u00a0. P., Tang, J., et al. (2017). Feature selection: A data perspective. ACM Computing Surveys, 50(6), 94:1\u201394:45.","journal-title":"ACM Computing Surveys"},{"key":"5748_CR46","doi-asserted-by":"crossref","unstructured":"Li, Q., Qiu, S., Ji, S., Thompson, P.\u00a0M., Ye, J., & Wang, J. (2016). Parallel lasso screening for big data optimization. In Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining (KDD \u201916) (pp. 1705\u20131714). New York, ACM.","DOI":"10.1145\/2939672.2939859"},{"issue":"3","key":"5748_CR47","doi-asserted-by":"publisher","first-page":"467","DOI":"10.1016\/j.csda.2003.11.020","volume":"47","author":"TM Loughin","year":"2004","unstructured":"Loughin, T. M. (2004). A systematic comparison of methods for combining p values from independent tests. Computational Statistics & Data Analysis, 47(3), 467\u2013485.","journal-title":"Computational Statistics & Data Analysis"},{"key":"5748_CR48","unstructured":"Margaritis, D. (2009). Toward provably correct feature selection in arbitrary domains. In Advances in neural information processing systems (pp. 1240\u20131248)."},{"key":"5748_CR49","first-page":"505","volume":"12","author":"D Margaritis","year":"2000","unstructured":"Margaritis, D., & Thrun, S. (2000). Bayesian network induction via local neighborhoods. Advances in Neural Information Processing Systems, 12, 505\u2013511.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"5748_CR50","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1111\/j.1467-9868.2007.00627.x","volume":"70","author":"L Meier","year":"2008","unstructured":"Meier, L., Van De Geer, S., & B\u00fchlmann, P. (2008). The group lasso for logistic regression. Journal of the Royal Statistical Society, Series B, 70, 53\u201371.","journal-title":"Journal of the Royal Statistical Society, Series B"},{"key":"5748_CR51","doi-asserted-by":"publisher","first-page":"1436","DOI":"10.1214\/009053606000000281","volume":"34","author":"N Meinshausen","year":"2006","unstructured":"Meinshausen, N., & B\u00fchlmann, P. (2006). High-dimensional graphs and variable selection with the Lasso. The Annals of Statistics, 34, 1436\u20131462.","journal-title":"The Annals of Statistics"},{"issue":"1","key":"5748_CR52","first-page":"1235","volume":"17","author":"X Meng","year":"2016","unstructured":"Meng, X., Bradley, J., Yavuz, B., Sparks, E., Venkataraman, S., Liu, D., et al. (2016). Mllib: Machine learning in apache spark. Journal of Machine Learning Research, 17(1), 1235\u20131241.","journal-title":"Journal of Machine Learning Research"},{"key":"5748_CR53","doi-asserted-by":"publisher","DOI":"10.1201\/9781420035933","volume-title":"Subset selection in regression","author":"A Miller","year":"2002","unstructured":"Miller, A. (2002). Subset selection in regression. Boca Raton: CRC Press."},{"key":"5748_CR54","unstructured":"Minka, T. P. (2003). A comparison of numerical optimizers for logistic regression. Technical report (unpublished draft)."},{"key":"5748_CR55","doi-asserted-by":"crossref","unstructured":"Pati, Y. C., Rezaiifar, R., & Krishnaprasad, P.\u00a0S. (1993). Orthogonal matching pursuit: Recursive function approximation with applications to wavelet decomposition. In Conference record of the twenty-seventh Asilomar conference on signals, systems and computers (pp. 40\u201344). IEEE.","DOI":"10.1109\/ACSSC.1993.342465"},{"key":"5748_CR56","volume-title":"Probabilistic reasoning in intelligent systems: Networks of plausible inference","author":"J Pearl","year":"1988","unstructured":"Pearl, J. (1988). Probabilistic reasoning in intelligent systems: Networks of plausible inference. San Francisco: Morgan Kaufmann Publishers Inc."},{"key":"5748_CR57","volume-title":"Causality, models, reasoning, and inference","author":"J Pearl","year":"2000","unstructured":"Pearl, J. (2000). Causality, models, reasoning, and inference. Cambridge: Cambridge University Press."},{"key":"5748_CR58","doi-asserted-by":"publisher","first-page":"789","DOI":"10.1016\/S0049-237X(06)80074-1","volume":"134","author":"J Pearl","year":"1995","unstructured":"Pearl, J., & Verma, T. S. (1995). A theory of inferred causation. Studies in Logic and the Foundations of Mathematics, 134, 789\u2013811.","journal-title":"Studies in Logic and the Foundations of Mathematics"},{"issue":"12","key":"5748_CR59","doi-asserted-by":"publisher","first-page":"1373","DOI":"10.1016\/S0895-4356(96)00236-3","volume":"49","author":"P Peduzzi","year":"1996","unstructured":"Peduzzi, P., Concato, J., Kemper, E., Holford, T. R., & Feinstein, A. R. (1996). A simulation study of the number of events per variable in logistic regression analysis. Journal of Clinical Epidemiology, 49(12), 1373\u20131379.","journal-title":"Journal of Clinical Epidemiology"},{"issue":"2","key":"5748_CR60","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1016\/j.ijar.2006.06.008","volume":"45","author":"JM Pe\u00f1a","year":"2007","unstructured":"Pe\u00f1a, J. M., Nilsson, R., Bj\u00f6rkegren, J., & Tegn\u00e9r, J. (2007). Towards scalable and data efficient learning of Markov boundaries. International Journal of Approximate Reasoning, 45(2), 211\u2013232.","journal-title":"International Journal of Approximate Reasoning"},{"issue":"8","key":"5748_CR61","doi-asserted-by":"publisher","first-page":"1226","DOI":"10.1109\/TPAMI.2005.159","volume":"27","author":"H Peng","year":"2005","unstructured":"Peng, H., Long, F., & Ding, C. (2005). Feature selection based on mutual information criteria of max-dependency, max-relevance, and min-redundancy. IEEE Transactions on Pattern Analysis and Machine Intelligence, 27(8), 1226\u20131238.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"99","key":"5748_CR62","first-page":"1","volume":"PP","author":"S Ramrez-Gallego","year":"2017","unstructured":"Ramrez-Gallego, S., Mourio-Taln, H., Martnez-Rego, D., Boln-Canedo, V., Bentez, J. M., Alonso-Betanzos, A., et al. (2017). An information theory-based feature selection framework for big data under apache spark. IEEE Transactions on Systems, Man, and Cybernetics: Systems, PP(99), 1\u201313.","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics: Systems"},{"key":"5748_CR63","doi-asserted-by":"publisher","first-page":"962","DOI":"10.1214\/aos\/1031689015","volume":"30","author":"T Richardson","year":"2002","unstructured":"Richardson, T., & Spirtes, P. (2002). Ancestral graph Markov models. Annals of Statistics, 30, 962\u20131030.","journal-title":"Annals of Statistics"},{"issue":"3","key":"5748_CR64","doi-asserted-by":"publisher","first-page":"865","DOI":"10.1007\/s10589-016-9832-2","volume":"64","author":"T Sato","year":"2016","unstructured":"Sato, T., Takano, Y., Miyashiro, R., & Yoshise, A. (2016). Feature subset selection for logistic regression via mixed integer optimization. Computational Optimization and Applications, 64(3), 865\u2013880.","journal-title":"Computational Optimization and Applications"},{"issue":"2","key":"5748_CR65","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1111\/j.1467-9469.2011.00740.x","volume":"38","author":"J Schelldorfer","year":"2011","unstructured":"Schelldorfer, J., B\u00fchlmann, P., & Van De Geer, S. (2011). Estimation for high-dimensional linear mixed-effects models using l1-penalization. Scandinavian Journal of Statistics, 38(2), 197\u2013214.","journal-title":"Scandinavian Journal of Statistics"},{"issue":"2","key":"5748_CR66","doi-asserted-by":"publisher","first-page":"461","DOI":"10.1214\/aos\/1176344136","volume":"6","author":"G Schwarz","year":"1978","unstructured":"Schwarz, G. (1978). Estimating the dimension of a model. The Annals of Statistics, 6(2), 461\u2013464.","journal-title":"The Annals of Statistics"},{"issue":"1","key":"5748_CR67","doi-asserted-by":"publisher","first-page":"308","DOI":"10.1093\/nar\/29.1.308","volume":"29","author":"ST Sherry","year":"2001","unstructured":"Sherry, S. T., Ward, M.-H., Kholodov, M., Baker, J., Phan, L., Smigielski, E. M., et al. (2001). dbSNP: The NCBI database of genetic variation. Nucleic Acids Research, 29(1), 308\u2013311.","journal-title":"Nucleic Acids Research"},{"key":"5748_CR68","doi-asserted-by":"crossref","unstructured":"Singh, S., Kubica, J., Larsen, S., & Sorokina, D. (2009). Parallel large scale feature selection for logistic regression. In Proceedings of the 2009 SIAM international conference on data mining (pp. 1172\u20131183). SIAM.","DOI":"10.1137\/1.9781611972795.100"},{"key":"5748_CR69","volume-title":"Causation, prediction, and search","author":"P Spirtes","year":"2000","unstructured":"Spirtes, P., Glymour, C. N., & Scheines, R. (2000). Causation, prediction, and search (2nd ed.). Cambridge: MIT Press.","edition":"2"},{"issue":"Feb","key":"5748_CR70","first-page":"499","volume":"14","author":"A Statnikov","year":"2013","unstructured":"Statnikov, A., Lytkin, N. I., Lemeire, J., & Aliferis, C. F. (2013). Algorithms for discovery of multiple Markov boundaries. Journal of Machine Learning Research, 14(Feb), 499\u2013566.","journal-title":"Journal of Machine Learning Research"},{"key":"5748_CR71","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1111\/j.2517-6161.1996.tb02080.x","volume":"58","author":"R Tibshirani","year":"1996","unstructured":"Tibshirani, R. (1996). Regression shrinkage and selection via the Lasso. Journal of the Royal Statistical Society. Series B (Methodological), 58, 267\u2013288.","journal-title":"Journal of the Royal Statistical Society. Series B (Methodological)"},{"issue":"1","key":"5748_CR72","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1186\/s12859-018-2023-7","volume":"19","author":"M Tsagris","year":"2018","unstructured":"Tsagris, M., Lagani, V., & Tsamardinos, I. (2018). Feature selection for high-dimensional temporal data. BMC Bioinformatics, 19(1), 17.","journal-title":"BMC Bioinformatics"},{"key":"5748_CR73","unstructured":"Tsamardinos, I., & Aliferis, C. F. (2003). Towards principled feature selection: Relevancy, filters and wrappers. In Proceedings of the ninth international workshop on artificial intelligence and statistics."},{"key":"5748_CR74","doi-asserted-by":"crossref","unstructured":"Tsamardinos, I., Aliferis, C. F., & Statnikov, A. (2003a). Time and sample efficient discovery of Markov blankets and direct causal relations. In Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining (pp. 673\u2013678). ACM.","DOI":"10.1145\/956804.956838"},{"key":"5748_CR75","unstructured":"Tsamardinos, I., Aliferis, C.\u00a0F, & Statnikov, A.\u00a0R. (2003b). Algorithms for large scale Markov blanket discovery. In FLAIRS conference (Vol. 2)."},{"key":"5748_CR76","doi-asserted-by":"crossref","unstructured":"Tsamardinos, I., & Mariglis, A.\u00a0P. (2009). Multi-source causal analysis: Learning Bayesian networks from multiple datasets. In IFIP international conference on artificial intelligence applications and innovations (pp. 479\u2013490). Springer, Berlin.","DOI":"10.1007\/978-1-4419-0221-4_56"},{"key":"5748_CR77","unstructured":"Verma, T., & Pearl. (1988). Causal networks: Semantics and expressiveness. In Proceedings, 4th workshop on uncertainty in artificial intelligence (pp. 352\u2013359)."},{"issue":"6","key":"5748_CR78","doi-asserted-by":"publisher","first-page":"710","DOI":"10.1093\/aje\/kwk052","volume":"165","author":"E Vittinghoff","year":"2007","unstructured":"Vittinghoff, E., & McCulloch, C. E. (2007). Relaxing the rule of ten events per variable in logistic and Cox regression. American Journal of Epidemiology, 165(6), 710\u2013718.","journal-title":"American Journal of Epidemiology"},{"key":"5748_CR79","doi-asserted-by":"publisher","first-page":"307","DOI":"10.2307\/1912557","volume":"57","author":"QH Vuong","year":"1989","unstructured":"Vuong, Q. H. (1989). Likelihood ratio tests for model selection and non-nested hypotheses. Econometrica: Journal of the Econometric Society, 57, 307\u2013333.","journal-title":"Econometrica: Journal of the Econometric Society"},{"key":"5748_CR80","unstructured":"Wang, X., Dunson, D.\u00a0B, & Leng, C. (2016). Decorrelated feature space partitioning for distributed sparse regression. In Advances in neural information processing systems (pp. 802\u2013810)."},{"key":"5748_CR81","doi-asserted-by":"publisher","DOI":"10.1002\/0471704091","volume-title":"Applied linear regression","author":"S Weisberg","year":"2005","unstructured":"Weisberg, S. (2005). Applied linear regression (Vol. 528). Hoboken: Wiley."},{"issue":"1","key":"5748_CR82","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1080\/00949658208810560","volume":"15","author":"WJ Welch","year":"1982","unstructured":"Welch, W. J. (1982). Algorithmic complexity: Three NP-hard problems in computational statistics. Journal of Statistical Computation and Simulation, 15(1), 17\u201325.","journal-title":"Journal of Statistical Computation and Simulation"},{"issue":"1","key":"5748_CR83","doi-asserted-by":"publisher","first-page":"1","DOI":"10.2307\/1912526","volume":"50","author":"H White","year":"1982","unstructured":"White, H. (1982). Maximum likelihood estimation of misspecified models. Econometrica, 50(1), 1\u201325.","journal-title":"Econometrica"},{"issue":"1","key":"5748_CR84","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1214\/aoms\/1177732360","volume":"9","author":"SS Wilks","year":"1938","unstructured":"Wilks, S. S. (1938). The large-sample distribution of the likelihood ratio for testing composite hypotheses. The Annals of Mathematical Statistics, 9(1), 60\u201362.","journal-title":"The Annals of Mathematical Statistics"},{"issue":"2","key":"5748_CR85","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1016\/J.ENG.2016.02.008","volume":"2","author":"EP Xing","year":"2016","unstructured":"Xing, E. P., Ho, Q., Xie, P., & Wei, D. (2016). Strategies and principles of distributed machine learning on Big Data. Engineering, 2(2), 179\u2013195.","journal-title":"Engineering"},{"key":"5748_CR86","unstructured":"Yang, H.\u00a0H., & Moody, J. (2000). Data visualization and feature selection: New algorithms for nongaussian data. In Advances in neural information processing systems (pp. 687\u2013693)."},{"key":"5748_CR87","unstructured":"Zaharia, M., Chowdhury, M., Franklin, M.\u00a0J., Shenker, S., & Stoica, I. (2010). Spark: Cluster computing with working sets. In HotCloud."},{"issue":"3","key":"5748_CR88","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1109\/MCI.2014.2326099","volume":"9","author":"Y Zhai","year":"2014","unstructured":"Zhai, Y., Ong, Y., & Tsang, I. W. (2014). The emerging big dimensionality. IEEE Computational Intelligence Magazine, 9(3), 14\u201326.","journal-title":"IEEE Computational Intelligence Magazine"},{"key":"5748_CR89","unstructured":"Zhang, K., Peters, J., Janzing, D., & Sch\u00f6lkopf, B. (2011). Kernel-based conditional independence test and application in causal discovery. In Proceedings of the twenty-seventh conference on uncertainty in artificial intelligence (pp. 804\u2013813)."},{"issue":"1","key":"5748_CR90","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1007\/s10994-013-5373-4","volume":"92","author":"Z Zhao","year":"2013","unstructured":"Zhao, Z., Zhang, R., Cox, J., Duling, D., & Sarle, W. (2013). Massively parallel feature selection: An approach based on variance preservation. Machine Learning, 92(1), 195\u2013220.","journal-title":"Machine Learning"},{"key":"5748_CR91","unstructured":"Zhimin, P., Ming, Y., & Wotao, Y. (2013). Parallel and distributed sparse optimization. In Proceedings of the Asilomar conference on signals, systems and computers."},{"key":"5748_CR92","unstructured":"Zhou, Y., Porwal, U., Zhang, C., Ngo, H.\u00a0Q., Nguyen, X., R\u00e9, C., & Govindaraju, V. (2014). Parallel feature selection inspired by group testing. In Advances in neural information processing systems (pp. 3554\u20133562)."}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-018-5748-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-018-5748-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-018-5748-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,7]],"date-time":"2020-11-07T11:43:34Z","timestamp":1604749414000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-018-5748-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,8,7]]},"references-count":92,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2019,2]]}},"alternative-id":["5748"],"URL":"https:\/\/doi.org\/10.1007\/s10994-018-5748-7","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,8,7]]},"assertion":[{"value":"14 August 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 July 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 August 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}