{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:36:52Z","timestamp":1777657012509,"version":"3.51.4"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2019,8,27]],"date-time":"2019-08-27T00:00:00Z","timestamp":1566864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2019,8,27]],"date-time":"2019-08-27T00:00:00Z","timestamp":1566864000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Big Data"],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1186\/s40537-019-0241-0","type":"journal-article","created":{"date-parts":[[2019,8,27]],"date-time":"2019-08-27T12:03:35Z","timestamp":1566907415000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":113,"title":["Feature selection methods and genomic big data: a systematic review"],"prefix":"10.1186","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5817-0289","authenticated-orcid":false,"given":"Khawla","family":"Tadist","sequence":"first","affiliation":[]},{"given":"Said","family":"Najah","sequence":"additional","affiliation":[]},{"given":"Nikola S.","family":"Nikolov","sequence":"additional","affiliation":[]},{"given":"Fatiha","family":"Mrabti","sequence":"additional","affiliation":[]},{"given":"Azeddine","family":"Zahi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,8,27]]},"reference":[{"issue":"4","key":"241_CR1","doi-asserted-by":"publisher","first-page":"1193","DOI":"10.1109\/JBHI.2015.2450362","volume":"19","author":"J Andreu-Perez","year":"2015","unstructured":"Andreu-Perez J, Poon CC, Merrifield RD, Wong ST, Yang GZ. Big data for health. IEEE J Biomed Health Inform. 2015;19(4):1193.","journal-title":"IEEE J Biomed Health Inform"},{"issue":"5","key":"241_CR2","doi-asserted-by":"publisher","first-page":"559","DOI":"10.1101\/gr.3851306","volume":"16","author":"M West","year":"2006","unstructured":"West M, Ginsburg GS, Huang AT, Nevins JR. Embracing the complexity of genomic data for personalized medicine. Genome Res. 2006;16(5):559.","journal-title":"Genome Res"},{"key":"241_CR3","doi-asserted-by":"publisher","first-page":"314","DOI":"10.1016\/j.ins.2014.01.015","volume":"275","author":"CP Chen","year":"2014","unstructured":"Chen CP, Zhang CY. Data-intensive applications, challenges, techniques and technologies: a survey on Big Data. Inf Sci. 2014;275:314.","journal-title":"Inf Sci"},{"issue":"10","key":"241_CR4","doi-asserted-by":"publisher","first-page":"1245","DOI":"10.1093\/bioinformatics\/btl066","volume":"22","author":"D Berrar","year":"2006","unstructured":"Berrar D, Bradbury I, Dubitzky W. Avoiding model selection bias in small-sample genomic datasets. Bioinformatics. 2006;22(10):1245.","journal-title":"Bioinformatics"},{"issue":"1","key":"241_CR5","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1186\/s40537-015-0032-1","volume":"2","author":"S Landset","year":"2015","unstructured":"Landset S, Khoshgoftaar TM, Richter AN, Hasanin T. A survey of open source tools for machine learning with big data in the Hadoop ecosystem. J Big Data. 2015;2(1):24.","journal-title":"J Big Data"},{"key":"241_CR6","volume-title":"Wrapper induction for information extraction","author":"N Kushmerick","year":"1997","unstructured":"Kushmerick N, Weld DS, Doorenbos R. Wrapper induction for information extraction. Washington: University of Washington; 1997."},{"key":"241_CR7","doi-asserted-by":"crossref","unstructured":"Naseriparsa M, Bidgoli AM, Varaee T. A hybrid feature selection method to improve performance of a group of classification algorithms. 2014. arXiv preprint \n                    arXiv:1403.2372\n                    \n                  .","DOI":"10.5120\/12065-8172"},{"issue":"1","key":"241_CR8","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1016\/j.inffus.2004.04.003","volume":"6","author":"A Tsymbal","year":"2005","unstructured":"Tsymbal A, Pechenizkiy M, Cunningham P. Diversity in search strategies for ensemble feature selection. Inf Fusion. 2005;6(1):83.","journal-title":"Inf Fusion"},{"key":"241_CR9","doi-asserted-by":"crossref","unstructured":"Grasnick B, Perscheid C, Uflacker M. A framework for the automatic combination and evaluation of gene selection methods. In: International conference on practical applications of computational biology & bioinformatics. Berlin: Springer; 2018. p. 166\u201374.","DOI":"10.1007\/978-3-319-98702-6_20"},{"key":"241_CR10","first-page":"68","volume":"8","author":"K Petersen","year":"2008","unstructured":"Petersen K, Feldt R, Mujtaba S, Mattsson M. Systematic mapping studies in software engineering. Ease. 2008;8:68\u201377.","journal-title":"Ease"},{"issue":"1","key":"241_CR11","doi-asserted-by":"publisher","first-page":"175","DOI":"10.1007\/s00521-013-1368-0","volume":"24","author":"JR Vergara","year":"2014","unstructured":"Vergara JR, Est\u00e9vez PA. A review of feature selection methods based on mutual information. Neural Comput Appl. 2014;24(1):175.","journal-title":"Neural Comput Appl"},{"issue":"15","key":"241_CR12","doi-asserted-by":"publisher","first-page":"2429","DOI":"10.1093\/bioinformatics\/bth267","volume":"20","author":"T Li","year":"2004","unstructured":"Li T, Zhang C, Ogihara M. A comparative study of feature selection and multiclass classification methods for tissue classification based on gene expression. Bioinformatics. 2004;20(15):2429.","journal-title":"Bioinformatics"},{"key":"241_CR13","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1016\/j.ymeth.2016.08.014","volume":"111","author":"L Wang","year":"2016","unstructured":"Wang L, Wang Y, Chang Q. Feature selection methods for big data bioinformatics: a survey from the search perspective. Methods. 2016;111:21.","journal-title":"Methods"},{"issue":"1","key":"241_CR14","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1186\/s40537-019-0224-1","volume":"6","author":"S Kumar","year":"2019","unstructured":"Kumar S, Zymbler M. A machine learning approach to analyze customer satisfaction from airline tweets. J Big Data. 2019;6(1):62.","journal-title":"J Big Data"},{"issue":"3\/4","key":"241_CR15","first-page":"1","volume":"21","author":"B Houghton","year":"2015","unstructured":"Houghton B. Trustworthiness: self-assessment of an institutional repository against ISO 16363\u20132012. D-Lib Mag. 2015;21(3\/4):1.","journal-title":"D-Lib Mag"},{"issue":"1","key":"241_CR16","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1186\/s40537-015-0028-x","volume":"2","author":"P O\u2019Donovan","year":"2015","unstructured":"O\u2019Donovan P, Leahy K, Bruton K, O\u2019Sullivan DT. Big data in manufacturing: a systematic mapping study. J Big Data. 2015;2(1):20.","journal-title":"J Big Data"},{"key":"241_CR17","doi-asserted-by":"crossref","unstructured":"Muneshwara M, Swetha M, Thungamani M, Anil G. Digital genomics to build a smart franchise in real time applications, In: 2017 international conference on circuit, power and computing technologies (ICCPCT). New York: IEEE; 2017. p. 1\u20134.","DOI":"10.1109\/ICCPCT.2017.8074247"},{"key":"241_CR18","doi-asserted-by":"crossref","unstructured":"Yang J, Zhu Z, He S, Ji Z. Minimal-redundancy-maximal-relevance feature selection using different relevance measures for omics data classification. In: 2013 IEEE symposium on computational intelligence in bioinformatics and computational biology (CIBCB). New York: IEEE; 2013. p. 246\u201351.","DOI":"10.1109\/CIBCB.2013.6595417"},{"issue":"2","key":"241_CR19","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1007\/s10994-018-5748-7","volume":"108","author":"I Tsamardinos","year":"2019","unstructured":"Tsamardinos I, Borboudakis G, Katsogridakis P, Pratikakis P, Christophides V. A greedy feature selection algorithm for Big Data of high dimensionality. Mach Learn. 2019;108(2):149\u2013202.","journal-title":"Mach Learn."},{"issue":"3","key":"241_CR20","doi-asserted-by":"publisher","first-page":"578","DOI":"10.1109\/TCBB.2015.2448071","volume":"13","author":"D He","year":"2016","unstructured":"He D, Rish I, Haws D, Parida L. Mint: mutual information based transductive feature selection for genetic trait prediction. IEEE\/ACM Trans Comput Biol Bioinform. 2016;13(3):578.","journal-title":"IEEE\/ACM Trans Comput Biol Bioinform"},{"key":"241_CR21","doi-asserted-by":"publisher","first-page":"470","DOI":"10.1016\/j.schres.2018.01.002","volume":"197","author":"Y Shen","year":"2018","unstructured":"Shen Y, Xu J, Li Z, Huang Y, Yuan Y, Wang J, Zhang M, Hu S, Liang Y. Analysis of gut microbiota diversity and auxiliary diagnosis as a biomarker in patients with schizophrenia: a cross-sectional study. Schizophr Res. 2018;197:470.","journal-title":"Schizophr Res"},{"key":"241_CR22","doi-asserted-by":"publisher","first-page":"606","DOI":"10.1016\/j.future.2018.05.060","volume":"89","author":"G Sun","year":"2018","unstructured":"Sun G, Li J, Dai J, Song Z, Lang F. Feature selection for IoT based on maximal information coefficient. Future Gener Comput Syst. 2018;89:606.","journal-title":"Future Gener Comput Syst"},{"key":"241_CR23","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1016\/j.compbiomed.2017.09.020","volume":"90","author":"I Kavakiotis","year":"2017","unstructured":"Kavakiotis I, Samaras P, Triantafyllidis A, Vlahavas I. FIFS: a data mining method for informative marker selection in high dimensional population genomic data. Comput Biol Med. 2017;90:146.","journal-title":"Comput Biol Med"},{"key":"241_CR24","doi-asserted-by":"crossref","unstructured":"Saghir H, Megherbi DB. Big data biology-based predictive models via DNA-metagenomics binning for WMD events applications. In: 2015 IEEE international symposium on technologies for homeland security (HST). New York: IEEE; 2015. p. 1\u20136.","DOI":"10.1109\/THS.2015.7225313"},{"key":"241_CR25","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1016\/j.procs.2015.04.005","volume":"50","author":"S Sasikala","year":"2015","unstructured":"Sasikala S, alias Balamurugan SA, Geetha S. A novel feature selection technique for improved survivability diagnosis of breast cancer. Procedia Comput Sci. 2015;50:16.","journal-title":"Procedia Comput Sci"},{"key":"241_CR26","doi-asserted-by":"publisher","first-page":"584","DOI":"10.1016\/j.knosys.2015.09.005","volume":"89","author":"M Kumar","year":"2015","unstructured":"Kumar M, Rath SK. Classification of microarray using MapReduce based proximal support vector machine classifier. Knowl Based Syst. 2015;89:584.","journal-title":"Knowl Based Syst"},{"key":"241_CR27","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1016\/j.jbi.2016.03.002","volume":"60","author":"M Kumar","year":"2016","unstructured":"Kumar M, Rath NK, Rath SK. Analysis of microarray leukemia data using an efficient MapReduce-based K-nearest-neighbor classifier. J Biomed Inform. 2016;60:395.","journal-title":"J Biomed Inform"},{"issue":"6","key":"241_CR28","doi-asserted-by":"publisher","first-page":"2255","DOI":"10.1016\/j.bbadis.2017.12.003","volume":"1864","author":"YH Zhang","year":"2018","unstructured":"Zhang YH, Hu Y, Zhang Y, Hu LD, Kong X. Distinguishing three subtypes of hematopoietic cells based on gene expression profiles using a support vector machine. Biochim Biophys Acta Mol Basis Dis. 2018;1864(6):2255.","journal-title":"Biochim Biophys Acta Mol Basis Dis"},{"key":"241_CR29","doi-asserted-by":"publisher","first-page":"100","DOI":"10.1016\/j.ymeth.2017.06.010","volume":"124","author":"C Liu","year":"2017","unstructured":"Liu C, Wang X, Genchev GZ, Lu H. Distinguishing three subtypes of hematopoietic cells based on gene expression profiles using a support vector machine. Methods. 2017;124:100.","journal-title":"Methods"},{"issue":"6","key":"241_CR30","doi-asserted-by":"publisher","first-page":"2241","DOI":"10.1016\/j.bbadis.2017.10.036","volume":"1864","author":"J Li","year":"2018","unstructured":"Li J, Huang T. Predicting and analyzing early wake-up associated gene expressions by integrating GWAS and eQTL studies. Biochim Biophys Acta Mol Basis Dis. 2018;1864(6):2241.","journal-title":"Biochim Biophys Acta Mol Basis Dis"},{"key":"241_CR31","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1016\/j.neucom.2015.10.148","volume":"217","author":"Y Zhou","year":"2016","unstructured":"Zhou Y, Huang T, Huang G, Zhang N, Kong X, Cai YD. Prediction of protein N-formylation and comparison with N-acetylation based on a feature selection method. Neurocomputing. 2016;217:53.","journal-title":"Neurocomputing"},{"key":"241_CR32","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1016\/j.knosys.2015.05.027","volume":"87","author":"I Triguero","year":"2015","unstructured":"Triguero I, del R\u00edo S, L\u00f3pez V, Bacardit J, Ben\u00edtez JM, Herrera F. ROSEFW-RF: the winner algorithm for the ECBDL\u201914 big data competition: an extremely imbalanced big data bioinformatics problem. Knowl Based Syst. 2015;87:69.","journal-title":"Knowl Based Syst"},{"key":"241_CR33","doi-asserted-by":"crossref","unstructured":"Wang MH, Tsoi K, Lai X, Chong M, Zee B, Zheng T, Lo SH, Hu I. Two screening methods for genetic association study with application to psoriasis microarray data sets. In: 2015 IEEE international congress on big data. New York: IEEE; 2015. p. 324\u20136.","DOI":"10.1109\/BigDataCongress.2015.55"},{"issue":"1","key":"241_CR34","doi-asserted-by":"publisher","first-page":"1679","DOI":"10.1016\/j.matpr.2017.11.263","volume":"5","author":"P Arumugam","year":"2018","unstructured":"Arumugam P, Jose P. Efficient decision tree based data selection and support vector machine classification. Mater Today Proc. 2018;5(1):1679.","journal-title":"Mater Today Proc"},{"key":"241_CR35","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1016\/j.artmed.2017.05.004","volume":"79","author":"M Jafari","year":"2017","unstructured":"Jafari M, Ghavami B, Sattari V. A hybrid framework for reverse engineering of robust gene regulatory networks. Artif Intell Med. 2017;79:15.","journal-title":"Artif Intell Med"},{"issue":"3","key":"241_CR36","doi-asserted-by":"publisher","first-page":"993","DOI":"10.1016\/j.ejor.2017.08.040","volume":"265","author":"B Ghaddar","year":"2018","unstructured":"Ghaddar B, Naoum-Sawaya J. High dimensional data classification and feature selection using support vector machines. Eur J Oper Res. 2018;265(3):993.","journal-title":"Eur J Oper Res"},{"issue":"6","key":"241_CR37","doi-asserted-by":"publisher","first-page":"2218","DOI":"10.1016\/j.bbadis.2017.12.026","volume":"1864","author":"S Wang","year":"2018","unstructured":"Wang S, Cai Y. Identification of the functional alteration signatures across different cancer types with support vector machine and feature analysis. Biochim Biophys Acta Mol Basis Dis. 2018;1864(6):2218.","journal-title":"Biochim Biophys Acta Mol Basis Dis"},{"key":"241_CR38","doi-asserted-by":"crossref","unstructured":"Farid DM, Nowe A, Manderick B. A feature grouping method for ensemble clustering of high-dimensional genomic big data. In: 2016 future technologies conference (FTC). New York: IEEE; 2016. p. 260\u20138.","DOI":"10.1109\/FTC.2016.7821620"},{"key":"241_CR39","doi-asserted-by":"publisher","first-page":"2003","DOI":"10.1016\/j.procs.2014.05.184","volume":"29","author":"JM Hogan","year":"2014","unstructured":"Hogan JM, Peut T. Large scale read classification for next generation sequencing. Procedia Comput Sci. 2014;29:2003.","journal-title":"Procedia Comput Sci"},{"issue":"4","key":"241_CR40","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1109\/TBDATA.2017.2735991","volume":"3","author":"X Zhu","year":"2017","unstructured":"Zhu X, Suk HI, Huang H, Shen D. Low-rank graph-regularized structured sparse regression for identifying genetic biomarkers. IEEE Trans Big Data. 2017;3(4):405.","journal-title":"IEEE Trans Big Data"},{"key":"241_CR41","doi-asserted-by":"crossref","unstructured":"Altinigneli C, Konten B, Rujescir D, B\u00f6hm C, Plant C. Identification of SNP interactions using data-parallel primitives on GPUs. In: 2014 IEEE international conference on big data (Big Data). New York: IEEE; 2014. p. 539\u201348.","DOI":"10.1109\/BigData.2014.7004271"},{"key":"241_CR42","doi-asserted-by":"crossref","unstructured":"Raghu VK, Ge X, Chrysanthis PK, Benos PV Integrated theory-and data-driven feature selection in gene expression data analysis. In: 2017 IEEE 33rd international conference on data engineering (ICDE). New York: IEEE; 2017. p. 1525\u201332.","DOI":"10.1109\/ICDE.2017.223"},{"key":"241_CR43","doi-asserted-by":"crossref","unstructured":"AlFarraj O, AlZubi A, Tolba A. Optimized feature selection algorithm based on fireflies with gravitational ant colony algorithm for big data predictive analytics. Neural Comput Appl. 2018:1\u201313.","DOI":"10.1007\/s00521-018-3612-0"},{"key":"241_CR44","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1016\/j.procs.2015.06.035","volume":"54","author":"M Kumar","year":"2015","unstructured":"Kumar M, Rath NK, Swain A, Rath SK. Feature selection and classification of microarray data using MapReduce based ANOVA and K-nearest neighbor. Procedia Comput Sci. 2015;54:301.","journal-title":"Procedia Comput Sci"},{"key":"241_CR45","doi-asserted-by":"publisher","first-page":"305","DOI":"10.1016\/j.eswa.2016.08.008","volume":"64","author":"DM Farid","year":"2016","unstructured":"Farid DM, Al-Mamun MA, Manderick B, Nowe A. An adaptive rule-based classifier for mining big biological data. Expert Syst Appl. 2016;64:305.","journal-title":"Expert Syst Appl"},{"key":"241_CR46","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1016\/j.jocs.2015.09.008","volume":"11","author":"E Elsebakhi","year":"2015","unstructured":"Elsebakhi E, Lee F, Schendel E, Haque A, Kathireason N, Pathare T, Syed N, Al-Ali R. Large-scale machine learning based on functional networks for biomedical big data with high performance computing platforms. J Comput Sci. 2015;11:69.","journal-title":"J Comput Sci"},{"key":"241_CR47","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1016\/j.is.2017.05.006","volume":"69","author":"W Dhifli","year":"2017","unstructured":"Dhifli W, Aridhi S, Nguifo EM. MR-SimLab: scalable subgraph selection with label similarity for big data. Inf Syst. 2017;69:155.","journal-title":"Inf Syst"}],"container-title":["Journal of Big Data"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-019-0241-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s40537-019-0241-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-019-0241-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,25]],"date-time":"2020-08-25T23:07:22Z","timestamp":1598396842000},"score":1,"resource":{"primary":{"URL":"https:\/\/journalofbigdata.springeropen.com\/articles\/10.1186\/s40537-019-0241-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,8,27]]},"references-count":47,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2019,12]]}},"alternative-id":["241"],"URL":"https:\/\/doi.org\/10.1186\/s40537-019-0241-0","relation":{},"ISSN":["2196-1115"],"issn-type":[{"value":"2196-1115","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,8,27]]},"assertion":[{"value":"5 May 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 August 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 August 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Not applicable.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare that they have no competing interests.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"79"}}