{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T19:28:46Z","timestamp":1774466926330,"version":"3.50.1"},"reference-count":155,"publisher":"Public Library of Science (PLoS)","issue":"8","license":[{"start":{"date-parts":[[2022,8,11]],"date-time":"2022-08-11T00:00:00Z","timestamp":1660176000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001866","name":"Fonds National de la Recherche Luxembourg","doi-asserted-by":"crossref","award":["I1R-BIC-PFN-15NCER"],"award-info":[{"award-number":["I1R-BIC-PFN-15NCER"]}],"id":[{"id":"10.13039\/501100001866","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100007601","name":"Horizon 2020","doi-asserted-by":"publisher","award":["ERAPERMED 2020-314"],"award-info":[{"award-number":["ERAPERMED 2020-314"]}],"id":[{"id":"10.13039\/501100007601","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007601","name":"Horizon 2020","doi-asserted-by":"publisher","award":["PERMIT 874 825"],"award-info":[{"award-number":["PERMIT 874 825"]}],"id":[{"id":"10.13039\/501100007601","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007601","name":"Horizon 2020","doi-asserted-by":"publisher","award":["814978"],"award-info":[{"award-number":["814978"]}],"id":[{"id":"10.13039\/501100007601","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100013278","name":"EU Joint Programme \u2013 Neurodegenerative Disease Research","doi-asserted-by":"publisher","award":["JPND2019-466-037"],"award-info":[{"award-number":["JPND2019-466-037"]}],"id":[{"id":"10.13039\/100013278","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["www.ploscompbiol.org"],"crossmark-restriction":false},"short-container-title":["PLoS Comput Biol"],"DOI":"10.1371\/journal.pcbi.1010357","type":"journal-article","created":{"date-parts":[[2022,8,11]],"date-time":"2022-08-11T17:26:02Z","timestamp":1660238762000},"page":"e1010357","update-policy":"https:\/\/doi.org\/10.1371\/journal.pcbi.corrections_policy","source":"Crossref","is-referenced-by-count":38,"title":["Ten quick tips for biomarker discovery and validation analyses using machine learning"],"prefix":"10.1371","volume":"18","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6637-9039","authenticated-orcid":true,"given":"Ramon","family":"Diaz-Uriarte","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7115-7393","authenticated-orcid":true,"given":"Elisa","family":"G\u00f3mez de Lope","sequence":"additional","affiliation":[]},{"given":"Rosalba","family":"Giugno","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5328-1243","authenticated-orcid":true,"given":"Holger","family":"Fr\u00f6hlich","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3443-0298","authenticated-orcid":true,"given":"Petr V.","family":"Nazarov","sequence":"additional","affiliation":[]},{"given":"Isabel A.","family":"Nepomuceno-Chamorro","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6498-4801","authenticated-orcid":true,"given":"Armin","family":"Rauschenberger","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3977-7469","authenticated-orcid":true,"given":"Enrico","family":"Glaab","sequence":"additional","affiliation":[]}],"member":"340","published-online":{"date-parts":[[2022,8,11]]},"reference":[{"key":"pcbi.1010357.ref001","first-page":"1","article-title":"OMICS for Tumor Biomarker Research.","author":"S Moshkovskii","year":"2014","journal-title":"Biomarkers. Cancer"},{"key":"pcbi.1010357.ref002","doi-asserted-by":"crossref","first-page":"53","DOI":"10.2134\/appliedstatistics.2015.0074.c3","article-title":"Blocking Principles for Biological Experiments.","author":"MD Casler","year":"2018","journal-title":"Applied Statistics in Agricultural, Biological, and Environmental Sciences."},{"key":"pcbi.1010357.ref003","doi-asserted-by":"crossref","first-page":"29","DOI":"10.1093\/biostatistics\/kxv027","article-title":"Methods that remove batch effects while retaining group differences may lead to exaggerated confidence in downstream analyses.","volume":"17","author":"V Nygaard","year":"2016","journal-title":"Biostatistics"},{"key":"pcbi.1010357.ref004","doi-asserted-by":"crossref","first-page":"3092","DOI":"10.1038\/s41467-020-16937-8","article-title":"Harmonization of quality metrics and power calculation in multi-omic studies.","volume":"11","author":"S Tarazona","year":"2020","journal-title":"Nat Commun."},{"key":"pcbi.1010357.ref005","doi-asserted-by":"crossref","first-page":"c315","DOI":"10.1159\/000323136","article-title":"Matching, an appealing method to avoid confounding?","volume":"118","author":"MA de Graaf","year":"2011","journal-title":"Nephron Clin Pract"},{"key":"pcbi.1010357.ref006","volume-title":"Causal Inference","author":"MA Hernan","year":"2020"},{"key":"pcbi.1010357.ref007","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511803161","volume-title":"Causality: Models, Reasoning, and Inference","author":"J. Pearl","year":"2009","edition":"2"},{"key":"pcbi.1010357.ref008","doi-asserted-by":"crossref","first-page":"291","DOI":"10.1093\/biomet\/asx009","article-title":"Instrumental variables as bias amplifiers with general outcome and confounding","volume":"104","author":"P Ding","year":"2017","journal-title":"Biometrika"},{"key":"pcbi.1010357.ref009","article-title":"An Ethical Framework for Global Governance for Health Research. Springer.","author":"K. Aramesh","year":"2019","journal-title":"Nature"},{"key":"pcbi.1010357.ref010","doi-asserted-by":"crossref","DOI":"10.1186\/s40537-017-0110-7","article-title":"Big healthcare data: preserving security and privacy.","volume":"5","author":"K Abouelmehdi","year":"2018","journal-title":"J Big Data."},{"key":"pcbi.1010357.ref011","doi-asserted-by":"crossref","first-page":"a2390","DOI":"10.1136\/bmj.a2390","article-title":"Improving the reporting of pragmatic trials: an extension of the CONSORT statement","volume":"337","author":"M Zwarenstein","year":"2008","journal-title":"BMJ"},{"key":"pcbi.1010357.ref012","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1186\/s41073-016-0014-7","article-title":"Updating standards for reporting diagnostic accuracy: the development of STARD 2015.","volume":"1","author":"DA Korevaar","year":"2016","journal-title":"Res Integr Peer Rev."},{"key":"pcbi.1010357.ref013","doi-asserted-by":"crossref","first-page":"h5527","DOI":"10.1136\/bmj.h5527","article-title":"STARD 2015: an updated list of essential items for reporting diagnostic accuracy studies","volume":"351","author":"PM Bossuyt","year":"2015","journal-title":"BMJ"},{"key":"pcbi.1010357.ref014","doi-asserted-by":"crossref","first-page":"3137","DOI":"10.1093\/bioinformatics\/btx373","article-title":"FQC Dashboard: integrates FastQC results into a web-based, interactive, and extensible FASTQ quality control tool","author":"J Brown","year":"2017","journal-title":"Bioinformatics"},{"key":"pcbi.1010357.ref015","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1093\/bioinformatics\/btn647","article-title":"arrayQualityMetrics\u2014a bioconductor package for quality assessment of microarray data","volume":"25","author":"A Kauffmann","year":"2009","journal-title":"Bioinformatics"},{"key":"pcbi.1010357.ref016","doi-asserted-by":"crossref","first-page":"e1900264","DOI":"10.1002\/pmic.201900264","article-title":"pseudoQC: A Regression-Based Simulation Software for Correction and Normalization of Complex Metabolomics and Proteomics Datasets","volume":"19","author":"S Wang","year":"2019","journal-title":"Proteomics"},{"key":"pcbi.1010357.ref017","doi-asserted-by":"crossref","first-page":"10241","DOI":"10.1021\/acs.analchem.0c00136","article-title":"Concepts and Software Package for Efficient Quality Control in Targeted Metabolomics Studies: MeTaQuaC","volume":"92","author":"M Kuhring","year":"2020","journal-title":"Anal Chem"},{"key":"pcbi.1010357.ref018","doi-asserted-by":"crossref","first-page":"3114","DOI":"10.1021\/pr401264n","article-title":"Normalyzer: a tool for rapid evaluation of normalization methods for omics data sets","volume":"13","author":"A Chawade","year":"2014","journal-title":"J Proteome Res"},{"key":"pcbi.1010357.ref019","doi-asserted-by":"crossref","first-page":"633438","DOI":"10.3389\/fnins.2021.633438","article-title":"Management and Quality Control of Large Neuroimaging Datasets: Developments From the Barcelona\u03b2eta Brain Research Center.","volume":"15","author":"J Huguet","year":"2021","journal-title":"Front Neurosci"},{"key":"pcbi.1010357.ref020","doi-asserted-by":"crossref","first-page":"280","DOI":"10.1186\/s12859-020-03603-5","article-title":"A cell-level quality control workflow for high-throughput image analysis","volume":"21","author":"M Qiu","year":"2020","journal-title":"BMC Bioinformatics"},{"key":"pcbi.1010357.ref021","doi-asserted-by":"crossref","first-page":"164","DOI":"10.1186\/s12859-019-2748-y","article-title":"Data and knowledge management in translational research: implementation of the eTRIKS platform for the IMI OncoTrack consortium","volume":"20","author":"W Gu","year":"2019","journal-title":"BMC Bioinformatics"},{"key":"pcbi.1010357.ref022","doi-asserted-by":"crossref","DOI":"10.1201\/b12832","volume-title":"Practical Guide to Clinical Data Management","author":"S. Prokscha","year":"2011","edition":"3"},{"key":"pcbi.1010357.ref023","article-title":"Developing and adopting safe and effective digital biomarkers to improve patient outcomes.","volume":"2","author":"A Coravos","year":"2019","journal-title":"NPJ Digit Med."},{"key":"pcbi.1010357.ref024","first-page":"63","article-title":"Transfer of Clinical Drug Data to a Research Infrastructure on OMOP\u2014A FAIR Concept.","volume":"287","author":"I Reinecke","year":"2021","journal-title":"Stud Health Technol Inform."},{"key":"pcbi.1010357.ref025","doi-asserted-by":"crossref","first-page":"408","DOI":"10.3414\/ME9236","article-title":"CDISC standard-based electronic archiving of clinical trials.","volume":"48","author":"W Kuchinke","year":"2009","journal-title":"Methods Inf Med"},{"key":"pcbi.1010357.ref026","author":"PA Buescher","year":"2003","journal-title":"The International Classification of Diseases (ICD)."},{"key":"pcbi.1010357.ref027","article-title":"A State-of-the Art Review of SNOMED CT Terminology Binding and Recommendations for Practice and Research.","author":"A Rossander","year":"2021","journal-title":"Methods Inf Med."},{"key":"pcbi.1010357.ref028","doi-asserted-by":"crossref","first-page":"420","DOI":"10.1100\/tsw.2009.57","article-title":"Minimum Information About a Microarray Experiment (MIAME)\u2014successes, failures, challenges","volume":"9","author":"A. Brazma","year":"2009","journal-title":"ScientificWorldJournal"},{"key":"pcbi.1010357.ref029","doi-asserted-by":"crossref","first-page":"889","DOI":"10.1038\/nbt.1411","article-title":"Promoting coherent minimum reporting guidelines for biological and biomedical investigations: the MIBBI project","volume":"26","author":"CF Taylor","year":"2008","journal-title":"Nat Biotechnol"},{"key":"pcbi.1010357.ref030","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1002\/pmic.200600549","article-title":"Minimum Reporting Requirements for Proteomics: A MIAPE Primer","author":"CF Taylor","year":"2006","journal-title":"Proteomics"},{"key":"pcbi.1010357.ref031","doi-asserted-by":"crossref","first-page":"691","DOI":"10.1111\/j.1365-313X.2007.03387.x","article-title":"Quality control for plant metabolomics: reporting MSI-compliant studies","volume":"53","author":"O Fiehn","year":"2008","journal-title":"Plant J"},{"key":"pcbi.1010357.ref032","doi-asserted-by":"crossref","first-page":"D940","DOI":"10.1093\/nar\/gkr972","article-title":"Disease Ontology: a backbone for disease semantic integration","volume":"40","author":"LM Schriml","year":"2012","journal-title":"Nucleic Acids Res"},{"key":"pcbi.1010357.ref033","first-page":"325","article-title":"A review on machine learning principles for multi-view biological data integration","volume":"19","author":"Y Li","year":"2018","journal-title":"Brief Bioinform"},{"key":"pcbi.1010357.ref034","volume-title":"Support vector machine applications in computational biology. Kernel Methods in Computational Biology.","year":"2004"},{"key":"pcbi.1010357.ref035","doi-asserted-by":"crossref","first-page":"609","DOI":"10.1093\/biomet\/asaa007","article-title":"Sparse semiparametric canonical correlation analysis for data of mixed types","volume":"107","author":"G Yoon","year":"2020","journal-title":"Biometrika"},{"key":"pcbi.1010357.ref036","doi-asserted-by":"crossref","first-page":"2639","DOI":"10.1162\/0899766042321814","article-title":"Canonical correlation analysis: an overview with application to learning methods.","volume":"16","author":"DR Hardoon","year":"2004","journal-title":"Neural Comput"},{"key":"pcbi.1010357.ref037","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1016\/S0893-6080(05)80023-1","article-title":"Stacked generalization.","author":"DH Wolpert","year":"1992","journal-title":"Neural Netw"},{"key":"pcbi.1010357.ref038","doi-asserted-by":"crossref","first-page":"255","DOI":"10.1023\/B:MACH.0000015881.36452.6e","article-title":"Is Combining Classifiers with Stacking Better than Selecting the Best One?","author":"S D\u017eeroski","year":"2004","journal-title":"Mach Learn."},{"key":"pcbi.1010357.ref039","article-title":"The Conditional Super Learner","author":"G Valdes","year":"2021","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"pcbi.1010357.ref040","doi-asserted-by":"crossref","first-page":"829","DOI":"10.1162\/neco_a_01273","article-title":"A Survey on Deep Learning for Multimodal Data Fusion.","volume":"32","author":"J Gao","year":"2020","journal-title":"Neural Comput."},{"key":"pcbi.1010357.ref041","doi-asserted-by":"crossref","first-page":"162","DOI":"10.1186\/s12874-019-0802-0","article-title":"A plea for taking all available clinical information into account when assessing the predictive value of omics data","volume":"19","author":"A Volkmann","year":"2019","journal-title":"BMC Med Res Methodol"},{"key":"pcbi.1010357.ref042","doi-asserted-by":"crossref","first-page":"1904","DOI":"10.1093\/bib\/bbz136","article-title":"Combining clinical and molecular data in regression prediction models: insights from a simulation study","volume":"21","author":"R De Bin","year":"2020","journal-title":"Brief Bioinform"},{"key":"pcbi.1010357.ref043","doi-asserted-by":"crossref","first-page":"1655","DOI":"10.1214\/17-AOAS1125","article-title":"Sequential double cross-validation for assessment of added predictive ability in high-dimensional omic applications.","volume":"12","author":"M Rodr\u00edguez-Girondo","year":"2018","journal-title":"Ann Appl Stat"},{"key":"pcbi.1010357.ref044","doi-asserted-by":"crossref","first-page":"385","DOI":"10.1186\/s12859-014-0385-z","article-title":"Comparison of classification methods that combine clinical data and high-dimensional mass spectrometry data","volume":"15","author":"C Truntzer","year":"2014","journal-title":"BMC Bioinformatics"},{"key":"pcbi.1010357.ref045","article-title":"Machine Learning Methods for Omics Data.","author":"W. Zhou","year":"2011","journal-title":"Dermatol Int"},{"key":"pcbi.1010357.ref046","doi-asserted-by":"crossref","first-page":"5310","DOI":"10.1002\/sim.6246","article-title":"Investigating the prediction ability of survival models based on both clinical and omics data: two case studies","volume":"33","author":"R De Bin","year":"2014","journal-title":"Stat Med"},{"key":"pcbi.1010357.ref047","article-title":"Systems Analytics and Integration of Big Omics Data.","author":"G. Hardiman","year":"2020","journal-title":"MDPI"},{"key":"pcbi.1010357.ref048","doi-asserted-by":"crossref","first-page":"32","DOI":"10.18547\/gcb.2016.vol2.iss1.e32","article-title":"Integrating heterogeneous omics data via statistical inference and learning techniques","volume":"2","author":"A Ahmad","year":"2016","journal-title":"Genom Comput Biol"},{"key":"pcbi.1010357.ref049","doi-asserted-by":"crossref","first-page":"e10","DOI":"10.5808\/GI.2020.18.1.e10","article-title":"Accelerating next generation sequencing data analysis: an evaluation of optimized best practices for Genome Analysis Toolkit algorithms","volume":"18","author":"KR Franke","year":"2020","journal-title":"Genomics Inform"},{"key":"pcbi.1010357.ref050","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1007\/978-1-0716-1839-4_7","article-title":"Microarray Data Preprocessing: From Experimental Design to Differential Analysis.","volume":"2401","author":"A Federico","year":"2022","journal-title":"Methods Mol Biol"},{"key":"pcbi.1010357.ref051","article-title":"The Impact of Preprocessing Methods for a Successful Prostate Cell Lines Discrimination Using Partial Least Squares Regression and Discriminant Analysis Based on Fourier Transform Infrared Imaging","volume":"10","author":"D Liberda","year":"2021","journal-title":"Cell"},{"key":"pcbi.1010357.ref052","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1002\/hbm.10062","article-title":"Fast robust automated brain extraction","volume":"17","author":"SM Smith","year":"2002","journal-title":"Hum Brain Mapp"},{"key":"pcbi.1010357.ref053","doi-asserted-by":"crossref","first-page":"162","DOI":"10.1006\/cbmr.1996.0014","article-title":"AFNI: software for analysis and visualization of functional magnetic resonance neuroimages","volume":"29","author":"RW Cox","year":"1996","journal-title":"Comput Biomed Res"},{"key":"pcbi.1010357.ref054","article-title":"freesurfer: Connecting the Freesurfer software with R.","volume":"599","author":"J Muschelli","year":"2018","journal-title":"F1000Res"},{"key":"pcbi.1010357.ref055","doi-asserted-by":"crossref","DOI":"10.1201\/9780429156397","volume-title":"Multiple Imputation of Missing Data in Practice: Basic Theory and Analysis Strategies.","author":"Y He","year":"2021"},{"key":"pcbi.1010357.ref056","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1186\/1471-2105-10-193","article-title":"Filtering genes for cluster and network analysis","volume":"10","author":"D Tritchler","year":"2009","journal-title":"BMC Bioinformatics"},{"key":"pcbi.1010357.ref057","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1186\/1471-2105-12-49","article-title":"A novel approach to the clustering of microarray data via nonparametric density estimation","volume":"12","author":"R De Bin","year":"2011","journal-title":"BMC Bioinformatics"},{"key":"pcbi.1010357.ref058","article-title":"Improving your data transformations: Applying the Box-Cox transformation","author":"J. Osborne","year":"2010","journal-title":"University of Massachusetts Amherst."},{"key":"pcbi.1010357.ref059","volume-title":"Applied Linear Regression","author":"S. Weisberg","year":"2014","edition":"4"},{"key":"pcbi.1010357.ref060","doi-asserted-by":"crossref","first-page":"296","DOI":"10.1186\/s13059-019-1874-1","article-title":"Normalization and variance stabilization of single-cell RNA-seq data using regularized negative binomial regression","volume":"20","author":"C Hafemeister","year":"2019","journal-title":"Genome Biol"},{"key":"pcbi.1010357.ref061","doi-asserted-by":"crossref","first-page":"966","DOI":"10.1093\/bioinformatics\/btg107","article-title":"Approximate variance-stabilizing transformations for gene-expression microarray data","volume":"19","author":"DM Rocke","year":"2003","journal-title":"Bioinformatics"},{"key":"pcbi.1010357.ref062","doi-asserted-by":"crossref","first-page":"118","DOI":"10.1089\/1536231041388348","article-title":"Discrimination models using variance-stabilizing transformation of metabolomic NMR data","volume":"8","author":"PV Purohit","year":"2004","journal-title":"OMICS"},{"key":"pcbi.1010357.ref063","first-page":"259","volume-title":"Principal coordinate analysis and non-metric multidimensional scaling. Statistics for Biology and Health","year":"2007"},{"key":"pcbi.1010357.ref064","doi-asserted-by":"crossref","first-page":"343","DOI":"10.2307\/2110441","article-title":"An introduction to nonmetric multidimensional scaling","volume":"19","author":"GB Rabinowitz","year":"1975","journal-title":"Am J Pol Sci"},{"key":"pcbi.1010357.ref065","article-title":"Visualizing data using t-SNE.","volume":"9","author":"L van der Maaten","year":"2008","journal-title":"J Mach Learn Res."},{"key":"pcbi.1010357.ref066","article-title":"Dimensionality reduction for visualizing single-cell data using UMAP","author":"E Becht","year":"2018","journal-title":"Nat Biotechnol"},{"key":"pcbi.1010357.ref067","doi-asserted-by":"crossref","first-page":"S56","DOI":"10.1038\/nmeth.1436","article-title":"Visualization of omics data for systems biology","volume":"7","author":"N Gehlenborg","year":"2010","journal-title":"Nat Methods"},{"key":"pcbi.1010357.ref068","volume-title":"Machine Learning Algorithms.","author":"G. Bonaccorso","year":"2017"},{"key":"pcbi.1010357.ref069","doi-asserted-by":"crossref","first-page":"1950017","DOI":"10.1142\/S0218001419500174","article-title":"A review on dimensionality reduction techniques.","volume":"33","author":"X Huang","year":"2019","journal-title":"Int J Pattern Recognit Artif Intell"},{"key":"pcbi.1010357.ref070","doi-asserted-by":"crossref","first-page":"342","DOI":"10.32614\/RJ-2018-039","article-title":"DimRed and coRanking\u2014unifying dimensionality reduction in R.","volume":"10","author":"G Kraemer","year":"2018","journal-title":"R J"},{"key":"pcbi.1010357.ref071","doi-asserted-by":"crossref","DOI":"10.1201\/9780429341830","volume-title":"Introduction to Data Science: Data Analysis and Prediction Algorithms with R","author":"RA Irizarry","year":"2019"},{"key":"pcbi.1010357.ref072","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1186\/s12859-019-2780-y","article-title":"Focused multidimensional scaling: interactive visualization for exploration of high-dimensional data","volume":"20","author":"LM Urpa","year":"2019","journal-title":"BMC Bioinformatics"},{"key":"pcbi.1010357.ref073","doi-asserted-by":"crossref","unstructured":"Hanussek M, Blohm M, Kintz M. Can AutoML outperform humans? An evaluation on popular OpenML datasets using AutoML Benchmark. 2020 2nd International Conference on Artificial Intelligence, Robotics and Control. 2020. doi: 10.1145\/3448326.3448353","DOI":"10.1145\/3448326.3448353"},{"key":"pcbi.1010357.ref074","doi-asserted-by":"crossref","first-page":"2044","DOI":"10.1016\/j.ins.2009.12.010","article-title":"Advanced nonparametric tests for multiple comparisons in the design of experiments in computational intelligence and data mining: Experimental analysis of power.","volume":"180","author":"S Garc\u00eda","year":"2010","journal-title":"Inf Sci."},{"key":"pcbi.1010357.ref075","doi-asserted-by":"crossref","first-page":"550","DOI":"10.1093\/biostatistics\/kxp011","article-title":"Testing the prediction error difference between 2 predictors.","volume":"10","author":"MA van de Wiel","year":"2009","journal-title":"Biostatistics"},{"key":"pcbi.1010357.ref076","doi-asserted-by":"crossref","first-page":"11","DOI":"10.20982\/tqmp.02.1.p011","article-title":"Confidence Intervals: From tests of statistical significance to confidence intervals, range hypotheses and substantial effects.","author":"D. Beaulieu-Pr\u00e9vost","year":"2006","journal-title":"Tutor Quant Methods Psychol."},{"key":"pcbi.1010357.ref077","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1080\/00031305.2019.1583913","article-title":"Moving to a World Beyond \u201cp < 0.05.\u201d","volume":"73","author":"RL Wasserstein","year":"2019","journal-title":"Am Stat"},{"key":"pcbi.1010357.ref078","doi-asserted-by":"crossref","first-page":"1180","DOI":"10.1126\/science.aaf5406","article-title":"Aligning statistical and scientific reasoning","volume":"352","author":"SN Goodman","year":"2016","journal-title":"Science"},{"key":"pcbi.1010357.ref079","doi-asserted-by":"crossref","first-page":"337","DOI":"10.1007\/s10654-016-0149-3","article-title":"Statistical tests, P values, confidence intervals, and power: a guide to misinterpretations","volume":"31","author":"S Greenland","year":"2016","journal-title":"Eur J Epidemiol"},{"key":"pcbi.1010357.ref080","doi-asserted-by":"crossref","first-page":"607","DOI":"10.1038\/nmeth.3934","article-title":"A clash of cultures in discussions of the P value","author":"W. Huber","year":"2016","journal-title":"Nat Methods."},{"key":"pcbi.1010357.ref081","volume-title":"Ensemble Methods for Machine Learning","author":"G. Kunapuli","year":"2022"},{"key":"pcbi.1010357.ref082","doi-asserted-by":"crossref","unstructured":"Goder A, Filkov V. Consensus clustering algorithms: Comparison and refinement. Proceedings of the Tenth Workshop on Algorithm Engineering and Experiments (ALENEX). Philadelphia, PA: Society for Industrial and Applied Mathematics. 2008;2008:109\u2013117.","DOI":"10.1137\/1.9781611972887.11"},{"key":"pcbi.1010357.ref083","first-page":"73","article-title":"The Runtime of Learning.","author":"S Shalev-Shwartz","journal-title":"Understanding Machine Learning."},{"key":"pcbi.1010357.ref084","volume-title":"The Elements of Statistical Learning: Data Mining, Inference, and Prediction.","author":"T Hastie","year":"2017","edition":"2"},{"key":"pcbi.1010357.ref085","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9781316576533","volume-title":"Computer age statistical inference: Algorithms, evidence, and data science.","author":"B Efron","year":"2016"},{"key":"pcbi.1010357.ref086","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1111\/j.1467-9868.2005.00503.x","article-title":"Regularization and variable selection via the elastic net.","volume":"67","author":"H Zou","year":"2005","journal-title":"J R Stat Soc Ser B Stat Methodol"},{"key":"pcbi.1010357.ref087","doi-asserted-by":"crossref","first-page":"270","DOI":"10.3389\/fgene.2013.00270","article-title":"Evaluation of the lasso and the elastic net in genome-wide association studies.","volume":"4","author":"P Waldmann","year":"2013","journal-title":"Front Genet."},{"key":"pcbi.1010357.ref088","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4842-6579-6","article-title":"Hyperparameter Optimization in Machine Learning","author":"T. Agrawal","year":"2021"},{"key":"pcbi.1010357.ref089","doi-asserted-by":"crossref","unstructured":"Frohlich H, Zell A. Efficient parameter selection for support vector machines in classification and regression via model-based global optimization. Proceedings 2005 IEEE International Joint Conference on Neural Networks. 2005. IEEE; 2006. doi: 10.1109\/ijcnn.2005.1556085","DOI":"10.1109\/IJCNN.2005.1556085"},{"key":"pcbi.1010357.ref090","first-page":"841","article-title":"Preventing Over-Fitting during Model Selection via Bayesian Regularisation of the Hyper-Parameters.","volume":"8","author":"GC Cawley","year":"2007","journal-title":"J Mach Learn Res"},{"key":"pcbi.1010357.ref091","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1016\/j.jmp.2018.12.004","article-title":"Shrinkage priors for Bayesian penalized regression.","volume":"89","author":"S van Erp","year":"2019","journal-title":"J Math Psychol"},{"key":"pcbi.1010357.ref092","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4614-7138-7","volume-title":"An Introduction to Statistical Learning: with Applications in R.","author":"G James","year":"2013"},{"key":"pcbi.1010357.ref093","doi-asserted-by":"crossref","first-page":"6562","DOI":"10.1073\/pnas.102102699","article-title":"Selection bias in gene extraction on the basis of microarray gene-expression data","volume":"99","author":"C Ambroise","year":"2002","journal-title":"Proc Natl Acad Sci U S A"},{"key":"pcbi.1010357.ref094","doi-asserted-by":"crossref","first-page":"147","DOI":"10.1093\/jnci\/djk018","article-title":"Critical review of published microarray studies for cancer outcome and guidelines on statistical analysis and reporting","volume":"99","author":"A Dupuy","year":"2007","journal-title":"J Natl Cancer Inst"},{"key":"pcbi.1010357.ref095","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1007\/BF00117832","article-title":"Stacked regressions.","volume":"24","author":"L. Breiman","year":"1996","journal-title":"Mach Learn"},{"key":"pcbi.1010357.ref096","doi-asserted-by":"crossref","first-page":"2012","DOI":"10.1093\/bioinformatics\/btaa535","article-title":"Predictive and interpretable models via the stacked elastic net","volume":"37","author":"A Rauschenberger","year":"2021","journal-title":"Bioinformatics"},{"key":"pcbi.1010357.ref097","article-title":"Random Forests with R. Springer.","author":"R Genuer","year":"2020","journal-title":"Nature"},{"key":"pcbi.1010357.ref098","doi-asserted-by":"crossref","unstructured":"Classification: Practice\u2014Random Forest. 2018. doi: 10.4135\/9781526469144","DOI":"10.4135\/9781526469144"},{"key":"pcbi.1010357.ref099","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1186\/1471-2105-7-3","article-title":"Gene selection and classification of microarray data using random forest","volume":"7","author":"R Diaz-Uriarte","year":"2006","journal-title":"BMC Bioinformatics"},{"key":"pcbi.1010357.ref100","doi-asserted-by":"crossref","first-page":"i413","DOI":"10.1093\/bioinformatics\/btw449","article-title":"TANDEM: a two-stage approach to maximize interpretability of drug response models based on multiple molecular data types","volume":"32","author":"N Aben","year":"2016","journal-title":"Bioinformatics"},{"key":"pcbi.1010357.ref101","doi-asserted-by":"crossref","first-page":"571","DOI":"10.1007\/s11634-019-00375-6","article-title":"Sparse classification with paired covariates.","volume":"14","author":"A Rauschenberger","year":"2020","journal-title":"Adv Data Anal Classif"},{"key":"pcbi.1010357.ref102","doi-asserted-by":"crossref","first-page":"368","DOI":"10.1002\/sim.6732","article-title":"Better prediction by use of co-data: adaptive group-regularized ridge regression.","volume":"35","author":"MA van de Wiel","year":"2016","journal-title":"Stat Med"},{"key":"pcbi.1010357.ref103","doi-asserted-by":"crossref","first-page":"463","DOI":"10.1109\/TSMCC.2011.2161285","article-title":"A review on ensembles for the class imbalance problem: Bagging-, boosting-, and hybrid-based approaches","volume":"42","author":"M Galar","year":"2012","journal-title":"IEEE Trans Syst Man Cybern C Appl Rev"},{"key":"pcbi.1010357.ref104","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-319-98074-4","volume-title":"Learning from Imbalanced Data Sets","author":"A Fern\u00e1ndez","year":"2018"},{"key":"pcbi.1010357.ref105","doi-asserted-by":"crossref","first-page":"863","DOI":"10.1613\/jair.1.11192","article-title":"SMOTE for Learning from Imbalanced Data: Progress and Challenges, Marking the 15-year Anniversary.","volume":"61","author":"A Fernandez","year":"2018","journal-title":"J Artif Intell Res."},{"key":"pcbi.1010357.ref106","volume-title":"Imbalanced Classification with Python: Better Metrics, Balance Skewed Classes, Cost-Sensitive Learning","author":"J. Brownlee","year":"2020"},{"key":"pcbi.1010357.ref107","first-page":"128","article-title":"Calibration of machine learning models. Handbook of Research on Machine Learning Applications and Trends.","author":"A Bella","year":"2010","journal-title":"IGI Global."},{"key":"pcbi.1010357.ref108","first-page":"343","article-title":"Comparative Study between Validity Indices to Obtain the Optimal Cluster.","author":"Earth Observation Department, Centre of Space Techniques, Algeria","year":"2017","journal-title":"Int J Comput Electr Eng"},{"key":"pcbi.1010357.ref109","doi-asserted-by":"crossref","first-page":"3201","DOI":"10.1093\/bioinformatics\/bti517","article-title":"Computational cluster validation in post-genomic data analysis","volume":"21","author":"J Handl","year":"2005","journal-title":"Bioinformatics"},{"key":"pcbi.1010357.ref110","author":"S. Bruhns","year":"2008","journal-title":"An Empirical Study of Performance Metrics for Classifier Evaluation in Machine Learning"},{"key":"pcbi.1010357.ref111","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-319-19425-7","volume-title":"Regression Modeling Strategies: With Applications to Linear Models, Logistic and Ordinal Regression, and Survival Analysis.","author":"FE Harrell","year":"2015","edition":"2"},{"key":"pcbi.1010357.ref112","doi-asserted-by":"crossref","first-page":"128","DOI":"10.1097\/EDE.0b013e3181c30fb2","article-title":"Assessing the performance of prediction models: a framework for traditional and novel measures.","volume":"21","author":"EW Steyerberg","year":"2010","journal-title":"Epidemiology"},{"key":"pcbi.1010357.ref113","doi-asserted-by":"crossref","first-page":"1082","DOI":"10.1016\/j.engappai.2012.02.012","article-title":"The AUK: A simple alternative to the AUC.","author":"U Kaymak","year":"2012","journal-title":"Eng Appl Artif Intell"},{"key":"pcbi.1010357.ref114","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1186\/s12874-017-0332-6","article-title":"Time-dependent ROC curve analysis in medical research: current methods and applications.","volume":"17","author":"AN Kamarudin","year":"2017","journal-title":"BMC Med Res Methodol"},{"key":"pcbi.1010357.ref115","doi-asserted-by":"crossref","first-page":"e1003047","DOI":"10.1371\/journal.pcbi.1003047","article-title":"Improving breast cancer survival analysis through competition-based multidimensional modeling","volume":"9","author":"E Bilal","year":"2013","journal-title":"PLoS Comput Biol"},{"key":"pcbi.1010357.ref116","first-page":"22","article-title":"Large-scale benchmark study of survival prediction methods using multi-omics data","author":"M Herrmann","year":"2021","journal-title":"Brief Bioinform"},{"key":"pcbi.1010357.ref117","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1186\/s41512-017-0020-3","article-title":"The Brier score does not evaluate the clinical utility of diagnostic tests or prediction models.","volume":"1","author":"M Assel","year":"2017","journal-title":"Diagn Progn Res"},{"key":"pcbi.1010357.ref118","first-page":"548","article-title":"Improvements on cross-validation: The .632+ bootstrap method","volume":"92","author":"B Efron","year":"1997","journal-title":"J Am Stat Assoc"},{"key":"pcbi.1010357.ref119","doi-asserted-by":"crossref","first-page":"3735","DOI":"10.1016\/j.csda.2009.04.009","article-title":"Estimating classification error rate: Repeated cross-validation, repeated hold-out and bootstrap.","volume":"53","author":"J-H Kim","year":"2009","journal-title":"Comput Stat Data Anal"},{"key":"pcbi.1010357.ref120","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s42113-018-0011-7","article-title":"Limitations of Bayesian Leave-One-Out Cross-Validation for Model Selection.","volume":"2","author":"QF Gronau","year":"2019","journal-title":"Comput Brain Behav"},{"key":"pcbi.1010357.ref121","doi-asserted-by":"crossref","first-page":"1267","DOI":"10.1016\/j.patcog.2003.08.017","article-title":"Bolstered error estimation.","volume":"37","author":"U Braga-Neto","year":"2004","journal-title":"Pattern Recogn"},{"key":"pcbi.1010357.ref122","doi-asserted-by":"crossref","first-page":"3056","DOI":"10.1093\/bioinformatics\/btr518","article-title":"High-dimensional bolstered error estimation","volume":"27","author":"C Sima","year":"2011","journal-title":"Bioinformatics"},{"key":"pcbi.1010357.ref123","doi-asserted-by":"crossref","first-page":"199","DOI":"10.1038\/s41568-020-00327-9","article-title":"Designing deep learning studies in cancer diagnostics","volume":"21","author":"A Kleppe","year":"2021","journal-title":"Nat Rev Cancer"},{"key":"pcbi.1010357.ref124","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4614-6849-3","author":"M Kuhn","year":"2013","journal-title":"Applied Predictive Modeling"},{"key":"pcbi.1010357.ref125","author":"G. Hackeling","year":"2017","journal-title":"Mastering Machine Learning with Scikit-Learn"},{"key":"pcbi.1010357.ref126","volume-title":"Machine Learning with R: Expert techniques for predictive modeling","author":"B. Lantz","year":"2019","edition":"3"},{"key":"pcbi.1010357.ref127","volume-title":"Evolution of Translational Omics: Lessons Learned and the Path Forward","author":"Committee on the Review of Omics-Based Tests for Predicting Patient Outcomes in Clinical Trials, Board on Health Care Services, Board on Health Sciences Policy, Institute of Medicine","year":"2014"},{"key":"pcbi.1010357.ref128","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1016\/j.cca.2013.09.018","article-title":"From biomarkers to medical tests: the changing landscape of test evaluation","volume":"427","author":"AR Horvath","year":"2014","journal-title":"Clin Chim Acta"},{"key":"pcbi.1010357.ref129","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1186\/1471-2105-15-91","article-title":"Differential meta-analysis of RNA-seq data from multiple studies","volume":"15","author":"A Rau","year":"2014","journal-title":"BMC Bioinformatics"},{"key":"pcbi.1010357.ref130","doi-asserted-by":"crossref","first-page":"214","DOI":"10.1016\/j.arr.2018.07.004","article-title":"Towards frailty biomarkers: Candidates from genes and pathways regulated in aging and age-related diseases.","volume":"47","author":"AL Cardoso","year":"2018","journal-title":"Ageing Res Rev"},{"key":"pcbi.1010357.ref131","doi-asserted-by":"crossref","first-page":"440","DOI":"10.1093\/bib\/bbv044","article-title":"Using prior knowledge from cellular pathways and molecular networks for diagnostic specimen classification","volume":"17","author":"E. Glaab","year":"2016","journal-title":"Brief Bioinform"},{"key":"pcbi.1010357.ref132","doi-asserted-by":"crossref","first-page":"411","DOI":"10.1016\/j.tibtech.2004.06.005","article-title":"Biomarker discovery and validation: technologies and integrative approaches","volume":"22","author":"SE Ilyin","year":"2004","journal-title":"Trends Biotechnol"},{"key":"pcbi.1010357.ref133","doi-asserted-by":"crossref","first-page":"329","DOI":"10.1111\/insr.12016","article-title":"Fifty Years of Classification and Regression Trees.","author":"W-Y Loh","year":"2014","journal-title":"Int Stat Rev."},{"key":"pcbi.1010357.ref134","first-page":"129","article-title":"Classification and Regression Trees (CART). Statistical Learning from a Regression.","author":"RA Berk","year":"2016","journal-title":"Perspective"},{"key":"pcbi.1010357.ref135","author":"E Frank","year":"2008","journal-title":"Generating Accurate Rule Sets Without Global Optimization"},{"key":"pcbi.1010357.ref136","doi-asserted-by":"crossref","first-page":"e39932","DOI":"10.1371\/journal.pone.0039932","article-title":"Using rule-based machine learning for candidate disease gene prioritization and sample classification of cancer gene expression data","volume":"7","author":"E Glaab","year":"2012","journal-title":"PLoS ONE"},{"key":"pcbi.1010357.ref137","doi-asserted-by":"crossref","DOI":"10.1142\/9789812799470_0018","article-title":"Learning decision rules from uncertain data using rough sets","author":"S Trabelsi","year":"2008","journal-title":"Computational Intelligence in Decision and Control"},{"key":"pcbi.1010357.ref138","doi-asserted-by":"crossref","first-page":"668","DOI":"10.1093\/bioinformatics\/btq005","article-title":"Bayesian rule learning for biomedical data mining","author":"V Gopalakrishnan","year":"2010","journal-title":"Bioinformatics"},{"key":"pcbi.1010357.ref139","article-title":"Functional networks inference from rule-based machine learning models.","author":"N Lazzarini","year":"2016","journal-title":"BioData Mining."},{"key":"pcbi.1010357.ref140","doi-asserted-by":"crossref","first-page":"1814","DOI":"10.1109\/JBHI.2019.2944865","article-title":"Pathway-Based Single-Cell RNA-Seq Classification, Clustering, and Construction of Gene-Gene Interactions Networks Using Random Forests.","volume":"24","author":"H Wang","year":"2020","journal-title":"IEEE J Biomed Health Inform."},{"key":"pcbi.1010357.ref141","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1016\/j.ymeth.2019.06.017","article-title":"Pathway-based deep clustering for molecular subtyping of cancer","volume":"173","author":"T Mallavarapu","year":"2020","journal-title":"Methods"},{"key":"pcbi.1010357.ref142","article-title":"NetAUC: A network-based multi-biomarker identification method by AUC optimization.","author":"X-Y Li","year":"2021","journal-title":"Methods"},{"key":"pcbi.1010357.ref143","unstructured":"Lundberg SM, Lee S-I. A unified approach to interpreting model predictions. Proceedings of the 31st international conference on neural information processing systems. 2017. p. 4768\u20134777."},{"key":"pcbi.1010357.ref144","article-title":"\u201cWhy Should I Trust You?\u201d: Explaining the predictions of any classifier.","author":"MT Ribeiro","year":"2016","journal-title":"arXiv [cs.LG]."},{"key":"pcbi.1010357.ref145","doi-asserted-by":"crossref","unstructured":"Lou Y, Caruana R, Gehrke J, Hooker G. Accurate intelligible models with pairwise interactions. Proceedings of the 19th ACM SIGKDD international conference on Knowledge discovery and data mining. New York, NY, USA: Association for Computing Machinery; 2013. p. 623\u2013631.","DOI":"10.1145\/2487575.2487579"},{"key":"pcbi.1010357.ref146","volume-title":"Demystifying Black-box Models with Symbolic Metamodels.","author":"AM Alaa","year":"2019"},{"key":"pcbi.1010357.ref147","doi-asserted-by":"crossref","DOI":"10.3390\/e23010018","article-title":"Explainable AI: A Review of Machine Learning Interpretability Methods.","volume":"23","author":"P Linardatos","year":"2020","journal-title":"Entropy"},{"key":"pcbi.1010357.ref148","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1016\/j.inffus.2019.12.012","article-title":"Explainable Artificial Intelligence (XAI): Concepts, taxonomies, opportunities and challenges toward responsible AI.","volume":"58","author":"A Barredo Arrieta","year":"2020","journal-title":"Inf Fusion."},{"key":"pcbi.1010357.ref149","doi-asserted-by":"crossref","first-page":"310","DOI":"10.1186\/s12911-020-01332-6","article-title":"Precise4Q consortium. Explainability for artificial intelligence in healthcare: a multidisciplinary perspective","volume":"20","author":"J Amann","year":"2020","journal-title":"BMC Med Inform Decis Mak"},{"key":"pcbi.1010357.ref150","doi-asserted-by":"crossref","DOI":"10.1186\/s40537-016-0043-6","article-title":"A survey of transfer learning.","volume":"3","author":"K Weiss","year":"2016","journal-title":"J Big Data."},{"key":"pcbi.1010357.ref151","doi-asserted-by":"crossref","first-page":"e195","DOI":"10.1016\/S2589-7500(20)30292-2","article-title":"Approval of artificial intelligence and machine learning-based medical devices in the USA and Europe (2015\u201320): a comparative analysis.","volume":"3","author":"UJ Muehlematter","year":"2021","journal-title":"Lancet Digit Health."},{"key":"pcbi.1010357.ref152","volume-title":"Good machine learning practice for medical device development","author":"U.S. Food and Drug Administration","year":"2021"},{"key":"pcbi.1010357.ref153","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1038\/nature12564","article-title":"Criteria for the use of omics-based predictors in clinical trials","volume":"502","author":"LM McShane","year":"2013","journal-title":"Nature"},{"key":"pcbi.1010357.ref154","article-title":"Assessing Credibility of Computational Modeling Through Verification and Validation: Application to Medical Devices.","year":"2018","journal-title":"Am Soc Mech Eng."},{"key":"pcbi.1010357.ref155","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s12916-014-0241-z","article-title":"Transparent reporting of a multivariable prediction model for individual prognosis or diagnosis (TRIPOD): the TRIPOD Statement.","volume":"13","author":"GS Collins","year":"2015","journal-title":"BMC Med"}],"container-title":["PLOS Computational Biology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.1010357","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,11]],"date-time":"2022-08-11T17:28:07Z","timestamp":1660238887000},"score":1,"resource":{"primary":{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.1010357"}},"subtitle":[],"editor":[{"given":"Francis","family":"Ouellette","sequence":"first","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2022,8,11]]},"references-count":155,"journal-issue":{"issue":"8","published-online":{"date-parts":[[2022,8,11]]}},"URL":"https:\/\/doi.org\/10.1371\/journal.pcbi.1010357","relation":{},"ISSN":["1553-7358"],"issn-type":[{"value":"1553-7358","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,8,11]]}}}