{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T15:51:55Z","timestamp":1759333915757,"version":"build-2065373602"},"reference-count":48,"publisher":"Oxford University Press (OUP)","issue":"9","license":[{"start":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T00:00:00Z","timestamp":1758067200000},"content-version":"vor","delay-in-days":16,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Ministry of Environment and Energy Security"},{"DOI":"10.13039\/501100000780","name":"European Union","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,9,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Understanding how bacterial species relate to clinical health indicators can reveal microbiome signatures of disease, offering insights into conditions such as obesity or liver disease. However, analyzing such data requires methods that address compositionality, high dimensionality, sparsity, and outliers.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>We tackle the challenge of identifying microbiome components linked to health indicators through a robust multivariate compositional regression model. Our method addresses the high dimensionality, sparsity, and compositional nature of microbiome data while maintaining control of the false discovery rate (FDR). By incorporating outlier robustness and a derandomization step, we enhance the stability and reproducibility of results, surpassing current techniques like the Multi-Response Knockoff Filter (MRKF). In simulation studies, our method outperforms MRKF in terms of FDR control, power, and robustness. In real data applications, it leads to valuable biological insights, such as identifying microbial species associated with specific clinical parameters.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>Software in R code format, along with synthetic data example illustrations and comprehensive documentation, is available at https:\/\/github.com\/giannamonti\/RobMReg.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaf506","type":"journal-article","created":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T23:19:03Z","timestamp":1758323943000},"source":"Crossref","is-referenced-by-count":0,"title":["Robust multivariate regression controlling false discoveries for microbiome data"],"prefix":"10.1093","volume":"41","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7952-3618","authenticated-orcid":false,"given":"Gianna Serafina","family":"Monti","sequence":"first","affiliation":[{"name":"Department of Economics, Management and Statistics, University of Milano-Bicocca , Milan 20126,","place":["Italy"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0313-3506","authenticated-orcid":false,"given":"Meritxell","family":"Pujolassos","sequence":"additional","affiliation":[{"name":"Bioscience Department, Faculty of Sciences, Technology and Engineering, University of Vic, Central University of Catalunya , Vic 08500,","place":["Spain"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9334-415X","authenticated-orcid":false,"given":"Malu","family":"Calle Rosingana","sequence":"additional","affiliation":[{"name":"Bioscience Department, Faculty of Sciences, Technology and Engineering, University of Vic, Central University of Catalunya , Vic 08500,","place":["Spain"]},{"name":"Institut de Recerca i Innovaci\u00f3 en Ci\u00e8ncies de la Vida i de la Salut, Catalunya Central (IRIS-CC) , Vic, Barcelona 08500,","place":["Spain"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8014-4682","authenticated-orcid":false,"given":"Peter","family":"Filzmoser","sequence":"additional","affiliation":[{"name":"Institute of Statistics & Mathematical Methods in Economics, Vienna University of Technology , Vienna 1040,","place":["Austria"]}]}],"member":"286","published-online":{"date-parts":[[2025,9,17]]},"reference":[{"key":"2025093001213405800_btaf506-B1","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1111\/j.2517-6161.1982.tb01195.x","article-title":"The statistical analysis of compositional data","volume":"44","author":"Aitchison","year":"1982","journal-title":"J R Stat Soc Series B Stat Methodol"},{"key":"2025093001213405800_btaf506-B2","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1080\/01621459.2016.1148609","article-title":"Robust maximum association estimators","volume":"112","author":"Alfons","year":"2017","journal-title":"J Am Stat Assoc"},{"key":"2025093001213405800_btaf506-B3","doi-asserted-by":"publisher","first-page":"2055","DOI":"10.1214\/15-AOS1337","article-title":"Controlling the false discovery rate via knockoffs","volume":"43","author":"Barber","year":"2015","journal-title":"Ann Stat"},{"key":"2025093001213405800_btaf506-B4","doi-asserted-by":"publisher","first-page":"2504","DOI":"10.1214\/18-AOS1755","article-title":"A knockoff filter for high-dimensional selective inference","volume":"47","author":"Barber","year":"2019","journal-title":"Ann Stat"},{"key":"2025093001213405800_btaf506-B5","doi-asserted-by":"publisher","first-page":"2648","DOI":"10.1214\/22-AOAS1607","article-title":"Testing for differential abundance in compositional counts data, with application to microbiome studies","volume":"16","author":"Brill","year":"2022","journal-title":"Ann Appl Stat"},{"key":"2025093001213405800_btaf506-B6","doi-asserted-by":"publisher","first-page":"551","DOI":"10.1111\/rssb.12265","article-title":"Panning for gold: \u2018model-X\u2019 knockoffs for high dimensional controlled variable selection","volume":"80","author":"Cand\u00e8s","year":"2018","journal-title":"J R Stat Soc Series B Stat Methodol"},{"key":"2025093001213405800_btaf506-B7","doi-asserted-by":"publisher","first-page":"961","DOI":"10.1080\/10618600.2022.2118752","article-title":"Robust multivariate lasso regression with covariance estimation","volume":"32","author":"Chang","year":"2023","journal-title":"J Comput Graph Stat"},{"key":"2025093001213405800_btaf506-B8","doi-asserted-by":"publisher","first-page":"643","DOI":"10.1093\/bioinformatics\/btx650","article-title":"An omnibus test for differential distribution analysis of microbiome sequencing data","volume":"34","author":"Chen","year":"2018","journal-title":"Bioinformatics"},{"key":"2025093001213405800_btaf506-B9","doi-asserted-by":"publisher","first-page":"44","DOI":"10.2436\/20.8080.02.100","article-title":"On interpretations of tests and effect sizes in regression models with a compositional predictor","volume":"06","author":"Coenders","year":"2020","journal-title":"SORT"},{"first-page":"1851","year":"2016","author":"Dai","key":"2025093001213405800_btaf506-B10"},{"key":"2025093001213405800_btaf506-B11","doi-asserted-by":"publisher","first-page":"2738","DOI":"10.1038\/s41598-021-82538-0","article-title":"Alterations in the gut bacterial microbiome in people with type 2 diabetes mellitus and diabetic retinopathy","volume":"11","author":"Das","year":"2021","journal-title":"Sci Rep"},{"key":"2025093001213405800_btaf506-B12","doi-asserted-by":"publisher","first-page":"1694","DOI":"10.1016\/j.csda.2007.05.018","article-title":"Outlier identification in high dimensions","volume":"52","author":"Filzmoser","year":"2008","journal-title":"Comput Stat Data Anal"},{"key":"2025093001213405800_btaf506-B13","doi-asserted-by":"publisher","first-page":"611","DOI":"10.1198\/016214502760047131","article-title":"Model-based clustering, discriminant analysis, and density estimation","volume":"97","author":"Fraley","year":"2002","journal-title":"J Am Stat Assoc"},{"key":"2025093001213405800_btaf506-B14","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1214\/07-AOAS131","article-title":"Pathwise coordinate optimization","volume":"1","author":"Friedman","year":"2007","journal-title":"Ann Appl Stat"},{"key":"2025093001213405800_btaf506-B15","doi-asserted-by":"publisher","first-page":"432","DOI":"10.1093\/biostatistics\/kxm045","article-title":"Sparse inverse covariance estimation with the graphical lasso","volume":"9","author":"Friedman","year":"2008","journal-title":"Biostatistics"},{"year":"2023","author":"Friedman","key":"2025093001213405800_btaf506-B16"},{"key":"2025093001213405800_btaf506-B17","doi-asserted-by":"publisher","first-page":"2224","DOI":"10.3389\/fmicb.2017.02224","article-title":"Microbiome datasets are compositional: And this is not optional","volume":"8","author":"Gloor","year":"2017","journal-title":"Front Microbiol"},{"key":"2025093001213405800_btaf506-B18","doi-asserted-by":"publisher","first-page":"1772","DOI":"10.1038\/s41598-019-38874-3","article-title":"Faecal bacterial and short-chain fatty acids signature in hypercholesterolemia","volume":"9","author":"Granado-Serrano","year":"2019","journal-title":"Sci Rep"},{"key":"2025093001213405800_btaf506-B19","doi-asserted-by":"publisher","first-page":"727398","DOI":"10.3389\/fmicb.2021.727398","article-title":"Compositional data analysis of microbiome and any-omics datasets: A validation of the additive logratio transformation","volume":"12","author":"Greenacre","year":"2021","journal-title":"Front Microbiol"},{"key":"2025093001213405800_btaf506-B20","doi-asserted-by":"publisher","first-page":"140","DOI":"10.1016\/j.copbio.2017.08.004","article-title":"Intestinal lactobacillus in health and disease, a driver or just along for the ride?","volume":"49","author":"Heeney","year":"2018","journal-title":"Curr Opin Biotechnol"},{"key":"2025093001213405800_btaf506-B21","doi-asserted-by":"publisher","first-page":"102132","DOI":"10.1016\/j.eclinm.2023.102132","article-title":"Distinct signatures of gut microbiota and metabolites in different types of diabetes: a population-based cross-sectional study","volume":"62","author":"Hu","year":"2023","journal-title":"EClinicalMedicine"},{"key":"2025093001213405800_btaf506-B22","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1038\/nature12198","article-title":"Gut metagenome in european women with normal, impaired and diabetic glucose control","volume":"498","author":"Karlsson","year":"2013","journal-title":"Nature"},{"key":"2025093001213405800_btaf506-B23","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1016\/j.jmva.2012.03.013","article-title":"Simultaneous multiple response regression and inverse covariance matrix estimation via penalized gaussian maximum likelihood","volume":"111","author":"Lee","year":"2012","journal-title":"J Multivar Anal"},{"key":"2025093001213405800_btaf506-B24","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1186\/s40168-016-0222-x","article-title":"Gut microbiota dysbiosis contributes to the development of hypertension","volume":"5","author":"Li","year":"2017","journal-title":"Microbiome"},{"key":"2025093001213405800_btaf506-B25","doi-asserted-by":"publisher","first-page":"1056","DOI":"10.21037\/atm-22-3967","article-title":"Correlation between gut bacteria phascolarctobacterium and exogenous metabolite \u03b1-linolenic acid in T2DM: a case-control study","volume":"10","author":"Li","year":"2022","journal-title":"Ann Transl Med"},{"key":"2025093001213405800_btaf506-B26","doi-asserted-by":"publisher","first-page":"785","DOI":"10.1093\/biomet\/asu031","article-title":"Variable selection in regression with compositional covariates","volume":"101","author":"Lin","year":"2014","journal-title":"Biometrika"},{"key":"2025093001213405800_btaf506-B27","doi-asserted-by":"publisher","first-page":"3959","DOI":"10.1093\/bioinformatics\/btaa255","article-title":"A novel normalization and differential abundance test framework for microbiome data","volume":"36","author":"Ma","year":"2020","journal-title":"Bioinformatics"},{"key":"2025093001213405800_btaf506-B28","doi-asserted-by":"publisher","first-page":"27663","DOI":"10.3402\/mehd.v26.27663","article-title":"Analysis of composition of microbiomes: a novel method for studying microbial composition","volume":"26","author":"Mandal","year":"2015","journal-title":"Microb Ecol Health Dis"},{"key":"2025093001213405800_btaf506-B29","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1198\/TECH.2010.09114","article-title":"Robust ridge regression for high-dimensional data","volume":"53","author":"Maronna","year":"2011","journal-title":"Technometrics"},{"volume-title":"Robust Statistics: Theory and Methods (with R)","year":"2019","author":"Maronna","key":"2025093001213405800_btaf506-B30"},{"key":"2025093001213405800_btaf506-B31","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1007\/s00180-022-01268-7","article-title":"A robust knockoff filter for sparse regression analysis of microbiome compositional data","volume":"39","author":"Monti","year":"2024","journal-title":"Comput Stat"},{"key":"2025093001213405800_btaf506-B32","doi-asserted-by":"publisher","first-page":"2719","DOI":"10.1038\/s41467-019-10656-5","article-title":"Establishing microbial composition measurement standards with reference frames","volume":"10","author":"Morton","year":"2019","journal-title":"Nat Commun"},{"key":"2025093001213405800_btaf506-B33","doi-asserted-by":"publisher","first-page":"1023","DOI":"10.1038\/nmeth.4468","article-title":"Accessible, curated metagenomic data through ExperimentHub","volume":"14","author":"Pasolli","year":"2017","journal-title":"Nat Methods"},{"key":"2025093001213405800_btaf506-B34","doi-asserted-by":"publisher","DOI":"10.1016\/j.ecosta.2024.02.002","article-title":"Challenges of cellwise outliers","author":"Raymaekers","year":"2024","journal-title":"Econom Stat"},{"key":"2025093001213405800_btaf506-B35","doi-asserted-by":"publisher","first-page":"122","DOI":"10.1093\/jrsssb\/qkad085","article-title":"Derandomised knockoffs: leveraging e-values for false discovery rate control","volume":"86","author":"Ren","year":"2024","journal-title":"J R Stat Soc Series B Stat Methodol"},{"key":"2025093001213405800_btaf506-B36","doi-asserted-by":"publisher","first-page":"947","DOI":"10.1198\/jcgs.2010.09188","article-title":"Sparse multivariate regression with covariance estimation","volume":"19","author":"Rothman","year":"2010","journal-title":"J Comput Graph Stat"},{"key":"2025093001213405800_btaf506-B37","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1214\/10-STS347","article-title":"Test martingales, bayes factors and p-values","volume":"26","author":"Shafer","year":"2011","journal-title":"Stat Sci"},{"key":"2025093001213405800_btaf506-B38","doi-asserted-by":"publisher","first-page":"2269","DOI":"10.1093\/bioinformatics\/btv165","article-title":"A robust approach for identifying differentially abundant features in metagenomic samples","volume":"31","author":"Sohn","year":"2015","journal-title":"Bioinformatics"},{"key":"2025093001213405800_btaf506-B39","doi-asserted-by":"publisher","first-page":"984","DOI":"10.1111\/biom.13336","article-title":"Compositional knockoff filter for high-dimensional regression analysis of microbiome data","volume":"77","author":"Srinivasan","year":"2021","journal-title":"Biometrics"},{"key":"2025093001213405800_btaf506-B40","doi-asserted-by":"publisher","first-page":"107621","DOI":"10.1016\/j.csda.2022.107621","article-title":"Identification of microbial features in multivariate regression under false discovery rate control","volume":"181","author":"Srinivasan","year":"2023","journal-title":"Comput Stat Data Anal"},{"key":"2025093001213405800_btaf506-B41","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1111\/j.2517-6161.1996.tb02080.x","article-title":"Regression shrinkage and selection via the lasso","volume":"58","author":"Tibshirani","year":"1996","journal-title":"J R Stat Soc Series B Stat Methodol"},{"key":"2025093001213405800_btaf506-B42","doi-asserted-by":"publisher","DOI":"10.3390\/microorganisms8040573","article-title":"The controversial role of human gut Lachnospiraceae","volume":"573","author":"Vacca","year":"2020","journal-title":"Microorganisms"},{"key":"2025093001213405800_btaf506-B43","doi-asserted-by":"publisher","DOI":"10.1128\/mSystems.00130-17","article-title":"Colonic butyrate-producing communities in humans: an overview using omics data","volume":"2","author":"Vital","year":"2017","journal-title":"mSystems"},{"key":"2025093001213405800_btaf506-B44","doi-asserted-by":"publisher","first-page":"1736","DOI":"10.1214\/20-AOS2020","article-title":"E-values: Calibration, combination and applications","volume":"49","author":"Vovk","year":"2021","journal-title":"Ann Statist"},{"key":"2025093001213405800_btaf506-B45","doi-asserted-by":"publisher","first-page":"822","DOI":"10.1111\/rssb.12489","article-title":"False discovery rate control with E-values","volume":"84","author":"Wang","year":"2022","journal-title":"J R Stat Soc Series B Stat Methodol"},{"key":"2025093001213405800_btaf506-B46","doi-asserted-by":"publisher","first-page":"705","DOI":"10.1093\/biostatistics\/kxaa049","article-title":"An optimal kernel-based multivariate U-statistic to test for associations with multiple phenotypes","volume":"23","author":"Wen","year":"2022","journal-title":"Biostatistics"},{"key":"2025093001213405800_btaf506-B47","doi-asserted-by":"publisher","first-page":"1418","DOI":"10.1198\/016214506000000735","article-title":"The adaptive lasso and its oracle properties","volume":"101","author":"Zou","year":"2006","journal-title":"J Am Stat Assoc"},{"key":"2025093001213405800_btaf506-B48","doi-asserted-by":"publisher","first-page":"1733","DOI":"10.1214\/08-AOS625","article-title":"On the adaptive elastic-net with a diverging number of parameters","volume":"37","author":"Zou","year":"2009","journal-title":"Ann Stat"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btaf506\/64302264\/btaf506.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/41\/9\/btaf506\/64302264\/btaf506.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/41\/9\/btaf506\/64302264\/btaf506.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T05:21:41Z","timestamp":1759209701000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btaf506\/8256681"}},"subtitle":[],"editor":[{"given":"Laura","family":"Cantini","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2025,9,1]]},"references-count":48,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2025,9,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaf506","relation":{},"ISSN":["1367-4811"],"issn-type":[{"type":"electronic","value":"1367-4811"}],"subject":[],"published-other":{"date-parts":[[2025,9]]},"published":{"date-parts":[[2025,9,1]]},"article-number":"btaf506"}}