{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,3]],"date-time":"2026-01-03T19:06:22Z","timestamp":1767467182967,"version":"3.48.0"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T00:00:00Z","timestamp":1764288000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2026,1,3]],"date-time":"2026-01-03T00:00:00Z","timestamp":1767398400000},"content-version":"vor","delay-in-days":36,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"DOI":"10.1186\/s12859-025-06312-z","type":"journal-article","created":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T20:42:54Z","timestamp":1764362574000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["TaxaPLN: a taxonomy-aware augmentation strategy for microbiome-trait classification including metadata"],"prefix":"10.1186","volume":"27","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-4561-3797","authenticated-orcid":false,"given":"Alexandre","family":"Chaussard","sequence":"first","affiliation":[]},{"given":"Anna","family":"Bonnet","sequence":"additional","affiliation":[]},{"given":"Sylvain","family":"Le Corff","sequence":"additional","affiliation":[]},{"given":"Harry","family":"Sokol","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,28]]},"reference":[{"issue":"7","key":"6312_CR1","doi-asserted-by":"publisher","first-page":"410","DOI":"10.1038\/s41579-018-0029-9","volume":"16","author":"R Knight","year":"2018","unstructured":"Knight R, Vrbanac A, Taylor BC, Aksenov A, Callewaert C, Debelius J, et al. Best practices for analysing microbiomes. Nat Rev Microbiol. 2018;16(7):410\u201322.","journal-title":"Nat Rev Microbiol"},{"issue":"4","key":"6312_CR2","doi-asserted-by":"publisher","first-page":"392","DOI":"10.1038\/nm.4517","volume":"24","author":"JA Gilbert","year":"2018","unstructured":"Gilbert JA, Blaser MJ, Caporaso JG, Jansson JK, Lynch SV, Knight R. Current understanding of the human microbiome. Nat Med. 2018;24(4):392\u2013400.","journal-title":"Nat Med"},{"issue":"7758","key":"6312_CR3","doi-asserted-by":"publisher","first-page":"655","DOI":"10.1038\/s41586-019-1237-9","volume":"569","author":"J Lloyd-Price","year":"2019","unstructured":"Lloyd-Price J, Arze C, Ananthakrishnan AN, Schirmer M, Avila-Pacheco J, Poon TW, et al. Multi-omics of the gut microbial ecosystem in inflammatory bowel diseases. Nature. 2019;569(7758):655\u201362.","journal-title":"Nature"},{"issue":"2","key":"6312_CR4","doi-asserted-by":"publisher","first-page":"260","DOI":"10.1016\/j.chom.2015.01.001","volume":"17","author":"AD Kostic","year":"2015","unstructured":"Kostic AD, Gevers D, Siljander H, Vatanen T, Hy\u00f6tyl\u00e4inen T, H\u00e4m\u00e4l\u00e4inen AM, et al. The dynamics of the human infant gut microbiome in development and in progression toward type 1 diabetes. Cell Host Microbe. 2015;17(2):260\u201373.","journal-title":"Cell Host Microbe"},{"issue":"11","key":"6312_CR5","doi-asserted-by":"publisher","first-page":"766","DOI":"10.15252\/msb.20145645","volume":"10","author":"G Zeller","year":"2014","unstructured":"Zeller G, Tap J, Voigt AY, Sunagawa S, Kultima JR, Costea PI, et al. Potential of fecal microbiota for early-stage detection of colorectal cancer. Mol Syst Biol. 2014;10(11):766.","journal-title":"Mol Syst Biol"},{"issue":"1","key":"6312_CR6","doi-asserted-by":"publisher","first-page":"1612","DOI":"10.1038\/s41467-020-15457-9","volume":"11","author":"F Zhu","year":"2020","unstructured":"Zhu F, Ju Y, Wang W, Wang Q, Guo R, Ma Q, et al. Metagenome-wide association of gut microbiome features for schizophrenia. Nat Commun. 2020;11(1):1612.","journal-title":"Nat Commun"},{"key":"6312_CR7","doi-asserted-by":"publisher","DOI":"10.3389\/fmicb.2021.634511","volume":"12","author":"LJ Marcos-Zambrano","year":"2021","unstructured":"Marcos-Zambrano LJ, Karaduzovic-Hadziabdic K, Loncar Turukalo T, Przymus P, Trajkovik V, Aasmets O, et al. Applications of machine learning in human microbiome studies: a review on feature selection, biomarker identification, disease prediction and treatment. Front Microbiol. 2021;12:634511.","journal-title":"Front Microbiol"},{"issue":"1","key":"6312_CR8","doi-asserted-by":"publisher","first-page":"25099","DOI":"10.1038\/s41598-024-76513-8","volume":"14","author":"W Qian","year":"2024","unstructured":"Qian W, Stanley KG, Aziz Z, Aziz U, Siciliano SD. SPLANG\u2014a synthetic Poisson-Lognormal-based abundance and network generative model for microbial interaction inference algorithms. Sci Rep. 2024;14(1):25099.","journal-title":"Sci Rep"},{"key":"6312_CR9","doi-asserted-by":"publisher","first-page":"2224","DOI":"10.3389\/fmicb.2017.02224","volume":"8","author":"GB Gloor","year":"2017","unstructured":"Gloor GB, Macklaim JM, Pawlowsky-Glahn V, Egozcue JJ. Microbiome datasets are compositional: and this is not optional. Front Microbiol. 2017;8:2224.","journal-title":"Front Microbiol"},{"key":"6312_CR10","doi-asserted-by":"publisher","DOI":"10.1016\/j.array.2022.100258","volume":"16","author":"A Mumuni","year":"2022","unstructured":"Mumuni A, Mumuni F. Data augmentation: a comprehensive survey of modern approaches. Array. 2022;16:100258.","journal-title":"Array"},{"key":"6312_CR11","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla NV, Bowyer KW, Hall LO, Kegelmeyer WP. SMOTE: synthetic minority over-sampling technique. J Artif Intell Res. 2002;16:321\u201357.","journal-title":"J Artif Intell Res"},{"key":"6312_CR12","unstructured":"Zhang H, Ciss\u00e9 M, Dauphin YN, Lopez-Paz D. mixup: Beyond Empirical Risk Minimization. In: ICLR. OpenReview.net; 2018. Vancouver, Canada."},{"key":"6312_CR13","unstructured":"Gordon-Rodr\u00edguez E, Quinn TP, Cunningham JP. Data Augmentation for Compositional Data: Advancing Predictive Models of the Microbiome. In: NeurIPS; 2022. Los Angeles, USA."},{"issue":"14","key":"6312_CR14","doi-asserted-by":"publisher","first-page":"i31","DOI":"10.1093\/bioinformatics\/btz394","volume":"35","author":"E Sayyari","year":"2019","unstructured":"Sayyari E, Kawas B, Mirarab S. TADA: phylogenetic augmentation of microbiome samples enhances phenotype classification. Bioinformatics. 2019;35(14):i31\u201340.","journal-title":"Bioinformatics"},{"issue":"2","key":"6312_CR15","doi-asserted-by":"publisher","first-page":"btaf014","DOI":"10.1093\/bioinformatics\/btaf014","volume":"41","author":"Y Jiang","year":"2025","unstructured":"Jiang Y, Liao D, Zhu Q, Lu YY. PhyloMix: enhancing microbiome-trait association prediction through phylogeny-mixing augmentation. Bioinformatics. 2025;41(2):btaf014.","journal-title":"Bioinformatics"},{"issue":"4","key":"6312_CR16","doi-asserted-by":"publisher","first-page":"btae161","DOI":"10.1093\/bioinformatics\/btae161","volume":"40","author":"D Sharma","year":"2024","unstructured":"Sharma D, Lou W, Xu W. phylaGAN: data augmentation through conditional GANs and autoencoders for improving disease prediction accuracy using microbiome data. Bioinformatics. 2024;40(4):btae161.","journal-title":"Bioinformatics"},{"issue":"5","key":"6312_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11222-025-10668-w","volume":"35","author":"A Chaussard","year":"2025","unstructured":"Chaussard A, Bonnet A, Gassiat E, Le Corff S. Tree-based variational inference for Poisson log-normal models. Stat Comput. 2025;35(5):1\u201335.","journal-title":"Stat Comput"},{"key":"6312_CR18","unstructured":"Tomczak JM, Welling M. VAE with a VampPrior. In: AISTATS. vol.\u00a084. PMLR; 2018. p. 1214\u20131223."},{"key":"6312_CR19","doi-asserted-by":"crossref","unstructured":"Perez E, Strub F, de\u00a0Vries H, Dumoulin V, Courville AC. FiLM: Visual Reasoning with a General Conditioning Layer. In: AAAI. AAAI Press; 2018. p. 3942\u20133951.","DOI":"10.1609\/aaai.v32i1.11671"},{"issue":"11","key":"6312_CR20","doi-asserted-by":"publisher","first-page":"1023","DOI":"10.1038\/nmeth.4468","volume":"14","author":"E Pasolli","year":"2017","unstructured":"Pasolli E, Schiffer L, Manghi P, Renson A, Obenchain V, Truong DT, et al. Accessible, curated metagenomic data through ExperimentHub. Nat Methods. 2017;14(11):1023\u20134.","journal-title":"Nat Methods"},{"issue":"12","key":"6312_CR21","doi-asserted-by":"publisher","first-page":"930","DOI":"10.1038\/s44220-023-00148-3","volume":"1","author":"TF Bastiaanssen","year":"2023","unstructured":"Bastiaanssen TF, Quinn TP, Loughman A. Bugs as features (part 1): concepts and foundations for the compositional data analysis of the microbiome-gut-brain axis. Nat Mental Health. 2023;1(12):930\u20138.","journal-title":"Nat Mental Health"},{"key":"6312_CR22","doi-asserted-by":"publisher","DOI":"10.3389\/fevo.2021.588292","volume":"9","author":"J Chiquet","year":"2021","unstructured":"Chiquet J, Mariadassou M, Robin S. The Poisson-Lognormal model as a versatile framework for the joint analysis of species abundances. Front Ecol Evol. 2021;9:588292.","journal-title":"Front Ecol Evol"},{"issue":"3","key":"6312_CR23","doi-asserted-by":"publisher","first-page":"2879","DOI":"10.1109\/TPAMI.2022.3185773","volume":"45","author":"C Chadebec","year":"2022","unstructured":"Chadebec C, Thibeau-Sutre E, Burgos N, Allassonni\u00e8re S. Data augmentation in high dimensional low sample size setting using a geometry-based variational autoencoder. IEEE Trans Pattern Anal Mach Intell. 2022;45(3):2879\u201396.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"4","key":"6312_CR24","doi-asserted-by":"publisher","first-page":"679","DOI":"10.1038\/s41591-019-0406-6","volume":"25","author":"J Wirbel","year":"2019","unstructured":"Wirbel J, Pyl PT, Kartal E, Zych K, Kashani A, Milanese A, et al. Meta-analysis of fecal metagenomes reveals global microbial signatures that are specific for colorectal cancer. Nat Med. 2019;25(4):679\u201389.","journal-title":"Nat Med"},{"key":"6312_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13059-020-02020-4","volume":"21","author":"MA Rubel","year":"2020","unstructured":"Rubel MA, Abbas A, Taylor LJ, Connell A, Tanes C, Bittinger K, et al. Lifestyle and the presence of helminths is associated with gut microbiome composition in Cameroonians. Genome Biol. 2020;21:1\u201332.","journal-title":"Genome Biol"},{"issue":"6","key":"6312_CR26","doi-asserted-by":"publisher","first-page":"968","DOI":"10.1038\/s41591-019-0458-7","volume":"25","author":"S Yachida","year":"2019","unstructured":"Yachida S, Mizutani S, Shiroma H, Shiba S, Nakajima T, Sakamoto T, et al. Metagenomic and metabolomic analyses reveal distinct stage-specific phenotypes of the gut microbiota in colorectal cancer. Nat Med. 2019;25(6):968\u201376.","journal-title":"Nat Med"},{"issue":"1","key":"6312_CR27","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1136\/gutjnl-2015-309800","volume":"66","author":"J Yu","year":"2017","unstructured":"Yu J, Feng Q, Wong SH, Zhang D, Liang QY, Qin Y, et al. Metagenomic analysis of Faecal microbiome as a tool towards targeted non-invasive biomarkers for colorectal cancer. Gut. 2017;66(1):70\u20138.","journal-title":"Gut"},{"issue":"8","key":"6312_CR28","doi-asserted-by":"publisher","first-page":"822","DOI":"10.1038\/nbt.2939","volume":"32","author":"HB Nielsen","year":"2014","unstructured":"Nielsen HB, Almeida M, Juncker AS, Rasmussen S, Li J, Sunagawa S, et al. Identification and assembly of genomes and genetic elements in complex metagenomic samples without using reference genomes. Nat Biotechnol. 2014;32(8):822\u20138.","journal-title":"Nat Biotechnol"},{"key":"6312_CR29","doi-asserted-by":"crossref","unstructured":"Davis J, Goadrich MH. The relationship between Precision-Recall and ROC curves. In: ICML. vol. 148 of ACM International Conference Proceeding Series. ACM; 2006. p. 233\u2013240.","DOI":"10.1145\/1143844.1143874"},{"key":"6312_CR30","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa F, Varoquaux G, Gramfort A, Michel V, Thirion B, Grisel O, et al. Scikit-learn: machine learning in python. J Mach Learn Res. 2011;12:2825\u201330.","journal-title":"J Mach Learn Res"},{"issue":"104","key":"6312_CR31","doi-asserted-by":"publisher","first-page":"6969","DOI":"10.21105\/joss.06969","volume":"9","author":"B Batardiere","year":"2024","unstructured":"Batardiere B, Kwon J, Chiquet J. pyPLNmodels: a Python package to analyze multivariate high-dimensional count data. J Open Sour Softw. 2024;9(104):6969.","journal-title":"J Open Sour Softw"},{"issue":"2","key":"6312_CR32","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1111\/j.2517-6161.1982.tb01195.x","volume":"44","author":"J Aitchison","year":"1982","unstructured":"Aitchison J. The statistical analysis of compositional data. J Roy Stat Soc: Ser B (Methodol). 1982;44(2):139\u201360.","journal-title":"J Roy Stat Soc: Ser B (Methodol)"},{"key":"6312_CR33","doi-asserted-by":"publisher","first-page":"1488656","DOI":"10.3389\/fmicb.2024.1488656","volume":"15","author":"H Wu","year":"2025","unstructured":"Wu H, Li Y, Jiang Y, Li X, Wang S, Zhao C, et al. Machine learning prediction of obesity-associated gut microbiota: identifying Bifidobacterium Pseudocatenulatum as a potential therapeutic target. Front Microbiol. 2025;15:1488656.","journal-title":"Front Microbiol"},{"key":"6312_CR34","doi-asserted-by":"crossref","unstructured":"Wu H, Lv B, Zhi L, Shao Y, Liu X, Mitteregger M, et\u00a0al. Microbiome\u2013metabolome dynamics associated with impaired glucose control and responses to lifestyle changes. Nat Med 2025;p. 1\u201310.","DOI":"10.1038\/s41591-025-03642-6"},{"issue":"2","key":"6312_CR35","doi-asserted-by":"publisher","first-page":"giab005","DOI":"10.1093\/gigascience\/giab005","volume":"10","author":"R Rong","year":"2021","unstructured":"Rong R, Jiang S, Xu L, Xiao G, Xie Y, Liu DJ, et al. MB-GAN: microbiome simulation via generative adversarial network. GigaScience. 2021;10(2):giab005.","journal-title":"GigaScience"},{"key":"6312_CR36","unstructured":"Kumar A, Poole B. On Implicit Regularization in $$\\beta $$-VAEs. In: ICML. vol. 119. PMLR; 2020. p. 5480\u20135490."}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-025-06312-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s12859-025-06312-z","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-025-06312-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,3]],"date-time":"2026-01-03T19:03:46Z","timestamp":1767467026000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1186\/s12859-025-06312-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,28]]},"references-count":36,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2026,12]]}},"alternative-id":["6312"],"URL":"https:\/\/doi.org\/10.1186\/s12859-025-06312-z","relation":{},"ISSN":["1471-2105"],"issn-type":[{"type":"electronic","value":"1471-2105"}],"subject":[],"published":{"date-parts":[[2025,11,28]]},"assertion":[{"value":"31 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 November 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Microbiome data used in this study originate from the publicly available\n                      curatedMetagenomicData\n                      database, which aggregates datasets approved by the respective institutional review boards. No additional ethics approval was required for our work.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare no conflict of interest.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"1"}}