{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T19:24:02Z","timestamp":1776453842440,"version":"3.51.2"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2020,11,16]],"date-time":"2020-11-16T00:00:00Z","timestamp":1605484800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"},{"start":{"date-parts":[[2020,11,16]],"date-time":"2020-11-16T00:00:00Z","timestamp":1605484800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["BO 3245\/19-1"],"award-info":[{"award-number":["BO 3245\/19-1"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003495","name":"Hessisches Ministerium fr Wissenschaft und Kunst","doi-asserted-by":"publisher","award":["HMWK LOEWE, MOSLA research cluster"],"award-info":[{"award-number":["HMWK LOEWE, MOSLA research cluster"]}],"id":[{"id":"10.13039\/501100003495","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Projekt DEAL"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"published-print":{"date-parts":[[2020,12]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Background<\/jats:title>\n                    <jats:p>Sequencing of marker genes amplified from environmental samples, known as amplicon sequencing, allows us to resolve some of the hidden diversity and elucidate evolutionary relationships and ecological processes among complex microbial communities. The analysis of large numbers of samples at high sequencing depths generated by high throughput sequencing technologies requires efficient, flexible, and reproducible bioinformatics pipelines. Only a few existing workflows can be run in a user-friendly, scalable, and reproducible manner on different computing devices using an efficient workflow management system.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>\n                      We present Natrix, an open-source bioinformatics workflow for preprocessing raw amplicon sequencing data. The workflow contains all analysis steps from quality assessment, read assembly, dereplication, chimera detection, split-sample merging, sequence representative assignment (OTUs or ASVs) to the taxonomic assignment of sequence representatives. The workflow is written using Snakemake, a workflow management engine for developing data analysis workflows. In addition, Conda is used for version control. Thus, Snakemake ensures reproducibility and Conda offers version control of the utilized programs. The encapsulation of rules and their dependencies support hassle-free sharing of rules between workflows and easy adaptation and extension of existing workflows. Natrix is freely available on GitHub (\n                      <jats:ext-link xmlns:xlink=\"http:\/\/www.w3.org\/1999\/xlink\" ext-link-type=\"uri\" xlink:href=\"https:\/\/github.com\/MW55\/Natrix\">https:\/\/github.com\/MW55\/Natrix<\/jats:ext-link>\n                      ) or as a Docker container on DockerHub (\n                      <jats:ext-link xmlns:xlink=\"http:\/\/www.w3.org\/1999\/xlink\" ext-link-type=\"uri\" xlink:href=\"https:\/\/hub.docker.com\/r\/mw55\/natrix\">https:\/\/hub.docker.com\/r\/mw55\/natrix<\/jats:ext-link>\n                      ).\n                    <\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Conclusion<\/jats:title>\n                    <jats:p>Natrix is a user-friendly and highly extensible workflow for processing Illumina amplicon data.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1186\/s12859-020-03852-4","type":"journal-article","created":{"date-parts":[[2020,11,16]],"date-time":"2020-11-16T07:04:47Z","timestamp":1605510287000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":29,"title":["Natrix: a Snakemake-based workflow for processing, clustering, and taxonomically assigning amplicon sequencing reads"],"prefix":"10.1186","volume":"21","author":[{"given":"Marius","family":"Welzel","sequence":"first","affiliation":[]},{"given":"Anja","family":"Lange","sequence":"additional","affiliation":[]},{"given":"Dominik","family":"Heider","sequence":"additional","affiliation":[]},{"given":"Michael","family":"Schwarz","sequence":"additional","affiliation":[]},{"given":"Bernd","family":"Freisleben","sequence":"additional","affiliation":[]},{"given":"Manfred","family":"Jensen","sequence":"additional","affiliation":[]},{"given":"Jens","family":"Boenigk","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0679-6631","authenticated-orcid":false,"given":"Daniela","family":"Beisser","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,11,16]]},"reference":[{"issue":"3","key":"3852_CR1","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1016\/s0022-2836(05)80360-2","volume":"215","author":"SF Altschul","year":"1990","unstructured":"Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ. Basic local alignment search tool. J Mol Biol. 1990;215(3):403\u201310. https:\/\/doi.org\/10.1016\/s0022-2836(05)80360-2.","journal-title":"J Mol Biol"},{"key":"3852_CR2","unstructured":"Andrews S. A quality control tool for high throughput sequence data. FASTQC (2010)."},{"issue":"1","key":"3852_CR3","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1038\/nmeth.2276","volume":"10","author":"NA Bokulich","year":"2012","unstructured":"Bokulich NA, Subramanian S, Faith JJ, Gevers D, Gordon JI, Knight R, Mills DA, Caporaso JG. Quality-filtering vastly improves diversity estimates from illumina amplicon sequencing. Nat Methods. 2012;10(1):57\u20139. https:\/\/doi.org\/10.1038\/nmeth.2276.","journal-title":"Nat Methods"},{"issue":"8","key":"3852_CR4","doi-asserted-by":"publisher","first-page":"852","DOI":"10.1038\/s41587-019-0209-9","volume":"37","author":"E Bolyen","year":"2019","unstructured":"Bolyen E, Rideout JR, Dillon MR, Bokulich NA, Abnet CC, Al-Ghalith GA, Alexander H, Alm EJ, Arumugam M, Asnicar F, et al. Reproducible, interactive, scalable and extensible microbiome data science using qiime 2. Nat Biotechnol. 2019;37(8):852\u20137. https:\/\/doi.org\/10.1038\/s41587-019-0209-9.","journal-title":"Nat Biotechnol"},{"issue":"12","key":"3852_CR5","doi-asserted-by":"publisher","first-page":"2639","DOI":"10.1038\/ismej.2017.119","volume":"11","author":"BJ Callahan","year":"2017","unstructured":"Callahan BJ, McMurdie PJ, Holmes SP. Exact sequence variants should replace operational taxonomic units in marker-gene data analysis. ISME J. 2017;11(12):2639\u201343. https:\/\/doi.org\/10.1038\/ismej.2017.119.","journal-title":"ISME J"},{"issue":"7","key":"3852_CR6","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1038\/nmeth.3869","volume":"13","author":"BJ Callahan","year":"2016","unstructured":"Callahan BJ, McMurdie PJ, Rosen MJ, Han AW, Johnson AJA, Holmes SP. DADA2: high-resolution sample inference from illumina amplicon data. Nat Methods. 2016;13(7):581\u20133. https:\/\/doi.org\/10.1038\/nmeth.3869.","journal-title":"Nat Methods"},{"key":"3852_CR7","doi-asserted-by":"publisher","DOI":"10.1101\/074252","author":"R Edgar","year":"2016","unstructured":"Edgar R. Uchime2: improved chimera prediction for amplicon sequencing. bioRxiv. 2016;. https:\/\/doi.org\/10.1101\/074252.","journal-title":"bioRxiv"},{"issue":"19","key":"3852_CR8","doi-asserted-by":"publisher","first-page":"2460","DOI":"10.1093\/bioinformatics\/btq461","volume":"26","author":"RC Edgar","year":"2010","unstructured":"Edgar RC. Search and clustering orders of magnitude faster than BLAST. Bioinformatics. 2010;26(19):2460\u20131. https:\/\/doi.org\/10.1093\/bioinformatics\/btq461.","journal-title":"Bioinformatics"},{"issue":"19","key":"3852_CR9","doi-asserted-by":"publisher","first-page":"3047","DOI":"10.1093\/bioinformatics\/btw354","volume":"32","author":"P Ewels","year":"2016","unstructured":"Ewels P, Magnusson M, Lundin S, K\u00e4ller M. Multiqc: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016;32(19):3047\u20138. https:\/\/doi.org\/10.1093\/bioinformatics\/btw354.","journal-title":"Bioinformatics"},{"issue":"D1","key":"3852_CR10","doi-asserted-by":"publisher","first-page":"136","DOI":"10.1093\/nar\/gkr1178","volume":"40","author":"S Federhen","year":"2011","unstructured":"Federhen S. The NCBI taxonomy database. Nucleic Acids Res. 2011;40(D1):136\u201343. https:\/\/doi.org\/10.1093\/nar\/gkr1178.","journal-title":"Nucleic Acids Res"},{"issue":"23","key":"3852_CR11","doi-asserted-by":"publisher","first-page":"3150","DOI":"10.1093\/bioinformatics\/bts565","volume":"28","author":"L Fu","year":"2012","unstructured":"Fu L, Niu B, Zhu Z, Wu S, Li W. Cd-hit: accelerated for clustering the next-generation sequencing data. Bioinformatics. 2012;28(23):3150\u20132. https:\/\/doi.org\/10.1093\/bioinformatics\/bts565.","journal-title":"Bioinformatics"},{"issue":"3","key":"3852_CR12","doi-asserted-by":"publisher","first-page":"257","DOI":"10.3354\/ame01853","volume":"80","author":"N Graupner","year":"2017","unstructured":"Graupner N, R\u00f6hl O, Jensen M, Beisser D, Begerow D, Boenigk J. Effects of short-term flooding on aquatic and terrestrial microeukaryotic communities: a mesocosm approach. Aquat Microb Ecol. 2017;80(3):257\u201372. https:\/\/doi.org\/10.3354\/ame01853.","journal-title":"Aquat Microb Ecol"},{"issue":"7","key":"3852_CR13","doi-asserted-by":"publisher","first-page":"475","DOI":"10.1038\/s41592-018-0046-7","volume":"15","author":"B Gr\u00fcning","year":"2018","unstructured":"Gr\u00fcning B, Dale R, Sj\u00f6din A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, K\u00f6ster J. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018;15(7):475\u20136. https:\/\/doi.org\/10.1038\/s41592-018-0046-7.","journal-title":"Nat Methods"},{"key":"3852_CR14","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bty350","author":"J K\u00f6ster","year":"2018","unstructured":"K\u00f6ster J, Rahmann S. Snakemake-a scalable bioinformatics workflow engine. Bioinformatics. 2018;. https:\/\/doi.org\/10.1093\/bioinformatics\/bty350.","journal-title":"Bioinformatics"},{"key":"3852_CR15","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0141590","author":"A Lange","year":"2015","unstructured":"Lange A, Jost S, Heider D, Bock C, Budeus B, Schilling E, Strittmatter A, Boenigk J, Hoffmann D. Ampliconduo: a split-sample filtering protocol for high-throughput amplicon sequencing of microbial communities. PLoS ONE. 2015;. https:\/\/doi.org\/10.1371\/journal.pone.0141590.","journal-title":"PLoS ONE"},{"key":"3852_CR16","unstructured":"Mahe F. frederic-mahe\/swarm (2016). https:\/\/github.com\/frederic-mahe\/swarm\/wiki\/Fred\u2019s-metabarcoding-pipeline."},{"key":"3852_CR17","doi-asserted-by":"publisher","DOI":"10.7717\/peerj.1420","author":"F Mah\u00e9","year":"2015","unstructured":"Mah\u00e9 F, Rognes T, Quince C, Vargas CD, Dunthorn M. Swarm v2: highly-scalable and high-resolution amplicon clustering. PeerJ. 2015;. https:\/\/doi.org\/10.7717\/peerj.1420.","journal-title":"PeerJ"},{"issue":"1","key":"3852_CR18","doi-asserted-by":"publisher","first-page":"10","DOI":"10.14806\/ej.17.1.200","volume":"17","author":"M Martin","year":"2011","unstructured":"Martin M. Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet J. 2011;17(1):10. https:\/\/doi.org\/10.14806\/ej.17.1.200.","journal-title":"EMBnet J"},{"issue":"1","key":"3852_CR19","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1186\/1471-2105-13-31","volume":"13","author":"AP Masella","year":"2012","unstructured":"Masella AP, Bartram AK, Truszkowski JM, Brown DG, Neufeld JD. Pandaseq: paired-end assembler for illumina sequences. BMC Bioinform. 2012;13(1):31. https:\/\/doi.org\/10.1186\/1471-2105-13-31.","journal-title":"BMC Bioinform"},{"key":"3852_CR20","unstructured":"Oksanen J, Blanchet FG, Friendly M, Kindt R, Legendre P, McGlinn D, Minchin PR, O\u2019Hara RB, Simpson GL, Solymos P, Stevens MHH, Szoecs E, Wagner H. Vegan: Community Ecology Package. (2019). R package version 2.5-6. https:\/\/CRAN.R-project.org\/package=vegan."},{"issue":"5313","key":"3852_CR21","doi-asserted-by":"publisher","first-page":"734","DOI":"10.1126\/science.276.5313.734","volume":"276","author":"NR Pace","year":"1997","unstructured":"Pace NR. A molecular view of microbial diversity and the biosphere. Science. 1997;276(5313):734\u201340. https:\/\/doi.org\/10.1126\/science.276.5313.734.","journal-title":"Science"},{"issue":"21","key":"3852_CR22","doi-asserted-by":"publisher","first-page":"7188","DOI":"10.1093\/nar\/gkm864","volume":"35","author":"E Pruesse","year":"2007","unstructured":"Pruesse E, Quast C, Knittel K, Fuchs BM, Ludwig W, Peplies J, Glockner FO. Silva: a comprehensive online resource for quality checked and aligned ribosomal RNA sequence data compatible with ARB. Nucleic Acids Res. 2007;35(21):7188\u201396. https:\/\/doi.org\/10.1093\/nar\/gkm864.","journal-title":"Nucleic Acids Res"},{"key":"3852_CR23","doi-asserted-by":"publisher","unstructured":"Rognes T, Flouri T, Nichols B, Quince C, Mah\u00e9 F. Vsearch: a versatile open source tool for metagenomics; (2016). https:\/\/doi.org\/10.7287\/peerj.preprints.2409v1.","DOI":"10.7287\/peerj.preprints.2409v1"},{"issue":"2","key":"3852_CR24","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1007\/s11557-016-1261-1","volume":"16","author":"O R\u00f6hl","year":"2017","unstructured":"R\u00f6hl O, Per\u0161oh D, Mittelbach M, Elbrecht V, Brachmann A, Nuy J, Boenigk J, Leese F, Begerow D. Distinct sensitivity of fungal freshwater guilds to water quality. Mycol Prog. 2017;16(2):155\u201369. https:\/\/doi.org\/10.1007\/s11557-016-1261-1.","journal-title":"Mycol Prog"},{"issue":"23","key":"3852_CR25","doi-asserted-by":"publisher","first-page":"7537","DOI":"10.1128\/aem.01541-09","volume":"75","author":"PD Schloss","year":"2009","unstructured":"Schloss PD, Westcott SL, Ryabin T, Hall JR, Hartmann M, Hollister EB, Lesniewski RA, Oakley BB, Parks DH, Robinson CJ, et al. Introducing mothur: open-source, platform-independent, community-supported software for describing and comparing microbial communities. Appl Environ Microbiol. 2009;75(23):7537\u201341. https:\/\/doi.org\/10.1128\/aem.01541-09.","journal-title":"Appl Environ Microbiol"},{"issue":"6","key":"3852_CR26","doi-asserted-by":"publisher","first-page":"863","DOI":"10.1093\/bioinformatics\/btr026","volume":"27","author":"R Schmieder","year":"2011","unstructured":"Schmieder R, Edwards RA. Quality control and preprocessing of metagenomic datasets. Bioinformatics. 2011;27(6):863\u20134.","journal-title":"Bioinformatics"},{"issue":"1","key":"3852_CR27","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1007\/s10933-009-9383-y","volume":"44","author":"JA Wiklund","year":"2009","unstructured":"Wiklund JA, Bozinovski N, Hall RI, Wolfe BB. Epiphytic diatoms as flood indicators. J Paleolimnol. 2009;44(1):25\u201342. https:\/\/doi.org\/10.1007\/s10933-009-9383-y.","journal-title":"J Paleolimnol"},{"issue":"6","key":"3852_CR28","doi-asserted-by":"publisher","first-page":"1326","DOI":"10.1111\/1755-0998.12922","volume":"18","author":"W Xiong","year":"2018","unstructured":"Xiong W, Zhan A. Testing clustering strategies for metabarcoding-based investigation of community-environment interactions. Mol Ecol Resour. 2018;18(6):1326\u201338. https:\/\/doi.org\/10.1111\/1755-0998.12922.","journal-title":"Mol Ecol Resour"}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-020-03852-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s12859-020-03852-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-020-03852-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,16]],"date-time":"2020-11-16T07:06:09Z","timestamp":1605510369000},"score":1,"resource":{"primary":{"URL":"https:\/\/bmcbioinformatics.biomedcentral.com\/articles\/10.1186\/s12859-020-03852-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,11,16]]},"references-count":28,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2020,12]]}},"alternative-id":["3852"],"URL":"https:\/\/doi.org\/10.1186\/s12859-020-03852-4","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2020.09.23.309864","asserted-by":"object"}]},"ISSN":["1471-2105"],"issn-type":[{"value":"1471-2105","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,11,16]]},"assertion":[{"value":"29 May 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 October 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 November 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Not applicable.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare that they have no competing interests.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"526"}}