{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T01:39:23Z","timestamp":1775698763619,"version":"3.50.1"},"reference-count":54,"publisher":"Oxford University Press (OUP)","issue":"22","license":[{"start":{"date-parts":[[2022,9,20]],"date-time":"2022-09-20T00:00:00Z","timestamp":1663632000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Strategic Focus Area \u2018Personalized Health and Related Technologies","award":["#2021-362"],"award-info":[{"award-number":["#2021-362"]}]},{"name":"Swiss Federal Institutes of Technology"},{"DOI":"10.13039\/501100001711","name":"Swiss National Science Foundation","doi-asserted-by":"publisher","award":["310030_204275"],"award-info":[{"award-number":["310030_204275"]}],"id":[{"id":"10.13039\/501100001711","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Swiss Government Excellence"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,11,15]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>The volume of public nucleotide sequence data has blossomed over the past two decades and is ripe for re- and meta-analyses to enable novel discoveries. However, reproducible re-use and management of sequence datasets and associated metadata remain critical challenges. We created the open source Python package q2-fondue to enable user-friendly acquisition, re-use and management of public sequence (meta)data while adhering to open data principles.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>q2-fondue allows fully provenance-tracked programmatic access to and management of data from the NCBI Sequence Read Archive (SRA). Unlike other packages allowing download of sequence data from the SRA, q2-fondue enables full data provenance tracking from data download to final visualization, integrates with the QIIME 2 ecosystem, prevents data loss upon space exhaustion and allows download of (meta)data given a publication library. To highlight its manifold capabilities, we present executable demonstrations using publicly available amplicon, whole genome and metagenome datasets.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>q2-fondue is available as an open-source BSD-3-licensed Python package at https:\/\/github.com\/bokulich-lab\/q2-fondue. Usage tutorials are available in the same repository. All Jupyter notebooks used in this article are available under https:\/\/github.com\/bokulich-lab\/q2-fondue-examples.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Supplementary information<\/jats:title>\n                    <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btac639","type":"journal-article","created":{"date-parts":[[2022,9,19]],"date-time":"2022-09-19T08:25:59Z","timestamp":1663575959000},"page":"5081-5091","source":"Crossref","is-referenced-by-count":15,"title":["Reproducible acquisition, management and meta-analysis of nucleotide sequence (meta)data using q2-fondue"],"prefix":"10.1093","volume":"38","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6285-8852","authenticated-orcid":false,"given":"Michal","family":"Ziemski","sequence":"first","affiliation":[{"name":"Laboratory of Food Systems Biotechnology, Institute of Food, Nutrition, and Health, ETH Z\u00fcrich , Z\u00fcrich 8092, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7506-1583","authenticated-orcid":false,"given":"Anja","family":"Adamov","sequence":"additional","affiliation":[{"name":"Laboratory of Food Systems Biotechnology, Institute of Food, Nutrition, and Health, ETH Z\u00fcrich , Z\u00fcrich 8092, Switzerland"}]},{"given":"Lina","family":"Kim","sequence":"additional","affiliation":[{"name":"Laboratory of Food Systems Biotechnology, Institute of Food, Nutrition, and Health, ETH Z\u00fcrich , Z\u00fcrich 8092, Switzerland"}]},{"given":"Lena","family":"Fl\u00f6rl","sequence":"additional","affiliation":[{"name":"Laboratory of Food Systems Biotechnology, Institute of Food, Nutrition, and Health, ETH Z\u00fcrich , Z\u00fcrich 8092, Switzerland"}]},{"given":"Nicholas A","family":"Bokulich","sequence":"additional","affiliation":[{"name":"Laboratory of Food Systems Biotechnology, Institute of Food, Nutrition, and Health, ETH Z\u00fcrich , Z\u00fcrich 8092, Switzerland"}]}],"member":"286","published-online":{"date-parts":[[2022,9,20]]},"reference":[{"key":"2022112014202397500_btac639-B1","doi-asserted-by":"crossref","first-page":"719","DOI":"10.1016\/j.chom.2019.04.001","article-title":"Redondoviridae, a family of small, circular DNA viruses of the human oro-respiratory tract that are associated with periodontitis and critical illness","volume":"25","author":"Abbas","year":"2019","journal-title":"Cell Host Microbe"},{"key":"2022112014202397500_btac639-B2","doi-asserted-by":"crossref","first-page":"1063","DOI":"10.1101\/gr.157503.113","article-title":"Comparative genomics as a tool to understand evolution and disease","volume":"23","author":"Alf\u00f6ldi","year":"2013","journal-title":"Genome Res"},{"key":"2022112014202397500_btac639-B3","doi-asserted-by":"crossref","first-page":"350","DOI":"10.1126\/science.aaw1280","article-title":"Toward unrestricted use of public genomic data","volume":"363","author":"Amann","year":"2019","journal-title":"Science"},{"key":"2022112014202397500_btac639-B4","doi-asserted-by":"crossref","first-page":"452","DOI":"10.1038\/533452a","article-title":"1,500 Scientists lift the lid on reproducibility","volume":"533","author":"Baker","year":"2016","journal-title":"Nature"},{"key":"2022112014202397500_btac639-B5","article-title":"Building global infrastructure for data sharing and exchange through the research data alliance","volume":"20","author":"Berman","year":"2014","journal-title":"D-Lib Mag"},{"key":"2022112014202397500_btac639-B6","doi-asserted-by":"crossref","first-page":"934","DOI":"10.21105\/joss.00934","article-title":"q2-sample-classifier: machine-learning tools for microbiome classification and regression","volume":"3","author":"Bokulich","year":"2018","journal-title":"J. Open Source Softw"},{"key":"2022112014202397500_btac639-B7","doi-asserted-by":"crossref","first-page":"90","DOI":"10.1186\/s40168-018-0470-z","article-title":"Optimizing taxonomic classification of marker-gene amplicon sequences with QIIME 2\u2019s q2-feature-classifier plugin","volume":"6","author":"Bokulich","year":"2018","journal-title":"Microbiome"},{"key":"2022112014202397500_btac639-B8","volume-title":"Nbokulich\/q2-Coordinates: 2018.11","author":"Bokulich","year":"2018"},{"key":"2022112014202397500_btac639-B9","doi-asserted-by":"crossref","first-page":"852","DOI":"10.1038\/s41587-019-0209-9","article-title":"Reproducible, interactive, scalable and extensible microbiome data science using QIIME 2","volume":"37","author":"Bolyen","year":"2019","journal-title":"Nat. Biotechnol"},{"key":"2022112014202397500_btac639-B10","doi-asserted-by":"crossref","first-page":"4511","DOI":"10.1093\/bioinformatics\/btz385","article-title":"Entrezpy: a python library to dynamically interact with the NCBI entrez databases","volume":"35","author":"Buchmann","year":"2019","journal-title":"Bioinformatics (Oxford, England)"},{"key":"2022112014202397500_btac639-B11","doi-asserted-by":"crossref","first-page":"581","DOI":"10.1038\/nmeth.3869","article-title":"DADA2: high-resolution sample inference from illumina amplicon data","volume":"13","author":"Callahan","year":"2016","journal-title":"Nat. Methods"},{"key":"2022112014202397500_btac639-B12","doi-asserted-by":"crossref","first-page":"532","DOI":"10.12688\/f1000research.18676.1","article-title":"Pysradb: a python package to query next-generation sequencing metadata and data from NCBI sequence read archive","volume":"8","author":"Choudhary","year":"2019","journal-title":"F1000Research"},{"key":"2022112014202397500_btac639-B13","doi-asserted-by":"crossref","first-page":"40466","DOI":"10.1038\/srep40466","article-title":"Growth and morbidity of Gambian infants are influenced by maternal milk oligosaccharides and infant gut microbiota","volume":"7","author":"Davis","year":"2017","journal-title":"Sci. Rep"},{"key":"2022112014202397500_btac639-B14","doi-asserted-by":"crossref","first-page":"175","DOI":"10.1038\/nature25753","article-title":"Meta-analysis and the science of research synthesis","volume":"555","author":"Gurevitch","year":"2018","journal-title":"Nature"},{"key":"2022112014202397500_btac639-B15","doi-asserted-by":"crossref","first-page":"4121","DOI":"10.1093\/bioinformatics\/bty407","article-title":"Nextstrain: real-time tracking of pathogen evolution","volume":"34","author":"Hadfield","year":"2018","journal-title":"Bioinformatics"},{"key":"2022112014202397500_btac639-B16","author":"Halko","year":"2011"},{"key":"2022112014202397500_btac639-B17","doi-asserted-by":"crossref","first-page":"391","DOI":"10.1093\/bib\/bbs078","article-title":"Comparability and reproducibility of biomedical data","volume":"14","author":"Huang","year":"2013","journal-title":"Brief. Bioinformatics"},{"key":"2022112014202397500_btac639-B18","volume-title":"Urschrei\/Pyzotero: Zenodo Release","author":"H\u00fcgel","year":"2019"},{"key":"2022112014202397500_btac639-B19","doi-asserted-by":"crossref","first-page":"90","DOI":"10.1109\/MCSE.2007.55","article-title":"Matplotlib: a 2D graphics environment","volume":"9","author":"Hunter","year":"2007","journal-title":"Comput. Sci. Eng"},{"key":"2022112014202397500_btac639-B20","doi-asserted-by":"crossref","first-page":"543","DOI":"10.1016\/j.jclinepi.2004.10.019","article-title":"Early extreme contradictory estimates may appear in published research: the Proteus phenomenon in molecular genetics research and randomized trials","volume":"58","author":"Ioannidis","year":"2005","journal-title":"J. Clin. Epidemiol"},{"key":"2022112014202397500_btac639-B21","volume-title":"Entrez Programming Utilities Help","author":"Kans","year":"2013"},{"key":"2022112014202397500_btac639-B22","doi-asserted-by":"crossref","first-page":"D387","DOI":"10.1093\/nar\/gkab1053","article-title":"The sequence read archive: a decade more of explosive growth","volume":"50","author":"Katz","year":"2022","journal-title":"Nucleic Acids Res"},{"key":"2022112014202397500_btac639-B23","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1093\/gigascience\/giy077","article-title":"Experimenting with reproducibility: a case study of robustness in bioinformatics","volume":"7","author":"Kim","year":"2018","journal-title":"GigaScience"},{"key":"2022112014202397500_btac639-B24","doi-asserted-by":"crossref","first-page":"D54","DOI":"10.1093\/nar\/gkr854","article-title":"The sequence read archive: explosive growth of sequencing data","volume":"40","author":"Kodama","year":"2012","journal-title":"Nucleic Acids Res"},{"key":"2022112014202397500_btac639-B25","doi-asserted-by":"crossref","first-page":"D28","DOI":"10.1093\/nar\/gkq967","article-title":"The European nucleotide archive","volume":"39","author":"Leinonen","year":"2011","journal-title":"Nucleic Acids Res"},{"key":"2022112014202397500_btac639-B26","doi-asserted-by":"crossref","first-page":"D19","DOI":"10.1093\/nar\/gkq1019","article-title":"The sequence read archive","volume":"39","author":"Leinonen","year":"2011","journal-title":"Nucleic Acids Res"},{"key":"2022112014202397500_btac639-B27","doi-asserted-by":"crossref","first-page":"40932","DOI":"10.1038\/srep40932","article-title":"The fecal microbial community of breast-fed infants from Armenia and Georgia","volume":"7","author":"Lewis","year":"2017","journal-title":"Sci. Rep"},{"key":"2022112014202397500_btac639-B28","doi-asserted-by":"crossref","first-page":"e00055","DOI":"10.1128\/mSystems.00055-18","article-title":"Phylogenetically novel uncultured microbial cells dominate earth microbiomes","volume":"3","author":"Lloyd","year":"2018","journal-title":"MSystems"},{"key":"2022112014202397500_btac639-B29","doi-asserted-by":"crossref","first-page":"10","DOI":"10.14806\/ej.17.1.200","article-title":"Cutadapt removes adapter sequences from high-throughput sequencing reads","volume":"17","author":"Martin","year":"2011","journal-title":"EMBnet. J"},{"key":"2022112014202397500_btac639-B30","doi-asserted-by":"crossref","first-page":"D25","DOI":"10.1093\/nar\/gkw1001","article-title":"DNA data bank of Japan","volume":"45","author":"Mashima","year":"2017","journal-title":"Nucleic Acids Res"},{"key":"2022112014202397500_btac639-B31","doi-asserted-by":"crossref","first-page":"1238","DOI":"10.1093\/ajcn\/nqy249","article-title":"Anemia in infancy is associated with alterations in systemic metabolism and microbial structure and function in a sex-specific manner: an observational study","volume":"108","author":"McClorry","year":"2018","journal-title":"Am. J. Clin. Nutr"},{"key":"2022112014202397500_btac639-B32","first-page":"56","author":"McKinney","year":"2010"},{"key":"2022112014202397500_btac639-B33","doi-asserted-by":"crossref","first-page":"1024","DOI":"10.1126\/science.aad7048","article-title":"Liberating field science samples and data","volume":"351","author":"McNutt","year":"2016","journal-title":"Science"},{"key":"2022112014202397500_btac639-B34","doi-asserted-by":"crossref","first-page":"624","DOI":"10.1038\/nrg.2017.51","article-title":"Dissecting evolution and disease using comparative vertebrate genomics","volume":"18","author":"Meadows","year":"2017","journal-title":"Nat. Rev. Genet"},{"key":"2022112014202397500_btac639-B35","doi-asserted-by":"crossref","first-page":"386","DOI":"10.1186\/1471-2105-9-386","article-title":"The metagenomics RAST server\u2014a public resource for the automatic phylogenetic and functional analysis of metagenomes","volume":"9","author":"Meyer","year":"2008","journal-title":"BMC Bioinformatics"},{"key":"2022112014202397500_btac639-B36","first-page":"D570","article-title":"MGnify: the microbiome analysis resource in 2020","volume":"48","author":"Mitchell","year":"2020","journal-title":"Nucleic Acids Res"},{"key":"2022112014202397500_btac639-B37","doi-asserted-by":"crossref","first-page":"132","DOI":"10.1186\/s13059-016-0997-x","article-title":"Mash: fast genome and metagenome distance estimation using MinHash","volume":"17","author":"Ondov","year":"2016","journal-title":"Genome Biol"},{"key":"2022112014202397500_btac639-B38","doi-asserted-by":"crossref","first-page":"441","DOI":"10.1146\/annurev-genom-091212-153520","article-title":"The power of meta-analysis in genome wide association studies","volume":"14","author":"Panagiotou","year":"2013","journal-title":"Annu. Rev. Genomics Hum. Genet"},{"key":"2022112014202397500_btac639-B39","doi-asserted-by":"crossref","first-page":"1533","DOI":"10.1038\/s41564-017-0012-7","article-title":"Recovery of nearly 8,000 metagenome-assembled genomes substantially expands the tree of life","volume":"2","author":"Parks","year":"2017","journal-title":"Nat. Microbiol"},{"key":"2022112014202397500_btac639-B40","first-page":"2826","author":"Pedregosa","year":"2011"},{"key":"2022112014202397500_btac639-B41","volume-title":"Pandas-Dev\/Pandas: Pandas 1.0.3","author":"Reback","year":"2020"},{"key":"2022112014202397500_btac639-B42","doi-asserted-by":"crossref","first-page":"703","DOI":"10.1126\/science.1197962","article-title":"Challenges and opportunities of open data in ecology","volume":"331","author":"Reichman","year":"2011","journal-title":"Science"},{"key":"2022112014202397500_btac639-B43","doi-asserted-by":"crossref","first-page":"58","DOI":"10.1016\/j.jclinepi.2015.09.004","article-title":"Field-wide Meta-analyses of observational associations can map selective availability of risk factors and the impact of model specifications","volume":"71","author":"Serghiou","year":"2016","journal-title":"J. Clin. Epidemiol"},{"key":"2022112014202397500_btac639-B44","doi-asserted-by":"crossref","first-page":"e1002195","DOI":"10.1371\/journal.pbio.1002195","article-title":"Big data: astronomical or genomical?","volume":"13","author":"Stephens","year":"2015","journal-title":"PLoS Biol"},{"key":"2022112014202397500_btac639-B45","doi-asserted-by":"crossref","first-page":"150023","DOI":"10.1038\/sdata.2015.23","article-title":"Open science resources for the discovery and analysis of Tara oceans data","volume":"2","author":"Pesant","year":"2015","journal-title":"Sci. Data"},{"key":"2022112014202397500_btac639-B46","doi-asserted-by":"crossref","first-page":"521","DOI":"10.1038\/s41581-019-0188-6","volume":"15","year":"2019","journal-title":"Nat. Rev. Nephrol"},{"key":"2022112014202397500_btac639-B47","doi-asserted-by":"crossref","first-page":"457","DOI":"10.1038\/nature24621","article-title":"A communal catalogue reveals earth\u2019s multiscale microbial diversity","volume":"551","author":"Thompson","year":"2017","journal-title":"Nature"},{"key":"2022112014202397500_btac639-B48","doi-asserted-by":"crossref","first-page":"1351","DOI":"10.1136\/bmj.309.6965.1351","article-title":"Why sources of heterogeneity in meta-analysis should be investigated","volume":"309","author":"Thompson","year":"1994","journal-title":"BMJ (Clinical Research Ed.)"},{"key":"2022112014202397500_btac639-B49","doi-asserted-by":"crossref","first-page":"3021","DOI":"10.21105\/joss.03021","article-title":"Seaborn: statistical data visualization","volume":"6","author":"Waskom","year":"2021","journal-title":"J. Open Source Softw"},{"key":"2022112014202397500_btac639-B50","doi-asserted-by":"crossref","first-page":"160018","DOI":"10.1038\/sdata.2016.18","article-title":"The FAIR guiding principles for scientific data management and stewardship","volume":"3","author":"Wilkinson","year":"2016","journal-title":"Sci. Data"},{"key":"2022112014202397500_btac639-B51","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1038\/nbt.1823","article-title":"Minimum information about a marker gene sequence (MIMARKS) and minimum information about any (x) sequence (MIxS) specifications","volume":"29","author":"Yilmaz","year":"2011","journal-title":"Nat. Biotechnol"},{"key":"2022112014202397500_btac639-B52","article-title":"iMicrobe: tools and data-driven discovery platform for the microbiome sciences","volume":"8","author":"Youens-Clark","year":"2019","journal-title":"GigaScience"},{"key":"2022112014202397500_btac639-B53","doi-asserted-by":"crossref","first-page":"228","DOI":"10.1038\/s41396-020-00777-x","article-title":"A network approach to elucidate and prioritize microbial dark matter in microbial communities","volume":"15","author":"Zamkovaya","year":"2021","journal-title":"ISME J"},{"key":"2022112014202397500_btac639-B54","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1186\/1471-2105-14-19","article-title":"SRAdb: query and use public next-generation sequencing data from within R","volume":"14","author":"Zhu","year":"2013","journal-title":"BMC Bioinformatics"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btac639\/46340009\/btac639.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/38\/22\/5081\/47153924\/btac639.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/38\/22\/5081\/47153924\/btac639.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,20]],"date-time":"2022-11-20T09:20:56Z","timestamp":1668936056000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/38\/22\/5081\/6706785"}},"subtitle":[],"editor":[{"given":"Jonathan","family":"Wren","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2022,9,20]]},"references-count":54,"journal-issue":{"issue":"22","published-online":{"date-parts":[[2022,9,20]]},"published-print":{"date-parts":[[2022,11,15]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btac639","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2022.03.22.485322","asserted-by":"object"}]},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2022,11,15]]},"published":{"date-parts":[[2022,9,20]]}}}