{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T18:29:56Z","timestamp":1768588196371,"version":"3.49.0"},"reference-count":25,"publisher":"Oxford University Press (OUP)","issue":"9","license":[{"start":{"date-parts":[[2024,9,19]],"date-time":"2024-09-19T00:00:00Z","timestamp":1726704000000},"content-version":"vor","delay-in-days":18,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"European Molecular Biology Laboratory, European Bioinformatics Institute"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,9,2]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Metagenome-Assembled Genomes (MAGs) or Single-cell Amplified Genomes (SAGs) are often incomplete, with sequences missing due to errors in assembly or low coverage. This presents a particular challenge for the identification of true gene frequencies within a microbial population, as core genes missing in only a few assemblies will be mischaracterized by current pangenome approaches.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>Here, we present CELEBRIMBOR, a Snakemake pangenome analysis pipeline which uses a measure of genome completeness to automatically adjust the frequency threshold at which core genes are identified, enabling accurate core gene identification in MAGs and SAGs.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>CELEBRIMBOR is published under open source Apache 2.0 licence at https:\/\/github.com\/bacpop\/CELEBRIMBOR and is available as a Docker container from this repository. Supplementary material is available in the online version of the article.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btae542","type":"journal-article","created":{"date-parts":[[2024,9,19]],"date-time":"2024-09-19T17:35:46Z","timestamp":1726767346000},"source":"Crossref","is-referenced-by-count":2,"title":["CELEBRIMBOR: core and accessory genes from metagenomes"],"prefix":"10.1093","volume":"40","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2683-0849","authenticated-orcid":false,"given":"Joel","family":"Hellewell","sequence":"first","affiliation":[{"name":"European Bioinformatics Institute, European Molecular Biology Laboratory , Wellcome Genome Campus, Hinxton , Cambridge CB10 1SD,","place":["United Kingdom"]}]},{"given":"Samuel T","family":"Horsfield","sequence":"additional","affiliation":[{"name":"European Bioinformatics Institute, European Molecular Biology Laboratory , Wellcome Genome Campus, Hinxton , Cambridge CB10 1SD,","place":["United Kingdom"]}]},{"given":"Johanna","family":"von Wachsmann","sequence":"additional","affiliation":[{"name":"European Bioinformatics Institute, European Molecular Biology Laboratory , Wellcome Genome Campus, Hinxton , Cambridge CB10 1SD,","place":["United Kingdom"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7537-8871","authenticated-orcid":false,"given":"Tatiana A","family":"Gurbich","sequence":"additional","affiliation":[{"name":"European Bioinformatics Institute, European Molecular Biology Laboratory , Wellcome Genome Campus, Hinxton , Cambridge CB10 1SD,","place":["United Kingdom"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8626-2148","authenticated-orcid":false,"given":"Robert D","family":"Finn","sequence":"additional","affiliation":[{"name":"European Bioinformatics Institute, European Molecular Biology Laboratory , Wellcome Genome Campus, Hinxton , Cambridge CB10 1SD,","place":["United Kingdom"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8466-7547","authenticated-orcid":false,"given":"Zamin","family":"Iqbal","sequence":"additional","affiliation":[{"name":"European Bioinformatics Institute, European Molecular Biology Laboratory , Wellcome Genome Campus, Hinxton , Cambridge CB10 1SD,","place":["United Kingdom"]},{"name":"University of Bath Milner Centre for Evolution, , Bath BA2 7AZ,","place":["United Kingdom"]}]},{"given":"Leah W","family":"Roberts","sequence":"additional","affiliation":[{"name":"European Bioinformatics Institute, European Molecular Biology Laboratory , Wellcome Genome Campus, Hinxton , Cambridge CB10 1SD,","place":["United Kingdom"]},{"name":"Centre for Immunology and Infection Control, Queensland University of Technology , Brisbane, QLD 4000,","place":["Australia"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5360-1254","authenticated-orcid":false,"given":"John A","family":"Lees","sequence":"additional","affiliation":[{"name":"European Bioinformatics Institute, European Molecular Biology Laboratory , Wellcome Genome Campus, Hinxton , Cambridge CB10 1SD,","place":["United Kingdom"]}]}],"member":"286","published-online":{"date-parts":[[2024,9,19]]},"reference":[{"key":"2024092505104750100_btae542-B1","doi-asserted-by":"crossref","first-page":"443","DOI":"10.1093\/gbe\/evs016","article-title":"The infinitely many genes model for the distributed genome of bacteria","volume":"4","author":"Baumdicker","year":"2012","journal-title":"Genome Biol Evol"},{"key":"2024092505104750100_btae542-B2","doi-asserted-by":"crossref","first-page":"lqac060","DOI":"10.1093\/nargab\/lqac060","article-title":"mOTUpan: a robust Bayesian approach to leverage metagenome-assembled genomes for core-genome estimation","volume":"4","author":"Buck","year":"2022","journal-title":"NAR Genom Bioinform"},{"key":"2024092505104750100_btae542-B3","doi-asserted-by":"crossref","first-page":"315","DOI":"10.1101\/gr.258640.119","article-title":"Accurate and complete genomes from metagenomes","volume":"30","author":"Chen","year":"2020","journal-title":"Genome Res"},{"key":"2024092505104750100_btae542-B4","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1186\/s13059-021-02473-1","article-title":"Pandora: nucleotide-resolution bacterial pan-genomics with reference graphs","volume":"22","author":"Colquhoun","year":"2021","journal-title":"Genome Biol"},{"key":"2024092505104750100_btae542-B5","doi-asserted-by":"crossref","first-page":"e1007732","DOI":"10.1371\/journal.pcbi.1007732","article-title":"PPanGGOLiN: depicting microbial diversity via a partitioned pangenome graph","volume":"16","author":"Gautreau","year":"2020","journal-title":"PLoS Comput Biol"},{"key":"2024092505104750100_btae542-B6","doi-asserted-by":"crossref","first-page":"168016","DOI":"10.1016\/j.jmb.2023.168016","article-title":"MGnify genomes: a resource for biome-specific microbial genome catalogues","volume":"435","author":"Gurbich","year":"2023","journal-title":"J Mol Biol"},{"key":"2024092505104750100_btae542-B7","doi-asserted-by":"crossref","first-page":"593","DOI":"10.1093\/bioinformatics\/btr708","article-title":"ART: a next-generation sequencing read simulator","volume":"28","author":"Huang","year":"2012","journal-title":"Bioinformatics"},{"key":"2024092505104750100_btae542-B8","doi-asserted-by":"crossref","first-page":"1437","DOI":"10.1101\/gr.216606.116","article-title":"Systematic longitudinal survey of invasive Escherichia coli in England demonstrates a stable population structure only transiently disturbed by the emergence of ST131","volume":"27","author":"Kallonen","year":"2017","journal-title":"Genome Res"},{"key":"2024092505104750100_btae542-B9","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1186\/s12859-020-03585-4","article-title":"ATLAS: a snakemake workflow for assembly, annotation, and genomic binning of metagenome sequence data","volume":"21","author":"Kieser","year":"2020","journal-title":"BMC Bioinformatics"},{"key":"2024092505104750100_btae542-B10","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1093\/bib\/bbac413","article-title":"Critical assessment of pan-genomic analysis of metagenome-assembled genomes","volume":"23","author":"Li","year":"2022","journal-title":"Brief Bioinform"},{"key":"2024092505104750100_btae542-B11","doi-asserted-by":"crossref","first-page":"233","DOI":"10.1093\/gbe\/evt002","article-title":"Gene frequency distributions reject a neutral model of genome evolution","volume":"5","author":"Lobkovsky","year":"2013","journal-title":"Genome Biol Evol"},{"key":"2024092505104750100_btae542-B12","doi-asserted-by":"crossref","first-page":"824","DOI":"10.1101\/gr.213959.116","article-title":"metaSPAdes: a new versatile metagenomic assembler","volume":"27","author":"Nurk","year":"2017","journal-title":"Genome Res"},{"key":"2024092505104750100_btae542-B13","first-page":"e000083","article-title":"Robust high-throughput prokaryote de novo assembly and improvement pipeline for Illumina data","volume":"2","author":"Page","year":"2016","journal-title":"Microb Genom"},{"key":"2024092505104750100_btae542-B14","doi-asserted-by":"crossref","first-page":"996","DOI":"10.1038\/nbt.4229","article-title":"A standardized bacterial taxonomy based on genome phylogeny substantially revises the tree of life","volume":"36","author":"Parks","year":"2018","journal-title":"Nat Biotechnol"},{"key":"2024092505104750100_btae542-B15","doi-asserted-by":"crossref","first-page":"1043","DOI":"10.1101\/gr.186072.114","article-title":"CheckM: assessing the quality of microbial genomes recovered from isolates, single cells, and metagenomes","volume":"25","author":"Parks","year":"2015","journal-title":"Genome Res"},{"key":"2024092505104750100_btae542-B16","doi-asserted-by":"crossref","first-page":"D753","DOI":"10.1093\/nar\/gkac1080","article-title":"MGnify: the microbiome sequence data analysis resource in 2023","volume":"51","author":"Richardson","year":"2023","journal-title":"Nucleic Acids Res"},{"key":"2024092505104750100_btae542-B17","doi-asserted-by":"crossref","first-page":"D777","DOI":"10.1093\/nar\/gkad943","article-title":"SPIRE: a Searchable, Planetary-scale mIcrobiome REsource","volume":"52","author":"Schmidt","year":"2024","journal-title":"Nucleic Acids Res"},{"key":"2024092505104750100_btae542-B18","article-title":"Bakta: rapid and standardized annotation of bacterial genomes via alignment-free sequence identification","volume":"7","author":"Schwengers","year":"2021","journal-title":"Microb Genom"},{"key":"2024092505104750100_btae542-B19","doi-asserted-by":"crossref","first-page":"385","DOI":"10.1186\/1471-2164-10-385","article-title":"Microbial comparative pan-genomics using binomial mixture models","volume":"10","author":"Snipen","year":"2009","journal-title":"BMC Genomics"},{"key":"2024092505104750100_btae542-B20","doi-asserted-by":"crossref","first-page":"1026","DOI":"10.1038\/nbt.3988","article-title":"MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets","volume":"35","author":"Steinegger","year":"2017","journal-title":"Nat Biotechnol"},{"key":"2024092505104750100_btae542-B21","doi-asserted-by":"crossref","first-page":"1522","DOI":"10.12688\/f1000research.128091.1","article-title":"SnakeMAGs: a simple, efficient, flexible and scalable workflow to reconstruct prokaryotic genomes from metagenomes","volume":"11","author":"Tadrent","year":"2022","journal-title":"F1000Res"},{"key":"2024092505104750100_btae542-B22","article-title":"Challenges in prokaryote pangenomics","volume":"9","author":"Tonkin-Hill","year":"2023","journal-title":"Microb Genom"},{"key":"2024092505104750100_btae542-B23","doi-asserted-by":"crossref","first-page":"180","DOI":"10.1186\/s13059-020-02090-4","article-title":"Producing polished prokaryotic pangenomes with the Panaroo pipeline","volume":"21","author":"Tonkin-Hill","year":"2020","journal-title":"Genome Biol"},{"key":"2024092505104750100_btae542-B24","doi-asserted-by":"crossref","first-page":"e1003788","DOI":"10.1371\/journal.pcbi.1003788","article-title":"Defining the estimated core genome of bacterial populations using a Bayesian decision model","volume":"10","author":"van Tonder","year":"2014","journal-title":"PLoS Comput Biol"},{"key":"2024092505104750100_btae542-B25","doi-asserted-by":"crossref","first-page":"1667","DOI":"10.1101\/gr.260828.120","article-title":"Accurate reconstruction of bacterial pan- and core genomes with PEPPAN","volume":"30","author":"Zhou","year":"2020","journal-title":"Genome Res"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btae542\/59204011\/btae542.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/9\/btae542\/59328449\/btae542.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/9\/btae542\/59328449\/btae542.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,25]],"date-time":"2024-09-25T13:30:05Z","timestamp":1727271005000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btae542\/7762100"}},"subtitle":[],"editor":[{"given":"Can","family":"Alkan","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2024,9]]},"references-count":25,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2024,9,2]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btae542","relation":{},"ISSN":["1367-4811"],"issn-type":[{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2024,9]]},"published":{"date-parts":[[2024,9]]},"article-number":"btae542"}}