{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T21:04:32Z","timestamp":1773954272196,"version":"3.50.1"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2021,2,15]],"date-time":"2021-02-15T00:00:00Z","timestamp":1613347200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"},{"start":{"date-parts":[[2021,2,15]],"date-time":"2021-02-15T00:00:00Z","timestamp":1613347200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["T32GM070449"],"award-info":[{"award-number":["T32GM070449"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"crossref","award":["1R01AI148259 01"],"award-info":[{"award-number":["1R01AI148259 01"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"published-print":{"date-parts":[[2021,12]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Background<\/jats:title>\n                    <jats:p>The quantity of genomic data is expanding at an increasing rate. Tools for phylogenetic analysis which scale to the quantity of available data are required. To address this need, we present cognac, a user-friendly software package to rapidly generate concatenated gene alignments for phylogenetic analysis.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>\n                      We illustrate that cognac is able to rapidly identify phylogenetic marker genes using a data driven approach and efficiently generate concatenated gene alignments for very large genomic datasets. To benchmark our tool, we generated core gene alignments for eight unique genera of bacteria, including a dataset of over 11,000 genomes from the genus\n                      <jats:italic>Escherichia<\/jats:italic>\n                      producing an alignment with 1353 genes, which was constructed in less than 17\u00a0h.\n                    <\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Conclusions<\/jats:title>\n                    <jats:p>\n                      We demonstrate that cognac presents an efficient method for generating concatenated gene alignments for phylogenetic analysis. We have released cognac as an R package (\n                      <jats:ext-link xmlns:xlink=\"http:\/\/www.w3.org\/1999\/xlink\" ext-link-type=\"uri\" xlink:href=\"https:\/\/github.com\/rdcrawford\/cognac\">https:\/\/github.com\/rdcrawford\/cognac<\/jats:ext-link>\n                      ) with customizable parameters for adaptation to diverse applications.\n                    <\/jats:p>\n                  <\/jats:sec>","DOI":"10.1186\/s12859-021-03981-4","type":"journal-article","created":{"date-parts":[[2021,2,17]],"date-time":"2021-02-17T09:23:12Z","timestamp":1613553792000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["cognac: rapid generation of concatenated gene alignments for phylogenetic inference from large, bacterial whole genome sequencing datasets"],"prefix":"10.1186","volume":"22","author":[{"given":"Ryan D.","family":"Crawford","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8409-278X","authenticated-orcid":false,"given":"Evan S.","family":"Snitkin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,2,15]]},"reference":[{"key":"3981_CR1","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1038\/nrg3186","volume":"13","author":"Z Yang","year":"2012","unstructured":"Yang Z, Rannala B. Molecular phylogenetics: principles and practice. Nat Rev Genet. 2012;13:303\u201314.","journal-title":"Nat Rev Genet"},{"key":"3981_CR2","doi-asserted-by":"publisher","first-page":"412","DOI":"10.1038\/304412a0","volume":"304","author":"M Kreitman","year":"1983","unstructured":"Kreitman M. Nucleotide polymorphism at the alcohol dehydrogenase locus of Drosophila melanogaster. Nature. 1983;304:412\u20137.","journal-title":"Nature"},{"key":"3981_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1111\/j.1558-5646.2008.00549.x","volume":"63","author":"SV Edwards","year":"2009","unstructured":"Edwards SV. Is a new and general theory of molecular systematics emerging? Evolution. 2009;63:1\u201319.","journal-title":"Evolution"},{"key":"3981_CR4","doi-asserted-by":"publisher","first-page":"4023","DOI":"10.1098\/rstb.2008.0144","volume":"363","author":"N Galtier","year":"2008","unstructured":"Galtier N, Daubin V. Dealing with incongruence in phylogenomic analyses. Philos Trans R Soc B Biol Sci. 2008;363:4023\u20139.","journal-title":"Philos. Trans. R. Soc. B Biol. Sci."},{"key":"3981_CR5","doi-asserted-by":"publisher","first-page":"1933","DOI":"10.1126\/science.1116759","volume":"310","author":"A Rokas","year":"2005","unstructured":"Rokas A. Animal evolution and the molecular signature of radiations compressed in time. Science. 2005;310:1933\u20138.","journal-title":"Science"},{"key":"3981_CR6","doi-asserted-by":"publisher","first-page":"1283","DOI":"10.1126\/science.1123061","volume":"311","author":"FD Ciccarelli","year":"2006","unstructured":"Ciccarelli FD. Toward automatic reconstruction of a highly resolved tree of life. Science. 2006;311:1283\u20137.","journal-title":"Science"},{"key":"3981_CR7","doi-asserted-by":"publisher","first-page":"1246","DOI":"10.1093\/molbev\/msi111","volume":"22","author":"H Philippe","year":"2005","unstructured":"Philippe H, Lartillot N, Brinkmann H. Multigene analyses of bilaterian animals corroborate the monophyly of Ecdysozoa, Lophotrochozoa, and Protostomia. Mol Biol Evol. 2005;22:1246\u201353.","journal-title":"Mol Biol Evol"},{"key":"3981_CR8","doi-asserted-by":"publisher","first-page":"5477","DOI":"10.1038\/s41467-019-13443-4","volume":"10","author":"Q Zhu","year":"2019","unstructured":"Zhu Q, et al. Phylogenomics of 10,575 genomes reveals evolutionary proximity between domains Bacteria and Archaea. Nat Commun. 2019;10:5477.","journal-title":"Nat Commun"},{"key":"3981_CR9","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1093\/sysbio\/43.4.467","volume":"43","author":"RG Olmstead","year":"1994","unstructured":"Olmstead RG, Sweere JA. Combining data in phylogenetic systematics: an empirical approach using three molecular data sets in the solanaceae. Syst Biol. 1994;43:15.","journal-title":"Syst Biol"},{"key":"3981_CR10","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1080\/10635150801910436","volume":"57","author":"JW Leigh","year":"2008","unstructured":"Leigh JW, Susko E, Baumgartner M, Roger AJ. Testing congruence in phylogenomic analysis. Syst Biol. 2008;57:104\u201315.","journal-title":"Syst Biol"},{"key":"3981_CR11","doi-asserted-by":"crossref","unstructured":"Tonini J, Moore A, Stern D, Shcheglovitova M, Ort\u00ed G. Concatenation and species tree methods exhibit statistically indistinguishable accuracy under a range of simulated conditions. PLoS Curr. 7 (2015).","DOI":"10.1371\/currents.tol.34260cc27551a527b124ec5f6334b6be"},{"key":"3981_CR12","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1002\/jez.b.21026","volume":"304B","author":"SR Gadagkar","year":"2005","unstructured":"Gadagkar SR, Rosenberg MS, Kumar S. Inferring species phylogenies from multiple genes: Concatenated sequence tree versus consensus gene tree. J Exp Zoolog B Mol Dev Evol. 2005;304B:64\u201374.","journal-title":"J. Exp. Zoolog. B Mol. Dev. Evol."},{"key":"3981_CR13","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1016\/j.micres.2010.02.003","volume":"166","author":"J Rajendhran","year":"2011","unstructured":"Rajendhran J, Gunasekaran P. Microbial phylogeny and diversity: small subunit ribosomal RNA sequence analysis and beyond. Microbiol Res. 2011;166:99\u2013110.","journal-title":"Microbiol Res"},{"key":"3981_CR14","doi-asserted-by":"publisher","first-page":"3140","DOI":"10.1073\/pnas.95.6.3140","volume":"95","author":"MCJ Maiden","year":"1998","unstructured":"Maiden MCJ, et al. Multilocus sequence typing: a portable approach to the identification of clones within populations of pathogenic microorganisms. Proc Natl Acad Sci U S A. 1998;95:3140\u20135.","journal-title":"Proc Natl Acad Sci U S A"},{"key":"3981_CR15","doi-asserted-by":"publisher","first-page":"2304","DOI":"10.1038\/ncomms3304","volume":"4","author":"N Segata","year":"2013","unstructured":"Segata N, B\u00f6rnigen D, Morgan XC, Huttenhower C. PhyloPhlAn is a new method for improved phylogenetic and taxonomic placement of microbes. Nat Commun. 2013;4:2304.","journal-title":"Nat Commun"},{"key":"3981_CR16","doi-asserted-by":"publisher","first-page":"3691","DOI":"10.1093\/bioinformatics\/btv421","volume":"31","author":"AJ Page","year":"2015","unstructured":"Page AJ, et al. Roary: rapid large-scale prokaryote pan genome analysis. Bioinformatics. 2015;31:3691\u20133.","journal-title":"Bioinformatics"},{"key":"3981_CR17","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1186\/1471-2164-9-75","volume":"9","author":"RK Aziz","year":"2008","unstructured":"Aziz RK, et al. The RAST server: rapid annotations using subsystems technology. BMC Genomics. 2008;9:75.","journal-title":"BMC Genomics"},{"key":"3981_CR18","doi-asserted-by":"publisher","first-page":"2068","DOI":"10.1093\/bioinformatics\/btu153","volume":"30","author":"T Seemann","year":"2014","unstructured":"Seemann T. Prokka: rapid prokaryotic genome annotation. Bioinforma Oxf Engl. 2014;30:2068\u20139.","journal-title":"Bioinforma Oxf Engl"},{"key":"3981_CR19","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1186\/1471-2105-11-119","volume":"11","author":"D Hyatt","year":"2010","unstructured":"Hyatt D, et al. Prodigal: prokaryotic gene recognition and translation initiation site identification. BMC Bioinformatics. 2010;11:119.","journal-title":"BMC Bioinformatics"},{"key":"3981_CR20","doi-asserted-by":"publisher","first-page":"3150","DOI":"10.1093\/bioinformatics\/bts565","volume":"28","author":"L Fu","year":"2012","unstructured":"Fu L, Niu B, Zhu Z, Wu S, Li W. CD-HIT: accelerated for clustering the next-generation sequencing data. Bioinformatics. 2012;28:3150\u20132.","journal-title":"Bioinformatics"},{"key":"3981_CR21","doi-asserted-by":"publisher","first-page":"772","DOI":"10.1093\/molbev\/mst010","volume":"30","author":"K Katoh","year":"2013","unstructured":"Katoh K, Standley DM. MAFFT multiple sequence alignment software version 7: improvements in performance and usability. Mol Biol Evol. 2013;30:772\u201380.","journal-title":"Mol Biol Evol"},{"key":"3981_CR22","first-page":"1","volume":"40","author":"D Eddelbuettel","year":"2011","unstructured":"Eddelbuettel D, Francois R. Rcpp: seamless R and C++ integration. J Stat Softw. 2011;40:1\u201318.","journal-title":"J Stat Softw"},{"key":"3981_CR23","doi-asserted-by":"publisher","first-page":"955","DOI":"10.1007\/978-0-387-09766-4_51","volume-title":"Encyclopedia of Parallel Computing","author":"AD Robison","year":"2011","unstructured":"Robison AD. Intel\u00ae Threading Building Blocks (TBB). In: Padua D, editor. Encyclopedia of Parallel Computing. New York: Springer; 2011. p. 955\u201364. https:\/\/doi.org\/10.1007\/978-0-387-09766-4_51."},{"key":"3981_CR24","unstructured":"Bengtsson H, R Core Team. future.apply: Apply Function to Elements in Parallel using Futures. 2020."},{"key":"3981_CR25","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1093\/bioinformatics\/btg412","volume":"20","author":"E Paradis","year":"2004","unstructured":"Paradis E, Claude J, Strimmer K. APE: analyses of phylogenetics and evolution in R language. Bioinformatics. 2004;20:289\u201390.","journal-title":"Bioinformatics"},{"key":"3981_CR26","doi-asserted-by":"crossref","unstructured":"Dettman JR, Sztepanacz JL, Kassen R. The properties of spontaneous mutations in the opportunistic pathogen Pseudomonas aeruginosa. BMC Genomics 2016;17.","DOI":"10.1186\/s12864-015-2244-3"},{"key":"3981_CR27","doi-asserted-by":"publisher","first-page":"1170","DOI":"10.1038\/s41564-020-0746-5","volume":"5","author":"NT Porter","year":"2020","unstructured":"Porter NT, et al. Phase-variable capsular polysaccharides and lipoproteins modify bacteriophage susceptibility in Bacteroides thetaiotaomicron. Nat Microbiol. 2020;5:1170\u201381.","journal-title":"Nat Microbiol"},{"key":"3981_CR28","doi-asserted-by":"publisher","first-page":"1160","DOI":"10.1093\/bib\/bbx108","volume":"20","author":"K Katoh","year":"2019","unstructured":"Katoh K, Rozewicki J, Yamada KD. MAFFT online service: multiple sequence alignment, interactive sequence choice and visualization. Brief Bioinform. 2019;20:1160\u20136.","journal-title":"Brief Bioinform"},{"key":"3981_CR29","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1089\/cmb.1994.1.337","volume":"1","author":"L Wang","year":"1994","unstructured":"Wang L, Jiang T. On the complexity of multiple sequence alignment. J Comput Biol. 1994;1:337\u201348.","journal-title":"J Comput Biol"},{"key":"3981_CR30","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-3783-7","volume-title":"Principles and methods of sequence. Analysis sequence - evolution - function: computational approaches in comparative genomics","author":"EV Koonin","year":"2003","unstructured":"Koonin EV, Galperin MY. Principles and methods of sequence. Analysis sequence - evolution - function: computational approaches in comparative genomics. Dordrecht: Kluwer Academic; 2003."},{"key":"3981_CR31","doi-asserted-by":"publisher","first-page":"007","DOI":"10.1093\/gigascience\/giaa007","volume":"9","author":"SJ Bush","year":"2020","unstructured":"Bush SJ, et al. Genomic diversity affects the accuracy of bacterial single-nucleotide polymorphism\u2013calling pipelines. GigaScience. 2020;9:007.","journal-title":"GigaScience"},{"key":"3981_CR32","doi-asserted-by":"publisher","first-page":"304","DOI":"10.1101\/gr.241455.118","volume":"29","author":"JA Lees","year":"2019","unstructured":"Lees JA, et al. Fast and flexible bacterial genomic epidemiology with PopPUNK. Genome Res. 2019;29:304\u201316.","journal-title":"Genome Res"},{"key":"3981_CR33","doi-asserted-by":"publisher","first-page":"132","DOI":"10.1186\/s13059-016-0997-x","volume":"17","author":"BD Ondov","year":"2016","unstructured":"Ondov BD, et al. Mash: fast genome and metagenome distance estimation using MinHash. Genome Biol. 2016;17:132.","journal-title":"Genome Biol"},{"key":"3981_CR34","doi-asserted-by":"publisher","first-page":"4286","DOI":"10.1128\/IAI.00207-11","volume":"79","author":"JJ Gillespie","year":"2011","unstructured":"Gillespie JJ, et al. PATRIC: the comprehensive bacterial bioinformatics resource with a focus on human pathogenic species. Infect Immun. 2011;79:4286\u201398.","journal-title":"Infect Immun"},{"key":"3981_CR35","doi-asserted-by":"publisher","first-page":"1043","DOI":"10.1101\/gr.186072.114","volume":"25","author":"DH Parks","year":"2015","unstructured":"Parks DH, Imelfort M, Skennerton CT, Hugenholtz P, Tyson GW. CheckM: assessing the quality of microbial genomes recovered from isolates, single cells, and metagenomes. Genome Res. 2015;25:1043\u201355.","journal-title":"Genome Res"},{"key":"3981_CR36","doi-asserted-by":"publisher","first-page":"486","DOI":"10.1186\/s12859-019-3068-y","volume":"20","author":"B Parrello","year":"2019","unstructured":"Parrello B, et al. A machine learning-based service for estimating quality of genomes using PATRIC. BMC Bioinformatics. 2019;20:486.","journal-title":"BMC Bioinformatics"},{"key":"3981_CR37","doi-asserted-by":"publisher","first-page":"1153","DOI":"10.1093\/cid\/ciu1173","volume":"60","author":"MK Hayden","year":"2015","unstructured":"Hayden MK, et al. Prevention of colonization and infection by Klebsiella pneumoniae carbapenemase-producing enterobacteriaceae in long-term acute-care hospitals. Clin Infect Dis Off Publ Infect Dis Soc Am. 2015;60:1153\u201361.","journal-title":"Clin Infect Dis Off Publ Infect Dis Soc Am"}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-021-03981-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s12859-021-03981-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-021-03981-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,2,17]],"date-time":"2021-02-17T09:24:08Z","timestamp":1613553848000},"score":1,"resource":{"primary":{"URL":"https:\/\/bmcbioinformatics.biomedcentral.com\/articles\/10.1186\/s12859-021-03981-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,2,15]]},"references-count":37,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2021,12]]}},"alternative-id":["3981"],"URL":"https:\/\/doi.org\/10.1186\/s12859-021-03981-4","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2020.10.15.340901","asserted-by":"object"}]},"ISSN":["1471-2105"],"issn-type":[{"value":"1471-2105","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,2,15]]},"assertion":[{"value":"9 November 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 January 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 February 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Not applicable.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare no competing interests.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"70"}}