{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T08:07:53Z","timestamp":1772611673937,"version":"3.50.1"},"reference-count":19,"publisher":"Oxford University Press (OUP)","issue":"9","funder":[{"name":"United States Department of Agriculture, Agricultural Research Service","award":["5030-21000-071-000D"],"award-info":[{"award-number":["5030-21000-071-000D"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,9,2]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Summary<\/jats:title>\n                  <jats:p>Identification of allelic or corresponding genes (pan-genes) within a species or genus is important for discovery of biologically significant genetic conservation and variation. Similarly, identification of orthologs (gene families) across wider evolutionary distances is important for understanding the genetic basis for similar or differing traits. Especially in plants, several complications make identification of pan-genes and gene families challenging, including whole-genome duplications, evolutionary rate differences among lineages, and varying qualities of assemblies and annotations. Here, we document and distribute a set of workflows that we have used to address these problems.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>Pandagma is a set of configurable workflows for identifying and comparing pan-gene sets and gene families for annotation sets from eukaryotic genomes, using a combination of homology, synteny, and expected rates of synonymous change in coding sequence.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>The Pandagma workflows, example configurations, implementation details, and scripts for retrieving public datasets, are available at https:\/\/github.com\/legumeinfo\/pandagma<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btae526","type":"journal-article","created":{"date-parts":[[2024,8,24]],"date-time":"2024-08-24T21:14:05Z","timestamp":1724534045000},"source":"Crossref","is-referenced-by-count":5,"title":["Pandagma: a tool for identifying pan-gene sets and gene families at desired evolutionary depths and accommodating whole-genome duplications"],"prefix":"10.1093","volume":"40","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2777-8034","authenticated-orcid":false,"given":"Steven B","family":"Cannon","sequence":"first","affiliation":[{"name":"USDA\u2014Agricultural Research Service, Corn Insects and Crop Genetics Research Unit, 819 Wallace Rd. , Ames, IA 50011,","place":["United States"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8182-8763","authenticated-orcid":false,"given":"Hyun-Oh","family":"Lee","sequence":"additional","affiliation":[{"name":"USDA\u2014Agricultural Research Service, Corn Insects and Crop Genetics Research Unit, 819 Wallace Rd. ORISE Fellow, , Ames, IA 50011,","place":["United States"]}]},{"given":"Nathan T","family":"Weeks","sequence":"additional","affiliation":[{"name":"USDA\u2014Agricultural Research Service, Corn Insects and Crop Genetics Research Unit, 819 Wallace Rd. , Ames, IA 50011,","place":["United States"]}]},{"given":"Joel","family":"Berendzen","sequence":"additional","affiliation":[{"name":"GenerisBio , 4327 Lost Feather Ln, Santa Fe, NM 87507,","place":["United States"]}]}],"member":"286","published-online":{"date-parts":[[2024,8,24]]},"reference":[{"key":"2024092712594685600_btae526-B1","doi-asserted-by":"crossref","first-page":"bbaa198","DOI":"10.1093\/bib\/bbaa198","article-title":"Challenges in gene-oriented approaches for pangenome content discovery","volume":"22","author":"Bonnici","year":"2021","journal-title":"Brief Bioinform"},{"key":"2024092712594685600_btae526-B2","doi-asserted-by":"crossref","first-page":"1070","DOI":"10.1111\/pbi.12454","article-title":"Genomics of crop wild relatives: expanding the gene Pool for crop improvement","volume":"14","author":"Brozynska","year":"2016","journal-title":"Plant Biotechnol J"},{"key":"2024092712594685600_btae526-B3","doi-asserted-by":"crossref","first-page":"223","DOI":"10.1186\/s13059-023-03071-z","article-title":"GET_PANGENES: calling pangenes from plant genome alignments confirms presence-absence variation","volume":"24","author":"Contreras-Moreira","year":"2023","journal-title":"Genome Biol"},{"key":"2024092712594685600_btae526-B4","doi-asserted-by":"crossref","first-page":"6991","DOI":"10.1093\/jxb\/erv432","article-title":"Whole genome duplications in plants: an overview from Arabidopsis","volume":"66","author":"Del Pozo","year":"2015","journal-title":"J Exp Bot"},{"key":"2024092712594685600_btae526-B5","doi-asserted-by":"crossref","first-page":"33964","DOI":"10.1038\/srep33964","article-title":"FAMSA: fast and accurate multiple sequence alignment of huge protein families","volume":"6","author":"Deorowicz","year":"2016","journal-title":"Sci Rep"},{"key":"2024092712594685600_btae526-B6","doi-asserted-by":"crossref","first-page":"e1002195","DOI":"10.1371\/journal.pcbi.1002195","article-title":"Accelerated profile HMM searches","volume":"7","author":"Eddy","year":"2011","journal-title":"PLoS Comput Biol"},{"key":"2024092712594685600_btae526-B7","doi-asserted-by":"crossref","first-page":"238","DOI":"10.1186\/s13059-019-1832-y","article-title":"OrthoFinder: phylogenetic orthology inference for comparative genomics","volume":"20","author":"Emms","year":"2019","journal-title":"Genome Biol"},{"key":"2024092712594685600_btae526-B8","doi-asserted-by":"crossref","first-page":"e1000703","DOI":"10.1371\/journal.pcbi.1000703","article-title":"Getting started in gene orthology and functional analysis","volume":"6","author":"Fang","year":"2010","journal-title":"PLoS Comput Biol"},{"key":"2024092712594685600_btae526-B9","doi-asserted-by":"crossref","first-page":"3643","DOI":"10.1093\/bioinformatics\/bth397","article-title":"DAGchainer: a tool for mining segmental genome duplications and synteny","volume":"20","author":"Haas","year":"2004","journal-title":"Bioinformatics"},{"key":"2024092712594685600_btae526-B10","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.pbi.2018.11.001","article-title":"Plant genome sequences: past, present, future","volume":"48","author":"Kersey","year":"2019","journal-title":"Curr Opin Plant Biol"},{"key":"2024092712594685600_btae526-B11","doi-asserted-by":"crossref","first-page":"e78526","DOI":"10.7554\/eLife.78526","article-title":"GENESPACE tracks regions of interest and gene copy number variation across multiple genomes","volume":"11","author":"Lovell","year":"2022","journal-title":"Elife"},{"key":"2024092712594685600_btae526-B12","doi-asserted-by":"crossref","first-page":"e9490","DOI":"10.1371\/journal.pone.0009490","article-title":"FastTree 2\u2013approximately maximum-likelihood trees for large alignments","volume":"5","author":"Price","year":"2010","journal-title":"PLoS One"},{"key":"2024092712594685600_btae526-B13","doi-asserted-by":"crossref","first-page":"276","DOI":"10.1016\/S0168-9525(00)02024-2","article-title":"EMBOSS: the European molecular biology open software suite","volume":"16","author":"Rice","year":"2000","journal-title":"Trends Genet"},{"key":"2024092712594685600_btae526-B14","doi-asserted-by":"crossref","first-page":"16115","DOI":"10.1038\/nplants.2016.115","article-title":"Whole-genome duplication as a key factor in crop domestication","volume":"2","author":"Salman-Minkov","year":"2016","journal-title":"Nat Plants"},{"key":"2024092712594685600_btae526-B15","doi-asserted-by":"crossref","first-page":"147","DOI":"10.1016\/j.pbi.2012.03.011","article-title":"Ancient whole genome duplications, novelty and diversification: The WGD radiation lag-time model","volume":"15","author":"Schranz","year":"2012","journal-title":"Curr Opin Plant Biol"},{"key":"2024092712594685600_btae526-B16","doi-asserted-by":"crossref","first-page":"1026","DOI":"10.1038\/nbt.3988","article-title":"MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets","volume":"35","author":"Steinegger","year":"2017","journal-title":"Nat Biotechnol"},{"key":"2024092712594685600_btae526-B17","doi-asserted-by":"crossref","first-page":"13950","DOI":"10.1073\/pnas.0506758102","article-title":"Genome analysis of multiple pathogenic isolates of Streptococcus agalactiae: implications for the microbial \u201cpan-genome","volume":"102","author":"Tettelin","year":"2005","journal-title":"Proc Natl Acad Sci USA"},{"key":"2024092712594685600_btae526-B18","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1137\/040608635","article-title":"Graph clustering via a discrete uncoupling process","volume":"30","author":"Van Dongen","year":"2008","journal-title":"SIAM J Matrix Anal Appl"},{"key":"2024092712594685600_btae526-B19","doi-asserted-by":"crossref","first-page":"1586","DOI":"10.1093\/molbev\/msm088","article-title":"PAML 4: phylogenetic analysis by maximum likelihood","volume":"24","author":"Yang","year":"2007","journal-title":"Mol Biol Evol"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btae526\/58913822\/btae526.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/9\/btae526\/59371229\/btae526.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/9\/btae526\/59371229\/btae526.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,27]],"date-time":"2024-09-27T13:00:02Z","timestamp":1727442002000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btae526\/7740678"}},"subtitle":[],"editor":[{"given":"Russell","family":"Schwartz","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2024,8,24]]},"references-count":19,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2024,9,2]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btae526","relation":{},"ISSN":["1367-4811"],"issn-type":[{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2024,9]]},"published":{"date-parts":[[2024,8,24]]},"article-number":"btae526"}}