{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T12:45:06Z","timestamp":1771073106073,"version":"3.50.1"},"reference-count":32,"publisher":"Oxford University Press (OUP)","issue":"4","license":[{"start":{"date-parts":[[2022,7,15]],"date-time":"2022-07-15T00:00:00Z","timestamp":1657843200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["32171821"],"award-info":[{"award-number":["32171821"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["31770639"],"award-info":[{"award-number":["31770639"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["2662018PY071"],"award-info":[{"award-number":["2662018PY071"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["2662020YLPY026"],"award-info":[{"award-number":["2662020YLPY026"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Science and Technology Projects of Shennongjia Academy of Forestry","award":["SAF202107"],"award-info":[{"award-number":["SAF202107"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,7,18]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:p>Protein phylogenetic analysis focuses on the evolutionary relationships among related protein sequences and can help researchers infer protein functions and developmental trajectories. With the advent of the big data era, the existing protein phylogenetic methods, including distance matrix and character-based methods, are facing challenges in both running time and application scope. Here, we developed an R package that we call CProtMEDIAS that is useful for protein phylogenetic analysis. In contrast to existing phylogenetic analysis methods, CProtMEDIAS utilizes dimensionality reduction algorithms to digitize multiple sequence alignments and quickly conduct phylogenetic analysis with a large number of amino acid sequences from similarly distant protein families and species. We used CProtMEDIAS to perform a dimensionality reduction, clustering, pseudotime, specific residue and evolutionary trajectory analysis of the plant homeobox superfamily. We found that CProtMEDIAS delivers consistent clustering, fast running and elegant presentation and thus provides powerful new tools and methods for protein clustering and evolutionary analysis.<\/jats:p>","DOI":"10.1093\/bib\/bbac276","type":"journal-article","created":{"date-parts":[[2022,7,14]],"date-time":"2022-07-14T22:06:51Z","timestamp":1657836411000},"source":"Crossref","is-referenced-by-count":4,"title":["CProtMEDIAS: clustering of amino acid sequences encoded by gene families by MErging and DIgitizing Aligned Sequences"],"prefix":"10.1093","volume":"23","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5265-6069","authenticated-orcid":false,"given":"Zhe","family":"Zhang","sequence":"first","affiliation":[{"name":"Key Laboratory of Horticultural Plant Biology of Ministry of Education, Huazhong Agricultural University , Wuhan 430070, China"},{"name":"College of Horticulture and Forestry Sciences, Huazhong Agricultural University , Wuhan 430070, China"},{"name":"Poplar Research Center, Huazhong Agricultural University , Wuhan 430070, China"}]},{"given":"Miaomiao","family":"Zhu","sequence":"additional","affiliation":[{"name":"Department of Physiology, School of Basic Medicine, Tongji Medical College, Huazhong University of Science and Technology , Wuhan 430030, China"}]},{"given":"Qi","family":"Xie","sequence":"additional","affiliation":[{"name":"Key Laboratory of Horticultural Plant Biology of Ministry of Education, Huazhong Agricultural University , Wuhan 430070, China"},{"name":"College of Horticulture and Forestry Sciences, Huazhong Agricultural University , Wuhan 430070, China"},{"name":"Poplar Research Center, Huazhong Agricultural University , Wuhan 430070, China"}]},{"given":"Robert M","family":"Larkin","sequence":"additional","affiliation":[{"name":"Key Laboratory of Horticultural Plant Biology of Ministry of Education, Huazhong Agricultural University , Wuhan 430070, China"},{"name":"College of Horticulture and Forestry Sciences, Huazhong Agricultural University , Wuhan 430070, China"}]},{"given":"Xueping","family":"Shi","sequence":"additional","affiliation":[{"name":"Key Laboratory of Horticultural Plant Biology of Ministry of Education, Huazhong Agricultural University , Wuhan 430070, China"},{"name":"College of Horticulture and Forestry Sciences, Huazhong Agricultural University , Wuhan 430070, China"},{"name":"Poplar Research Center, Huazhong Agricultural University , Wuhan 430070, China"}]},{"given":"Bo","family":"Zheng","sequence":"additional","affiliation":[{"name":"Key Laboratory of Horticultural Plant Biology of Ministry of Education, Huazhong Agricultural University , Wuhan 430070, China"},{"name":"College of Horticulture and Forestry Sciences, Huazhong Agricultural University , Wuhan 430070, China"},{"name":"Poplar Research Center, Huazhong Agricultural University , Wuhan 430070, China"}]}],"member":"286","published-online":{"date-parts":[[2022,7,15]]},"reference":[{"key":"2022071906210299800_ref1","first-page":"1409","article-title":"A statistical method for evaluating systematic relationships","volume":"38","author":"Sokal","year":"1958","journal-title":"Univ Kansas Sci Bull"},{"key":"2022071906210299800_ref2","first-page":"406","article-title":"The neighbor-joining method: a new method for reconstructing phylogenetic trees","volume":"4","author":"Saitou","year":"1987","journal-title":"Mol Biol Evol"},{"key":"2022071906210299800_ref3","first-page":"945","article-title":"A simple method for estimating and testing minimum-evolution trees","volume":"9","author":"Rzhetsky","year":"1992","journal-title":"Mol Biol Evol"},{"key":"2022071906210299800_ref4","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1007\/s00285-011-0458-9","article-title":"Non-hereditary maximum parsimony trees","volume":"65","author":"Fischer","year":"2012","journal-title":"J Math Biol"},{"key":"2022071906210299800_ref5","doi-asserted-by":"crossref","first-page":"368","DOI":"10.1007\/BF01734359","article-title":"Evolutionary trees from DNA sequences: a maximum likelihood approach","volume":"17","author":"Felsenstein","year":"1981","journal-title":"J Mol Evol"},{"key":"2022071906210299800_ref6","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.0006-341X.1999.00001.x","article-title":"Bayesian phylogenetic inference via Markov chain Monte Carlo methods","volume":"55","author":"Mau","year":"1999","journal-title":"Biometrics"},{"key":"2022071906210299800_ref7","doi-asserted-by":"crossref","first-page":"792","DOI":"10.1093\/molbev\/msi066","article-title":"Comparison of the accuracies of several phylogenetic methods using protein and DNA sequences","volume":"22","author":"Hall","year":"2005","journal-title":"Mol Biol Evol"},{"key":"2022071906210299800_ref8","doi-asserted-by":"crossref","first-page":"1189","DOI":"10.1093\/bioinformatics\/btp033","article-title":"Jalview version 2--a multiple sequence alignment editor and analysis workbench","volume":"25","author":"Waterhouse","year":"2009","journal-title":"Bioinformatics"},{"key":"2022071906210299800_ref9","doi-asserted-by":"crossref","first-page":"861","DOI":"10.21105\/joss.00861","article-title":"UMAP: uniform manifold approximation and projection","volume":"3","author":"McInnes","year":"2018","journal-title":"J Open Source Softw"},{"key":"2022071906210299800_ref10","doi-asserted-by":"crossref","first-page":"765","DOI":"10.1145\/2783258.2783309","article-title":"Dimensionality reduction via graph structure learning","author":"Mao","year":"2015","journal-title":"Proceedings of the 21th ACM SIGKDD international conference on knowledge discovery and data mining"},{"key":"2022071906210299800_ref11","article-title":"An introduction to nonlinear dimensionality reduction by maximum variance unfolding","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","author":"Weinberger","year":"2006"},{"key":"2022071906210299800_ref12","doi-asserted-by":"crossref","first-page":"346","DOI":"10.1038\/nature10983","article-title":"The genomic and transcriptomic architecture of 2,000 breast tumours reveals novel subgroups","volume":"486","author":"Curtis","year":"2012","journal-title":"Nature"},{"key":"2022071906210299800_ref13","doi-asserted-by":"crossref","first-page":"471","DOI":"10.1140\/epjb\/e2013-40829-0","article-title":"A smart local moving algorithm for large-scale modularity-based community detection","volume":"86","author":"Waltman","year":"2013","journal-title":"Euro Phys J B"},{"key":"2022071906210299800_ref14","doi-asserted-by":"crossref","first-page":"D1040","DOI":"10.1093\/nar\/gkw982","article-title":"PlantTFDB 4.0: toward a central hub for transcription factors and regulatory interactions in plants","volume":"45","author":"Jin","year":"2017","journal-title":"Nucleic Acids Res"},{"key":"2022071906210299800_ref15","doi-asserted-by":"crossref","first-page":"D412","DOI":"10.1093\/nar\/gkaa913","article-title":"Pfam: the protein families database in 2021","volume":"49","author":"Mistry","year":"2021","journal-title":"Nucleic Acids Res"},{"key":"2022071906210299800_ref16","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1007\/BF01908075","article-title":"Comparing partitions","volume":"2","author":"Hubert","year":"1985","journal-title":"J Classific"},{"key":"2022071906210299800_ref17","first-page":"275","article-title":"The rapid generation of mutation data matrices from protein sequences","volume":"8","author":"Jones","year":"1992","journal-title":"Comput Appl Biosci"},{"key":"2022071906210299800_ref18","first-page":"353","volume-title":"Atlas of Protein Sequences","author":"Schwarz","year":"1979"},{"key":"2022071906210299800_ref19","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1016\/B978-1-4832-2734-4.50017-6","volume-title":"Evolving Genes and Proteins","author":"Zuckerkandl","year":"1965"},{"key":"2022071906210299800_ref20","first-page":"269","article-title":"Estimation of evolutionary distance between nucleotide sequences","volume":"1","author":"Tajima","year":"1984","journal-title":"Mol Biol Evol"},{"key":"2022071906210299800_ref21","first-page":"W5","article-title":"MAFFT-DASH: integrated protein sequence and structural alignment","volume":"47","author":"Rozewicki","year":"2019","journal-title":"Nucleic Acids Res"},{"key":"2022071906210299800_ref22","doi-asserted-by":"crossref","first-page":"6097","DOI":"10.1093\/nar\/18.20.6097","article-title":"Sequence logos: a new way to display consensus sequences","volume":"18","author":"Schneider","year":"1990","journal-title":"Nucleic Acids Res"},{"key":"2022071906210299800_ref23","doi-asserted-by":"crossref","first-page":"411","DOI":"10.1038\/nbt.4096","article-title":"Integrating single-cell transcriptomic data across different conditions, technologies, and species","volume":"36","author":"Butler","year":"2018","journal-title":"Nat Biotechnol"},{"key":"2022071906210299800_ref24","doi-asserted-by":"crossref","first-page":"381","DOI":"10.1038\/nbt.2859","article-title":"The dynamics and regulators of cell fate decisions are revealed by pseudotemporal ordering of single cells","volume":"32","author":"Trapnell","year":"2014","journal-title":"Nat Biotechnol"},{"key":"2022071906210299800_ref25","doi-asserted-by":"crossref","first-page":"1547","DOI":"10.1093\/molbev\/msy096","article-title":"MEGA X: molecular evolutionary genetics analysis across computing platforms","volume":"35","author":"Kumar","year":"2018","journal-title":"Mol Biol Evol"},{"key":"2022071906210299800_ref26","doi-asserted-by":"crossref","first-page":"592","DOI":"10.1093\/bioinformatics\/btq706","article-title":"Phangorn: phylogenetic analysis in R","volume":"27","author":"Schliep","year":"2011","journal-title":"Bioinformatics"},{"key":"2022071906210299800_ref27","doi-asserted-by":"crossref","first-page":"325","DOI":"10.1093\/biomet\/53.3-4.325","article-title":"Some distance properties of latent root and vector methods used in multivariate analysis","volume":"53","author":"Gower","year":"1966","journal-title":"Biometrika"},{"key":"2022071906210299800_ref28","doi-asserted-by":"crossref","first-page":"2077","DOI":"10.1093\/bioinformatics\/bty058","article-title":"scEpath: energy landscape-based inference of transition probabilities and cellular trajectories from single-cell transcriptomic data","volume":"34","author":"Jin","year":"2018","journal-title":"Bioinformatics"},{"key":"2022071906210299800_ref29","doi-asserted-by":"crossref","first-page":"419","DOI":"10.1016\/j.tplants.2007.08.003","article-title":"The true story of the HD-zip family","volume":"12","author":"Ariel","year":"2007","journal-title":"Trends Plant Sci"},{"key":"2022071906210299800_ref30","doi-asserted-by":"crossref","first-page":"371","DOI":"10.1016\/j.crvi.2010.01.015","article-title":"Plant development: a TALE story","volume":"333","author":"Hamant","year":"2010","journal-title":"C R Biol"},{"key":"2022071906210299800_ref31","doi-asserted-by":"crossref","first-page":"248","DOI":"10.1186\/gb-2009-10-12-248","article-title":"The WUS homeobox-containing (WOX) protein family","volume":"10","author":"Graaff","year":"2009","journal-title":"Genome Biol"},{"key":"2022071906210299800_ref32","doi-asserted-by":"crossref","first-page":"403","DOI":"10.1016\/S0022-2836(05)80360-2","article-title":"Basic local alignment search tool","volume":"215","author":"Altschul","year":"1990","journal-title":"J Mol Biol"}],"container-title":["Briefings in Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bib\/article-pdf\/23\/4\/bbac276\/45016364\/bbac276.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bib\/article-pdf\/23\/4\/bbac276\/45016364\/bbac276.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,19]],"date-time":"2022-07-19T06:24:33Z","timestamp":1658211873000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bib\/article\/doi\/10.1093\/bib\/bbac276\/6644722"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,15]]},"references-count":32,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2022,7,18]]}},"URL":"https:\/\/doi.org\/10.1093\/bib\/bbac276","relation":{},"ISSN":["1467-5463","1477-4054"],"issn-type":[{"value":"1467-5463","type":"print"},{"value":"1477-4054","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2022,7,18]]},"published":{"date-parts":[[2022,7,15]]},"article-number":"bbac276"}}