{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T19:59:23Z","timestamp":1760731163375,"version":"3.37.3"},"reference-count":35,"publisher":"Oxford University Press (OUP)","issue":"15","license":[{"start":{"date-parts":[[2018,12,24]],"date-time":"2018-12-24T00:00:00Z","timestamp":1545609600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"DOI":"10.13039\/501100003977","name":"Israel Science Foundation","doi-asserted-by":"publisher","award":["802\/16"],"award-info":[{"award-number":["802\/16"]}],"id":[{"id":"10.13039\/501100003977","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,8,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Ancestral sequence reconstruction (ASR) is widely used to understand protein evolution, structure and function. Current ASR methodologies do not fully consider differences in evolutionary constraints among positions imposed by the three-dimensional (3D) structure of the protein. Here, we developed an ASR algorithm that allows different protein sites to evolve according to different mixtures of replacement matrices. We show that assigning replacement matrices to protein positions based on their solvent accessibility leads to ASR with higher log-likelihoods compared to na\u00efve models that assume a single replacement matrix for all sites. Improved ASR log-likelihoods are also demonstrated when solvent accessibility is predicted from protein sequences rather than inferred from a known 3D structure. Finally, we show that using such structure-aware mixture models results in substantial differences in the inferred ancestral sequences.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>http:\/\/fastml.tau.ac.il.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Supplementary information<\/jats:title>\n                  <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/bty1031","type":"journal-article","created":{"date-parts":[[2018,12,16]],"date-time":"2018-12-16T20:06:32Z","timestamp":1544990792000},"page":"2562-2568","source":"Crossref","is-referenced-by-count":20,"title":["Ancestral sequence reconstruction: accounting for structural information by averaging over replacement matrices"],"prefix":"10.1093","volume":"35","author":[{"given":"Asher","family":"Moshe","sequence":"first","affiliation":[{"name":"Department of Cell Research and Immunology, School of Molecular Cell Biology and Biotechnology, George S. Wise Faculty of Life Sciences, Tel Aviv University, Tel Aviv, Israel"}]},{"given":"Tal","family":"Pupko","sequence":"additional","affiliation":[{"name":"Department of Cell Research and Immunology, School of Molecular Cell Biology and Biotechnology, George S. Wise Faculty of Life Sciences, Tel Aviv University, Tel Aviv, Israel"}]}],"member":"286","published-online":{"date-parts":[[2018,12,24]]},"reference":[{"key":"2023062713155270700_bty1031-B1","doi-asserted-by":"crossref","first-page":"459","DOI":"10.1007\/BF02498640","article-title":"Model of amino acid substitution in proteins encoded by mitochondrial DNA","volume":"42","author":"Adachi","year":"1996","journal-title":"J. Mol. Evol"},{"key":"2023062713155270700_bty1031-B2","doi-asserted-by":"crossref","first-page":"348","DOI":"10.1007\/s002399910038","article-title":"Plastid genome phylogeny and a model of amino acid substitution for proteins encoded by chloroplast DNA","volume":"50","author":"Adachi","year":"2000","journal-title":"J. Mol. Evol"},{"key":"2023062713155270700_bty1031-B3","doi-asserted-by":"crossref","first-page":"753","DOI":"10.1002\/prot.20176","article-title":"Accurate prediction of solvent accessibility using neural networks\u2013based regression","volume":"56","author":"Adamczak","year":"2004","journal-title":"Prot. Struct. Funct. Bioinform"},{"key":"2023062713155270700_bty1031-B5","doi-asserted-by":"crossref","first-page":"W580","DOI":"10.1093\/nar\/gks498","article-title":"FastML: a web server for probabilistic reconstruction of ancestral sequences","volume":"40","author":"Ashkenazy","year":"2012","journal-title":"Nucleic Acids Res"},{"key":"2023062713155270700_bty1031-B6","doi-asserted-by":"crossref","first-page":"218","DOI":"10.1002\/prot.25429","article-title":"A new parameter-rich structure-aware mechanistic model for amino acid substitution during evolution","volume":"86","author":"Chi","year":"2018","journal-title":"Prot. Struct. Funct. Bioinform"},{"key":"2023062713155270700_bty1031-B7","first-page":"345","volume-title":"Atlas of Protein Sequence and Structure","author":"Dayhoff","year":"1978"},{"key":"2023062713155270700_bty1031-B8","doi-asserted-by":"crossref","first-page":"368","DOI":"10.1007\/BF01734359","article-title":"Evolutionary trees from DNA sequences: a maximum likelihood approach","volume":"17","author":"Felsenstein","year":"1981","journal-title":"J. Mol. Evol"},{"key":"2023062713155270700_bty1031-B9","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1007\/BF00487738","article-title":"A method for estimating the number of invariant amino acid coding positions in a gene using cytochrome c as a model case","volume":"1","author":"Fitch","year":"1967","journal-title":"Biochem. Genet"},{"key":"2023062713155270700_bty1031-B10","doi-asserted-by":"crossref","first-page":"331","DOI":"10.1089\/10665270252935494","article-title":"A structural EM algorithm for phylogenetic inference","volume":"9","author":"Friedman","year":"2002","journal-title":"J. Comput. Biol. J. Comput. Mol. Cell Biol"},{"key":"2023062713155270700_bty1031-B11","doi-asserted-by":"crossref","first-page":"445","DOI":"10.1093\/genetics\/149.1.445","article-title":"Assessing the impact of secondary structure and solvent accessibility on protein evolution","volume":"149","author":"Goldman","year":"1998","journal-title":"Genetics"},{"key":"2023062713155270700_bty1031-B12","doi-asserted-by":"crossref","first-page":"307","DOI":"10.1093\/sysbio\/syq010","article-title":"New algorithms and methods to estimate maximum-likelihood phylogenies: assessing the performance of PhyML 3.0","volume":"59","author":"Guindon","year":"2010","journal-title":"Syst. Biol"},{"key":"2023062713155270700_bty1031-B13","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1042\/BCJ20160507","article-title":"Exploring the past and the future of protein evolution with ancestral sequence reconstruction: the \u2018retro\u2019 approach to protein engineering","volume":"474","author":"Gumulya","year":"2017","journal-title":"Biochem. J"},{"key":"2023062713155270700_bty1031-B14","doi-asserted-by":"crossref","first-page":"275","DOI":"10.1093\/bioinformatics\/8.3.275","article-title":"The rapid generation of mutation data matrices from protein sequences","volume":"8","author":"Jones","year":"1992","journal-title":"Bioinformatics"},{"key":"2023062713155270700_bty1031-B15","first-page":"132.","article-title":"Evolution of protein molecules","volume":"3","author":"Jukes","year":"1969","journal-title":"Mammalian Prot. Metab"},{"key":"2023062713155270700_bty1031-B16","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1093\/molbev\/mss080","article-title":"Protein conformational diversity modulates sequence divergence","volume":"30","author":"Juritz","year":"2013","journal-title":"Mol. Biol. Evol"},{"key":"2023062713155270700_bty1031-B17","doi-asserted-by":"crossref","first-page":"2577","DOI":"10.1002\/bip.360221211","article-title":"Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features","volume":"22","author":"Kabsch","year":"1983","journal-title":"Biopolymers"},{"key":"2023062713155270700_bty1031-B18","doi-asserted-by":"crossref","first-page":"641","DOI":"10.1093\/protein\/8.7.641","article-title":"Context-dependent optimal substitution matrices","volume":"8","author":"Koshi","year":"1995","journal-title":"Prot. Eng. Des. Sel"},{"key":"2023062713155270700_bty1031-B19","doi-asserted-by":"crossref","first-page":"2286","DOI":"10.1093\/bioinformatics\/btp368","article-title":"PhyloBayes 3: a Bayesian software package for phylogenetic reconstruction and molecular dating","volume":"25","author":"Lartillot","year":"2009","journal-title":"Bioinformatics"},{"key":"2023062713155270700_bty1031-B20","doi-asserted-by":"crossref","first-page":"1307","DOI":"10.1093\/molbev\/msn067","article-title":"An improved general amino acid replacement matrix","volume":"25","author":"Le","year":"2008","journal-title":"Mol. Biol. Evol"},{"key":"2023062713155270700_bty1031-B21","doi-asserted-by":"crossref","first-page":"277","DOI":"10.1093\/sysbio\/syq002","article-title":"Accounting for solvent accessibility and secondary structure in protein phylogenetics is clearly beneficial","volume":"59","author":"Le","year":"2010","journal-title":"Syst. Biol"},{"key":"2023062713155270700_bty1031-B22","doi-asserted-by":"crossref","DOI":"10.1093\/acprof:oso\/9780199299188.001.0001","volume-title":"Ancestral Sequence Reconstruction","author":"Liberles","year":"2007"},{"key":"2023062713155270700_bty1031-B23","doi-asserted-by":"crossref","first-page":"539","DOI":"10.1007\/978-1-4939-1292-6_44","article-title":"Tracing ancestral specificity of lectins: ancestral sequence reconstruction method as a new approach in protein engineering","volume":"1200","author":"Ogawa","year":"2014","journal-title":"Methods Mol. Biol"},{"key":"2023062713155270700_bty1031-B24","first-page":"43","article-title":"Probabilistic models and their impact on the accuracy of reconstructed ancestral protein sequences","volume":"4","author":"Pupko","year":"2008","journal-title":"Ances. Seq. Reconst"},{"key":"2023062713155270700_bty1031-B25","doi-asserted-by":"crossref","first-page":"1116","DOI":"10.1093\/bioinformatics\/18.8.1116","article-title":"A branch-and-bound algorithm for the inference of ancestral amino-acid sequences when the replacement rate varies among sites: application to the evolution of five gene families","volume":"18","author":"Pupko","year":"2002","journal-title":"Bioinformatics"},{"key":"2023062713155270700_bty1031-B26","doi-asserted-by":"crossref","first-page":"890","DOI":"10.1093\/oxfordjournals.molbev.a026369","article-title":"A fast algorithm for joint reconstruction of ancestral amino acid sequences","volume":"17","author":"Pupko","year":"2000","journal-title":"Mol. Biol. Evol"},{"key":"2023062713155270700_bty1031-B27","doi-asserted-by":"crossref","first-page":"12847.","DOI":"10.1038\/ncomms12847","article-title":"An experimental phylogeny to benchmark ancestral sequence reconstruction","volume":"7","author":"Randall","year":"2016","journal-title":"Nat. Commun"},{"key":"2023062713155270700_bty1031-B28","doi-asserted-by":"crossref","first-page":"14522","DOI":"10.1021\/bi035097r","article-title":"Dimerization in aminergic G-protein-coupled receptors: application of a hidden-site class model of evolution","volume":"42","author":"Soyer","year":"2003","journal-title":"Biochemistry"},{"key":"2023062713155270700_bty1031-B29","doi-asserted-by":"crossref","first-page":"2725","DOI":"10.1093\/molbev\/mst197","article-title":"MEGA6: molecular evolutionary genetics analysis version 6.0","volume":"30","author":"Tamura","year":"2013","journal-title":"Mol. Biol. Evol"},{"key":"2023062713155270700_bty1031-B30","doi-asserted-by":"crossref","first-page":"e80635.","DOI":"10.1371\/journal.pone.0080635","article-title":"Maximum allowed solvent accessibilites of residues in proteins","volume":"8","author":"Tien","year":"2013","journal-title":"PLoS One"},{"key":"2023062713155270700_bty1031-B31","doi-asserted-by":"crossref","first-page":"D364","DOI":"10.1093\/nar\/gku1028","article-title":"A series of PDB-related databanks for everyday needs","volume":"43","author":"Touw","year":"2015","journal-title":"Nucleic Acids Res"},{"key":"2023062713155270700_bty1031-B32","doi-asserted-by":"crossref","first-page":"1089","DOI":"10.1126\/science.172.3988.1089","article-title":"Fitting discrete probability distributions to evolutionary events","volume":"172","author":"Uzzell","year":"1971","journal-title":"Science"},{"key":"2023062713155270700_bty1031-B34","doi-asserted-by":"crossref","first-page":"691","DOI":"10.1093\/oxfordjournals.molbev.a003851","article-title":"A general empirical model of protein evolution derived from multiple protein families using a maximum-likelihood approach","volume":"18","author":"Whelan","year":"2001","journal-title":"Mol. Biol. Evol"},{"key":"2023062713155270700_bty1031-B35","doi-asserted-by":"crossref","first-page":"367","DOI":"10.1016\/0169-5347(96)10041-0","article-title":"Among-site rate variation and its impact on phylogenetic analyses","volume":"11","author":"Yang","year":"1996","journal-title":"Trends Ecol. Evol"},{"key":"2023062713155270700_bty1031-B36","doi-asserted-by":"crossref","first-page":"1641","DOI":"10.1093\/genetics\/141.4.1641","article-title":"A new method of inference of ancestral nucleotide and amino acid sequences","volume":"141","author":"Yang","year":"1995","journal-title":"Genetics"},{"key":"2023062713155270700_bty1031-B37","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1016\/j.csbj.2017.05.002","article-title":"Resurrecting the dead (molecules)","volume":"15","author":"Zaucha","year":"2017","journal-title":"Comput. Struct. Biotechnol. J"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/35\/15\/2562\/50722580\/bioinformatics_35_15_2562.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/35\/15\/2562\/50722580\/bioinformatics_35_15_2562.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,27]],"date-time":"2023-06-27T13:17:35Z","timestamp":1687871855000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/35\/15\/2562\/5258106"}},"subtitle":[],"editor":[{"given":"Russell","family":"Schwartz","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2018,12,24]]},"references-count":35,"journal-issue":{"issue":"15","published-print":{"date-parts":[[2019,8,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/bty1031","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"type":"print","value":"1367-4803"},{"type":"electronic","value":"1367-4811"}],"subject":[],"published-other":{"date-parts":[[2019,8,1]]},"published":{"date-parts":[[2018,12,24]]}}}