{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T01:06:31Z","timestamp":1773277591862,"version":"3.50.1"},"reference-count":122,"publisher":"Public Library of Science (PLoS)","issue":"7","license":[{"start":{"date-parts":[[2010,7,15]],"date-time":"2010-07-15T00:00:00Z","timestamp":1279152000000},"content-version":"unspecified","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/publicdomain\/zero\/1.0\/"}],"content-domain":{"domain":["www.ploscompbiol.org"],"crossmark-restriction":false},"short-container-title":["PLoS Comput Biol"],"DOI":"10.1371\/journal.pcbi.1000852","type":"journal-article","created":{"date-parts":[[2010,7,15]],"date-time":"2010-07-15T21:16:52Z","timestamp":1279228612000},"page":"e1000852","update-policy":"https:\/\/doi.org\/10.1371\/journal.pcbi.corrections_policy","source":"Crossref","is-referenced-by-count":61,"title":["The Construction and Use of Log-Odds Substitution Scores for Multiple Sequence Alignment"],"prefix":"10.1371","volume":"6","author":[{"given":"Stephen F.","family":"Altschul","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"John C.","family":"Wootton","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Elena","family":"Zaslavsky","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yi-Kuo","family":"Yu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"340","published-online":{"date-parts":[[2010,7,15]]},"reference":[{"key":"ref1","doi-asserted-by":"crossref","first-page":"195","DOI":"10.1016\/0022-2836(81)90087-5","article-title":"Identification of common molecular subsequences.","volume":"147","author":"TF Smith","year":"1981","journal-title":"J Mol Biol"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"501","DOI":"10.1007\/BF02459499","article-title":"Pattern recognition in genetic sequences by mismatch density.","volume":"46","author":"PH Sellers","year":"1984","journal-title":"Bull Math Biol"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"2444","DOI":"10.1073\/pnas.85.8.2444","article-title":"Improved tools for biological sequence comparison.","volume":"85","author":"WR Pearson","year":"1988","journal-title":"Proc Natl Acad Sci USA"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"3389","DOI":"10.1093\/nar\/25.17.3389","article-title":"Gapped BLAST and PSI-BLAST: a new generation of protein database search programs.","volume":"25","author":"SF Altschul","year":"1997","journal-title":"Nucleic Acids Res"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"D205","DOI":"10.1093\/nar\/gkn845","article-title":"CDD: specific functional annotation with the Conserved Domain Database.","volume":"37","author":"A Marchler-Bauer","year":"2009","journal-title":"Nucleic Acids Res"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"4678","DOI":"10.1093\/nar\/gkm414","article-title":"The identification of complete domains within protein sequences using accurate e-values for semi-global alignment.","volume":"35","author":"MG Kann","year":"2007","journal-title":"Nucleic Acids Res"},{"key":"ref7","first-page":"345","article-title":"A model of evolutionary change in proteins.","volume":"volume 5","author":"MO Dayhoff","year":"1978"},{"key":"ref8","first-page":"353","article-title":"Matrices for detecting distant relationships.","volume":"volume 5","author":"RM Schwartz","year":"1978"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"112","DOI":"10.1007\/BF02100085","article-title":"Aligning amino acid sequences: comparison of commonly used methods.","volume":"21","author":"DF Feng","year":"1985","journal-title":"J Mol Evol"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"205","DOI":"10.1016\/S0022-5193(86)80075-3","article-title":"The classification of amino acid conservation.","volume":"119","author":"WR Taylor","year":"1986","journal-title":"J Theor Biol"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"276","DOI":"10.1111\/j.1399-3011.1987.tb02254.x","article-title":"New scoring matrix for amino acid residue exchanges based on residue characteristic physical parameters.","volume":"29","author":"JKM Rao","year":"1987","journal-title":"Int J Peptide Protein Res"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"1019","DOI":"10.1016\/0022-2836(88)90058-7","article-title":"Amino acid substitutions in structurally related proteins.","volume":"204","author":"JL Risler","year":"1988","journal-title":"J Mol Biol"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"1443","DOI":"10.1126\/science.1604319","article-title":"Exhaustive matching of the entire protein sequence database.","volume":"256","author":"GH Gonnet","year":"1992","journal-title":"Science"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"10915","DOI":"10.1073\/pnas.89.22.10915","article-title":"Amino acid substitution matrices from protein blocks.","volume":"89","author":"S Henikoff","year":"1992","journal-title":"Proc Natl Acad Sci USA"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"216","DOI":"10.1002\/pro.5560010203","article-title":"Environment-specific amino acid substitution tables: Tertiary templates and prediction of protein folds.","volume":"1","author":"J Overington","year":"1992","journal-title":"Prot Sci"},{"key":"ref16","first-page":"275","article-title":"The rapid generation of mutation data matrices from protein sequences.","volume":"8","author":"DT Jones","year":"1992","journal-title":"Comput Appl Biosci"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"498","DOI":"10.1002\/1097-0134(20001201)41:4<498::AID-PROT70>3.0.CO;2-3","article-title":"Optimization of a new score function for the detection of remote homologs.","volume":"41","author":"M Kann","year":"2000","journal-title":"Proteins"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"760","DOI":"10.1093\/bioinformatics\/16.9.760","article-title":"PHAT: a transmembrane-specific substitution matrix.","volume":"16","author":"PC Ng","year":"2000","journal-title":"Bioinformatics"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"S182","DOI":"10.1093\/bioinformatics\/17.suppl_1.S182","article-title":"Non-symmetric score matrices and the detection of homologous transmembrane proteins.","volume":"17,","author":"T M\u00fcller","year":"2001","journal-title":"Bioinformatics"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"910","DOI":"10.1002\/prot.21775","article-title":"Context-specific amino acid substitution matrices and their use in the detection of protein homologs.","volume":"71","author":"NC Goonesekere","year":"2008","journal-title":"Proteins"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"66","DOI":"10.1016\/S1046-2023(05)80165-3","article-title":"Improved sensitivity of nucleic acid database searches using application-specific scoring matrices.","volume":"3","author":"DJ States","year":"1991","journal-title":"Methods"},{"key":"ref22","first-page":"115","article-title":"Scoring pairwise genomic sequence alignments.","author":"F Chiaromonte","year":"2002"},{"key":"ref23","doi-asserted-by":"crossref","first-page":"2264","DOI":"10.1073\/pnas.87.6.2264","article-title":"Methods for assessing the statistical significance of molecular sequence features by using general scoring schemes.","volume":"87","author":"S Karlin","year":"1990","journal-title":"Proc Natl Acad Sci USA"},{"key":"ref24","doi-asserted-by":"crossref","first-page":"2022","DOI":"10.1214\/aop\/1176988493","article-title":"Limit distribution of maximal nonaligned two-sequence segmental score.","volume":"22","author":"A Dembo","year":"1994","journal-title":"Ann Prob"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"35","DOI":"10.1137\/0128004","article-title":"Minimal mutation trees of sequences.","volume":"28","author":"D Sankoff","year":"1975","journal-title":"SIAM J Appl Math"},{"key":"ref26","first-page":"253","article-title":"Simultaneous comparison of three or more sequences related by a tree.","author":"D Sankoff","year":"1983"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"3073","DOI":"10.1073\/pnas.82.10.3073","article-title":"Simultaneous comparison of three protein sequences.","volume":"82","author":"M Murata","year":"1985","journal-title":"Proc Natl Acad Sci USA"},{"key":"ref28","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1016\/0022-2836(86)90252-4","article-title":"Multiple sequence alignment.","volume":"191","author":"DJ Bacon","year":"1986","journal-title":"J Mol Biol"},{"key":"ref29","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1016\/0022-2836(86)90165-8","article-title":"Information content of binding sites on nucleotide sequences.","volume":"188","author":"TD Schneider","year":"1986","journal-title":"J Mol Biol"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"555","DOI":"10.1016\/0022-2836(91)90193-A","article-title":"Amino acid substitution matrices from an information theoretic perspective.","volume":"219","author":"SF Altschul","year":"1991","journal-title":"J Mol Biol"},{"key":"ref31","first-page":"47","article-title":"Using Dirichlet mixture priors to derive hidden Markov models for protein families.","author":"M Brown","year":"1993"},{"key":"ref32","first-page":"327","article-title":"Dirichlet mixtures: a method for improved detection of weak but significant protein sequence homology.","volume":"12","author":"K Sj\u00f6lander","year":"1996","journal-title":"Comput Appl Biosci"},{"key":"ref33","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/4643.001.0001","article-title":"The Minimum Description Length Principle","author":"PD Gr\u00fcnwald","year":"2007"},{"key":"ref34","doi-asserted-by":"crossref","first-page":"10523","DOI":"10.1073\/pnas.0403564101","article-title":"MotifPrototyper: a Bayesian profile model for motif families.","volume":"101","author":"EP Xing","year":"2004","journal-title":"Proc Natl Acad Sci USA"},{"key":"ref35","doi-asserted-by":"crossref","first-page":"83","DOI":"10.1186\/1471-2105-6-83","article-title":"Bayesian coestimation of phylogeny and sequence alignment.","volume":"6","author":"G Lunter","year":"2005","journal-title":"BMC Bioinformatics"},{"key":"ref36","doi-asserted-by":"crossref","first-page":"e1000392","DOI":"10.1371\/journal.pcbi.1000392","article-title":"Fast statistical alignment.","volume":"5","author":"RK Bradley","year":"2009","journal-title":"PLoS Comput Biol"},{"key":"ref37","doi-asserted-by":"crossref","first-page":"217","DOI":"10.1186\/1471-2148-9-217","article-title":"BigFoot: Bayesian alignment and phylogenetic footprinting with MCMC.","volume":"9","author":"R Satija","year":"2009","journal-title":"BMC Evol Biol"},{"key":"ref38","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1007\/BF02603120","article-title":"Progressive sequence alignment as a prerequisite to correct phylogenetic trees.","volume":"25","author":"DF Feng","year":"1987","journal-title":"J Mol Evol"},{"key":"ref39","doi-asserted-by":"crossref","first-page":"4673","DOI":"10.1093\/nar\/22.22.4673","article-title":"CLUSTAL W: improving the sensitivity of progressive multiple sequence alignment through sequence weighting, position-specific gap penalties and weight matrix choice.","volume":"22","author":"JD Thompson","year":"1994","journal-title":"Nucleic Acids Res"},{"key":"ref40","first-page":"3836","article-title":"Searching databases of conserved sequence regions by aligning protein multiple-alignments.","volume":"24","author":"S Pietrokovski","year":"1996","journal-title":"Nucleic Acids Res"},{"key":"ref41","doi-asserted-by":"crossref","first-page":"232","DOI":"10.1110\/ps.9.2.232","article-title":"Comparison of sequence profiles. strategies for structural predictions using sequence information.","volume":"9","author":"L Rychlewski","year":"2000","journal-title":"Protein Sci"},{"key":"ref42","doi-asserted-by":"crossref","first-page":"1257","DOI":"10.1006\/jmbi.2001.5293","article-title":"Within the twilight zone: a sensitive profile-profile comparison tool based on information theory.","volume":"315","author":"G Yona","year":"2002","journal-title":"J Mol Biol"},{"key":"ref43","doi-asserted-by":"crossref","first-page":"1404","DOI":"10.1093\/bioinformatics\/btg158","article-title":"SATCHMO: sequence alignment and tree construction using hidden markov models.","volume":"19","author":"RC Edgar","year":"2003","journal-title":"Bioinformatics"},{"key":"ref44","doi-asserted-by":"crossref","first-page":"683","DOI":"10.1093\/nar\/gkg154","article-title":"Finding weak similarities between proteins by sequence profile comparison.","volume":"31","author":"AR Panchenko","year":"2003","journal-title":"Nucleic Acids Res"},{"key":"ref45","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1016\/S0022-2836(02)01371-2","article-title":"COMPASS: a tool for comparison of multiple protein alignments with assessment of statistical significance.","volume":"326","author":"R Sadreyev","year":"2003","journal-title":"J Mol Biol"},{"key":"ref46","doi-asserted-by":"crossref","first-page":"1301","DOI":"10.1093\/bioinformatics\/bth090","article-title":"A comparison of scoring functions for protein sequence profile alignment.","volume":"20","author":"RC Edgar","year":"2004","journal-title":"Bioinformatics"},{"key":"ref47","doi-asserted-by":"crossref","first-page":"1612","DOI":"10.1110\/ps.03601504","article-title":"Scoring profile-to-profile sequence alignments.","volume":"13","author":"G Wang","year":"2004","journal-title":"Protein Sci"},{"key":"ref48","doi-asserted-by":"crossref","first-page":"951","DOI":"10.1093\/bioinformatics\/bti125","article-title":"Protein homology detection by HMM-HMM comparison.","volume":"21","author":"J S\u00f6ding","year":"2005","journal-title":"Bioinformatics"},{"key":"ref49","article-title":"Information Theory, Inference, and Learning Algorithms","author":"DJC MacKay","year":"2003"},{"key":"ref50","doi-asserted-by":"crossref","first-page":"647","DOI":"10.1016\/0022-2836(89)90234-9","article-title":"Weights for data related by a tree.","volume":"207","author":"SF Altschul","year":"1989","journal-title":"J Mol Biol"},{"key":"ref51","doi-asserted-by":"crossref","first-page":"813","DOI":"10.1016\/S0022-2836(99)80003-5","article-title":"Weighting aligned protein or nucleic acid sequences to correct for unequal representation.","volume":"216","author":"PR Sibbald","year":"1990","journal-title":"J Mol Biol"},{"key":"ref52","doi-asserted-by":"crossref","first-page":"56","DOI":"10.1002\/prot.340090107","article-title":"Database of homology-derived protein structures and the structural meaning of sequence alignment.","volume":"9","author":"C Sander","year":"1991","journal-title":"Proteins"},{"key":"ref53","doi-asserted-by":"crossref","first-page":"8777","DOI":"10.1073\/pnas.90.19.8777","article-title":"Weighting in sequence space: a comparison of methods in terms of generalized sequences.","volume":"90","author":"M Vingron","year":"1993","journal-title":"Proc Natl Acad Sci USA"},{"key":"ref54","first-page":"1067","article-title":"Volume changes in protein evolution. Appendix: A method to weight protein sequences to correct for unequal representation.","volume":"236","author":"M Gerstein","year":"1994","journal-title":"J Mol Biol"},{"key":"ref55","doi-asserted-by":"crossref","first-page":"574","DOI":"10.1016\/0022-2836(94)90032-9","article-title":"Position-based sequence weights.","volume":"243","author":"S Henikoff","year":"1994","journal-title":"J Mol Biol"},{"key":"ref56","first-page":"19","article-title":"Improved sensitivity of profile searches through the use of sequence weights and gap excision.","volume":"10","author":"JD Thompson","year":"1994","journal-title":"Comput Appl Biosci"},{"key":"ref57","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1089\/cmb.1995.2.9","article-title":"Maximum discrimination hidden Markov models of sequence consensus.","volume":"2","author":"SR Eddy","year":"1995","journal-title":"J Comput Biol"},{"key":"ref58","first-page":"543","article-title":"A weighting system and algorithm for aligning many phylogenetically related sequences.","volume":"11","author":"O Gotoh","year":"1995","journal-title":"Comput Appl Biosci"},{"key":"ref59","first-page":"215","article-title":"Maximum entropy weighting of aligned sequences of protein or DNA.","author":"A Krogh","year":"1995"},{"key":"ref60","first-page":"15","article-title":"The megaprior heuristic for discovering protein sequence patterns.","author":"TL Bailey","year":"1996"},{"key":"ref61","doi-asserted-by":"crossref","first-page":"387","DOI":"10.1093\/protein\/12.5.387","article-title":"PSIC: profile extraction from sequence alignments with position-specific counts of independent observations.","volume":"12","author":"SR Sunyaev","year":"1999","journal-title":"Protein Eng"},{"key":"ref62","doi-asserted-by":"crossref","first-page":"e160","DOI":"10.1371\/journal.pcbi.0030160","article-title":"Automated protein subfamily identification and classification.","volume":"3","author":"DP Brown","year":"2007","journal-title":"PLoS Comput Biol"},{"key":"ref63","doi-asserted-by":"crossref","first-page":"815","DOI":"10.1093\/nar\/gkn981","article-title":"PSI-BLAST pseudocounts and the minimum description length principle.","volume":"37","author":"SF Altschul","year":"2009","journal-title":"Nucleic Acids Res"},{"key":"ref64","doi-asserted-by":"crossref","first-page":"15688","DOI":"10.1073\/pnas.2533904100","article-title":"The compositional adjustment of amino acid substitution matrices.","volume":"100","author":"YK Yu","year":"2003","journal-title":"Proc Natl Acad Sci USA"},{"key":"ref65","doi-asserted-by":"crossref","first-page":"902","DOI":"10.1093\/bioinformatics\/bti070","article-title":"The construction of amino acid substitution matrices for the comparison of proteins with non-standard compositions.","volume":"21","author":"YK Yu","year":"2005","journal-title":"Bioinformatics"},{"key":"ref66","doi-asserted-by":"crossref","first-page":"453","DOI":"10.1098\/rspa.1946.0056","article-title":"An invariant form of the prior probability in estimation problems.","volume":"186","author":"H Jeffreys","year":"1946","journal-title":"Proc Royal Soc London Series A"},{"key":"ref67","doi-asserted-by":"crossref","first-page":"939","DOI":"10.1093\/nar\/gkn1019","article-title":"Pseudocounts for transcription factor binding sites.","volume":"37","author":"K Nishida","year":"2009","journal-title":"Nucleic Acids Res"},{"key":"ref68","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/S0022-2836(05)80006-3","article-title":"Sequence alignment and penalty choice. Review of concepts, case studies and implications.","volume":"235","author":"M Vingron","year":"1994","journal-title":"J Mol Biol"},{"key":"ref69","doi-asserted-by":"crossref","first-page":"208","DOI":"10.1126\/science.8211139","article-title":"Detecting subtle sequence signals: A Gibbs sampling strategy for multiple alignment.","volume":"262","author":"CE Lawrence","year":"1993","journal-title":"Science"},{"key":"ref70","doi-asserted-by":"crossref","DOI":"10.1002\/0471200611","article-title":"Elements of Information Theory","author":"TM Cover","year":"1991"},{"key":"ref71","first-page":"165","article-title":"Phylogenetic inference in protein superfamilies: analysis of SH2 domains.","author":"K Sj\u00f6lander","year":"1998"},{"key":"ref72","doi-asserted-by":"crossref","first-page":"1765","DOI":"10.1093\/bioinformatics\/btn244","article-title":"Efficient functional clustering of protein sequences using the Dirichlet process.","volume":"24","author":"DP Brown","year":"2008","journal-title":"Bioinformatics"},{"key":"ref73","doi-asserted-by":"crossref","first-page":"297","DOI":"10.1016\/S0022-5193(89)80196-1","article-title":"Gap costs for multiple sequence alignment.","volume":"138","author":"SF Altschul","year":"1989","journal-title":"J Theor Biol"},{"key":"ref74","doi-asserted-by":"crossref","first-page":"114","DOI":"10.1007\/BF02193625","article-title":"An evolutionary model for maximum likelihood alignment of DNA sequences.","volume":"33","author":"JL Thorne","year":"1991","journal-title":"J Mol Evol"},{"key":"ref75","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/BF00163848","article-title":"Inching toward reality: an improved likelihood model of sequence evolution.","volume":"34","author":"JL Thorne","year":"1992","journal-title":"J Mol Evol"},{"key":"ref76","first-page":"395","article-title":"Hidden Markov models and iterative aligners: study of their equivalence and possibilities.","author":"H Tanaka","year":"1993"},{"key":"ref77","doi-asserted-by":"crossref","first-page":"1059","DOI":"10.1073\/pnas.91.3.1059","article-title":"Hidden Markov models of biological primary sequence information.","volume":"91","author":"P Baldi","year":"1994","journal-title":"Proc Natl Acad Sci USA"},{"key":"ref78","doi-asserted-by":"crossref","first-page":"1501","DOI":"10.1006\/jmbi.1994.1104","article-title":"Hidden Markov models in computational biology. Applications to protein modeling.","volume":"235","author":"A Krogh","year":"1994","journal-title":"J Mol Biol"},{"key":"ref79","article-title":"Biological sequence analysis. Probabilistic models of proteins and nucleic acids","author":"R Durbin","year":"1998"},{"key":"ref80","doi-asserted-by":"crossref","first-page":"755","DOI":"10.1093\/bioinformatics\/14.9.755","article-title":"Profile hidden Markov models.","volume":"14","author":"SR Eddy","year":"1998","journal-title":"Bioinformatics"},{"key":"ref81","doi-asserted-by":"crossref","first-page":"846","DOI":"10.1093\/bioinformatics\/14.10.846","article-title":"Hidden Markov models for detecting remote protein homologies.","volume":"14","author":"K Karplus","year":"1998","journal-title":"Bioinformatics"},{"key":"ref82","doi-asserted-by":"crossref","first-page":"157","DOI":"10.1186\/1471-2105-5-157","article-title":"Gapped alignment of protein sequence motifs through Monte Carlo optimization of a hidden Markov model.","volume":"5","author":"AF Neuwald","year":"2004","journal-title":"BMC Bioinformatics"},{"key":"ref83","doi-asserted-by":"crossref","first-page":"705","DOI":"10.1016\/0022-2836(82)90398-9","article-title":"An improved algorithm for matching biological sequences.","volume":"162","author":"O Gotoh","year":"1982","journal-title":"J Mol Biol"},{"key":"ref84","doi-asserted-by":"crossref","first-page":"1382","DOI":"10.1073\/pnas.80.5.1382","article-title":"Optimal sequence alignments.","volume":"80","author":"WM Fitch","year":"1983","journal-title":"Proc Natl Acad Sci USA"},{"key":"ref85","doi-asserted-by":"crossref","first-page":"603","DOI":"10.1007\/BF02462326","article-title":"Optimal sequence alignment using affine gap costs.","volume":"48","author":"SF Altschul","year":"1986","journal-title":"Bull Math Biol"},{"key":"ref86","doi-asserted-by":"crossref","first-page":"367","DOI":"10.1016\/0001-8708(76)90202-4","article-title":"Some biological sequence metrics.","volume":"20","author":"MS Waterman","year":"1976","journal-title":"Adv Math"},{"key":"ref87","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1007\/BF02459948","article-title":"Sequence comparison with concave weighting functions.","volume":"50","author":"W Miller","year":"1988","journal-title":"Bull Math Biol"},{"key":"ref88","doi-asserted-by":"crossref","first-page":"1065","DOI":"10.1006\/jmbi.1993.1105","article-title":"Empirical and structural models for insertions and deletions in the divergent evolution of proteins.","volume":"229","author":"SA Benner","year":"1993","journal-title":"J Mol Biol"},{"key":"ref89","doi-asserted-by":"crossref","first-page":"2838","DOI":"10.1093\/nar\/gkh610","article-title":"Frequency of gaps observed in a structurally aligned protein pair database suggests a simple gap penalty function.","volume":"32","author":"NC Goonesekere","year":"2004","journal-title":"Nucleic Acids Res"},{"key":"ref90","doi-asserted-by":"crossref","first-page":"330","DOI":"10.1006\/jmbi.1997.1304","article-title":"Glutamine, alanine or glycine repeats inserted into the loop of a protein have minimal effects on stability and folding rates.","volume":"273","author":"AG Ladurner","year":"1997","journal-title":"J Mol Biol"},{"key":"ref91","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1110\/ps.0232003","article-title":"Low free energy cost of very long loop insertions in proteins.","volume":"12","author":"M Scalley-Kim","year":"2003","journal-title":"Protein Sci"},{"key":"ref92","first-page":"55","article-title":"Recognition of patterns in genetic sequences.","author":"BW Erickson","year":"1983"},{"key":"ref93","first-page":"228","article-title":"An algorithm for finding novel gapped motifs in dna sequences.","author":"E Rocke","year":"1998"},{"key":"ref94","first-page":"392","article-title":"Stochastic heuristic algorithms for target motif identification (extended abstract).","author":"HT Wareham","year":"2000","journal-title":"Pac Symp Biocomput"},{"key":"ref95","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1093\/bioinformatics\/15.1.87","article-title":"BAliBASE: a benchmark alignment database for the evaluation of multiple alignment programs.","volume":"15","author":"JD Thompson","year":"1999","journal-title":"Bioinformatics"},{"key":"ref96","doi-asserted-by":"crossref","first-page":"66","DOI":"10.1186\/1471-2105-6-66","article-title":"DIALIGN-T: an improved algorithm for segment-based multiple sequence alignment.","volume":"6","author":"AR Subramanian","year":"2005","journal-title":"BMC Bioinformatics"},{"key":"ref97","doi-asserted-by":"crossref","first-page":"157","DOI":"10.1093\/bioinformatics\/14.2.157","article-title":"Rose: generating sequence families.","volume":"14","author":"J Stoye","year":"1998","journal-title":"Bioinformatics"},{"key":"ref98","doi-asserted-by":"crossref","first-page":"6","DOI":"10.1186\/1748-7188-3-6","article-title":"DIALIGN-TX: greedy and progressive approaches for segment-based multiple sequence alignment.","volume":"3","author":"AR Subramanian","year":"2008","journal-title":"Algorithms Mol Biol"},{"key":"ref99","doi-asserted-by":"crossref","first-page":"1073","DOI":"10.1093\/bioinformatics\/btm076","article-title":"COBALT: constraint-based alignment tool for multiple protein sequences.","volume":"23","author":"JS Papadopoulos","year":"2007","journal-title":"Bioinformatics"},{"key":"ref100","doi-asserted-by":"crossref","first-page":"4673","DOI":"10.1093\/nar\/22.22.4673","article-title":"CLUSTAL W: improving the sensitivity of progressive multiple sequence alignment through sequence weighting, position-specific gap penalties and weight matrix choice.","volume":"22","author":"JD Thompson","year":"1994","journal-title":"Nucleic Acids Res"},{"key":"ref101","doi-asserted-by":"crossref","first-page":"427","DOI":"10.1093\/bioinformatics\/btg008","article-title":"PCMA: fast and accurate multiple sequence alignment based on profile consistency.","volume":"19","author":"J Pei","year":"2003","journal-title":"Bioinformatics"},{"key":"ref102","doi-asserted-by":"crossref","first-page":"1792","DOI":"10.1093\/nar\/gkh340","article-title":"MUSCLE: multiple sequence alignment with high accuracy and high throughput.","volume":"32","author":"RC Edgar","year":"2004","journal-title":"Nucleic Acids Res"},{"key":"ref103","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1186\/1471-2105-5-113","article-title":"MUSCLE: a multiple sequence alignment method with reduced time and space complexity.","volume":"5","author":"RC Edgar","year":"2004","journal-title":"BMC Bioinformatics"},{"key":"ref104","doi-asserted-by":"crossref","first-page":"330","DOI":"10.1101\/gr.2821705","article-title":"ProbCons: Probabilistic consistency-based multiple sequence alignment.","volume":"15","author":"CB Do","year":"2005","journal-title":"Genome Res"},{"key":"ref105","doi-asserted-by":"crossref","first-page":"405","DOI":"10.1002\/(SICI)1097-0134(199707)28:3<405::AID-PROT10>3.0.CO;2-L","article-title":"Pfam: a comprehensive database of protein domain families based on seed alignments.","volume":"28","author":"EL Sonnhammer","year":"1997","journal-title":"Proteins"},{"key":"ref106","doi-asserted-by":"crossref","first-page":"320","DOI":"10.1093\/nar\/26.1.320","article-title":"Pfam: multiple sequence alignments and HMM-profiles of protein domains.","volume":"26","author":"EL Sonnhammer","year":"1998","journal-title":"Nucleic Acids Res"},{"key":"ref107","doi-asserted-by":"crossref","first-page":"D281","DOI":"10.1093\/nar\/gkm960","article-title":"The Pfam protein families database.","volume":"36","author":"RD Finn","year":"2008","journal-title":"Nucleic Acids Res"},{"key":"ref108","first-page":"686","article-title":"Extraction of hidden Markov model representations of signal patterns in DNA sequences.","author":"T Yada","year":"1996","journal-title":"Pac Symp Biocomput"},{"key":"ref109","doi-asserted-by":"crossref","first-page":"3613","DOI":"10.1093\/bioinformatics\/bth454","article-title":"Training HMM structure with genetic algorithm for biological sequence analysis.","volume":"20","author":"KJ Won","year":"2004","journal-title":"Bioinformatics"},{"key":"ref110","doi-asserted-by":"crossref","first-page":"1669","DOI":"10.1093\/bioinformatics\/btn254","article-title":"Modeling promoter grammars with evolving hidden Markov models.","volume":"24","author":"KJ Won","year":"2008","journal-title":"Bioinformatics"},{"key":"ref111","doi-asserted-by":"crossref","first-page":"455","DOI":"10.1093\/bioinformatics\/15.6.455","article-title":"Local sequence alignments with monotonic gap penalties.","volume":"15","author":"R Mott","year":"1999","journal-title":"Bioinformatics"},{"key":"ref112","doi-asserted-by":"crossref","first-page":"3994","DOI":"10.1093\/nar\/gki709","article-title":"Discovery of the principal specific transcription factors of Apicomplexa and their implication for the evolution of the AP2-integrase DNA binding domains.","volume":"33","author":"S Balaji","year":"2005","journal-title":"Nucleic Acids Res"},{"key":"ref113","doi-asserted-by":"crossref","first-page":"2265","DOI":"10.1105\/tpc.104.023135","article-title":"From endonucleases to transcription factors: evolution of the AP2 DNA binding domain in plants.","volume":"16","author":"E Magnani","year":"2004","journal-title":"Plant Cell"},{"key":"ref114","doi-asserted-by":"crossref","first-page":"685","DOI":"10.1128\/EC.3.3.685-694.2004","article-title":"Homing endonucleases encoded by germ line-limited genes in Tetrahymena thermophila have APETELA2 DNA binding domains.","volume":"3","author":"JD Wuitschick","year":"2004","journal-title":"Eukaryotic Cell"},{"key":"ref115","doi-asserted-by":"crossref","first-page":"8393","DOI":"10.1073\/pnas.0801993105","article-title":"Specific DNA-binding by apicomplexan AP2 transcription factors.","volume":"105","author":"EK De Silva","year":"2008","journal-title":"Proc Natl Acad Sci USA"},{"key":"ref116","doi-asserted-by":"crossref","first-page":"1402","DOI":"10.1111\/j.1365-2958.2009.06609.x","article-title":"Identification of a transcription factor in the mosquito-invasive stage of malaria parasites.","volume":"71","author":"M Yuda","year":"2009","journal-title":"Mol Microbiol"},{"key":"ref117","doi-asserted-by":"crossref","first-page":"5932","DOI":"10.1093\/nar\/gkl511","article-title":"Multiple alignment of protein sequences with repeats and rearrangements.","volume":"34","author":"TM Phuong","year":"2006","journal-title":"Nucleic Acids Res"},{"key":"ref118","doi-asserted-by":"crossref","first-page":"2336","DOI":"10.1101\/gr.2657504","article-title":"A novel method for multiple alignment of sequences with repeated and shuffled elements.","volume":"14","author":"B Raphael","year":"2004","journal-title":"Genome Res"},{"key":"ref119","doi-asserted-by":"crossref","first-page":"269","DOI":"10.1016\/0097-8485(94)85023-2","article-title":"Non-globular domains in protein sequences: automated segmentation using complexity measures.","volume":"18","author":"JC Wootton","year":"1994","journal-title":"Comput Chem"},{"key":"ref120","doi-asserted-by":"crossref","first-page":"6097","DOI":"10.1093\/nar\/18.20.6097","article-title":"Sequence logos: a new way to display consensus sequences.","volume":"18","author":"TD Schneider","year":"1990","journal-title":"Nucleic Acids Res"},{"key":"ref121","doi-asserted-by":"crossref","first-page":"5484","DOI":"10.1093\/emboj\/17.18.5484","article-title":"A novel mode of DNA recognition by a beta-sheet revealed by the solution structure of the GCC-box binding domain in complex with DNA.","volume":"17","author":"MD Allen","year":"1998","journal-title":"EMBO J"},{"key":"ref122","doi-asserted-by":"crossref","first-page":"558","DOI":"10.1016\/j.jmb.2009.11.004","article-title":"Structural determinants of DNA binding by a P. falciparum ApiAP2 transcriptional regulator.","volume":"395","author":"SE Lindner","year":"2010","journal-title":"J Mol Biol"}],"container-title":["PLoS Computational Biology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/dx.plos.org\/10.1371\/journal.pcbi.1000852","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,28]],"date-time":"2024-03-28T10:59:37Z","timestamp":1711623577000},"score":1,"resource":{"primary":{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.1000852"}},"subtitle":[],"editor":[{"given":"Adam","family":"Siepel","sequence":"first","affiliation":[],"role":[{"role":"editor","vocabulary":"crossref"}]}],"short-title":[],"issued":{"date-parts":[[2010,7,15]]},"references-count":122,"journal-issue":{"issue":"7","published-online":{"date-parts":[[2010,7,15]]}},"URL":"https:\/\/doi.org\/10.1371\/journal.pcbi.1000852","relation":{},"ISSN":["1553-7358"],"issn-type":[{"value":"1553-7358","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010,7,15]]}}}