{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T22:50:07Z","timestamp":1773269407884,"version":"3.50.1"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2009,1,6]],"date-time":"2009-01-06T00:00:00Z","timestamp":1231200000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/2.0"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"published-print":{"date-parts":[[2009,12]]},"DOI":"10.1186\/1471-2105-10-6","type":"journal-article","created":{"date-parts":[[2009,1,6]],"date-time":"2009-01-06T19:14:08Z","timestamp":1231269248000},"source":"Crossref","is-referenced-by-count":46,"title":["Automated Alphabet Reduction for Protein Datasets"],"prefix":"10.1186","volume":"10","author":[{"given":"Jaume","family":"Bacardit","sequence":"first","affiliation":[]},{"given":"Michael","family":"Stout","sequence":"additional","affiliation":[]},{"given":"Jonathan D","family":"Hirst","sequence":"additional","affiliation":[]},{"given":"Alfonso","family":"Valencia","sequence":"additional","affiliation":[]},{"given":"Robert E","family":"Smith","sequence":"additional","affiliation":[]},{"given":"Natalio","family":"Krasnogor","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2009,1,6]]},"reference":[{"key":"2736_CR1","doi-asserted-by":"publisher","first-page":"5361","DOI":"10.1073\/pnas.0509355103","volume":"103","author":"KM Misura","year":"2006","unstructured":"Misura KM, Chivian D, Rohl CA, Kim DE, Baker D: Physically realistic homology models built with ROSETTA can be more accurate than their templates. Proc Natl Acad Sci USA 2006, 103: 5361\u20135366.","journal-title":"Proc Natl Acad Sci USA"},{"key":"2736_CR2","doi-asserted-by":"publisher","first-page":"1501","DOI":"10.1021\/bi00327a032","volume":"24","author":"KA Dill","year":"1985","unstructured":"Dill KA: Theory for the folding and stability of globular proteins. Biochemistry 1985, 24: 1501\u20131509.","journal-title":"Biochemistry"},{"key":"2736_CR3","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1073\/pnas.92.1.325","volume":"92","author":"K Yue","year":"1995","unstructured":"Yue K, Fiebig KM, Thomas PD, Chan HS, Shakhnovich EI, Dill KA: A test of lattice protein folding algorithms. Proc Natl Acad Sci USA 1995, 92: 325\u2013329.","journal-title":"Proc Natl Acad Sci USA"},{"key":"2736_CR4","doi-asserted-by":"publisher","first-page":"769","DOI":"10.1007\/3-540-45712-7_74","volume":"2439","author":"N Krasnogor","year":"2002","unstructured":"Krasnogor N, Blackburne B, Burke E, Hirst J: Multimeme Algorithms for Protein Structure Prediction. Proceedings of the Parallel Problem Solving from Nature VII. Lecture Notes in Computer Science 2002, 2439: 769\u2013778.","journal-title":"Proceedings of the Parallel Problem Solving from Nature VII. Lecture Notes in Computer Science"},{"key":"2736_CR5","first-page":"208","volume-title":"Applications of Evolutionary Computing, EvoWorkshops 2006","author":"M Stout","year":"2006","unstructured":"Stout M, Bacardit J, Hirst JD, Krasnogor N, Blazewicz J: From HP Lattice Models to Real Proteins: Coordination Number Prediction Using Learning Classifier Systems. In Applications of Evolutionary Computing, EvoWorkshops 2006. Springer LNCS 3907; 2006:208\u2013220."},{"key":"2736_CR6","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1002\/(SICI)1097-0134(20000201)38:2<149::AID-PROT4>3.0.CO;2-#","volume":"38","author":"AD Solis","year":"2000","unstructured":"Solis AD, Rackovsky S: Optimized representations and maximal information in proteins. Proteins 2000, 38: 149\u2013164.","journal-title":"Proteins"},{"key":"2736_CR7","doi-asserted-by":"publisher","first-page":"463","DOI":"10.1002\/prot.10126","volume":"48","author":"AD Solis","year":"2002","unstructured":"Solis AD, Rackovsky S: Optimally informative backbone structural propensities in proteins. Proteins 2002, 48: 463\u2013486.","journal-title":"Proteins"},{"key":"2736_CR8","doi-asserted-by":"publisher","first-page":"892","DOI":"10.1002\/prot.20501","volume":"62","author":"AD Solis","year":"2006","unstructured":"Solis AD, Rackovsky S: Improvement of statistical potentials and threading score functions using information maximization. Proteins 2006, 62: 892\u2013908.","journal-title":"Proteins"},{"key":"2736_CR9","doi-asserted-by":"publisher","first-page":"1033","DOI":"10.1038\/14918","volume":"6","author":"J Wang","year":"1999","unstructured":"Wang J, Wang W: A computational approach to simplifying the protein folding alphabet. Nat Struct Biol 1999, 6: 1033\u20131038.","journal-title":"Nat Struct Biol"},{"key":"2736_CR10","first-page":"1420","volume":"114","author":"M Cieplak","year":"2001","unstructured":"Cieplak M, Holter NS, Maritan A, Banavar JR: Amino acid classes and the protein folding problem. Chem Phys 2001, 114: 1420\u20131423.","journal-title":"Chem Phys"},{"key":"2736_CR11","doi-asserted-by":"publisher","first-page":"623","DOI":"10.1006\/jmbi.1996.0114","volume":"256","author":"S Miyazawa","year":"1996","unstructured":"Miyazawa S, Jernigan RL: Residue-residue potentials with a favorable contact pair term and an unfavorable high packing density term, for simulation and threading. J Mol Biol 1996, 256: 623\u2013644.","journal-title":"J Mol Biol"},{"key":"2736_CR12","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1093\/protein\/gzg044","volume":"16","author":"T Li","year":"2003","unstructured":"Li T, Fan K, Wang J, Wang W: Reduction of protein sequence complexity by residue grouping. Protein Eng 2003, 16: 323\u2013330.","journal-title":"Protein Eng"},{"key":"2736_CR13","doi-asserted-by":"publisher","first-page":"10915","DOI":"10.1073\/pnas.89.22.10915","volume":"89","author":"S Henikoff","year":"1992","unstructured":"Henikoff S, Henikoff JG: Amino acid substitution matrices from protein blocks. Proc Natl Acad Sci USA 1992, 89: 10915\u201310919.","journal-title":"Proc Natl Acad Sci USA"},{"issue":"5 Pt 1","key":"2736_CR14","doi-asserted-by":"publisher","first-page":"051927","DOI":"10.1103\/PhysRevE.67.051927","volume":"67","author":"X Liu","year":"2003","unstructured":"Liu X, Zhang LM, Guan S, Zheng WM: Distances and classification of amino acids for different protein secondary structures. Phys Rev E Stat Nonlin Soft Matter Phys 2003, 67(5 Pt 1):051927.","journal-title":"Phys Rev E Stat Nonlin Soft Matter Phys"},{"key":"2736_CR15","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1214\/aoms\/1177729694","volume":"22","author":"S Kullback","year":"1951","unstructured":"Kullback S, Leibler RA: On Information and Sufficiency. The Annals of Mathematical Statistics 1951, 22: 79\u201386.","journal-title":"The Annals of Mathematical Statistics"},{"key":"2736_CR16","doi-asserted-by":"publisher","first-page":"986","DOI":"10.1002\/prot.20881","volume":"63","author":"F Melo","year":"2006","unstructured":"Melo F, Marti-Renom M: Accuracy of sequence alignment and fold assessment using reduced amino acid alphabets. Proteins 2006, 63: 986\u2013995.","journal-title":"Proteins"},{"key":"2736_CR17","first-page":"160","volume":"15","author":"J Mintseris","year":"2004","unstructured":"Mintseris J, Weng Z: Optimizing protein representations with information theory. Genome Informatics 2004, 15: 160\u2013169.","journal-title":"Genome Informatics"},{"key":"2736_CR18","doi-asserted-by":"publisher","first-page":"360","DOI":"10.1007\/s008940100038","volume":"7","author":"J Meiler","year":"2001","unstructured":"Meiler J, M\u00fcller M, Zeidler A, Schm\u00e4schke F: Generation and evaluation of dimension-reduced amino acid parameter representations by artificial neural networks. J Mol Model 2001, 7: 360\u2013369.","journal-title":"J Mol Model"},{"key":"2736_CR19","doi-asserted-by":"publisher","first-page":"523","DOI":"10.1002\/prot.20648","volume":"61","author":"JO Wrabl","year":"2005","unstructured":"Wrabl JO, Grishin NV: Grouping of Amino Acid Types and Extraction of Amino Acid Properties from Multiple Sequence Alignments Using Variance Maximization. Proteins 2005, 61: 523\u2013534.","journal-title":"Proteins"},{"key":"2736_CR20","doi-asserted-by":"publisher","first-page":"1059","DOI":"10.1007\/s00249-007-0188-5","volume":"36","author":"C Etchebest","year":"2007","unstructured":"Etchebest C, Benros C, Bornot A, Camproux AC, de Brevern A: A reduced amino acid alphabet for understanding and designing protein adaptation to mutation. European Biophysics Journal 2007, 36: 1059\u20131069.","journal-title":"European Biophysics Journal"},{"key":"2736_CR21","doi-asserted-by":"publisher","first-page":"1680","DOI":"10.1126\/science.8259512","volume":"262","author":"S Kamtekar","year":"1993","unstructured":"Kamtekar S, Schiffer J, Xiong H, Babik J, Hecht M: Protein design by binary patterning of polar and nonpolar amino acids. Science 1993, 262: 1680\u20131685.","journal-title":"Science"},{"key":"2736_CR22","first-page":"1668","volume":"62","author":"Y Ikenaka","year":"1998","unstructured":"Ikenaka Y, Nanba H, Yajima K, Yamada Y, Takano M, Takahashi S: Increase in Thermostability of. N-Carbamyl-D-Amino Acid Amidohydrolase on Amino Acid Substitutions. Bioscience, Biotechnology, and Biochemistry 1998, 62: 1668\u20131671.","journal-title":"N-Carbamyl-D-Amino Acid Amidohydrolase on Amino Acid Substitutions. Bioscience, Biotechnology, and Biochemistry"},{"key":"2736_CR23","doi-asserted-by":"publisher","first-page":"13549","DOI":"10.1073\/pnas.222243999","volume":"99","author":"S Akanuma","year":"2002","unstructured":"Akanuma S, Kigawa T, Yokoyama S: Combinatorial mutagenesis to restrict amino acid usage in an enzyme to a reduced set. Proc Natl Acad Sci USA 2002, 99: 13549\u201313553.","journal-title":"Proc Natl Acad Sci USA"},{"key":"2736_CR24","doi-asserted-by":"publisher","first-page":"346","DOI":"10.1145\/1276958.1277033","volume-title":"GECCO '07: Proceedings of the 9th annual conference on Genetic and evolutionary computation","author":"J Bacardit","year":"2007","unstructured":"Bacardit J, Stout M, Hirst JD, Sastry K, Llor\u00e0 X, Krasnogor N: Automated alphabet reduction method with evolutionary algorithms for protein structure prediction. In GECCO '07: Proceedings of the 9th annual conference on Genetic and evolutionary computation. New York, NY, USA: ACM Press; 2007:346\u2013353."},{"key":"2736_CR25","volume-title":"Linkage Learning via Probabilistic Modeling in the ECGA","author":"G Harik","year":"1999","unstructured":"Harik G: Linkage Learning via Probabilistic Modeling in the ECGA. Tech. Rep. 99010, Illinois Genetic Algorithms Lab, University of Illinois at Urbana-Champaign; 1999."},{"key":"2736_CR26","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1145\/1143997.1144041","volume-title":"GECCO '06: Proceedings of the 8th annual conference on Genetic and evolutionary computation","author":"J Bacardit","year":"2006","unstructured":"Bacardit J, Stout M, Krasnogor N, Hirst JD, Blazewicz J: Coordination number prediction using learning classifier systems: performance and interpretability. In GECCO '06: Proceedings of the 8th annual conference on Genetic and evolutionary computation. ACM Press; 2006:247\u2013254."},{"key":"2736_CR27","volume-title":"BioHEL: Bioinformatics-oriented Hierarchical Evolutionary Learning","author":"J Bacardit","year":"2006","unstructured":"Bacardit J, Krasnogor N: BioHEL: Bioinformatics-oriented Hierarchical Evolutionary Learning. Nottingham ePrints report 482, University of Nottingham; 2006."},{"key":"2736_CR28","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1002\/prot.10198","volume":"49","author":"MS Cline","year":"2002","unstructured":"Cline MS, Karplus K, Lathrop RH, Smith TF, Rogers RG, Haussler D: Information-Theoretic Dissection of Pairwise Contact Potentials. Proteins 2002, 49: 7\u201314.","journal-title":"Proteins"},{"key":"2736_CR29","doi-asserted-by":"publisher","first-page":"4355","DOI":"10.1073\/pnas.84.13.4355","volume":"84","author":"M Gribskov","year":"1987","unstructured":"Gribskov M, McLachlan AD, Eisenberg D: Profile analysis: detection of distantly related proteins. Proc Natl Acad Sci USA 1987, 84: 4355\u20134358.","journal-title":"Proc Natl Acad Sci USA"},{"key":"2736_CR30","doi-asserted-by":"publisher","first-page":"158","DOI":"10.1002\/prot.20300","volume":"58","author":"AR Kinjo","year":"2005","unstructured":"Kinjo AR, Horimoto K, Nishikawa K: Predicting Absolute Contact Numbers of Native Protein Structure From Amino Acid Sequence. Proteins 2005, 58: 158\u2013165.","journal-title":"Proteins"},{"key":"2736_CR31","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1002\/prot.10069","volume":"47","author":"G Pollastri","year":"2002","unstructured":"Pollastri G, Baldi P, Fariselli P, Casadio R: Prediction of coordination number and relative solvent accessibility in proteins. Proteins 2002, 47: 142\u2013153.","journal-title":"Proteins"},{"key":"2736_CR32","doi-asserted-by":"publisher","first-page":"3389","DOI":"10.1093\/nar\/25.17.3389","volume":"25","author":"SF Altschul","year":"1997","unstructured":"Altschul SF, Madden TL, Sch\u00e4er AA, Zhang J, Zhang Z, Miller W, Lipman DJ: Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res 1997, 25: 3389\u20133402.","journal-title":"Nucleic Acids Res"},{"key":"2736_CR33","doi-asserted-by":"publisher","first-page":"476","DOI":"10.1002\/prot.20435","volume":"59","author":"MJ Wood","year":"2005","unstructured":"Wood MJ, Hirst JD: Protein secondary structure prediction with dihedral angles. Proteins 2005, 59: 476\u2013481.","journal-title":"Proteins"},{"key":"2736_CR34","doi-asserted-by":"publisher","first-page":"216","DOI":"10.1002\/prot.340200303","volume":"20","author":"B Rost","year":"1994","unstructured":"Rost B, Sander C: Conservation and Prediction of Solvent Accessibility in Protein Families. Proteins 1994, 20: 216\u2013226.","journal-title":"Proteins"},{"key":"2736_CR35","doi-asserted-by":"publisher","first-page":"481","DOI":"10.1002\/prot.20620","volume":"61","author":"JY Wang","year":"2005","unstructured":"Wang JY, Lee HM, Ahmad S: Prediction and Evolutionary Information Analysis of Protein Solvent Accessibility Using Multiple Linear Regression. Proteins 2005, 61: 481\u2013491.","journal-title":"Proteins"},{"key":"2736_CR36","doi-asserted-by":"publisher","first-page":"473","DOI":"10.1002\/prot.20645","volume":"61","author":"S Qin","year":"2005","unstructured":"Qin S, He Y, Pan XM: Predicting Protein Secondary Structure and Solvent Accessibility with an Improved Multiple Linear Regression Method. Proteins 2005, 61: 473\u2013480.","journal-title":"Proteins"},{"key":"2736_CR37","doi-asserted-by":"publisher","first-page":"2577","DOI":"10.1002\/bip.360221211","volume":"22","author":"W Kabsch","year":"1983","unstructured":"Kabsch W, Sander C: Dictionary of Protein Secondary Structure: Pattern Recognition of Hydrogen-Bonded and Geometrical Features. Biopolymers 1983, 22: 2577\u20132637.","journal-title":"Biopolymers"},{"key":"2736_CR38","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1093\/nar\/29.1.219","volume":"29","author":"T Noguchi","year":"2001","unstructured":"Noguchi T, Matsuda H, Akiyama Y: PDB-REPRDB: a database of representative protein chains from the Protein Data Bank (PDB). Nucleic Acids Res 2001, 29: 219\u2013220.","journal-title":"Nucleic Acids Res"},{"key":"2736_CR39","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1002\/prot.340090107","volume":"9","author":"C Sander","year":"1991","unstructured":"Sander C, Schneider R: Database of homology-derived protein structures and the structural meaning of sequence alignment. Proteins 1991, 9: 56\u201368.","journal-title":"Proteins"},{"key":"2736_CR40","volume-title":"Estimation of Distribution Algorithms, A New Tool for Evolutionnary Computation","year":"2002","unstructured":"Larra\u00f1aga P, Lozano J, Eds: Estimation of Distribution Algorithms, A New Tool for Evolutionnary Computation. Genetic Algorithms and Evolutionnary Computation, Kluwer Academic Publishers; 2002."},{"key":"2736_CR41","doi-asserted-by":"publisher","DOI":"10.1002\/0471200611","volume-title":"Elements of Information Theory","author":"TM Cover","year":"1991","unstructured":"Cover TM, Thomas JA: Elements of Information Theory. John Wiley & Sons; 1991."},{"key":"2736_CR42","doi-asserted-by":"publisher","first-page":"280","DOI":"10.1007\/3-540-56602-3_142","volume-title":"Machine Learning: ECML-93 \u2013 Proc of the European Conference on Machine Learning","author":"G Venturini","year":"1993","unstructured":"Venturini G: SIA: A Supervised Inductive Algorithm with Genetic Search for Learning Attributes Based Concepts. In Machine Learning: ECML-93 \u2013 Proc of the European Conference on Machine Learning. Edited by: Brazdil PB. Berlin, Heidelberg: Springer-Verlag; 1993:280\u2013296."},{"key":"2736_CR43","volume-title":"PhD thesis","author":"J Bacardit","year":"2004","unstructured":"Bacardit J: Pittsburgh Genetics-Based Machine Learning in the Data Mining era: Representations, generalization, and run-time. PhD thesis. Ramon Llull University, Barcelona, Catalonia, Spain; 2004."},{"key":"2736_CR44","first-page":"745","volume":"9","author":"CD Livingstone","year":"1993","unstructured":"Livingstone CD, Barton GJ: Protein sequence alignments: a strategy for the hierarchical analysis of residue conservation. Computer Applications in the Biosciences 1993, 9: 745\u2013756.","journal-title":"Computer Applications in the Biosciences"},{"key":"2736_CR45","doi-asserted-by":"publisher","first-page":"838","DOI":"10.1002\/prot.21298","volume":"66","author":"O Dor","year":"2007","unstructured":"Dor O, Zhou Y: Achieving 80 secondary structure prediction by large-scale training. Proteins 2007, 66: 838\u2013845.","journal-title":"Proteins"},{"key":"2736_CR46","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1093\/protein\/13.3.149","volume":"13","author":"LR Murphy","year":"2000","unstructured":"Murphy LR, Wallqvist A, Levy RM: Simplified amino acid alphabets for protein fold recognition and implications for folding. Protein Eng 2000, 13: 149\u2013152.","journal-title":"Protein Eng"}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/1471-2105-10-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/1471-2105-10-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/1471-2105-10-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,1,23]],"date-time":"2019-01-23T06:04:11Z","timestamp":1548223451000},"score":1,"resource":{"primary":{"URL":"https:\/\/bmcbioinformatics.biomedcentral.com\/articles\/10.1186\/1471-2105-10-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,1,6]]},"references-count":46,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2009,12]]}},"alternative-id":["2736"],"URL":"https:\/\/doi.org\/10.1186\/1471-2105-10-6","relation":{},"ISSN":["1471-2105"],"issn-type":[{"value":"1471-2105","type":"electronic"}],"subject":[],"published":{"date-parts":[[2009,1,6]]},"article-number":"6"}}