{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T16:49:06Z","timestamp":1778690946562,"version":"3.51.4"},"reference-count":75,"publisher":"Public Library of Science (PLoS)","issue":"8","license":[{"start":{"date-parts":[[2007,8,17]],"date-time":"2007-08-17T00:00:00Z","timestamp":1187308800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["www.ploscompbiol.org"],"crossmark-restriction":false},"short-container-title":["PLoS Comput Biol"],"DOI":"10.1371\/journal.pcbi.0030160","type":"journal-article","created":{"date-parts":[[2007,8,15]],"date-time":"2007-08-15T13:06:47Z","timestamp":1187183207000},"page":"e160","update-policy":"https:\/\/doi.org\/10.1371\/journal.pcbi.corrections_policy","source":"Crossref","is-referenced-by-count":99,"title":["Automated Protein Subfamily Identification and Classification"],"prefix":"10.1371","volume":"3","author":[{"given":"Duncan P","family":"Brown","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nandini","family":"Krishnamurthy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kimmen","family":"Sj\u00f6lander","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"340","published-online":{"date-parts":[[2007,8,17]]},"reference":[{"key":"pcbi-0030160-b001","doi-asserted-by":"crossref","first-page":"225","DOI":"10.1093\/bib\/bbl004","article-title":"Automated protein function prediction\u2014The genomic challenge.","volume":"7","year":"2006","journal-title":"Brief Bioinform"},{"issue":"Supplement 7","key":"pcbi-0030160-b002","first-page":"201","article-title":"The prediction of protein function at CASP6.","volume":"61","year":"2005","journal-title":"Proteins"},{"key":"pcbi-0030160-b003","doi-asserted-by":"crossref","first-page":"4291","DOI":"10.1093\/nar\/26.18.4291","article-title":"A phylogenomic study of the MutS family of proteins.","volume":"26","year":"1998","journal-title":"Nucleic Acids Res"},{"key":"pcbi-0030160-b004","doi-asserted-by":"crossref","first-page":"391","DOI":"10.1093\/bioinformatics\/15.5.391","article-title":"Automated genome sequence analysis and annotation.","volume":"15","year":"1999","journal-title":"Bioinformatics"},{"key":"pcbi-0030160-b005","doi-asserted-by":"crossref","first-page":"W313","DOI":"10.1093\/nar\/gkh406","article-title":"GOblet: A platform for Gene Ontology annotation of anonymous sequence data.","volume":"32","year":"2004","journal-title":"Nucleic Acids Res"},{"key":"pcbi-0030160-b006","doi-asserted-by":"crossref","first-page":"2484","DOI":"10.1093\/bioinformatics\/btg338","article-title":"GoFigure: Automated Gene Ontology annotation.","volume":"19","year":"2003","journal-title":"Bioinformatics"},{"key":"pcbi-0030160-b007","doi-asserted-by":"crossref","first-page":"592","DOI":"10.1147\/sj.402.0592","article-title":"The GeneMine system for genome\/proteome annotation and collaborative data mining.","volume":"40","year":"2001","journal-title":"IBM Sys J"},{"key":"pcbi-0030160-b008","doi-asserted-by":"crossref","first-page":"178","DOI":"10.1186\/1471-2105-5-178","article-title":"GOtcha: A new method for prediction of protein function assessed by the annotation of seven genomes.","volume":"5","year":"2004","journal-title":"BMC Bioinformatics"},{"key":"pcbi-0030160-b009","doi-asserted-by":"crossref","first-page":"3799","DOI":"10.1093\/nar\/gkg555","article-title":"OntoBlast function: From sequence similarities directly to potential functional annotations by ontology terms.","volume":"31","year":"2003","journal-title":"Nucleic Acids Res"},{"key":"pcbi-0030160-b010","doi-asserted-by":"crossref","first-page":"132","DOI":"10.1016\/S0168-9525(99)01706-0","article-title":"Errors in genome annotation.","volume":"15","year":"1999","journal-title":"Trends Genet"},{"key":"pcbi-0030160-b011","doi-asserted-by":"crossref","first-page":"429","DOI":"10.1016\/S0168-9525(01)02348-4","article-title":"Intrinsic errors in genome annotation.","volume":"17","year":"2001","journal-title":"Trends Genet"},{"key":"pcbi-0030160-b012","first-page":"55","article-title":"Sources of systematic error in functional annotation of genomes: Domain rearrangement, non-orthologous gene displacement and operon disruption.","volume":"1","year":"1998","journal-title":"In Silico Biol"},{"key":"pcbi-0030160-b013","doi-asserted-by":"crossref","first-page":"540","DOI":"10.1007\/s002390010184","article-title":"The closest BLAST hit is often not the nearest neighbor.","volume":"52","year":"2001","journal-title":"J Mol Evol"},{"key":"pcbi-0030160-b014","doi-asserted-by":"crossref","first-page":"223","DOI":"10.1016\/j.mbs.2004.08.001","article-title":"Percolation of annotation errors through hierarchically structured protein sequence databases.","volume":"193","year":"2005","journal-title":"Math Biosci"},{"key":"pcbi-0030160-b015","article-title":"Functional classification using phylogenomic inference.","volume":"2","year":"2006","journal-title":"PLoS Comput Biol"},{"key":"pcbi-0030160-b016","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1101\/gr.8.3.163","article-title":"Phylogenomics: Improving functional predictions for uncharacterized genes by evolutionary analysis.","volume":"8","year":"1998","journal-title":"Genome Res"},{"key":"pcbi-0030160-b017","doi-asserted-by":"crossref","first-page":"2715","DOI":"10.1093\/nar\/23.14.2715","article-title":"Evolution of the SNF2 family of proteins: Subfamilies with distinct sequences and functions.","volume":"23","year":"1995","journal-title":"Nucleic Acids Res"},{"key":"pcbi-0030160-b018","doi-asserted-by":"crossref","first-page":"170","DOI":"10.1093\/bioinformatics\/bth021","article-title":"Phylogenomic inference of protein molecular function: Advances and challenges.","volume":"20","year":"2004","journal-title":"Bioinformatics"},{"key":"pcbi-0030160-b019","doi-asserted-by":"crossref","first-page":"5231","DOI":"10.1093\/nar\/gkh867","article-title":"PhyloGenie: Sutomated phylome generation and analysis.","volume":"32","year":"2004","journal-title":"Nucleic Acids Res"},{"key":"pcbi-0030160-b020","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1186\/1471-2105-6-153","article-title":"FIGENIX: Intelligent automation of genomic annotation: Expertise integration in a new software platform.","volume":"6","year":"2005","journal-title":"BMC Bioinformatics"},{"key":"pcbi-0030160-b021","doi-asserted-by":"crossref","first-page":"2178","DOI":"10.1101\/gr.1224503","article-title":"OrthoMCL: Identification of ortholog groups for eukaryotic genomes.","volume":"13","year":"2003","journal-title":"Genome Res"},{"key":"pcbi-0030160-b022","doi-asserted-by":"crossref","first-page":"77","DOI":"10.1093\/bioinformatics\/18.1.77","article-title":"Tolerating some redundancy significantly speeds up clustering of large protein databases.","volume":"18","year":"2002","journal-title":"Bioinformatics"},{"key":"pcbi-0030160-b023","doi-asserted-by":"crossref","first-page":"3829","DOI":"10.1093\/nar\/gkg518","article-title":"PipeAlign: A new toolkit for protein family analysis.","volume":"31","year":"2003","journal-title":"Nucleic Acids Res"},{"key":"pcbi-0030160-b024","doi-asserted-by":"crossref","first-page":"1041","DOI":"10.1006\/jmbi.2000.5197","article-title":"Automatic clustering of orthologs and in-paralogs from pairwise species comparisons.","volume":"314","year":"2001","journal-title":"J Mol Biol"},{"key":"pcbi-0030160-b025","doi-asserted-by":"crossref","first-page":"908","DOI":"10.1093\/bioinformatics\/18.7.908","article-title":"Clustering of proximal sequence space for the identification of protein families.","volume":"18","year":"2002","journal-title":"Bioinformatics"},{"key":"pcbi-0030160-b026","doi-asserted-by":"crossref","first-page":"683","DOI":"10.1002\/prot.10449","article-title":"Automatic annotation of protein function based on family identification.","volume":"53","year":"2003","journal-title":"Proteins"},{"key":"pcbi-0030160-b027","article-title":"Protein molecular function prediction by Bayesian phylogenomics.","volume":"1","year":"2005","journal-title":"PLoS Comput Biol"},{"key":"pcbi-0030160-b028","doi-asserted-by":"crossref","first-page":"92","DOI":"10.1093\/bioinformatics\/18.1.92","article-title":"Automated ortholog inference from phylogenetic trees and calculation of orthology reliability.","volume":"18","year":"2002","journal-title":"Bioinformatics"},{"key":"pcbi-0030160-b029","doi-asserted-by":"crossref","first-page":"14","DOI":"10.1186\/1471-2105-3-14","article-title":"RIO: Analyzing proteomes by automated phylogenomics using resampled inference of orthologs.","volume":"3","year":"2002","journal-title":"BMC Bioinformatics"},{"key":"pcbi-0030160-b030","doi-asserted-by":"crossref","first-page":"1304","DOI":"10.1126\/science.1058040","article-title":"The sequence of the human genome.","volume":"291","year":"2001","journal-title":"Science"},{"key":"pcbi-0030160-b031","first-page":"322","article-title":"Subfamily HMMS in functional genomics.","volume":"10","year":"2005","journal-title":"Pac Symp Biocomput"},{"key":"pcbi-0030160-b032","doi-asserted-by":"crossref","first-page":"1501","DOI":"10.1006\/jmbi.1994.1104","article-title":"Hidden Markov models in computational biology. Applications to protein modeling.","volume":"235","year":"1994","journal-title":"J Mol Biol"},{"key":"pcbi-0030160-b033","doi-asserted-by":"crossref","first-page":"147","DOI":"10.1093\/bioinformatics\/18.1.147","article-title":"Classifying G-protein coupled receptors with support vector machines.","volume":"18","year":"2002","journal-title":"Bioinformatics"},{"key":"pcbi-0030160-b034","doi-asserted-by":"crossref","first-page":"1658","DOI":"10.1093\/bioinformatics\/btl158","article-title":"CD-hit: A fast program for clustering and comparing large sets of protein or nucleotide sequences.","volume":"22","year":"2006","journal-title":"Bioinformatics"},{"key":"pcbi-0030160-b035","doi-asserted-by":"crossref","first-page":"1435","DOI":"10.1093\/oxfordjournals.molbev.a003929","article-title":"Secator: A program for inferring protein subfamilies from phylogenetic trees.","volume":"18","year":"2001","journal-title":"Mol Biol Evol"},{"key":"pcbi-0030160-b036","unstructured":"Sj\u00f6landerK\n\t\t\t\t\t1998\n\t\t\t\t\tPhylogenetic inference in protein superfamilies: Analysis of SH2 domains.\n\t\t\t\t\tIn:\n\t\t\t\t\tProceedings of the Sixth International Conference on Intelligent Systems in Molecular Biology\n\t\t\t\t\t28 June\u20131 July, 1998;\n\t\t\t\t\tMontreal, Quebec, Canada.\n\t\t\t\t\t165\n\t\t\t\t\t174"},{"key":"pcbi-0030160-b037","first-page":"327","article-title":"Dirichlet mixtures: A method for improved detection of weak but significant protein sequence homology.","volume":"12","year":"1996","journal-title":"Comput Appl Biosci"},{"key":"pcbi-0030160-b038","unstructured":"KullbackS\n\t\t\t\t\t1968\n\t\t\t\t\tInformation theory and statistics\n\t\t\t\t\tNew York\n\t\t\t\t\tDover Publications"},{"key":"pcbi-0030160-b039","doi-asserted-by":"crossref","first-page":"12091","DOI":"10.1073\/pnas.91.25.12091","article-title":"Detection of conserved segments in proteins: Iterative scanning of sequence databases with alignment blocks.","volume":"91","year":"1994","journal-title":"Proc Natl Acad Sci U S A"},{"key":"pcbi-0030160-b040","doi-asserted-by":"crossref","first-page":"D138","DOI":"10.1093\/nar\/gkh121","article-title":"The Pfam protein families database.","volume":"32","year":"2004","journal-title":"Nucleic Acids Res"},{"key":"pcbi-0030160-b041","first-page":"D192","article-title":"CDD: A conserved domain database for protein classification.","volume":"33","year":"2005","journal-title":"Nucleic Acids Res"},{"key":"pcbi-0030160-b042","first-page":"159","article-title":"Modulation of pulmonary innate immunity during bacterial infection: Animal studies.","volume":"50","year":"2002","journal-title":"Arch Immunol Ther Exp (Warsz)"},{"key":"pcbi-0030160-b043","doi-asserted-by":"crossref","first-page":"61","DOI":"10.1006\/jmbi.2000.4036","article-title":"Analysis and prediction of functional sub-types from protein sequence alignments.","volume":"303","year":"2000","journal-title":"J Mol Biol"},{"key":"pcbi-0030160-b044","unstructured":"CristianiniNShawe-TaylorJ\n\t\t\t\t\t2000\n\t\t\t\t\tAn introduction to support vector machines: And other kernel-based learning methods\n\t\t\t\t\tCambridge\/New York\n\t\t\t\t\tCambridge University Press"},{"key":"pcbi-0030160-b045","doi-asserted-by":"crossref","first-page":"3241","DOI":"10.1093\/bioinformatics\/bti497","article-title":"Semi-supervised protein classification using cluster kernels.","volume":"21","year":"2005","journal-title":"Bioinformatics"},{"key":"pcbi-0030160-b046","doi-asserted-by":"crossref","first-page":"536","DOI":"10.1016\/S0022-2836(05)80134-2","article-title":"SCOP: A structural classification of proteins database for the investigation of sequences and structures.","volume":"247","year":"1995","journal-title":"J Mol Biol"},{"key":"pcbi-0030160-b047","unstructured":"WebbECNC-IUBMB,\n\t\t\t\t\t1992\n\t\t\t\t\tEnzyme nomenclature 1992: Recommendations of the Nomenclature Committee of the International Union of Biochemistry and Molecular Biology on the nomenclature and classification of enzymes\n\t\t\t\t\tSan Diego\n\t\t\t\t\tAcademic Press"},{"key":"pcbi-0030160-b048","doi-asserted-by":"crossref","first-page":"2545","DOI":"10.1021\/bi052101l","article-title":"Leveraging enzyme structure\u2013function relationships for functional inference and experimental design: The Structure-Function Linkage Database.","volume":"45","year":"2006","journal-title":"Biochemistry"},{"key":"pcbi-0030160-b049","doi-asserted-by":"crossref","first-page":"294","DOI":"10.1093\/nar\/gkg103","article-title":"GPCRDB information system for G protein\u2013coupled receptors.","volume":"31","year":"2003","journal-title":"Nucleic Acids Res"},{"key":"pcbi-0030160-b050","doi-asserted-by":"crossref","first-page":"346","DOI":"10.1093\/nar\/29.1.346","article-title":"Collecting and harvesting biological data: The GPCRDB and NucleaRDB information systems.","volume":"29","year":"2001","journal-title":"Nucleic Acids Res"},{"key":"pcbi-0030160-b051","doi-asserted-by":"crossref","first-page":"98","DOI":"10.1002\/1097-0134(20001001)41:1<98::AID-PROT120>3.0.CO;2-S","article-title":"Practical limits of function prediction.","volume":"41","year":"2000","journal-title":"Proteins"},{"key":"pcbi-0030160-b052","doi-asserted-by":"crossref","first-page":"548","DOI":"10.1016\/S1367-5931(99)00007-1","article-title":"Evolution of protein function, from a structural perspective.","volume":"3","year":"1999","journal-title":"Curr Opin Chem Biol"},{"key":"pcbi-0030160-b053","first-page":"173","article-title":"Comparing clusterings by the variation of information.","year":"2003"},{"key":"pcbi-0030160-b054","first-page":"501","article-title":"CASP and CAFASP experiments and their findings.","volume":"44","year":"2003","journal-title":"Methods Biochem Anal"},{"key":"pcbi-0030160-b055","doi-asserted-by":"crossref","first-page":"D189","DOI":"10.1093\/nar\/gkh034","article-title":"The ASTRAL compendium in 2004.","volume":"32","year":"2004","journal-title":"Nucleic Acids Res"},{"key":"pcbi-0030160-b056","doi-asserted-by":"crossref","first-page":"1201","DOI":"10.1006\/jmbi.1998.2221","article-title":"Sequence comparisons using multiple sequences detect three times as many remote homologues as pairwise methods.","volume":"284","year":"1998","journal-title":"J Mol Biol"},{"key":"pcbi-0030160-b057","doi-asserted-by":"crossref","first-page":"R83","DOI":"10.1186\/gb-2006-7-9-r83","article-title":"PhyloFacts: An online structural phylogenomic encyclopedia for protein functional and structural classification.","volume":"7","year":"2006","journal-title":"Genome Biol"},{"key":"pcbi-0030160-b058","doi-asserted-by":"crossref","first-page":"832","DOI":"10.1006\/bbrc.2001.4653","article-title":"Isolation and characterization of acetoacetyl-CoA thiolase gene essential for n-decane assimilation in yeast Yarrowia lipolytica.","volume":"282","year":"2001","journal-title":"Biochem Biophys Res Commun"},{"key":"pcbi-0030160-b059","doi-asserted-by":"crossref","first-page":"525","DOI":"10.1146\/annurev.genet.38.072902.091216","article-title":"Metagenomics: Genomic analysis of microbial communities.","volume":"38","year":"2004","journal-title":"Annu Rev Genet"},{"key":"pcbi-0030160-b060","doi-asserted-by":"crossref","first-page":"66","DOI":"10.1126\/science.1093857","article-title":"Environmental genome shotgun sequencing of the Sargasso Sea.","volume":"304","year":"2004","journal-title":"Science"},{"key":"pcbi-0030160-b061","doi-asserted-by":"crossref","first-page":"4355","DOI":"10.1073\/pnas.84.13.4355","article-title":"Profile analysis: Detection of distantly related proteins.","volume":"84","year":"1987","journal-title":"Proc Natl Acad Sci U S A"},{"key":"pcbi-0030160-b062","unstructured":"DurbinREddySRKroghAMitchisonGJ\n\t\t\t\t\t1998\n\t\t\t\t\tBiological sequence analysis\n\t\t\t\t\tCambridge (United Kingdom)\n\t\t\t\t\tCambridge University Press"},{"key":"pcbi-0030160-b063","doi-asserted-by":"crossref","first-page":"574","DOI":"10.1016\/0022-2836(94)90032-9","article-title":"Position-based sequence weights.","volume":"243","year":"1994","journal-title":"J Mol Biol"},{"key":"pcbi-0030160-b064","doi-asserted-by":"crossref","first-page":"254","DOI":"10.1093\/nar\/28.1.254","article-title":"The ASTRAL compendium for protein structure and sequence analysis.","volume":"28","year":"2000","journal-title":"Nucleic Acids Res"},{"key":"pcbi-0030160-b065","doi-asserted-by":"crossref","first-page":"755","DOI":"10.1093\/bioinformatics\/14.9.755","article-title":"Profile hidden Markov models.","volume":"14","year":"1998","journal-title":"Bioinformatics"},{"key":"pcbi-0030160-b066","doi-asserted-by":"crossref","first-page":"D115","DOI":"10.1093\/nar\/gkh131","article-title":"UniProt: The universal protein knowledgebase.","volume":"32","year":"2004","journal-title":"Nucleic Acids Res"},{"key":"pcbi-0030160-b067","doi-asserted-by":"crossref","first-page":"1792","DOI":"10.1093\/nar\/gkh340","article-title":"MUSCLE: Multiple sequence alignment with high accuracy and high throughput.","volume":"32","year":"2004","journal-title":"Nucleic Acids Res"},{"key":"pcbi-0030160-b068","doi-asserted-by":"crossref","first-page":"846","DOI":"10.1093\/bioinformatics\/14.10.846","article-title":"Hidden Markov models for detecting remote protein homologies.","volume":"14","year":"1998","journal-title":"Bioinformatics"},{"issue":"Supplement 6","key":"pcbi-0030160-b069","first-page":"491","article-title":"Combining local-structure, fold-recognition, and new fold methods for protein structure prediction.","volume":"53","year":"2003","journal-title":"Proteins"},{"key":"pcbi-0030160-b070","doi-asserted-by":"crossref","first-page":"460","DOI":"10.1016\/S0076-6879(96)66029-7","article-title":"Local alignment statistics.","volume":"266","year":"1996","journal-title":"Methods Enzymol"},{"key":"pcbi-0030160-b071","unstructured":"EddySR\n\t\t\t\t\t1997\n\t\t\t\t\tMaximum-likelihood fitting of extreme value distributions\n\t\t\t\t\tAvailable: http:\/\/selab.wustl.edu\/publications\/Eddy97b\/Eddy97b-techreport.pdf. Accessed 13 July 2007."},{"key":"pcbi-0030160-b072","first-page":"1834","article-title":"Bootstrapping and normalization for enhanced evaluations of pairwise sequence comparison.","volume":"9","year":"2002","journal-title":"Proc IEEE"},{"key":"pcbi-0030160-b073","doi-asserted-by":"crossref","first-page":"506","DOI":"10.1007\/BF02074884","article-title":"Iteratively reweighted least squares: A comparison of several single step algorithms for linear models.","volume":"32","year":"1992","journal-title":"BIT"},{"key":"pcbi-0030160-b074","unstructured":"R-Project\n\t\t\t\t\t2005\n\t\t\t\t\tR: A language and environment for statistical computing\n\t\t\t\t\tAvailable: http:\/\/www.R-project.org. Accessed 13 July 2007."},{"key":"pcbi-0030160-b075","unstructured":"GlanvilleJGKirshnerDKrishnamurthyNSj\u00f6landerK\n\t\t\t\t\t2007\n\t\t\t\t\tBerkeley Phylogenics Group Web servers: Resources for structural phylogenomic analysis.\n\t\t\t\t\tNucleic Acids Res\n\t\t\t\t\tdoi:10.1093\/nar\/.gkm325"}],"container-title":["PLoS Computational Biology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.0030160","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,6]],"date-time":"2020-05-06T18:18:36Z","timestamp":1588789116000},"score":1,"resource":{"primary":{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.0030160"}},"subtitle":[],"editor":[{"given":"Jonathan A","family":"Eisen","sequence":"first","affiliation":[],"role":[{"role":"editor","vocabulary":"crossref"}]}],"short-title":[],"issued":{"date-parts":[[2007,8,17]]},"references-count":75,"journal-issue":{"issue":"8","published-online":{"date-parts":[[2007,8,17]]}},"URL":"https:\/\/doi.org\/10.1371\/journal.pcbi.0030160","relation":{"has-review":[{"id-type":"doi","id":"10.3410\/f.1091125.544520","asserted-by":"object"},{"id-type":"doi","id":"10.3410\/f.1091125.550038","asserted-by":"object"}]},"ISSN":["1553-7358"],"issn-type":[{"value":"1553-7358","type":"electronic"}],"subject":[],"published":{"date-parts":[[2007,8,17]]}}}