{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T07:19:25Z","timestamp":1770448765718,"version":"3.49.0"},"reference-count":32,"publisher":"Oxford University Press (OUP)","issue":"5","license":[{"start":{"date-parts":[[2017,9,23]],"date-time":"2017-09-23T00:00:00Z","timestamp":1506124800000},"content-version":"vor","delay-in-days":1,"URL":"https:\/\/academic.oup.com\/journals\/pages\/about_us\/legal\/notices"}],"funder":[{"DOI":"10.13039\/100006505","name":"ERDC","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006505","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100009917","name":"Naval Research Laboratory","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100009917","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,3,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Complex microbial communities can be characterized by metagenomics and metaproteomics. However, metagenome assemblies often generate enormous, and yet incomplete, protein databases, which undermines the identification of peptides and proteins in metaproteomics. This challenge calls for increased discrimination of true identifications from false identifications by database searching and filtering algorithms in metaproteomics.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>Sipros Ensemble was developed here for metaproteomics using an ensemble approach. Three diverse scoring functions from MyriMatch, Comet and the original Sipros were incorporated within a single database searching engine. Supervised classification with logistic regression was used to filter database searching results. Benchmarking with soil and marine microbial communities demonstrated a higher number of peptide and protein identifications by Sipros Ensemble than MyriMatch\/Percolator, Comet\/Percolator, MS-GF+\/Percolator, Comet &amp; MyriMatch\/iProphet and Comet &amp; MyriMatch &amp; MS-GF+\/iProphet. Sipros Ensemble was computationally efficient and scalable on supercomputers.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>Freely available under the GNU GPL license at http:\/\/sipros.omicsbio.org.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Supplementary information<\/jats:title>\n                  <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btx601","type":"journal-article","created":{"date-parts":[[2017,9,19]],"date-time":"2017-09-19T19:18:11Z","timestamp":1505848691000},"page":"795-802","source":"Crossref","is-referenced-by-count":34,"title":["Sipros Ensemble improves database searching and filtering for complex metaproteomics"],"prefix":"10.1093","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2777-4482","authenticated-orcid":false,"given":"Xuan","family":"Guo","sequence":"first","affiliation":[{"name":"Graduate School of Genome Science and Technology, University of Tennessee, Knoxville, TN, USA"},{"name":"Computer Science and Mathematics Division, Oak Ridge National Laboratory, Oak Ridge, TN, USA"},{"name":"Department of Computer Science and Engineering, University of North Texas, Denton, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhou","family":"Li","sequence":"additional","affiliation":[{"name":"Graduate School of Genome Science and Technology, University of Tennessee, Knoxville, TN, USA"},{"name":"Computer Science and Mathematics Division, Oak Ridge National Laboratory, Oak Ridge, TN, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiuming","family":"Yao","sequence":"additional","affiliation":[{"name":"Computer Science and Mathematics Division, Oak Ridge National Laboratory, Oak Ridge, TN, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ryan S","family":"Mueller","sequence":"additional","affiliation":[{"name":"Department of Microbiology, Oregon State University, Corvallis, OR, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jimmy K","family":"Eng","sequence":"additional","affiliation":[{"name":"Proteomics Resource, University of Washington, Seattle, WA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7223-578X","authenticated-orcid":false,"given":"David L","family":"Tabb","sequence":"additional","affiliation":[{"name":"DST\/NRF Centre of Excellence for Biomedical Tuberculosis Research, SAMRC Centre for Tuberculosis Research, Division of Molecular Biology and Human Genetics, Faculty of Medicine and Health Sciences, Stellenbosch University, Cape Town, South Africa"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"suffix":"IV","given":"William Judson","family":"Hervey","sequence":"additional","affiliation":[{"name":"Naval Research Laboratory, Center for Bio\/Molecular Science & Engineering (Code 6910), Washington, DC, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chongle","family":"Pan","sequence":"additional","affiliation":[{"name":"Graduate School of Genome Science and Technology, University of Tennessee, Knoxville, TN, USA"},{"name":"Computer Science and Mathematics Division, Oak Ridge National Laboratory, Oak Ridge, TN, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2017,9,22]]},"reference":[{"key":"2023012712385187600_btx601-B1","first-page":"btu641","article-title":"Sigma: strain-level inference of genomes from metagenomic analysis for biosurveillance","author":"Ahn","year":"2014","journal-title":"Bioinformatics"},{"key":"2023012712385187600_btx601-B2","doi-asserted-by":"crossref","first-page":"e00027\u201315","DOI":"10.1128\/mSystems.00027-15","article-title":"Proteomic stable isotope probing reveals taxonomically distinct patterns in amino acid assimilation by coastal marine bacterioplankton","volume":"1","author":"Bryson","year":"2016","journal-title":"mSystems"},{"key":"2023012712385187600_btx601-B3","doi-asserted-by":"crossref","first-page":"e2687","DOI":"10.7717\/peerj.2687","article-title":"Proteogenomic analyses indicate bacterial methylotrophy and archaeal heterotrophy are prevalent below the grass root zone","volume":"4","author":"Butterfield","year":"2016","journal-title":"PeerJ"},{"key":"2023012712385187600_btx601-B4","doi-asserted-by":"crossref","first-page":"642","DOI":"10.1186\/s12864-016-2855-3","article-title":"A comprehensive and scalable database search system for metaproteomics","volume":"17","author":"Chatterjee","year":"2016","journal-title":"BMC Genomics"},{"key":"2023012712385187600_btx601-B5","author":"Chollet","year":"2015"},{"key":"2023012712385187600_btx601-B6","doi-asserted-by":"crossref","first-page":"23.","DOI":"10.1007\/s12014-009-9024-5","article-title":"An unsupervised, model-free, machine-learning combiner for peptide identifications from tandem mass spectra","volume":"5","author":"Edwards","year":"2009","journal-title":"Clin. Proteomics"},{"key":"2023012712385187600_btx601-B7","first-page":"13.23.1","article-title":"PepArML: a meta-search peptide identification platform for tandem mass spectra","volume-title":"Curr. Protoc. Bioinf","author":"Edwards","year":"2013"},{"key":"2023012712385187600_btx601-B8","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1038\/nmeth1019","article-title":"Target-decoy search strategy for increased confidence in large-scale protein identifications by mass spectrometry","volume":"4","author":"Elias","year":"2007","journal-title":"Nat. Methods"},{"key":"2023012712385187600_btx601-B9","doi-asserted-by":"crossref","first-page":"976","DOI":"10.1016\/1044-0305(94)80016-2","article-title":"An approach to correlate tandem mass spectral data of peptides with amino acid sequences in a protein database","volume":"5","author":"Eng","year":"1994","journal-title":"J. Am. Soci. Mass Spectrometry"},{"key":"2023012712385187600_btx601-B10","doi-asserted-by":"crossref","first-page":"R111\u20139522.","DOI":"10.1074\/mcp.R111.009522","article-title":"A face in the crowd: recognizing peptides through database search","volume":"10","author":"Eng","year":"2011","journal-title":"Mol. Cell. Proteomics"},{"key":"2023012712385187600_btx601-B11","doi-asserted-by":"crossref","first-page":"22","DOI":"10.1002\/pmic.201200439","article-title":"Comet: an open-source MS\/MS sequence database search tool","volume":"13","author":"Eng","year":"2013","journal-title":"Proteomics"},{"key":"2023012712385187600_btx601-B12","doi-asserted-by":"crossref","first-page":"768","DOI":"10.1021\/ac0258709","article-title":"A method for assessing the statistical significance of mass spectrometry-based protein identifications using general scoring schemes","volume":"75","author":"Fenyo","year":"2003","journal-title":"Anal. Chem. Washington DC"},{"key":"2023012712385187600_btx601-B13","doi-asserted-by":"crossref","first-page":"890","DOI":"10.1021\/pr400937n","article-title":"Fast and accurate database searches with MS-GF+ percolator","volume":"13","author":"Granholm","year":"2013","journal-title":"J. Proteome Res"},{"key":"2023012712385187600_btx601-B14","first-page":"btu395","article-title":"Omega: an overlap-graph de novo assembler for metagenomics","author":"Haider","year":"2014","journal-title":"Bioinformatics"},{"key":"2023012712385187600_btx601-B15","doi-asserted-by":"crossref","first-page":"1895","DOI":"10.1093\/bioinformatics\/bts274","article-title":"Exhaustive database searching for amino acid mutations in proteomes","volume":"28","author":"Hyatt","year":"2012","journal-title":"Bioinformatics"},{"key":"2023012712385187600_btx601-B16","doi-asserted-by":"crossref","first-page":"923","DOI":"10.1038\/nmeth1113","article-title":"Semi-supervised learning for peptide identification from shotgun proteomics datasets","volume":"4","author":"K\u00e4ll","year":"2007","journal-title":"Nat. Methods"},{"key":"2023012712385187600_btx601-B17","doi-asserted-by":"crossref","first-page":"2949","DOI":"10.1021\/pr2002116","article-title":"MSblender: a probabilistic approach for integrating peptide identifications from multiple database search engines","volume":"10","author":"Kwon","year":"2011","journal-title":"J. Proteome Res"},{"key":"2023012712385187600_btx601-B18","doi-asserted-by":"crossref","first-page":"4405.","DOI":"10.1038\/ncomms5405","article-title":"Diverse and divergent protein post-translational modifications in two growth stages of a natural microbial community","volume":"5","author":"Li","year":"2014","journal-title":"Nat. Commun"},{"key":"2023012712385187600_btx601-B19","doi-asserted-by":"crossref","first-page":"1041","DOI":"10.1111\/1462-2920.13605","article-title":"Integrated proteomics and metabolomics suggests symbiotic metabolism and multimodal regulation in a fungal-endobacterial system","volume":"19","author":"Li","year":"2017","journal-title":"Environ. Microbiol"},{"key":"2023012712385187600_btx601-B20","doi-asserted-by":"crossref","DOI":"10.3389\/fmicb.2016.00563","article-title":"Proteomic stable isotope probing reveals biosynthesis dynamics of slow growing methane based microbial communities","volume":"7","author":"Marlow","year":"2016","journal-title":"Front. Microbiol"},{"key":"2023012712385187600_btx601-B21","doi-asserted-by":"crossref","first-page":"1419","DOI":"10.1074\/mcp.R500012-MCP200","article-title":"Interpretation of shotgun proteomic data the protein inference problem","volume":"4","author":"Nesvizhskii","year":"2005","journal-title":"Mol. Cell. Proteomics"},{"key":"2023012712385187600_btx601-B22","doi-asserted-by":"crossref","first-page":"M110\u20136049","DOI":"10.1074\/mcp.M110.006049","article-title":"Quantitative tracking of isotope flows in proteomes of microbial communities","volume":"10","author":"Pan","year":"2011","journal-title":"Mol. Cell. Proteomics"},{"key":"2023012712385187600_btx601-B23","doi-asserted-by":"crossref","first-page":"3022","DOI":"10.1021\/pr800127y","article-title":"Rapid and accurate peptide identification from tandem mass spectra","volume":"7","author":"Park","year":"2008","journal-title":"J. Proteome Res"},{"key":"2023012712385187600_btx601-B24","doi-asserted-by":"crossref","first-page":"4082","DOI":"10.1021\/acs.jproteome.6b00376","article-title":"Integrated proteomic pipeline using multiple search engines for a proteogenomic study with a controlled protein false discovery rate","volume":"15","author":"Park","year":"2016","journal-title":"J. Proteome Res"},{"key":"2023012712385187600_btx601-B25","first-page":"2825","article-title":"Scikit-learn: Machine learning in Python","volume":"12","author":"Pedregosa","year":"2011","journal-title":"J. Mach. Learn. Res"},{"key":"2023012712385187600_btx601-B26","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1021\/pr025556v","article-title":"Evaluation of multidimensional chromatography coupled with tandem mass spectrometry (LC\/LC- MS\/MS) for large-scale protein analysis: the yeast proteome","volume":"2","author":"Peng","year":"2003","journal-title":"J. Proteome Res"},{"key":"2023012712385187600_btx601-B27","doi-asserted-by":"crossref","first-page":"195","DOI":"10.1038\/nmeth725","article-title":"Large-scale database searching using tandem mass spectra: looking up the answer in the back of the book","volume":"1","author":"Sadygov","year":"2004","journal-title":"Nat. Methods"},{"key":"2023012712385187600_btx601-B28","doi-asserted-by":"crossref","first-page":"M111\u20137690.","DOI":"10.1074\/mcp.M111.007690","article-title":"iProphet: multi-level integrative analysis of shotgun proteomic data improves peptide and protein identification rates and error estimates","volume":"10","author":"Shteynberg","year":"2011","journal-title":"Mol. Cell. Proteomics"},{"key":"2023012712385187600_btx601-B29","doi-asserted-by":"crossref","first-page":"654","DOI":"10.1021\/pr0604054","article-title":"MyriMatch: highly accurate tandem mass spectral peptide identification by multivariate hypergeometric analysis","volume":"6","author":"Tabb","year":"2007","journal-title":"J. Proteome Res"},{"key":"2023012712385187600_btx601-B30","doi-asserted-by":"crossref","first-page":"2064","DOI":"10.1093\/bioinformatics\/btt329","article-title":"Sipros\/ProRata: a versatile informatics system for quantitative community proteomics","volume":"29","author":"Wang","year":"2013","journal-title":"Bioinformatics"},{"key":"2023012712385187600_btx601-B31","doi-asserted-by":"crossref","first-page":"242","DOI":"10.1038\/85686","article-title":"Large-scale analysis of the yeast proteome by multidimensional protein identification technology","volume":"19","author":"Washburn","year":"2001","journal-title":"Nat. Biotechnol"},{"key":"2023012712385187600_btx601-B32","doi-asserted-by":"crossref","first-page":"3424","DOI":"10.1002\/pmic.201400571","article-title":"Microbial metaproteomics for characterizing the range of metabolic functions and activities of human gut microbiota","volume":"15","author":"Xiong","year":"2015","journal-title":"Proteomics"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/34\/5\/795\/48913545\/bioinformatics_34_5_795.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/34\/5\/795\/48913545\/bioinformatics_34_5_795.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,27]],"date-time":"2023-01-27T13:30:08Z","timestamp":1674826208000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/34\/5\/795\/4209993"}},"subtitle":[],"editor":[{"given":"Jonathan","family":"Wren","sequence":"additional","affiliation":[],"role":[{"role":"editor","vocabulary":"crossref"}]}],"short-title":[],"issued":{"date-parts":[[2017,9,22]]},"references-count":32,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2018,3,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btx601","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2018,3,1]]},"published":{"date-parts":[[2017,9,22]]}}}