{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T00:24:32Z","timestamp":1778631872477,"version":"3.51.4"},"reference-count":31,"publisher":"Oxford University Press (OUP)","issue":"6","license":[{"start":{"date-parts":[[2016,10,12]],"date-time":"2016-10-12T00:00:00Z","timestamp":1476230400000},"content-version":"vor","delay-in-days":333,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,3,15]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:p>Motivation: Multiple sequence alignments (MSAs) with large numbers of sequences are now commonplace. However, current multiple alignment benchmarks are ill-suited for testing these types of alignments, as test cases either contain a very small number of sequences or are based purely on simulation rather than empirical data.<\/jats:p>\n               <jats:p>Results: We take advantage of recent developments in protein structure prediction methods to create a benchmark (ContTest) for protein MSAs containing many thousands of sequences in each test case and which is based on empirical biological data. We rank popular MSA methods using this benchmark and verify a recent result showing that chained guide trees increase the accuracy of progressive alignment packages on datasets with thousands of proteins.<\/jats:p>\n               <jats:p>Availability and implementation: Benchmark data and scripts are available for download at http:\/\/www.bioinf.ucd.ie\/download\/ContTest.tar.gz.<\/jats:p>\n               <jats:p>Contact: \u00a0des.higgins@ucd.ie<\/jats:p>\n               <jats:p>Supplementary information: \u00a0Supplementary data are available at Bioinformatics online.<\/jats:p>","DOI":"10.1093\/bioinformatics\/btv592","type":"journal-article","created":{"date-parts":[[2015,11,15]],"date-time":"2015-11-15T01:38:21Z","timestamp":1447551501000},"page":"814-820","source":"Crossref","is-referenced-by-count":18,"title":["Using <i>de novo<\/i> protein structure predictions to measure the quality of very large multiple sequence alignments"],"prefix":"10.1093","volume":"32","author":[{"given":"Gear\u00f3id","family":"Fox","sequence":"first","affiliation":[{"name":"Conway Institute of Biomolecular and Biomedical Research, and UCD School of Medicine and Medical Science, University College Dublin, Dublin 4, Ireland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fabian","family":"Sievers","sequence":"additional","affiliation":[{"name":"Conway Institute of Biomolecular and Biomedical Research, and UCD School of Medicine and Medical Science, University College Dublin, Dublin 4, Ireland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Desmond G.","family":"Higgins","sequence":"additional","affiliation":[{"name":"Conway Institute of Biomolecular and Biomedical Research, and UCD School of Medicine and Medical Science, University College Dublin, Dublin 4, Ireland"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2015,11,14]]},"reference":[{"key":"2023020111572171800_btv592-B1","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1093\/nar\/28.1.235","article-title":"The protein data bank","volume":"28","author":"Berman","year":"2000","journal-title":"Nucleic Acids Res."},{"key":"2023020111572171800_btv592-B2","doi-asserted-by":"crossref","first-page":"21","DOI":"10.1186\/1748-7188-5-21","article-title":"Sequence embedding for fast construction of guide trees for multiple sequence alignment","volume":"5","author":"Blackshields","year":"2010","journal-title":"Algorithms Mol. Biol."},{"key":"2023020111572171800_btv592-B3","doi-asserted-by":"crossref","first-page":"10556","DOI":"10.1073\/pnas.1405628111","article-title":"Simple chained guide trees give high-quality protein multiple sequence alignments","volume":"111","author":"Boyce","year":"2014","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"2023020111572171800_btv592-B4","doi-asserted-by":"crossref","first-page":"E101","DOI":"10.1073\/pnas.1419351112","article-title":"Reply to Tan et\u00a0al.: differences between real and simulated proteins in multiple sequence alignments: Fig.\u00a01","volume":"112","author":"Boyce","year":"2015","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"2023020111572171800_btv592-B5","volume-title":"PFAM.db: A Set of Protein ID Mappings for PFAM","author":"Carlson","year":".."},{"key":"2023020111572171800_btv592-B6","doi-asserted-by":"crossref","first-page":"R37","DOI":"10.1186\/gb-2010-11-4-r37","article-title":"Phylogenetic assessment of alignments reveals neglected tree signal in gaps","volume":"11","author":"Dessimoz","year":"2010","journal-title":"Genome Biol."},{"key":"2023020111572171800_btv592-B7","doi-asserted-by":"crossref","first-page":"755","DOI":"10.1093\/bioinformatics\/14.9.755","article-title":"Profile hidden Markov models","volume":"14","author":"Eddy","year":"1998","journal-title":"Bioinformatics"},{"key":"2023020111572171800_btv592-B8","doi-asserted-by":"crossref","first-page":"1792","DOI":"10.1093\/nar\/gkh340","article-title":"MUSCLE: multiple sequence alignment with high accuracy and high throughput","volume":"32","author":"Edgar","year":"2004","journal-title":"Nucleic Acids Res."},{"key":"2023020111572171800_btv592-B9","doi-asserted-by":"crossref","first-page":"D222","DOI":"10.1093\/nar\/gkt1223","article-title":"Pfam: the protein families database","volume":"42","author":"Finn","year":"2014","journal-title":"Nucleic Acids Res."},{"key":"2023020111572171800_btv592-B10","first-page":"189","article-title":"CLUSTAL V: improved software for multiple sequence alignment","volume":"8","author":"Higgins","year":"1992","journal-title":"Comput. Appl. Biosci."},{"key":"2023020111572171800_btv592-B11","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1007\/978-1-62703-646-7_4","article-title":"Who watches the watchmen? An appraisal of benchmarks for multiple sequence alignment","volume":"1079","author":"Iantorno","year":"2014","journal-title":"Methods Mol. Biol."},{"key":"2023020111572171800_btv592-B12","doi-asserted-by":"crossref","first-page":"184","DOI":"10.1093\/bioinformatics\/btr638","article-title":"PSICOV: precise structural contact prediction using sparse inverse covariance estimation on large multiple sequence alignments","volume":"28","author":"Jones","year":"2012","journal-title":"Bioinformatics"},{"key":"2023020111572171800_btv592-B13","doi-asserted-by":"crossref","first-page":"85","DOI":"10.1186\/1471-2105-15-85","article-title":"FreeContact: fast and free software for protein contact prediction from residue co-evolution","volume":"15","author":"Kaj\u00e1n","year":"2014","journal-title":"BMC Bioinformatics"},{"key":"2023020111572171800_btv592-B14","doi-asserted-by":"crossref","first-page":"772","DOI":"10.1093\/molbev\/mst010","article-title":"MAFFT multiple sequence alignment software version 7: improvements in performance and usability","volume":"30","author":"Katoh","year":"2013","journal-title":"Mol. Biol. Evol."},{"key":"2023020111572171800_btv592-B15","doi-asserted-by":"crossref","first-page":"372","DOI":"10.1093\/bioinformatics\/btl592","article-title":"PartTree: an algorithm to build an approximate tree from a large number of unaligned sequences","volume":"23","author":"Katoh","year":"2007","journal-title":"Bioinformatics"},{"key":"2023020111572171800_btv592-B16","doi-asserted-by":"crossref","first-page":"2947","DOI":"10.1093\/bioinformatics\/btm404","article-title":"Clustal W and Clustal X version 2.0","volume":"23","author":"Larkin","year":"2007","journal-title":"Bioinformatics"},{"key":"2023020111572171800_btv592-B17","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/1471-2105-6-298","article-title":"Kalign\u2014an accurate and fast multiple sequence alignment algorithm","volume":"6","author":"Lassmann","year":"2005","journal-title":"BMC Bioinformatics"},{"key":"2023020111572171800_btv592-B18","doi-asserted-by":"crossref","first-page":"858","DOI":"10.1093\/nar\/gkn1006","article-title":"Kalign2: high-performance multiple alignment of protein and nucleotide sequences allowing external features","volume":"37","author":"Lassmann","year":"2009","journal-title":"Nucleic Acids Res."},{"key":"2023020111572171800_btv592-B19","doi-asserted-by":"crossref","first-page":"10557","DOI":"10.1073\/pnas.0409137102","article-title":"An algorithm for progressive multiple alignment of sequences with insertions","volume":"102","author":"L\u00f6ytynoja","year":"2005","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"2023020111572171800_btv592-B20","doi-asserted-by":"crossref","first-page":"e28766","DOI":"10.1371\/journal.pone.0028766","article-title":"Protein 3D structure computed from evolutionary sequence variation","volume":"6","author":"Marks","year":"2011","journal-title":"PLoS One"},{"key":"2023020111572171800_btv592-B21","doi-asserted-by":"crossref","first-page":"1072","DOI":"10.1038\/nbt.2419","article-title":"Protein structure prediction from sequence variation","volume":"30","author":"Marks","year":"2012","journal-title":"Nat. Biotechnol."},{"key":"2023020111572171800_btv592-B22","doi-asserted-by":"crossref","first-page":"125","DOI":"10.1016\/j.mbs.2012.10.005","article-title":"A new balance index for phylogenetic trees","volume":"241","author":"Mir","year":"2013","journal-title":"Math Biosci."},{"key":"2023020111572171800_btv592-B23","doi-asserted-by":"crossref","first-page":"2469","DOI":"10.1002\/pro.5560071126","article-title":"HOMSTRAD: a database of protein structure alignments for homologous families","volume":"7","author":"Mizuguchi","year":"1998","journal-title":"Protein Sci."},{"key":"2023020111572171800_btv592-B24","doi-asserted-by":"crossref","first-page":"225","DOI":"10.1093\/sysbio\/21.2.225","article-title":"\u201cGood\u201d and \u201cBad\u201d phenograms","volume":"21","author":"Sackin","year":"1972","journal-title":"Syst. Biol."},{"key":"2023020111572171800_btv592-B25","doi-asserted-by":"crossref","first-page":"539","DOI":"10.1038\/msb.2011.75","article-title":"Fast, scalable generation of high-quality protein multiple sequence alignments using Clustal Omega","volume":"7","author":"Sievers","year":"2011","journal-title":"Mol. Syst. Biol."},{"key":"2023020111572171800_btv592-B26","doi-asserted-by":"crossref","first-page":"338","DOI":"10.1186\/1471-2105-15-338","article-title":"Systematic exploration of guide-tree topology effects for small protein alignments","volume":"15","author":"Sievers","year":"2014","journal-title":"BMC Bioinformatics"},{"key":"2023020111572171800_btv592-B27","doi-asserted-by":"crossref","first-page":"3940","DOI":"10.1093\/bioinformatics\/bti623","article-title":"ROCR: visualizing classifier performance in R","volume":"21","author":"Sing","year":"2005","journal-title":"Bioinformatics"},{"key":"2023020111572171800_btv592-B28","doi-asserted-by":"crossref","first-page":"E99","DOI":"10.1073\/pnas.1417526112","article-title":"Simple chained guide trees give poorer multiple sequence alignments than inferred trees in simulation and phylogenetic benchmarks: Fig.\u00a01","volume":"112","author":"Tan","year":"2015","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"2023020111572171800_btv592-B29","doi-asserted-by":"crossref","first-page":"473","DOI":"10.1016\/j.sbi.2013.04.001","article-title":"Prediction of contacts from correlated sequence substitutions","volume":"23","author":"Taylor","year":"2013","journal-title":"Curr. Opin. Struct. Biol."},{"key":"2023020111572171800_btv592-B30","doi-asserted-by":"crossref","first-page":"1573","DOI":"10.1093\/bioinformatics\/btr163","article-title":"CMView: interactive contact map visualization and analysis","volume":"27","author":"Vehlow","year":"2011","journal-title":"Bioinformatics"},{"key":"2023020111572171800_btv592-B31","doi-asserted-by":"crossref","first-page":"21","DOI":"10.1098\/rstb.1925.0002","article-title":"A mathematical theory of evolution, based on the conclusions of Dr. J. C. Willis, F.R.S","volume":"213","author":"Yule","year":"1925","journal-title":"Philos. Trans. R Soc. Lond. B"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/32\/6\/814\/49018663\/bioinformatics_32_6_814.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/32\/6\/814\/49018663\/bioinformatics_32_6_814.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T22:22:15Z","timestamp":1675290135000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/32\/6\/814\/1743535"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,11,14]]},"references-count":31,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2016,3,15]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btv592","relation":{},"ISSN":["1367-4811","1367-4803"],"issn-type":[{"value":"1367-4811","type":"electronic"},{"value":"1367-4803","type":"print"}],"subject":[],"published-other":{"date-parts":[[2016,3,15]]},"published":{"date-parts":[[2015,11,14]]}}}