{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,25]],"date-time":"2026-02-25T01:31:46Z","timestamp":1771983106053,"version":"3.50.1"},"reference-count":77,"publisher":"Springer Science and Business Media LLC","issue":"S19","license":[{"start":{"date-parts":[[2018,12,1]],"date-time":"2018-12-01T00:00:00Z","timestamp":1543622400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1186\/s12859-018-2524-4","type":"journal-article","created":{"date-parts":[[2018,12,31]],"date-time":"2018-12-31T08:43:12Z","timestamp":1546245792000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":27,"title":["A benchmark study of sequence alignment methods for protein clustering"],"prefix":"10.1186","volume":"19","author":[{"given":"Yingying","family":"Wang","sequence":"first","affiliation":[]},{"given":"Hongyan","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Yunpeng","family":"Cai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,12,31]]},"reference":[{"key":"2524_CR1","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1517\/14622416.3.1.131","volume":"3","author":"C Notredame","year":"2002","unstructured":"Notredame C. Recent progress in multiple sequence alignment: a survey. Pharmacogenomics. 2002;3:131\u201344.","journal-title":"Pharmacogenomics"},{"key":"2524_CR2","doi-asserted-by":"publisher","first-page":"D227","DOI":"10.1093\/nar\/gkj063","volume":"34","author":"N Hulo","year":"2006","unstructured":"Hulo N, Bairoch A, Bulliard V, Cerutti L, De Castro E, Langendijk-Genevaux PS, et al. The PROSITE database. Nucleic Acids Res. 2006;34:D227\u201330.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR3","doi-asserted-by":"publisher","first-page":"D281","DOI":"10.1093\/nar\/gkm960","volume":"36","author":"RD Finn","year":"2008","unstructured":"Finn RD, Tate J, Mistry J, Coggill PC, Sammut SJ, Hotz HR, et al. The Pfam protein families database. Nucleic Acids Res. 2008;36:D281\u20138.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR4","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1093\/nar\/26.1.323","volume":"26","author":"F Corpet","year":"1998","unstructured":"Corpet F, Gouzy J, Kahn D. The ProDom database of protein domain families. Nucleic Acids Res. 1998;26:323\u20136.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR5","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1016\/S0022-2836(05)80360-2","volume":"215","author":"SF Altschul","year":"1990","unstructured":"Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ. Basic local alignment search tool. J Mol Biol. 1990;215:403\u201310.","journal-title":"J Mol Biol"},{"key":"2524_CR6","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1016\/0076-6879(90)83007-V","volume":"183","author":"WR Pearson","year":"1990","unstructured":"Pearson WR. Rapid and sensitive sequence comparison with FASTP and FASTA. Methods Enzymol. 1990;183:63\u201398.","journal-title":"Methods Enzymol"},{"key":"2524_CR7","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1016\/0022-2836(81)90087-5","volume":"147","author":"TF Smith","year":"1981","unstructured":"Smith TF, Waterman MS. Identification of common molecular subsequences. J Mol Biol. 1981;147:195\u20137.","journal-title":"J Mol Biol"},{"key":"2524_CR8","doi-asserted-by":"publisher","first-page":"266","DOI":"10.1038\/ng0393-266","volume":"3","author":"W Gish","year":"1993","unstructured":"Gish W, States DJ. Identification of protein coding regions by database similarity search. Nat Genet. 1993;3:266\u201372.","journal-title":"Nat Genet"},{"key":"2524_CR9","doi-asserted-by":"publisher","first-page":"656","DOI":"10.1101\/gr.229202","volume":"12","author":"WJ Kent","year":"2002","unstructured":"Kent WJ. BLAT\u2013the BLAST-like alignment tool. Genome Res. 2002;12:656\u201364.","journal-title":"Genome Res"},{"key":"2524_CR10","doi-asserted-by":"publisher","first-page":"164","DOI":"10.1093\/bioinformatics\/14.2.164","volume":"14","author":"J Gracy","year":"1998","unstructured":"Gracy J, Argos P. Automated protein sequence database classification. I. Integration of compositional similarity search, local similarity search, and multiple sequence alignment. Bioinformatics. 1998;14:164\u201373.","journal-title":"Bioinformatics"},{"key":"2524_CR11","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1016\/0014-5793(94)00648-2","volume":"349","author":"HO Villar","year":"1994","unstructured":"Villar HO, Kauvar LM. Amino acid preferences at protein binding sites. FEBS Lett. 1994;349:125\u201330.","journal-title":"FEBS Lett"},{"key":"2524_CR12","unstructured":"Ouzounis C, Perez-Irratxeta C, Sander C, Valencia A. Are binding residues conserved? Pacific symposium on biocomputing. Pac Symp Biocomput. 1998:401\u201312. \n                    https:\/\/www.ncbi.nlm.nih.gov\/pubmed\/9697199\n                    \n                  ."},{"key":"2524_CR13","doi-asserted-by":"publisher","first-page":"700","DOI":"10.1093\/bioinformatics\/17.8.700","volume":"17","author":"J Pei","year":"2001","unstructured":"Pei J, Grishin NV. AL2CO: calculation of positional conservation in a protein sequence alignment. Bioinformatics. 2001;17:700\u201312.","journal-title":"Bioinformatics"},{"key":"2524_CR14","doi-asserted-by":"publisher","first-page":"471","DOI":"10.1093\/bioinformatics\/15.6.471","volume":"15","author":"S Henikoff","year":"1999","unstructured":"Henikoff S, Henikoff JG, Pietrokovski S. Blocks+: a non-redundant database of protein alignment blocks derived from multiple compilations. Bioinformatics. 1999;15:471\u20139.","journal-title":"Bioinformatics"},{"key":"2524_CR15","doi-asserted-by":"publisher","first-page":"857","DOI":"10.1089\/106652703322756113","volume":"10","author":"L Liao","year":"2003","unstructured":"Liao L, Noble WS. Combining pairwise sequence similarity and support vector machines for detecting remote protein evolutionary and structural relationships. J Comput Biol. 2003;10:857\u201368.","journal-title":"J Comput Biol"},{"key":"2524_CR16","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1002\/prot.340090107","volume":"9","author":"C Sander","year":"1991","unstructured":"Sander C, Schneider R. Database of homology-derived protein structures and the structural meaning of sequence alignment. Proteins. 1991;9:56\u201368.","journal-title":"Proteins"},{"key":"2524_CR17","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1002\/(SICI)1097-0134(20000701)40:1<86::AID-PROT100>3.0.CO;2-Y","volume":"40","author":"H Zhang","year":"2000","unstructured":"Zhang H, Huang K, Li Z, Banerjei L, Fisher KE, Grishin NV, et al. Crystal structure of YbaK protein from Haemophilus influenzae (HI1434) at 1.8 a resolution: functional implications. Proteins. 2000;40:86\u201397.","journal-title":"Proteins"},{"key":"2524_CR18","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1016\/0022-2836(70)90057-4","volume":"48","author":"SB Needleman","year":"1970","unstructured":"Needleman SB, Wunsch CD. A general method applicable to the search for similarities in the amino acid sequence of two proteins. J Mol Biol. 1970;48:443\u201353.","journal-title":"J Mol Biol"},{"key":"2524_CR19","doi-asserted-by":"publisher","first-page":"276","DOI":"10.1016\/S0168-9525(00)02024-2","volume":"16","author":"P Rice","year":"2000","unstructured":"Rice P, Longden I, Bleasby A. EMBOSS: the European molecular biology open software suite. Trends Genet. 2000;16:276\u20137.","journal-title":"Trends Genet"},{"key":"2524_CR20","doi-asserted-by":"publisher","first-page":"W5","DOI":"10.1093\/nar\/gkn201","volume":"36","author":"M Johnson","year":"2008","unstructured":"Johnson M, Zaretskaya I, Raytselis Y, Merezhuk Y, McGinnis S, Madden TL. NCBI BLAST: a better web interface. Nucleic Acids Res. 2008;36:W5\u20139.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR21","doi-asserted-by":"publisher","first-page":"1658","DOI":"10.1093\/bioinformatics\/btl158","volume":"22","author":"W Li","year":"2006","unstructured":"Li W, Godzik A. Cd-hit: a fast program for clustering and comparing large sets of protein or nucleotide sequences. Bioinformatics. 2006;22:1658\u20139.","journal-title":"Bioinformatics"},{"key":"2524_CR22","doi-asserted-by":"publisher","first-page":"e76","DOI":"10.1093\/nar\/gkp285","volume":"37","author":"Y Sun","year":"2009","unstructured":"Sun Y, Cai Y, Liu L, Yu F, Farrell ML, McKendree W, et al. ESPRIT: estimating species richness using large collections of 16S rRNA pyrosequences. Nucleic Acids Res. 2009;37:e76.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR23","doi-asserted-by":"publisher","first-page":"2460","DOI":"10.1093\/bioinformatics\/btq461","volume":"26","author":"RC Edgar","year":"2010","unstructured":"Edgar RC. Search and clustering orders of magnitude faster than BLAST. Bioinformatics. 2010;26:2460\u20131.","journal-title":"Bioinformatics"},{"key":"2524_CR24","doi-asserted-by":"publisher","first-page":"1792","DOI":"10.1093\/nar\/gkh340","volume":"32","author":"RC Edgar","year":"2004","unstructured":"Edgar RC. MUSCLE: multiple sequence alignment with high accuracy and high throughput. Nucleic Acids Res. 2004;32:1792\u20137.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR25","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1186\/1471-2105-5-113","volume":"5","author":"RC Edgar","year":"2004","unstructured":"Edgar RC. MUSCLE: a multiple sequence alignment method with reduced time and space complexity. BMC Bioinformatics. 2004;5:113.","journal-title":"BMC Bioinformatics"},{"key":"2524_CR26","doi-asserted-by":"publisher","first-page":"772","DOI":"10.1093\/molbev\/mst010","volume":"30","author":"K Katoh","year":"2013","unstructured":"Katoh K, Standley DM. MAFFT multiple sequence alignment software version 7: improvements in performance and usability. Mol Biol Evol. 2013;30:772\u201380.","journal-title":"Mol Biol Evol"},{"key":"2524_CR27","doi-asserted-by":"publisher","first-page":"3059","DOI":"10.1093\/nar\/gkf436","volume":"30","author":"K Katoh","year":"2002","unstructured":"Katoh K, Misawa K, Kuma K, Miyata T. MAFFT: a novel method for rapid multiple sequence alignment based on fast Fourier transform. Nucleic Acids Res. 2002;30:3059\u201366.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR28","doi-asserted-by":"publisher","first-page":"4673","DOI":"10.1093\/nar\/22.22.4673","volume":"22","author":"JD Thompson","year":"1994","unstructured":"Thompson JD, Higgins DG, Gibson TJ. CLUSTAL W: improving the sensitivity of progressive multiple sequence alignment through sequence weighting, position-specific gap penalties and weight matrix choice. Nucleic Acids Res. 1994;22:4673\u201380.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR29","doi-asserted-by":"publisher","first-page":"539","DOI":"10.1038\/msb.2011.75","volume":"7","author":"F Sievers","year":"2011","unstructured":"Sievers F, Wilm A, Dineen D, Gibson TJ, Karplus K, Li W, et al. Fast, scalable generation of high-quality protein multiple sequence alignments using Clustal Omega. Mol Syst Biol. 2011;7:539.","journal-title":"Mol Syst Biol"},{"key":"2524_CR30","doi-asserted-by":"publisher","first-page":"330","DOI":"10.1101\/gr.2821705","volume":"15","author":"CB Do","year":"2005","unstructured":"Do CB, Mahabhashyam MS, Brudno M, Batzoglou S. ProbCons: Probabilistic consistency-based multiple sequence alignment. Genome Res. 2005;15:330\u201340.","journal-title":"Genome Res"},{"key":"2524_CR31","doi-asserted-by":"publisher","first-page":"205","DOI":"10.1006\/jmbi.2000.4042","volume":"302","author":"C Notredame","year":"2000","unstructured":"Notredame C, Higgins DG, Heringa J. T-coffee: a novel method for fast and accurate multiple sequence alignment. J Mol Biol. 2000;302:205\u201317.","journal-title":"J Mol Biol"},{"key":"2524_CR32","doi-asserted-by":"publisher","first-page":"802","DOI":"10.1093\/bioinformatics\/btm017","volume":"23","author":"J Pei","year":"2007","unstructured":"Pei J, Grishin NV. PROMALS: towards accurate multiple sequence alignments of distantly related proteins. Bioinformatics. 2007;23:802\u20138.","journal-title":"Bioinformatics"},{"key":"2524_CR33","doi-asserted-by":"publisher","first-page":"3615","DOI":"10.1093\/bioinformatics\/bti582","volume":"21","author":"H Zhou","year":"2005","unstructured":"Zhou H, Zhou Y. SPEM: improving multiple sequence alignment with sequence profiles and predicted secondary structures. Bioinformatics. 2005;21:3615\u201321.","journal-title":"Bioinformatics"},{"key":"2524_CR34","doi-asserted-by":"publisher","first-page":"W604","DOI":"10.1093\/nar\/gkl092","volume":"34","author":"F Armougom","year":"2006","unstructured":"Armougom F, Moretti S, Poirot O, Audic S, Dumas P, Schaeli B, et al. Expresso: automatic incorporation of structural information in multiple sequence alignments using 3D-coffee. Nucleic Acids Res. 2006;34:W604\u20138.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR35","doi-asserted-by":"publisher","first-page":"2295","DOI":"10.1093\/nar\/gkn072","volume":"36","author":"J Pei","year":"2008","unstructured":"Pei J, Kim BH, Grishin NV. PROMALS3D: a tool for multiple protein sequence and structure alignments. Nucleic Acids Res. 2008;36:2295\u2013300.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR36","doi-asserted-by":"publisher","first-page":"1428","DOI":"10.1093\/bioinformatics\/bth116","volume":"20","author":"I Walle Van","year":"2004","unstructured":"Van Walle I, Lasters I, Wyns L. Align-m\u2013a new algorithm for multiple alignment of highly divergent sequences. Bioinformatics. 2004;20:1428\u201335.","journal-title":"Bioinformatics"},{"key":"2524_CR37","doi-asserted-by":"publisher","first-page":"298","DOI":"10.1186\/1471-2105-6-298","volume":"6","author":"T Lassmann","year":"2005","unstructured":"Lassmann T, Sonnhammer EL. Kalign\u2013an accurate and fast multiple sequence alignment algorithm. BMC Bioinformatics. 2005;6:298.","journal-title":"BMC Bioinformatics"},{"key":"2524_CR38","doi-asserted-by":"publisher","first-page":"290","DOI":"10.1093\/bioinformatics\/14.3.290","volume":"14","author":"B Morgenstern","year":"1998","unstructured":"Morgenstern B, Frech K, Dress A, Werner T. DIALIGN: finding local similarities by multiple sequence alignment. Bioinformatics. 1998;14:290\u20134.","journal-title":"Bioinformatics"},{"key":"2524_CR39","doi-asserted-by":"publisher","first-page":"452","DOI":"10.1093\/bioinformatics\/18.3.452","volume":"18","author":"C Lee","year":"2002","unstructured":"Lee C, Grasso C, Sharlow MF. Multiple sequence alignment using partial order graphs. Bioinformatics. 2002;18:452\u201364.","journal-title":"Bioinformatics"},{"key":"2524_CR40","doi-asserted-by":"publisher","first-page":"2475","DOI":"10.1093\/bioinformatics\/btv177","volume":"31","author":"Q Zou","year":"2015","unstructured":"Zou Q, Hu Q, Guo M, Wang G. HAlign: fast multiple similar DNA\/RNA sequence alignment based on the centre star strategy. Bioinformatics. 2015;31:2475\u201381.","journal-title":"Bioinformatics"},{"key":"2524_CR41","doi-asserted-by":"publisher","first-page":"5932","DOI":"10.1093\/nar\/gkl511","volume":"34","author":"TM Phuong","year":"2006","unstructured":"Phuong TM, Do CB, Edgar RC, Batzoglou S. Multiple alignment of protein sequences with repeats and rearrangements. Nucleic Acids Res. 2006;34:5932\u201342.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR42","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1016\/j.jmb.2004.04.058","volume":"340","author":"O O\u2019Sullivan","year":"2004","unstructured":"O\u2019Sullivan O, Suhre K, Abergel C, Higgins DG, Notredame C. 3DCoffee: combining protein sequences and structures within multiple sequence alignments. J Mol Biol. 2004;340:385\u201395.","journal-title":"J Mol Biol"},{"key":"2524_CR43","doi-asserted-by":"publisher","first-page":"W394","DOI":"10.1093\/nar\/gkl244","volume":"34","author":"TZ DeSantis","year":"2006","unstructured":"DeSantis TZ, Hugenholtz P, Keller K, Brodie EL, Larsen N, Piceno YM, et al. NAST: a multiple sequence alignment server for comparative analysis of 16S rRNA genes. Nucleic Acids Res. 2006;34:W394\u20139.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR44","doi-asserted-by":"publisher","first-page":"7537","DOI":"10.1128\/AEM.01541-09","volume":"75","author":"PD Schloss","year":"2009","unstructured":"Schloss PD, Westcott SL, Ryabin T, Hall JR, Hartmann M, Hollister EB, et al. Introducing mothur: open-source, platform-independent, community-supported software for describing and comparing microbial communities. Appl Environ Microbiol. 2009;75:7537\u201341.","journal-title":"Appl Environ Microbiol"},{"key":"2524_CR45","doi-asserted-by":"publisher","first-page":"473","DOI":"10.1126\/science.1151532","volume":"319","author":"KM Wong","year":"2008","unstructured":"Wong KM, Suchard MA, Huelsenbeck JP. Alignment uncertainty and genomic analysis. Science. 2008;319:473\u20136.","journal-title":"Science"},{"key":"2524_CR46","doi-asserted-by":"publisher","first-page":"e18093","DOI":"10.1371\/journal.pone.0018093","volume":"6","author":"JD Thompson","year":"2011","unstructured":"Thompson JD, Linard B, Lecompte O, Poch O. A comprehensive benchmark study of multiple sequence alignment methods: current challenges and future perspectives. PLoS One. 2011;6:e18093.","journal-title":"PLoS One"},{"key":"2524_CR47","first-page":"571","volume":"11","author":"MA McClure","year":"1994","unstructured":"McClure MA, Vasi TK, Fitch WM. Comparative analysis of multiple protein-sequence alignment methods. Mol Biol Evol. 1994;11:571\u201392.","journal-title":"Mol Biol Evol"},{"key":"2524_CR48","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1093\/bioinformatics\/15.1.87","volume":"15","author":"JD Thompson","year":"1999","unstructured":"Thompson JD, Plewniak F, Poch O. BAliBASE: a benchmark alignment database for the evaluation of multiple alignment programs. Bioinformatics. 1999;15:87\u20138.","journal-title":"Bioinformatics"},{"key":"2524_CR49","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1093\/nar\/29.1.323","volume":"29","author":"A Bahr","year":"2001","unstructured":"Bahr A, Thompson JD, Thierry JC, Poch O. BAliBASE (Benchmark Alignment dataBASE): enhancements for repeats, transmembrane sequences and circular permutations. Nucleic Acids Res. 2001;29:323\u20136.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR50","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1002\/prot.20527","volume":"61","author":"JD Thompson","year":"2005","unstructured":"Thompson JD, Koehl P, Ripp R, Poch O. BAliBASE 3.0: latest developments of the multiple sequence alignment benchmark. Proteins. 2005;61:127\u201336.","journal-title":"Proteins"},{"key":"2524_CR51","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1186\/1471-2105-9-213","volume":"9","author":"E Perrodou","year":"2008","unstructured":"Perrodou E, Chica C, Poch O, Gibson TJ, Thompson JD. A new protein linear motif benchmark for multiple sequence alignment software. BMC Bioinformatics. 2008;9:213.","journal-title":"BMC Bioinformatics"},{"key":"2524_CR52","doi-asserted-by":"publisher","first-page":"2469","DOI":"10.1002\/pro.5560071126","volume":"7","author":"K Mizuguchi","year":"1998","unstructured":"Mizuguchi K, Deane CM, Blundell TL, Overington JP. HOMSTRAD: a database of protein structure alignments for homologous families. Protein Sci. 1998;7:2469\u201371.","journal-title":"Protein Sci"},{"key":"2524_CR53","doi-asserted-by":"publisher","first-page":"980","DOI":"10.1038\/nsb1203-980","volume":"10","author":"H Berman","year":"2003","unstructured":"Berman H, Henrick K, Nakamura H. Announcing the worldwide protein data Bank. Nat Struct Biol. 2003;10:980.","journal-title":"Nat Struct Biol"},{"key":"2524_CR54","doi-asserted-by":"publisher","first-page":"D419","DOI":"10.1093\/nar\/gkm993","volume":"36","author":"A Andreeva","year":"2008","unstructured":"Andreeva A, Howorth D, Chandonia JM, Brenner SE, Hubbard TJ, Chothia C, et al. Data growth and its impact on the SCOP database: new developments. Nucleic Acids Res. 2008;36:D419\u201325.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR55","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1186\/1471-2105-4-47","volume":"4","author":"GP Raghava","year":"2003","unstructured":"Raghava GP, Searle SM, Audley PC, Barber JD, Barton GJ. OXBench: a benchmark for evaluation of protein multiple sequence alignment accuracy. BMC Bioinformatics. 2003;4:47.","journal-title":"BMC Bioinformatics"},{"key":"2524_CR56","doi-asserted-by":"publisher","first-page":"1267","DOI":"10.1093\/bioinformatics\/bth493","volume":"21","author":"WI Van","year":"2005","unstructured":"Van WI, Lasters I, Wyns L. SABmark\u2013a benchmark for sequence alignment that covers the entire known fold space. Bioinformatics. 2005;21:1267\u20138.","journal-title":"Bioinformatics"},{"key":"2524_CR57","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1186\/1471-2105-6-66","volume":"6","author":"AR Subramanian","year":"2005","unstructured":"Subramanian AR, Weyer-Menkhoff J, Kaufmann M, Morgenstern B. DIALIGN-T: an improved algorithm for segment-based multiple sequence alignment. BMC Bioinformatics. 2005;6:66.","journal-title":"BMC Bioinformatics"},{"key":"2524_CR58","doi-asserted-by":"publisher","first-page":"6","DOI":"10.1186\/1748-7188-3-6","volume":"3","author":"AR Subramanian","year":"2008","unstructured":"Subramanian AR, Kaufmann M, Morgenstern B. DIALIGN-TX: greedy and progressive approaches for segment-based multiple sequence alignment. Algorithms Mol Biol. 2008;3:6.","journal-title":"Algorithms Mol Biol"},{"key":"2524_CR59","first-page":"321","volume":"6","author":"G Blackshields","year":"2006","unstructured":"Blackshields G, Wallace IM, Larkin M, Higgins DG. Analysis and comparison of benchmarks for multiple sequence alignment. In Silico Biol. 2006;6:321\u201339.","journal-title":"In Silico Biol"},{"key":"2524_CR60","doi-asserted-by":"publisher","first-page":"382","DOI":"10.1016\/j.sbi.2008.03.007","volume":"18","author":"J Pei","year":"2008","unstructured":"Pei J. Multiple protein sequence alignment. Curr Opin Struct Biol. 2008;18:382\u20136.","journal-title":"Curr Opin Struct Biol"},{"key":"2524_CR61","doi-asserted-by":"publisher","first-page":"368","DOI":"10.1016\/j.sbi.2006.04.004","volume":"16","author":"RC Edgar","year":"2006","unstructured":"Edgar RC, Batzoglou S. Multiple sequence alignment. Curr Opin Struct Biol. 2006;16:368\u201373.","journal-title":"Curr Opin Struct Biol"},{"key":"2524_CR62","doi-asserted-by":"publisher","first-page":"2145","DOI":"10.1093\/nar\/gkp1196","volume":"38","author":"RC Edgar","year":"2010","unstructured":"Edgar RC. Quality measures for protein alignment benchmarks. Nucleic Acids Res. 2010;38:2145\u201353.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR63","doi-asserted-by":"publisher","first-page":"7353","DOI":"10.1093\/nar\/gkq625","volume":"38","author":"MR Aniba","year":"2010","unstructured":"Aniba MR, Poch O, Thompson JD. Issues in bioinformatics benchmarking: the case study of multiple sequence alignment. Nucleic Acids Res. 2010;38:7353\u201363.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR64","doi-asserted-by":"publisher","first-page":"2682","DOI":"10.1093\/nar\/27.13.2682","volume":"27","author":"JD Thompson","year":"1999","unstructured":"Thompson JD, Plewniak F, Poch O. A comprehensive comparison of multiple sequence alignment programs. Nucleic Acids Res. 1999;27:2682\u201390.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR65","doi-asserted-by":"publisher","first-page":"126","DOI":"10.1016\/S0014-5793(02)03189-7","volume":"529","author":"T Lassmann","year":"2002","unstructured":"Lassmann T, Sonnhammer EL. Quality assessment of multiple alignment programs. FEBS Lett. 2002;529:126\u201330.","journal-title":"FEBS Lett"},{"key":"2524_CR66","doi-asserted-by":"publisher","first-page":"471","DOI":"10.1186\/1471-2105-7-471","volume":"7","author":"PA Nuin","year":"2006","unstructured":"Nuin PA, Wang Z, Tillier ER. The accuracy of several multiple sequence alignment programs for proteins. BMC Bioinformatics. 2006;7:471.","journal-title":"BMC Bioinformatics"},{"key":"2524_CR67","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1186\/1748-7188-9-4","volume":"9","author":"FS Pais","year":"2014","unstructured":"Pais FS, Ruy Pde C, Oliveira G, Coimbra RS. Assessing the efficiency of multiple sequence alignment programs. Algorithms Mol Biol. 2014;9:4.","journal-title":"Algorithms Mol Biol"},{"key":"2524_CR68","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1093\/bib\/bbr009","volume":"13","author":"Y Sun","year":"2012","unstructured":"Sun Y, Cai Y, Huse SM, Knight R, Farmerie WG, Wang X, et al. A large-scale benchmark study of existing algorithms for taxonomy-independent microbial community analysis. Brief Bioinform. 2012;13:107\u201321.","journal-title":"Brief Bioinform"},{"key":"2524_CR69","doi-asserted-by":"publisher","first-page":"e205","DOI":"10.1093\/nar\/gkq872","volume":"38","author":"Y Sun","year":"2010","unstructured":"Sun Y, Cai Y, Mai V, Farmerie W, Yu F, Li J, et al. Advanced computational algorithms for microbial community analysis using massive 16S rRNA sequence data. Nucleic Acids Res. 2010;38:e205.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR70","doi-asserted-by":"publisher","first-page":"e95","DOI":"10.1093\/nar\/gkr349","volume":"39","author":"Y Cai","year":"2011","unstructured":"Cai Y, Sun Y. ESPRIT-tree: hierarchical clustering analysis of millions of 16S rRNA pyrosequences in quasilinear computational time. Nucleic Acids Res. 2011;39:e95.","journal-title":"Nucleic Acids Res"},{"key":"2524_CR71","doi-asserted-by":"crossref","unstructured":"Vendramin L, Campello RJGB, Hruschka ER. On the comparison of relative clustering validity criteria. Proceedings of the 2009 SIAM International Conference on Data Mining SDM. SIAM; 2009. p. 733\u201344. \n                    https:\/\/epubs.siam.org\/doi\/abs\/10.1137\/1.9781611972795.63\n                    \n                  .","DOI":"10.1137\/1.9781611972795.63"},{"key":"2524_CR72","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1080\/01969727408546059","volume":"4","author":"J Dunn","year":"1974","unstructured":"Dunn J. Well separated clusters and optimal fuzzy partitions. J Cybern. 1974;4:95\u2013104.","journal-title":"J Cybern"},{"key":"2524_CR73","doi-asserted-by":"publisher","first-page":"224","DOI":"10.1109\/TPAMI.1979.4766909","volume":"1","author":"DL Davies","year":"1979","unstructured":"Davies DL, Bouldin DW. A cluster separation measure. IEEE Trans Pattern Anal Mach Intell. 1979;1:224\u20137.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2524_CR74","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1007\/3-540-45372-5_26","volume":"1","author":"M Halkidi","year":"2000","unstructured":"Halkidi M, Vazirgiannis M, Batistakis Y. Quality scheme assessment in the clustering process. Lect Notes Comput Sci. 2000;1:265\u201376.","journal-title":"Lect Notes Comput Sci"},{"key":"2524_CR75","first-page":"187","volume-title":"IEEE International Conference on Data Mining","author":"M Halkidi","year":"2001","unstructured":"Halkidi M, Vazirgiannis M. Clustering validity assessment: finding the optimal partitioning of a data set. In: IEEE International Conference on Data Mining; 2001. p. 187\u201394."},{"key":"2524_CR76","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1016\/0377-0427(87)90125-7","volume":"20","author":"PJ Rousseeuw","year":"1987","unstructured":"Rousseeuw PJ. Silhouettes: a graphical aid to the interpretation and validation of cluster analysis. Comput Appl Math. 1987;20:53\u201365.","journal-title":"Comput Appl Math"},{"key":"2524_CR77","unstructured":"Subhash S. Applied multivariate techniques. Wiley New York; 1996. \n                    https:\/\/epdf.tips\/applied-multivariate-techniques94be12da0a08d5558f6903554865418346595.html\n                    \n                  ."}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-018-2524-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s12859-018-2524-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-018-2524-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,12,31]],"date-time":"2019-12-31T00:03:19Z","timestamp":1577750599000},"score":1,"resource":{"primary":{"URL":"https:\/\/bmcbioinformatics.biomedcentral.com\/articles\/10.1186\/s12859-018-2524-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12]]},"references-count":77,"journal-issue":{"issue":"S19","published-print":{"date-parts":[[2018,12]]}},"alternative-id":["2524"],"URL":"https:\/\/doi.org\/10.1186\/s12859-018-2524-4","relation":{},"ISSN":["1471-2105"],"issn-type":[{"value":"1471-2105","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,12]]},"assertion":[{"value":"31 December 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Not applicable.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare that they have no competing interests.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Publisher\u2019s Note"}}],"article-number":"529"}}