{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T12:32:28Z","timestamp":1767961948699,"version":"3.49.0"},"reference-count":26,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2010,12,1]],"date-time":"2010-12-01T00:00:00Z","timestamp":1291161600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/2.0"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"published-print":{"date-parts":[[2010,12]]},"DOI":"10.1186\/1471-2105-11-601","type":"journal-article","created":{"date-parts":[[2011,1,5]],"date-time":"2011-01-05T14:27:47Z","timestamp":1294237667000},"source":"Crossref","is-referenced-by-count":35,"title":["A grammar-based distance metric enables fast and accurate clustering of large sets of 16S sequences"],"prefix":"10.1186","volume":"11","author":[{"given":"David J","family":"Russell","sequence":"first","affiliation":[]},{"given":"Samuel F","family":"Way","sequence":"additional","affiliation":[]},{"given":"Andrew K","family":"Benson","sequence":"additional","affiliation":[]},{"given":"Khalid","family":"Sayood","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2010,12,17]]},"reference":[{"issue":"5","key":"4184_CR1","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1093\/bioinformatics\/14.5.423","volume":"14","author":"L Holm","year":"1998","unstructured":"Holm L, Sander C: Removing Near-Neighbour Redundancy from Large Protein Sequence Collections. Bioinformatics 1998, 14(5):423\u2013429. 10.1093\/bioinformatics\/14.5.423","journal-title":"Bioinformatics"},{"issue":"3","key":"4184_CR2","doi-asserted-by":"publisher","first-page":"282","DOI":"10.1093\/bioinformatics\/17.3.282","volume":"17","author":"W Li","year":"2001","unstructured":"Li W, Jaroszewski L, Godzik A: Clustering of Highly Homologous Sequences to Reduce the Size of Large Protein Databases. Bioinformatics 2001, 17(3):282\u2013283. 10.1093\/bioinformatics\/17.3.282","journal-title":"Bioinformatics"},{"key":"4184_CR3","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1093\/bioinformatics\/18.1.77","volume":"18","author":"W Li","year":"2002","unstructured":"Li W, Jaroszewski L, Godzik A: Tolerating some Redundancy Significantly Speeds up Clustering of Large Protein Databases. Bioinformatics 2002, 18: 77\u201382. 10.1093\/bioinformatics\/18.1.77","journal-title":"Bioinformatics"},{"issue":"6","key":"4184_CR4","first-page":"603","volume":"11","author":"JD Parsons","year":"1995","unstructured":"Parsons JD: Improved Tools for DNA Comparison and Clustering. Computer Applications in the Biosciences 1995, 11(6):603\u2013613.","journal-title":"Computer Applications in the Biosciences"},{"key":"4184_CR5","doi-asserted-by":"crossref","unstructured":"Blackshields G, Sievers F, Shi W, Wilm A, Higgins DG: Sequence Embedding for Fast Construction of Guide Trees for Multiple Sequence Alignment. Algorithms for Molecular Biology 2010., 5(21):","DOI":"10.1186\/1748-7188-5-21"},{"issue":"13","key":"4184_CR6","doi-asserted-by":"publisher","first-page":"1658","DOI":"10.1093\/bioinformatics\/btl158","volume":"22","author":"W Li","year":"2006","unstructured":"Li W, Godzik A: Cd-hit: a Fast Program for Clustering and Comparing Large Sets of Protein or Nucleotide Sequences. Bioinformatics 2006, 22(13):1658\u20131659. 10.1093\/bioinformatics\/btl158","journal-title":"Bioinformatics"},{"key":"4184_CR7","doi-asserted-by":"publisher","first-page":"1694","DOI":"10.1126\/science.1177486","volume":"326","author":"EK Costello","year":"2009","unstructured":"Costello EK, Lauber CL, Hamady M, Fierer N, Gordon JI, Knight R: Bacterial Community Variation in Human Body Habitats Across Space and Time. Science 2009, 326: 1694\u20131697. 10.1126\/science.1177486","journal-title":"Science"},{"issue":"19","key":"4184_CR8","doi-asserted-by":"publisher","first-page":"2460","DOI":"10.1093\/bioinformatics\/btq461","volume":"26","author":"RC Edgar","year":"2010","unstructured":"Edgar RC: Search and Clustering Orders of Magnitude Faster than BLAST. Bioinformatics 2010, 26(19):2460\u20132461. 10.1093\/bioinformatics\/btq461","journal-title":"Bioinformatics"},{"issue":"17","key":"4184_CR9","doi-asserted-by":"publisher","first-page":"3389","DOI":"10.1093\/nar\/25.17.3389","volume":"25","author":"SF Altschul","year":"1997","unstructured":"Altschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ: Gapped BLAST and PSI-BLAST: a New Generation of Protein Database Search Programs. Nucleic Acids Research 1997, 25(17):3389\u20133402. 10.1093\/nar\/25.17.3389","journal-title":"Nucleic Acids Research"},{"key":"4184_CR10","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1109\/TIT.1976.1055501","volume":"22","author":"A Lempel","year":"1976","unstructured":"Lempel A, Ziv J: On the Complexity of Finite Sequences. IEEE Transactions on Information Theory 1976, 22: 75\u201381. 10.1109\/TIT.1976.1055501","journal-title":"IEEE Transactions on Information Theory"},{"issue":"2\/3","key":"4184_CR11","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1093\/comjnl\/40.2_and_3.103","volume":"40","author":"CG Nevill-Manning","year":"1997","unstructured":"Nevill-Manning CG, Witten IH: Compression and Explanation using Hierarchical Grammars. The Computer Journal 1997, 40(2\/3):103\u2013116. 10.1093\/comjnl\/40.2_and_3.103","journal-title":"The Computer Journal"},{"issue":"3","key":"4184_CR12","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1109\/TIT.1977.1055714","volume":"23","author":"J Ziv","year":"1977","unstructured":"Ziv J, Lempel A: A Universal Algorithm for Sequential Data Compression. IEEE Transactions on Information Theory 1977, 23(3):337\u2013343. 10.1109\/TIT.1977.1055714","journal-title":"IEEE Transactions on Information Theory"},{"key":"4184_CR13","doi-asserted-by":"publisher","first-page":"792","DOI":"10.1145\/509907.510021","volume-title":"STOC '02: Proceedings of the Thirty-Fourth Annual ACM Symposium on Theory of Computing","author":"M Charikar","year":"2002","unstructured":"Charikar M, Lehman E, Liu D, Panigrahy R, Prabhakaran M, Rasala A, Sahai A, Shelat A: Approximating the Smallest Grammar: Kolmogorov Complexity in Natural Models. In STOC '02: Proceedings of the Thirty-Fourth Annual ACM Symposium on Theory of Computing. New York, NY, USA: ACM; 2002:792\u2013801."},{"issue":"5","key":"4184_CR14","doi-asserted-by":"publisher","first-page":"530","DOI":"10.1109\/TIT.1978.1055934","volume":"24","author":"J Ziv","year":"1978","unstructured":"Ziv J, Lempel A: Compression of Individual Sequences via Variable-Rate Coding. IEEE Transactions on Information Theory 1978, 24(5):530\u2013536. 10.1109\/TIT.1978.1055934","journal-title":"IEEE Transactions on Information Theory"},{"key":"4184_CR15","doi-asserted-by":"crossref","unstructured":"Benedetto D, Caglioti E, Loreto V: Language Trees and Zipping. Physical Review Letters 2002., 88(4): 10.1103\/PhysRevLett.88.048702","DOI":"10.1103\/PhysRevLett.88.048702"},{"issue":"16","key":"4184_CR16","doi-asserted-by":"publisher","first-page":"2122","DOI":"10.1093\/bioinformatics\/btg295","volume":"19","author":"HH Otu","year":"2003","unstructured":"Otu HH, Sayood K: A New Sequence Distance Measure for Phylogenetic Tree Construction. Bioinformatics 2003, 19(16):2122\u20132130. 10.1093\/bioinformatics\/btg295","journal-title":"Bioinformatics"},{"key":"4184_CR17","doi-asserted-by":"crossref","unstructured":"Russell DJ, Otu HH, Sayood K: Grammar-Based Distance in Progressive Multiple Sequence Alignment. BMC Bioinformatics 2008., 9(306):","DOI":"10.1186\/1471-2105-9-306"},{"key":"4184_CR18","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1016\/S0167-2789(03)00047-2","volume":"180","author":"A Puglisi","year":"2003","unstructured":"Puglisi A, Benedetto D, Caglioti E, Loreto V, Vulpiani A: Data Compression and Learning in Time Sequences Analysis. Physica D: Nonlinear Phenomena 2003, 180: 92\u2013107. 10.1016\/S0167-2789(03)00047-2","journal-title":"Physica D: Nonlinear Phenomena"},{"issue":"2","key":"4184_CR19","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1017\/S0953756203009079","volume":"108","author":"DR Bastola","year":"2004","unstructured":"Bastola DR, Otu HH, Doukas SE, Sayood K, Hinrichs SH, Iwen PC: Utilization of the Relative Complexity Measure to Construct a Phylogenetic Tree for Fungi. Mycological Research 2004, 108(2):117\u2013125. 10.1017\/S0953756203009079","journal-title":"Mycological Research"},{"key":"4184_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/SWAT.1973.13","volume-title":"14th Annual Symposium on Switching and Automata Theory","author":"P Weiner","year":"1973","unstructured":"Weiner P: Linear Pattern Matching Algorithms. 14th Annual Symposium on Switching and Automata Theory 1973, 1\u201311. full_text"},{"issue":"2","key":"4184_CR21","doi-asserted-by":"publisher","first-page":"262","DOI":"10.1145\/321941.321946","volume":"23","author":"EM McCreight","year":"1976","unstructured":"McCreight EM: A Space-Economical Suffix Tree Construction Algorithm. Journal of the ACM 1976, 23(2):262\u2013272. 10.1145\/321941.321946","journal-title":"Journal of the ACM"},{"issue":"3","key":"4184_CR22","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1007\/BF01206331","volume":"14","author":"E Ukkonen","year":"1995","unstructured":"Ukkonen E: On-Line Construction of Suffix Trees. Algorithmica 1995, 14(3):249\u2013260. 10.1007\/BF01206331","journal-title":"Algorithmica"},{"key":"4184_CR23","doi-asserted-by":"publisher","first-page":"726","DOI":"10.1073\/pnas.80.3.726","volume":"80","author":"WJ Wilbur","year":"1983","unstructured":"Wilbur WJ, Lipman DJ: Rapid Similarity Searches of Nucleic Acid and Protein Data Banks. Proceedings of the National Academy of Sciences of the United States of America 1983, 80: 726\u2013730. 10.1073\/pnas.80.3.726","journal-title":"Proceedings of the National Academy of Sciences of the United States of America"},{"issue":"22","key":"4184_CR24","doi-asserted-by":"publisher","first-page":"4673","DOI":"10.1093\/nar\/22.22.4673","volume":"22","author":"JD Thompson","year":"1994","unstructured":"Thompson JD, Higgins DG, Gibson TJ: CLUSTAL W: Improving the Sensitivity of Progressive Multiple Sequence Alignment Through Sequence Weighting, Position-Specific Gap Penalties and Weight Matrix Choice. Nucleic Acids Research 1994, 22(22):4673\u20134680. 10.1093\/nar\/22.22.4673","journal-title":"Nucleic Acids Research"},{"issue":"2-3","key":"4184_CR25","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1023\/A:1012801612483","volume":"17","author":"M Halkidi","year":"2001","unstructured":"Halkidi M, Batistakis Y, Vazirgiannis M: On Clustering Validation Techniques. Journal of Intelligent Information Systems 2001, 17(2\u20133):107\u2013145. 10.1023\/A:1012801612483","journal-title":"Journal of Intelligent Information Systems"},{"key":"4184_CR26","doi-asserted-by":"crossref","unstructured":"Li W: Analysis and Comparison of Very Large Metagenomes with Fast Clustering and Functional Annotation. BMC Bioinformatics 2009., 10(359):","DOI":"10.1186\/1471-2105-10-359"}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/1471-2105-11-601.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/1471-2105-11-601\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/1471-2105-11-601.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,1,22]],"date-time":"2019-01-22T08:59:25Z","timestamp":1548147565000},"score":1,"resource":{"primary":{"URL":"https:\/\/bmcbioinformatics.biomedcentral.com\/articles\/10.1186\/1471-2105-11-601"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,12]]},"references-count":26,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2010,12]]}},"alternative-id":["4184"],"URL":"https:\/\/doi.org\/10.1186\/1471-2105-11-601","relation":{},"ISSN":["1471-2105"],"issn-type":[{"value":"1471-2105","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010,12]]},"article-number":"601"}}