{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T11:35:47Z","timestamp":1780400147818,"version":"3.54.1"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"1","content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"published-print":{"date-parts":[[2014,12]]},"DOI":"10.1186\/1471-2105-15-321","type":"journal-article","created":{"date-parts":[[2014,9,28]],"date-time":"2014-09-28T01:01:24Z","timestamp":1411866084000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["An improved alignment-free model for dna sequence similarity metric"],"prefix":"10.1186","volume":"15","author":[{"given":"Junpeng","family":"Bao","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ruiyu","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhe","family":"Bao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2014,9,28]]},"reference":[{"key":"6639_CR1","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1371\/journal.pone.0000085","volume":"1","author":"JP Demuth","year":"2006","unstructured":"Demuth JP, Bie TD, Stajich JE, Cristianini N, Hahn MW: The evolution of mammalian gene families. PLoS ONE. 2006, 1: 85-10.1371\/journal.pone.0000085.","journal-title":"PLoS ONE"},{"key":"6639_CR2","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1016\/j.jtbi.2011.12.024","volume":"297","author":"M Hackenberg","year":"2012","unstructured":"Hackenberg M, Rueda A, Carpena P, Bernaola-Galv\u00e1n P, Barturen G, Oliver JL: Clustering of dna words and biological function: a proof of principle. J Theor Biol. 2012, 297: 127-136.","journal-title":"J Theor Biol"},{"issue":"3","key":"6639_CR3","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1016\/S0022-2836(05)80360-2","volume":"215","author":"W Gish","year":"1990","unstructured":"Gish W, Miller W, Myers E, Lipman D, AltschulS: Basic local alignment search tool. J Mol Biol. 1990, 215 (3): 403-410. 10.1016\/S0022-2836(05)80360-2. doi:10.1016\/S0022-2836(05)80360-2","journal-title":"J Mol Biol"},{"key":"6639_CR4","doi-asserted-by":"publisher","first-page":"1435","DOI":"10.1126\/science.2983426","volume":"227","author":"DJ Lipman","year":"1985","unstructured":"Lipman DJ, Pearson WR: Rapid and sensitive protein similarity searches. Science. 1985, 227: 1435-1441. 10.1126\/science.2983426.","journal-title":"Science"},{"key":"6639_CR5","doi-asserted-by":"publisher","first-page":"2460","DOI":"10.1093\/bioinformatics\/btq461","volume":"26","author":"RC Edgar","year":"2010","unstructured":"Edgar RC: Search and clustering orders of magnitude faster than blast. Bioinformatics. 2010, 26: 2460-2461. 10.1093\/bioinformatics\/btq461.","journal-title":"Bioinformatics"},{"key":"6639_CR6","doi-asserted-by":"publisher","first-page":"1658","DOI":"10.1093\/bioinformatics\/btl158","volume":"22","author":"WZ Li","year":"2006","unstructured":"Li WZ, Godzik A: Cd-hit: a fast program for clustering and comparing large sets of protein or nucleotide sequences. Bioinformatics. 2006, 22: 1658-1659. 10.1093\/bioinformatics\/btl158.","journal-title":"Bioinformatics"},{"key":"6639_CR7","doi-asserted-by":"publisher","first-page":"449","DOI":"10.1093\/bioinformatics\/btq689","volume":"27","author":"B Haubold","year":"2011","unstructured":"Haubold B, Reed FA, Pfaffelhuber P: Alignment-free estimation of nucleotide diversity. Bioinformatics. 2011, 27: 449-455. 10.1093\/bioinformatics\/btq689.","journal-title":"Bioinformatics"},{"key":"6639_CR8","doi-asserted-by":"publisher","first-page":"3455","DOI":"10.1093\/bioinformatics\/bth426","volume":"20","author":"TD Pham","year":"2004","unstructured":"Pham TD, Zuegg J: A probabilistic measure for alignment-free sequence comparison. Bioinformatics. 2004, 20: 3455-3461. 10.1093\/bioinformatics\/bth426.","journal-title":"Bioinformatics"},{"key":"6639_CR9","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1093\/bioinformatics\/btm211","volume":"23","author":"MR Kantorovitz","year":"2007","unstructured":"Kantorovitz MR, Robinson GE, Sinha S: A statistical method for alignment-free comparison of regulatory sequences. Bioinformatics. 2007, 23: 249-255. 10.1093\/bioinformatics\/btl510.","journal-title":"Bioinformatics"},{"key":"6639_CR10","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1007\/978-3-540-74819-9_9","volume":"4692","author":"A Freno","year":"2007","unstructured":"Freno A: Selecting features by learning markov blankets. Lect Notes Comput Sci. 2007, 4692: 69-76. 10.1007\/978-3-540-74819-9_9.","journal-title":"Lect Notes Comput Sci"},{"key":"6639_CR11","doi-asserted-by":"publisher","first-page":"417","DOI":"10.1007\/3-540-47887-6_41","volume":"2336","author":"M Deshpande","year":"2002","unstructured":"Deshpande M, Karypis G: Evaluation of techniques for classifying biological sequences. Lect Notes Comput Sci. 2002, 2336: 417-431. 10.1007\/3-540-47887-6_41.","journal-title":"Lect Notes Comput Sci"},{"issue":"Suppl 6","key":"6639_CR12","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1186\/1471-2105-9-S6-S15","volume":"9","author":"G Lu","year":"2008","unstructured":"Lu G, Zhang S, Fang X: An improved string composition method for sequence comparison. BMC Bioinformatics. 2008, 9 (Suppl 6): 15-10.1186\/1471-2105-9-S6-S15.","journal-title":"BMC Bioinformatics"},{"key":"6639_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s00239-003-2493-7","volume":"58","author":"J Qi","year":"2004","unstructured":"Qi J, Wang B, Hao BI: Whole proteome prokaryote phylogeny without sequence alignment: a k-string composition approach. J Mol Evol. 2004, 58: 1-11. 10.1007\/s00239-003-2493-7.","journal-title":"J Mol Evol"},{"key":"6639_CR14","doi-asserted-by":"publisher","first-page":"1615","DOI":"10.1089\/cmb.2009.0198","volume":"16","author":"G Reinert","year":"2009","unstructured":"Reinert G, Chew D, Sun F, Waterman MS: Alignment-free sequence comparison (i): statistics and power. J Comput Biol. 2009, 16: 1615-1634. 10.1089\/cmb.2009.0198.","journal-title":"J Comput Biol"},{"key":"6639_CR15","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1186\/1471-2105-9-48","volume":"9","author":"M Bauer","year":"2008","unstructured":"Bauer M, Schuster SM, Sayood K: The average mutual information profile as a genomic signature. BMC Bioinformatics. 2008, 9: 48-10.1186\/1471-2105-9-48.","journal-title":"BMC Bioinformatics"},{"issue":"14","key":"6639_CR16","doi-asserted-by":"publisher","first-page":"5155","DOI":"10.1073\/pnas.83.14.5155","volume":"83","author":"BE Blaisdell","year":"1986","unstructured":"Blaisdell BE: A measure of the similarity of sets of sequences not requiring sequence alignment. Proc Natl Acad Sci U S A. 1986, 83 (14): 5155-5159. 10.1073\/pnas.83.14.5155.","journal-title":"Proc Natl Acad Sci U S A"},{"key":"6639_CR17","doi-asserted-by":"publisher","first-page":"513","DOI":"10.1093\/bioinformatics\/btg005","volume":"19","author":"S Vinga","year":"2003","unstructured":"Vinga S, Almeida J: Alignment-free sequence comparison\u2013a review. Bioinformatics. 2003, 19: 513-523. 10.1093\/bioinformatics\/btg005.","journal-title":"Bioinformatics"},{"key":"6639_CR18","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1016\/j.ympev.2006.05.019","volume":"41","author":"L Liu","year":"2006","unstructured":"Liu L, Ho YK, Yau S: Clustering dna sequences by feature vectors. Mol Phylogenet Evol. 2006, 41: 64-69. 10.1016\/j.ympev.2006.05.019.","journal-title":"Mol Phylogenet Evol"},{"key":"6639_CR19","first-page":"204","volume-title":"Proceedings of the 5th IEEE International Conference on Bio-Inspired Computing: Theories and Applications. Changsha, China","author":"D Wei","year":"2010","unstructured":"Wei D, Jiang QS: A DNA sequence distance measure approach for phylogenetic tree construction. Proceedings of the 5th IEEE International Conference on Bio-Inspired Computing: Theories and Applications. Changsha, China. 2010, IEEE, 204-212. doi:10.1109\/BICTA.2010.5645329, [\n                    http:\/\/ieeexplore.ieee.org\/xpls\/abs_all.jsp?arnumber=5645329&tag=1\n                    \n                  ]"},{"key":"6639_CR20","doi-asserted-by":"publisher","first-page":"174","DOI":"10.1186\/1471-2105-13-174","volume":"13","author":"D Wei","year":"2012","unstructured":"Wei D, Jiang QS, Wei YJ, Wang SR: A novel hierarchical clustering algorithm for gene sequences. BMC Bioinformatics. 2012, 13: 174-10.1186\/1471-2105-13-174.","journal-title":"BMC Bioinformatics"},{"key":"6639_CR21","doi-asserted-by":"publisher","first-page":"174","DOI":"10.1016\/j.jtbi.2011.02.005","volume":"276","author":"Q Dai","year":"2011","unstructured":"Dai Q, Liu XQ, Yao YH, Zhao FK: Numerical characteristics of word frequencies and their application to dissimilarity measure for sequence comparison. J Theor Biol. 2011, 276: 174-180. 10.1016\/j.jtbi.2011.02.005.","journal-title":"J Theor Biol"},{"key":"6639_CR22","doi-asserted-by":"publisher","first-page":"465","DOI":"10.1016\/j.physa.2004.08.041","volume":"347","author":"C Li","year":"2005","unstructured":"Li C, Wang J: Relative entropy of dna and its application. Physica A. 2005, 347: 465-471.","journal-title":"Physica A"},{"key":"6639_CR23","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1016\/j.mbs.2008.06.001","volume":"215","author":"J Wang","year":"2008","unstructured":"Wang J, Zheng X: Wse a new sequence distance measure based on word frequencies. Math Biosci. 2008, 215: 78-83. 10.1016\/j.mbs.2008.06.001.","journal-title":"Math Biosci"},{"key":"6639_CR24","doi-asserted-by":"publisher","first-page":"438","DOI":"10.1016\/j.ympev.2011.02.020","volume":"59","author":"B Zhao","year":"2011","unstructured":"Zhao B, He RL, Yau SS: A new distribution vector and its application in genome clustering. Mol Phylogenet Evol. 2011, 59: 438-443. 10.1016\/j.ympev.2011.02.020.","journal-title":"Mol Phylogenet Evol"},{"key":"6639_CR25","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1007\/978-3-642-27866-2_45","volume":"137","author":"L Shi","year":"2012","unstructured":"Shi L, Huang HL: Dna sequences analysis based on classifications of nucleotide bases. Adv Int Soft Comput. 2012, 137: 379-384. 10.1007\/978-3-642-27866-2_45.","journal-title":"Adv Int Soft Comput"},{"key":"6639_CR26","doi-asserted-by":"publisher","first-page":"462","DOI":"10.1007\/978-3-642-24553-4_61","volume":"6840","author":"HJ Yu","year":"2012","unstructured":"Yu HJ: Similarity analysis of dna sequences based on three 2-d cumulative ratio curves. Lect Notes Comput Sci. 2012, 6840: 462-469. 10.1007\/978-3-642-24553-4_61.","journal-title":"Lect Notes Comput Sci"},{"key":"6639_CR27","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1007\/s10910-006-9176-8","volume":"43","author":"C Li","year":"2008","unstructured":"Li C, Wang J: Similarity analysis of dna sequences based on the generalized lz complexity of (0,1)-sequences. J Math Chem. 2008, 43: 26-31. 10.1007\/s10910-006-9176-8.","journal-title":"J Math Chem"},{"key":"6639_CR28","doi-asserted-by":"publisher","first-page":"035102","DOI":"10.1103\/PhysRevE.79.035102","volume":"79","author":"P Carpena","year":"2009","unstructured":"Carpena P, Bernaola P, Hackenberg M, Coronado AV, Oliver JL: Level statistics of words: Finding keywords in literary texts and symbolic sequences. Phys Rev E. 2009, 79: 035102-035104.","journal-title":"Phys Rev E"},{"key":"6639_CR29","doi-asserted-by":"publisher","first-page":"1902","DOI":"10.1016\/j.patcog.2005.05.002","volume":"38","author":"Z Volkovich","year":"2005","unstructured":"Volkovich Z, Kirzhner V, Bolshoy A, Nevo E, Korol A: The method of n-grams in large-scale clustering of dna texts. Pattern Recogn. 2005, 38: 1902-1912. 10.1016\/j.patcog.2005.05.002.","journal-title":"Pattern Recogn"},{"key":"6639_CR30","doi-asserted-by":"publisher","first-page":"2345","DOI":"10.1007\/s10910-011-9890-8","volume":"49","author":"D Bielinska-Waz","year":"2011","unstructured":"Bielinska-Waz D: Graphical and numerical representations of dna sequences: statistical aspects of similarity. J Math Chem. 2011, 49: 2345-2407. 10.1007\/s10910-011-9890-8.","journal-title":"J Math Chem"},{"key":"6639_CR31","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1007\/978-0-387-69937-0_3","volume":"33","author":"GZ Dong","year":"2007","unstructured":"Dong GZ, Pei J: Classification, clustering, features and distances of sequence data. Adv Database Syst. 2007, 33: 47-65. 10.1007\/978-0-387-69937-0_3.","journal-title":"Adv Database Syst"},{"key":"6639_CR32","unstructured":"HOGENOM : Database of Complete Genome Homologous Genes Families. [\n                    http:\/\/pbil.univ-lyon1.fr\/databases\/hogenom\/home.php\n                    \n                  ]"},{"key":"6639_CR33","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1016\/j.biosystems.2011.06.009","volume":"106","author":"T Aita","year":"2011","unstructured":"Aita T, Husimi Y, Nishigaki K: A mathematical consideration of the word composition vector method in comparison of biological sequences. BioSystems. 2011, 106: 67-75. 10.1016\/j.biosystems.2011.06.009.","journal-title":"BioSystems"}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/1471-2105-15-321.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,1,23]],"date-time":"2019-01-23T01:16:27Z","timestamp":1548206187000},"score":1,"resource":{"primary":{"URL":"https:\/\/bmcbioinformatics.biomedcentral.com\/articles\/10.1186\/1471-2105-15-321"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,9,28]]},"references-count":33,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2014,12]]}},"alternative-id":["6639"],"URL":"https:\/\/doi.org\/10.1186\/1471-2105-15-321","relation":{},"ISSN":["1471-2105"],"issn-type":[{"value":"1471-2105","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,9,28]]},"assertion":[{"value":"9 December 2013","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 September 2014","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 September 2014","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"321"}}