{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T07:10:18Z","timestamp":1761808218767,"version":"3.37.3"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2018,8,4]],"date-time":"2018-08-04T00:00:00Z","timestamp":1533340800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Opera\u010dn\u00ed program V\u00fdzkum, v\u00fdvoj a vzd\u011bl\u00e1v\u00e1n\u00ed","award":["CZ.02.1.01\/0.0\/0.0\/16_019\/0000765"],"award-info":[{"award-number":["CZ.02.1.01\/0.0\/0.0\/16_019\/0000765"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Data Min Knowl Disc"],"published-print":{"date-parts":[[2019,1]]},"DOI":"10.1007\/s10618-018-0584-8","type":"journal-article","created":{"date-parts":[[2018,8,4]],"date-time":"2018-08-04T09:17:53Z","timestamp":1533374273000},"page":"1-23","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Estimating sequence similarity from read sets for clustering next-generation sequencing data"],"prefix":"10.1007","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6597-6616","authenticated-orcid":false,"given":"Petr","family":"Ry\u0161av\u00fd","sequence":"first","affiliation":[]},{"given":"Filip","family":"\u017delezn\u00fd","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,8,4]]},"reference":[{"key":"584_CR1","doi-asserted-by":"crossref","unstructured":"1000 Genomes Project Consortium et al. (2015) A global reference for human genetic variation. Nature 526(7571):68\u201374","DOI":"10.1038\/nature15393"},{"issue":"3","key":"584_CR2","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1016\/S0022-2836(05)80360-2","volume":"215","author":"SF Altschul","year":"1990","unstructured":"Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ (1990) Basic local alignment search tool. J Mol Biol 215(3):403\u2013410","journal-title":"J Mol Biol"},{"issue":"18","key":"584_CR3","doi-asserted-by":"publisher","first-page":"2502","DOI":"10.1093\/bioinformatics\/btr447","volume":"27","author":"E Bao","year":"2011","unstructured":"Bao E, Jiang T, Kaloshian I, Girke T (2011) SEED: efficient clustering of next-generation sequences. Bioinformatics 27(18):2502\u20132509","journal-title":"Bioinformatics"},{"issue":"14","key":"584_CR4","doi-asserted-by":"publisher","first-page":"5155","DOI":"10.1073\/pnas.83.14.5155","volume":"83","author":"BE Blaisdell","year":"1986","unstructured":"Blaisdell BE (1986) A measure of the similarity of sets of sequences not requiring sequence alignment. Proc Natl Acad Sci 83(14):5155\u20135159","journal-title":"Proc Natl Acad Sci"},{"issue":"1","key":"584_CR5","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1186\/s13015-014-0029-x","volume":"10","author":"M Comin","year":"2015","unstructured":"Comin M, Leoni A, Schimd M (2015) Clustering of reads with alignment-free measures and quality values. Algorithms Mol Biol 10(1):4","journal-title":"Algorithms Mol Biol"},{"issue":"9","key":"584_CR6","doi-asserted-by":"publisher","first-page":"S1","DOI":"10.1186\/1471-2105-15-S9-S1","volume":"15","author":"M Comin","year":"2014","unstructured":"Comin M, Schimd M (2014) Assembly-free genome comparison based on next-generation sequencing reads and variable length patterns. BMC Bioinformatics 15(9):S1","journal-title":"BMC Bioinformatics"},{"issue":"1","key":"584_CR7","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1186\/s12920-016-0193-6","volume":"9","author":"M Comin","year":"2016","unstructured":"Comin M, Schimd M (2016) Fast comparison of genomic and meta-genomic reads with alignment-free measures based on quality values. BMC Med Genomics 9(1):36","journal-title":"BMC Med Genomics"},{"issue":"383","key":"584_CR8","doi-asserted-by":"publisher","first-page":"553","DOI":"10.1080\/01621459.1983.10478008","volume":"78","author":"EB Fowlkes","year":"1983","unstructured":"Fowlkes EB, Mallows CL (1983) A method for comparing two hierarchical clusterings. J Am Stat Assoc 78(383):553\u2013569","journal-title":"J Am Stat Assoc"},{"key":"584_CR9","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1038\/nrg.2016.49","volume":"17","author":"S Goodwin","year":"2016","unstructured":"Goodwin S, Mcpherson J, Richard Mccombie W (2016) Coming of age: ten years of next-generation sequencing technologies. Nat Rev Genet 17:333\u2013351 05","journal-title":"Nat Rev Genet"},{"issue":"9","key":"584_CR10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1371\/journal.pone.0024182","volume":"6","author":"N Haiminen","year":"2011","unstructured":"Haiminen N, Kuhn DN, Parida L, Rigoutsos I (2011) Evaluation of methods for de novo genome assembly from high-throughput sequencing reads reveals dependencies that affect the quality of the results. PLOS ONE 6(9):1\u20139 09","journal-title":"PLOS ONE"},{"issue":"5","key":"584_CR11","doi-asserted-by":"publisher","first-page":"802","DOI":"10.1101\/gr.072033.107","volume":"18","author":"D Hernandez","year":"2008","unstructured":"Hernandez D, Franois P, Farinelli L, sters M, Schrenzel J (2008) De novo bacterial genome sequencing: millions of very short reads assembled on a desktop computer. Genome Res 18(5):802\u2013809","journal-title":"Genome Res"},{"issue":"4","key":"584_CR12","doi-asserted-by":"publisher","first-page":"593","DOI":"10.1093\/bioinformatics\/btr708","volume":"28","author":"W Huang","year":"2012","unstructured":"Huang W, Li L, Myers JR, Marth GT (2012) ART: a next-generation sequencing read simulator. Bioinformatics 28(4):593\u2013594","journal-title":"Bioinformatics"},{"issue":"1","key":"584_CR13","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1093\/nar\/30.1.38","volume":"30","author":"T Hubbard","year":"2002","unstructured":"Hubbard T, Barker D, Birney E, Cameron G, Chen Y et al (2002) The Ensembl genome database project. Nucl Acids Res 30(1):38\u201341","journal-title":"Nucl Acids Res"},{"key":"584_CR14","unstructured":"Jalovec K, \u017delezn\u00fd F (2014) Binary classification of metagenomic samples using discriminative DNA superstrings. In: MLSB 2014: 8th International workshop on machine learning in systems biology, pp 44\u201347"},{"key":"584_CR15","doi-asserted-by":"crossref","unstructured":"Kchouk M, Elloumi M(2016) A clustering approach for denovo assembly using next generation sequencing data. In: 2016 IEEE international conference on bioinformatics and biomedicine (BIBM), IEEE, pp 1909\u20131911","DOI":"10.1109\/BIBM.2016.7822812"},{"issue":"6822","key":"584_CR16","doi-asserted-by":"publisher","first-page":"860","DOI":"10.1038\/35057062","volume":"409","author":"ES Lander","year":"2001","unstructured":"Lander ES, Linton LM, Birren B, Nusbaum C, Zody MC, Baldwin J, Devon K, Dewar K, Doyle M, FitzHugh W et al (2001) Initial sequencing and analysis of the human genome. Nature 409(6822):860\u2013921","journal-title":"Nature"},{"issue":"suppl\u20131","key":"584_CR17","doi-asserted-by":"publisher","first-page":"D28","DOI":"10.1093\/nar\/gkq967","volume":"39","author":"R Leinonen","year":"2011","unstructured":"Leinonen R, Akhtar R, Birney E, Bower L, Cerdeno-Trraga A, Cheng Y, Cleland I, Faruque N, Goodgame N, Gibson R, Hoad G, Jang M, Pakseresht N, Plaister S, Radhakrishnan R, Reddy K, Sobhany S, Ten Hoopen P, Vaughan R, Zalunin V, Cochrane G (2011) The European Nucleotide Archive. Nucl Acids Res 39(suppl\u20131):D28\u2013D31","journal-title":"Nucl Acids Res"},{"issue":"8","key":"584_CR18","first-page":"707","volume":"10","author":"VI Levenshtein","year":"1966","unstructured":"Levenshtein VI (1966) Binary codes capable of correcting deletions, insertions, and reversals. Sov Phys Dokl 10(8):707","journal-title":"Sov Phys Dokl"},{"key":"584_CR19","unstructured":"Malhotra R, Elleder D, Bao L, Hunter DR, Acharya R, Poss M (2014) Clustering pipeline for determining consensus sequences in targeted next-generation sequencing. ArXiv preprint"},{"key":"584_CR20","unstructured":"Monge AE, Elkan CP (1996) The field matching problem: algorithms and applications. In: Proceedings of the second international conference on knowledge discovery and data mining, KDD\u201996, AAAI Press, pp 267\u2013270"},{"issue":"1","key":"584_CR21","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1145\/375360.375365","volume":"33","author":"G Navarro","year":"2001","unstructured":"Navarro G (2001) A guided tour to approximate string matching. ACM Comput Surv 33(1):31\u201388","journal-title":"ACM Comput Surv"},{"issue":"3","key":"584_CR22","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1016\/0022-2836(70)90057-4","volume":"48","author":"SB Needleman","year":"1970","unstructured":"Needleman SB, Wunsch CD (1970) A general method applicable to the search for similarities in the amino acid sequence of two proteins. J Mol Biol 48(3):443\u2013453","journal-title":"J Mol Biol"},{"key":"584_CR23","unstructured":"Nurk Sergey, Bankevich Anton, et al (2013) Assembling genomes and mini-metagenomes from highly chimeric reads. In: Deng M, Jiang R, Sun F, Zhang X, (eds) 17th Annual international conference on research in computational molecular biology, RECOMB 2013, Beijing, China, April 7\u201310, 2013. Proceedings, Springer, Berlin Heidelberg, Berlin, Heidelberg, pp 158\u2013170"},{"issue":"1","key":"584_CR24","doi-asserted-by":"publisher","first-page":"132","DOI":"10.1186\/s13059-016-0997-x","volume":"17","author":"BD Ondov","year":"2016","unstructured":"Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM (2016) Mash: fast genome and metagenome distance estimation using MinHash. Genome Biol 17(1):132","journal-title":"Genome Biol"},{"issue":"12","key":"584_CR25","doi-asserted-by":"publisher","first-page":"1615","DOI":"10.1089\/cmb.2009.0198","volume":"16","author":"G Reinert","year":"2009","unstructured":"Reinert G, Chew D, Sun F, Waterman MS (2009) Alignment-free sequence comparison (I): statistics and power. J Comput Biol 16(12):1615\u20131634","journal-title":"J Comput Biol"},{"key":"584_CR26","doi-asserted-by":"crossref","unstructured":"Ry\u0161av\u00fd Petr, \u017delezn\u00fd Filip (2016) Estimating sequence similarity from read sets for clustering sequencing data. In: Bostr\u00f6m H, Knobbe A, Soares C, Papapetrou P (eds) 15th International symposium on advances in intelligent data analysis XV, IDA 2016, Stockholm, Sweden, October 13\u201315, 2016, Proceedings, Cham, Springer International Publishing, pp 204\u2013214","DOI":"10.1007\/978-3-319-46349-0_18"},{"issue":"4","key":"584_CR27","first-page":"406","volume":"4","author":"N Saitou","year":"1987","unstructured":"Saitou N, Nei M (1987) The neighbor-joining method: a new method for reconstructing phylogenetic trees. Mol Biol Evol 4(4):406\u2013425","journal-title":"Mol Biol Evol"},{"issue":"6","key":"584_CR28","doi-asserted-by":"publisher","first-page":"1117","DOI":"10.1101\/gr.089532.108","volume":"19","author":"JT Simpson","year":"2009","unstructured":"Simpson JT, Wong K, Jackman SD, Schein JE, Jones SJM, nan Birol (2009) ABySS: a parallel assembler for short read sequence data. Genome Res 19(6):1117\u20131123","journal-title":"Genome Res"},{"key":"584_CR29","first-page":"1409","volume":"38","author":"RR Sokal","year":"1958","unstructured":"Sokal RR, Michener CD (1958) A statistical method for evaluating systematic relationships. Univ Kans Sci Bull 38:1409\u20131438","journal-title":"Univ Kans Sci Bull"},{"issue":"2","key":"584_CR30","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1089\/cmb.2012.0228","volume":"20","author":"K Song","year":"2013","unstructured":"Song K, Ren J, Zhai Z, Liu X, Deng M, Sun F (2013) Alignment-free sequence comparison based on next-generation sequencing reads. J Comput Biol 20(2):64\u201379","journal-title":"J Comput Biol"},{"issue":"1","key":"584_CR31","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1016\/0304-3975(92)90143-4","volume":"92","author":"E Ukkonen","year":"1992","unstructured":"Ukkonen E (1992) Approximate string-matching with \n                    \n                      \n                    \n                    $$q$$\n                    \n                      \n                        q\n                      \n                    \n                  -grams and maximal matches. Theor Comput Sci 92(1):191\u2013211","journal-title":"Theor Comput Sci"},{"issue":"1","key":"584_CR32","doi-asserted-by":"publisher","first-page":"168","DOI":"10.1145\/321796.321811","volume":"21","author":"RA Wagner","year":"1974","unstructured":"Wagner RA, Fischer MJ (1974) The string-to-string correction problem. J Assoc Comput Mach 21(1):168\u2013173","journal-title":"J Assoc Comput Mach"},{"issue":"4","key":"584_CR33","doi-asserted-by":"publisher","first-page":"500","DOI":"10.1093\/bioinformatics\/btl629","volume":"23","author":"RL Warren","year":"2007","unstructured":"Warren RL, Sutton GG, Jones SJM, Holt RA (2007) Assembling millions of short DNA sequences using SSAKE. Bioinformatics 23(4):500\u2013501","journal-title":"Bioinformatics"},{"key":"584_CR34","doi-asserted-by":"publisher","first-page":"869","DOI":"10.1186\/1756-0500-7-869","volume":"7","author":"E Weitschek","year":"2014","unstructured":"Weitschek E, Santoni D, Fiscon G, De Cola MC, Bertolazzi P, Felici G (2014) Next generation sequencing reads comparison with an alignment-free distance. BMC Res Notes 7:869","journal-title":"BMC Res Notes"},{"issue":"suppl\u20131","key":"584_CR35","first-page":"D13","volume":"36","author":"DL Wheeler","year":"2008","unstructured":"Wheeler DL, Barrett T, Benson DA, Bryant SH, Canese K, Chetvernin V, Church DM, DiCuccio M, Edgar R, Federhen S, Feolo M, Geer LY, Helmberg W, Kapustin Y, Khovayko O, Landsman D, Lipman DJ, Madden TL, Maglott DR, Miller V, Ostell J, Pruitt KD, Schuler GD, Shumway M, Sequeira E, Sherry ST, Sirotkin K, Souvorov A, Starchenko G, Tatusov RL, Tatusova TA, Wagner L, Yaschenko E (2008) Database resources of the national center for biotechnology information. Nucl Acids Res 36(suppl\u20131):D13\u2013D21","journal-title":"Nucl Acids Res"},{"issue":"7","key":"584_CR36","doi-asserted-by":"publisher","first-page":"e75","DOI":"10.1093\/nar\/gkt003","volume":"41","author":"H Yi","year":"2013","unstructured":"Yi H, Jin L (2013) Co-phylog: an assembly-free phylogenomic approach for closely related organisms. Nucl Acids Res 41(7):e75","journal-title":"Nucl Acids Res"},{"issue":"5","key":"584_CR37","doi-asserted-by":"publisher","first-page":"821","DOI":"10.1101\/gr.074492.107","volume":"18","author":"DR Zerbino","year":"2008","unstructured":"Zerbino DR, Birney E (2008) Velvet: algorithms for de novo short read assembly using de Bruijn graphs. Genome Res 18(5):821\u2013829","journal-title":"Genome Res"},{"key":"584_CR38","unstructured":"\u017delezn\u00fd F, Jalovec K, Tolar J (2014) Learning meets sequencing: a generality framework for read-sets. In: ILP 2014: 24th Internation conference on inductive logic programming, Late-Breaking Papers"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10618-018-0584-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-018-0584-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-018-0584-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,3]],"date-time":"2019-08-03T19:11:27Z","timestamp":1564859487000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10618-018-0584-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,8,4]]},"references-count":38,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2019,1]]}},"alternative-id":["584"],"URL":"https:\/\/doi.org\/10.1007\/s10618-018-0584-8","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"type":"print","value":"1384-5810"},{"type":"electronic","value":"1573-756X"}],"subject":[],"published":{"date-parts":[[2018,8,4]]},"assertion":[{"value":"28 August 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 July 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 August 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}