{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T16:24:47Z","timestamp":1743006287161,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":27,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819751273"},{"type":"electronic","value":"9789819751280"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-5128-0_29","type":"book-chapter","created":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T23:02:31Z","timestamp":1720738951000},"page":"359-370","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["An In-Depth Assessment of\u00a0Sequence Clustering Software in\u00a0Bioinformatics"],"prefix":"10.1007","author":[{"given":"Zhen","family":"Ju","sequence":"first","affiliation":[]},{"given":"Mingyu","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xuelei","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jintao","family":"Meng","sequence":"additional","affiliation":[]},{"given":"Wenhui","family":"Xi","sequence":"additional","affiliation":[]},{"given":"Yanjie","family":"Wei","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,7,12]]},"reference":[{"issue":"5617","key":"29_CR1","doi-asserted-by":"publisher","first-page":"286","DOI":"10.1126\/science.1084564","volume":"300","author":"FS Collins","year":"2003","unstructured":"Collins, F.S., Morgan, M., Patrinos, A.: The human genome project: lessons from large-scale biology. Science 300(5617), 286\u2013290 (2003)","journal-title":"Science"},{"issue":"1","key":"29_CR2","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1038\/nrg2626","volume":"11","author":"ML Metzker","year":"2010","unstructured":"Metzker, M.L.: Sequencing technologies-the next generation. Nat. Rev. Genet. 11(1), 31\u201346 (2010)","journal-title":"Nat. Rev. Genet."},{"issue":"6","key":"29_CR3","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1038\/nrg.2016.49","volume":"17","author":"S Goodwin","year":"2016","unstructured":"Goodwin, S., McPherson, J.D., McCombie, W.R.: Coming of age: ten years of next-generation sequencing technologies. Nat. Rev. Genet. 17(6), 333\u2013351 (2016)","journal-title":"Nat. Rev. Genet."},{"issue":"7453","key":"29_CR4","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1038\/498255a","volume":"498","author":"V Marx","year":"2013","unstructured":"Marx, V.: The big challenges of big data. Nature 498(7453), 255\u2013260 (2013)","journal-title":"Nature"},{"issue":"D1","key":"29_CR5","doi-asserted-by":"publisher","first-page":"D29","DOI":"10.1093\/nar\/gkac1032","volume":"51","author":"EW Sayers","year":"2023","unstructured":"Sayers, E.W., et al.: Database resources of the national center for biotechnology information in 2023. Nucleic Acids Res. 51(D1), D29\u2013D38 (2023)","journal-title":"Nucleic Acids Res."},{"issue":"6507","key":"29_CR6","doi-asserted-by":"publisher","first-page":"1077","DOI":"10.1126\/science.aba0372","volume":"369","author":"L Gao","year":"2020","unstructured":"Gao, L., et al.: Diverse enzymatic activities mediate antiviral immunity in prokaryotes. Science 369(6507), 1077\u20131084 (2020)","journal-title":"Science"},{"issue":"3","key":"29_CR7","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1038\/s41564-020-00839-y","volume":"6","author":"F Rousset","year":"2021","unstructured":"Rousset, F., et al.: The impact of genetic diversity on gene essentiality within the escherichia coli species. Nat. Microbiol. 6(3), 301\u2013312 (2021)","journal-title":"Nat. Microbiol."},{"key":"29_CR8","doi-asserted-by":"publisher","DOI":"10.3389\/fgene.2022.887491","volume":"13","author":"H Zhang","year":"2022","unstructured":"Zhang, H., et al.: Inter-residue distance prediction from duet deep learning models. Front. Genet. 13, 887491 (2022)","journal-title":"Front. Genet."},{"issue":"23","key":"29_CR9","doi-asserted-by":"publisher","first-page":"3150","DOI":"10.1093\/bioinformatics\/bts565","volume":"28","author":"F Limin","year":"2012","unstructured":"Limin, F., Niu, B., Zhu, Z., Sitao, W., Li, W.: CD-HIT: accelerated for clustering the next-generation sequencing data. Bioinformatics 28(23), 3150\u20133152 (2012)","journal-title":"Bioinformatics"},{"issue":"19","key":"29_CR10","doi-asserted-by":"publisher","first-page":"2460","DOI":"10.1093\/bioinformatics\/btq461","volume":"26","author":"RC Edgar","year":"2010","unstructured":"Edgar, R.C.: Search and clustering orders of magnitude faster than blast. Bioinformatics 26(19), 2460\u20132461 (2010)","journal-title":"Bioinformatics"},{"key":"29_CR11","doi-asserted-by":"publisher","DOI":"10.7717\/peerj.2584","volume":"4","author":"T Rognes","year":"2016","unstructured":"Rognes, T., Flouri, T., Nichols, B., Quince, C., Mah\u00e9, F.: VSEARCH: a versatile open source tool for metagenomics. PeerJ 4, e2584 (2016)","journal-title":"PeerJ"},{"key":"29_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"596","DOI":"10.1007\/978-3-030-91415-8_50","volume-title":"Bioinformatics Research and Applications","author":"Z Ju","year":"2021","unstructured":"Ju, Z., et al.: An efficient greedy incremental sequence clustering algorithm. In: Wei, Y., Li, M., Skums, P., Cai, Z. (eds.) ISBRA 2021. LNCS, vol. 13064, pp. 596\u2013607. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-91415-8_50"},{"key":"29_CR13","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1016\/j.future.2022.05.024","volume":"136","author":"J Zhen","year":"2022","unstructured":"Zhen, J., et al.: nGIA: a novel greedy incremental alignment based algorithm for gene sequence clustering. Futur. Gener. Comput. Syst. 136, 221\u2013230 (2022)","journal-title":"Futur. Gener. Comput. Syst."},{"issue":"4","key":"29_CR14","doi-asserted-by":"publisher","first-page":"366","DOI":"10.1038\/s41592-021-01101-x","volume":"18","author":"B Buchfink","year":"2021","unstructured":"Buchfink, B., Reuter, K., Drost, H.-G.: Sensitive protein alignments at tree-of-life scale using diamond. Nat. Methods 18(4), 366\u2013368 (2021)","journal-title":"Nat. Methods"},{"issue":"1","key":"29_CR15","doi-asserted-by":"publisher","first-page":"2542","DOI":"10.1038\/s41467-018-04964-5","volume":"9","author":"M Steinegger","year":"2018","unstructured":"Steinegger, M., S\u00f6ding, J.: Clustering huge protein sequence sets in linear time. Nat. Commun. 9(1), 2542 (2018)","journal-title":"Nat. Commun."},{"issue":"5","key":"29_CR16","doi-asserted-by":"publisher","first-page":"496","DOI":"10.1016\/j.gpb.2018.10.008","volume":"17","author":"R Li","year":"2019","unstructured":"Li, R., et al.: Gclust: a parallel clustering tool for microbial genomic data. Genom. Proteom. Bioinform. 17(5), 496\u2013502 (2019)","journal-title":"Genom. Proteom. Bioinform."},{"key":"29_CR17","doi-asserted-by":"crossref","unstructured":"Girgis, H.Z.: MeShClust v3. 0: high-quality clustering of DNA sequences using the mean shift algorithm and alignment-free identity scores. BMC Genom. 23(1), 423 (2022)","DOI":"10.1186\/s12864-022-08619-0"},{"issue":"D1","key":"29_CR18","doi-asserted-by":"publisher","first-page":"D933","DOI":"10.1093\/nar\/gkac958","volume":"51","author":"FJ Martin","year":"2023","unstructured":"Martin, F.J., et al.: Ensembl 2023. Nucleic Acids Res. 51(D1), D933\u2013D941 (2023)","journal-title":"Nucleic Acids Res."},{"issue":"D1","key":"29_CR19","doi-asserted-by":"publisher","first-page":"D523","DOI":"10.1093\/nar\/gkac1052","volume":"51","author":"The UniProt Consortium","year":"2023","unstructured":"The UniProt Consortium: UniProt: the universal protein knowledgebase in 2023. Nucleic Acids Res. 51(D1), D523\u2013D531 (2023)","journal-title":"Nucleic Acids Res."},{"issue":"4096","key":"29_CR20","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1126\/science.181.4096.223","volume":"181","author":"CB Anfinsen","year":"1973","unstructured":"Anfinsen, C.B.: Principles that govern the folding of protein chains. Science 181(4096), 223\u2013230 (1973)","journal-title":"Science"},{"issue":"D1","key":"29_CR21","doi-asserted-by":"publisher","first-page":"D222","DOI":"10.1093\/nar\/gkt1223","volume":"42","author":"RD Finn","year":"2014","unstructured":"Finn, R.D., et al.: Pfam: the protein families database. Nucleic Acids Res. 42(D1), D222\u2013D230 (2014)","journal-title":"Nucleic Acids Res."},{"issue":"2","key":"29_CR22","doi-asserted-by":"publisher","first-page":"3226","DOI":"10.1214\/23-EJS2180","volume":"17","author":"T Gneiting","year":"2023","unstructured":"Gneiting, T., Resin, J.: Regression diagnostics meets forecast evaluation: conditional calibration, reliability diagrams, and coefficient of determination. Electron. J. Stat. 17(2), 3226\u20133286 (2023)","journal-title":"Electron. J. Stat."},{"key":"29_CR23","unstructured":"G\u00e9ron, A.: Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow. O\u2019Reilly Media, Inc. (2022)"},{"issue":"8","key":"29_CR24","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0070837","volume":"8","author":"W Chen","year":"2013","unstructured":"Chen, W., Zhang, C.K., Cheng, Y., Zhang, S., Zhao, H.: A comparison of methods for clustering 16S rRNA sequences into OTUs. PLoS ONE 8(8), e70837 (2013)","journal-title":"PLoS ONE"},{"issue":"5","key":"29_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/nmicrobiol.2016.48","volume":"1","author":"LA Hug","year":"2016","unstructured":"Hug, L.A., et al.: A new view of the tree of life. Nat. Microbiol. 1(5), 1\u20136 (2016)","journal-title":"Nat. Microbiol."},{"issue":"6","key":"29_CR26","doi-asserted-by":"publisher","first-page":"656","DOI":"10.1093\/bib\/bbs035","volume":"13","author":"W Li","year":"2012","unstructured":"Li, W., Limin, F., Niu, B., Sitao, W., Wooley, J.: Ultrafast clustering algorithms for metagenomic sequence analysis. Brief. Bioinform. 13(6), 656\u2013668 (2012)","journal-title":"Brief. Bioinform."},{"issue":"1","key":"29_CR27","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1186\/s13059-023-02961-6","volume":"24","author":"X Xiaoming","year":"2023","unstructured":"Xiaoming, X., et al.: RabbitTClust: enabling fast clustering analysis of millions of bacteria genomes with MinHash sketches. Genome Biol. 24(1), 121 (2023)","journal-title":"Genome Biol."}],"container-title":["Lecture Notes in Computer Science","Bioinformatics Research and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-5128-0_29","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,18]],"date-time":"2024-11-18T18:04:51Z","timestamp":1731953091000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-5128-0_29"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819751273","9789819751280"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-5128-0_29","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"12 July 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ISBRA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Bioinformatics Research and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kunming","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 July 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 July 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"isbra2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/bio.csu.edu.cn\/ISBRA2024\/ISBRA2024_Home.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}