{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:18:40Z","timestamp":1757618320176,"version":"3.44.0"},"publisher-location":"Singapore","reference-count":26,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819681792"},{"type":"electronic","value":"9789819681808"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-8180-8_12","type":"book-chapter","created":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T09:15:45Z","timestamp":1750324545000},"page":"142-154","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DNA Sequence Clustering in High Error Rates via Hash Sketches Fuzzy Clustering for Efficient Stored Data Reconstruction"],"prefix":"10.1007","author":[{"given":"Qi","family":"Shao","sequence":"first","affiliation":[]},{"given":"Yanfen","family":"Zheng","sequence":"additional","affiliation":[]},{"given":"Ben","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Zhenlu","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Bin","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Shihua","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Pan","family":"Zheng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,20]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"Li, X., et al.: Predicting mutation-disease associations through protein interactions via deep learning. IEEE J. Biomed. Health Inf. (2025)","DOI":"10.1101\/2024.08.06.606730"},{"key":"12_CR2","doi-asserted-by":"publisher","first-page":"1658","DOI":"10.1093\/bioinformatics\/btl158","volume":"22","author":"W Li","year":"2006","unstructured":"Li, W., Godzik, A.: Cd-hit: a fast program for clustering and comparing large sets of protein or nucleotide sequences. Bioinformatics 22, 1658\u20131659 (2006)","journal-title":"Bioinformatics"},{"key":"12_CR3","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1186\/1471-2105-12-271","volume":"12","author":"M Ghodsi","year":"2011","unstructured":"Ghodsi, M., Liu, B., Pop, M.: DNACLUST: accurate and efficient clustering of phylogenetic marker genes. BMC Bioinf. 12, 271 (2011)","journal-title":"BMC Bioinf."},{"key":"12_CR4","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1186\/s12864-022-08619-0","volume":"23","author":"HZ Girgis","year":"2022","unstructured":"Girgis, H.Z.: MeShClust v3.0: high-quality clustering of DNA sequences using the mean shift algorithm and alignment-free identity scores. BMC Genom. 23, 423 (2022)","journal-title":"BMC Genom."},{"issue":"7","key":"12_CR5","doi-asserted-by":"publisher","first-page":"621","DOI":"10.1038\/s43588-023-00482-7","volume":"3","author":"AD Mantes","year":"2023","unstructured":"Mantes, A.D., Montserrat, D.M., Bustamante, C.D., Gir\u00f3-i-Nieto, X., Ioannidis, A.G.: Neural ADMIXTURE for rapid genomic clustering. Nat. Comput. Sci. 3(7), 621\u2013629 (2023). https:\/\/doi.org\/10.1038\/s43588-023-00482-7","journal-title":"Nat. Comput. Sci."},{"key":"12_CR6","doi-asserted-by":"publisher","first-page":"17552","DOI":"10.1021\/acsnano.2c06748","volume":"16","author":"A Doricchi","year":"2022","unstructured":"Doricchi, A., et al.: Emerging approaches to DNA data storage: challenges and prospects. ACS Nano 16, 17552\u201317571 (2022)","journal-title":"ACS Nano"},{"key":"12_CR7","doi-asserted-by":"publisher","first-page":"113699","DOI":"10.1016\/j.celrep.2024.113699","volume":"43","author":"B Cao","year":"2024","unstructured":"Cao, B., et al.: Efficient data reconstruction: the bottleneck of large-scale application of DNA storage. Cell Rep. 43, 113699 (2024)","journal-title":"Cell Rep."},{"key":"12_CR8","doi-asserted-by":"publisher","first-page":"2307499","DOI":"10.1002\/adma.202307499","volume":"36","author":"S Wang","year":"2023","unstructured":"Wang, S., Mao, X., Wang, F., Zuo, X., Fan, C.: Data storage using DNA. Adv. Mater. 36, 2307499 (2023)","journal-title":"Adv. Mater."},{"key":"12_CR9","doi-asserted-by":"publisher","first-page":"866","DOI":"10.1109\/TCBB.2020.3011582","volume":"19","author":"B Cao","year":"2020","unstructured":"Cao, B., Ii, X., Zhang, X., Wang, B., Zhang, Q., Wei, X.: Designing uncorrelated address constrain for DNA storage by DMVO algorithm. IEEE\/ACM Trans. Comput. Biol. Bioinf. 19, 866\u2013877 (2020)","journal-title":"IEEE\/ACM Trans. Comput. Biol. Bioinf."},{"key":"12_CR10","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1038\/s43588-022-00231-2","volume":"2","author":"Z Ping","year":"2022","unstructured":"Ping, Z., et al.: Towards practical and robust DNA-based data archiving using the yin\u2013yang codec system. Nat. Comput. Sci. 2, 234\u2013242 (2022)","journal-title":"Nat. Comput. Sci."},{"key":"12_CR11","doi-asserted-by":"publisher","first-page":"628","DOI":"10.1038\/s41467-023-36297-3","volume":"14","author":"M Welzel","year":"2023","unstructured":"Welzel, M., et al.: DNA-Aeon provides flexible arithmetic coding for constraint adherence and error correction in DNA storage. Nat. Commun. 14, 628 (2023)","journal-title":"Nat. Commun."},{"key":"12_CR12","doi-asserted-by":"publisher","first-page":"3136","DOI":"10.1093\/bioinformatics\/btab246","volume":"37","author":"J Jeong","year":"2021","unstructured":"Jeong, J., et al.: Cooperative sequence clustering and decoding for DNA storage system with fountain codes. Bioinformatics 37, 3136\u20133143 (2021)","journal-title":"Bioinformatics"},{"key":"12_CR13","doi-asserted-by":"publisher","first-page":"5361","DOI":"10.1038\/s41467-022-33046-w","volume":"13","author":"L Song","year":"2022","unstructured":"Song, L., et al.: Robust data storage in DNA by de Bruijn graph-based de novo strand assembly. Nat. Commun. 13, 5361 (2022)","journal-title":"Nat. Commun."},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Qu, G., Yan, Z., Wu, H.: Clover: tree structure-based efficient DNA clustering for DNA-based data storage. Brief. Bioinf. 23, bbac336 (2022)","DOI":"10.1093\/bib\/bbac336"},{"key":"12_CR15","doi-asserted-by":"publisher","first-page":"5345","DOI":"10.1038\/s41467-020-19148-3","volume":"11","author":"PL Antkowiak","year":"2020","unstructured":"Antkowiak, P.L., et al.: Low cost DNA data storage using photolithographic synthesis and advanced information reconstruction and error correction. Nat. Commun. 11, 5345 (2020)","journal-title":"Nat. Commun."},{"key":"12_CR16","unstructured":"Rashtchian, C., et al.: Strauss, clustering billions of reads for DNA data storage. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"12_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2023.107244","volume":"164","author":"P Wang","year":"2023","unstructured":"Wang, P., Cao, B., Ma, T., Wang, B., Zhang, Q., Zheng, P.: DUHI: dynamically updated hash index clustering method for DNA storage. Comput. Biol. Med. 164, 107244 (2023)","journal-title":"Comput. Biol. Med."},{"key":"12_CR18","doi-asserted-by":"publisher","first-page":"3710","DOI":"10.1093\/bioinformatics\/btac395","volume":"38","author":"N Tavakolian","year":"2022","unstructured":"Tavakolian, N., Fraz\u00e3o, J.G., Bendixsen, D., Stelkens, R., Li, C.-B.: Shepherd: accurate clustering for correcting DNA barcode errors. Bioinformatics 38, 3710\u20133716 (2022)","journal-title":"Bioinformatics"},{"key":"12_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12859-022-04637-7","volume":"23","author":"R Logan","year":"2022","unstructured":"Logan, R., et al.: 3GOLD: optimized Levenshtein distance for clustering third-generation sequencing data. BMC Bioinf. 23, 1\u201318 (2022)","journal-title":"BMC Bioinf."},{"key":"12_CR20","doi-asserted-by":"publisher","first-page":"6968","DOI":"10.1038\/s41467-022-34630-w","volume":"13","author":"RC Edgar","year":"2022","unstructured":"Edgar, R.C.: Muscle5: High-accuracy alignment ensembles enable unbiased assessments of sequence homology and phylogeny. Nat. Commun. 13, 6968 (2022)","journal-title":"Nat. Commun."},{"key":"12_CR21","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gky315","volume":"46","author":"BT James","year":"2018","unstructured":"James, B.T., Luczak, B.B., Girgis, H.Z.: MeShClust: an intelligent tool for clustering DNA sequences. Nucleic Acids Res. 46, e83 (2018)","journal-title":"Nucleic Acids Res."},{"key":"12_CR22","doi-asserted-by":"publisher","first-page":"212","DOI":"10.1186\/s13059-023-03049-x","volume":"24","author":"F Hausmann","year":"2023","unstructured":"Hausmann, F., et al.: DISCERN: deep single-cell expression reconstruction for improved cell clustering and cell subtype and state detection. Genome Biol. 24, 212 (2023)","journal-title":"Genome Biol."},{"key":"12_CR23","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1186\/s13059-023-03053-1","volume":"24","author":"R Han","year":"2023","unstructured":"Han, R., et al.: HycDemux: a hybrid unsupervised approach for accurate barcoded sample demultiplexing in nanopore sequencing. Genome Biol. 24, 222 (2023)","journal-title":"Genome Biol."},{"key":"12_CR24","doi-asserted-by":"publisher","first-page":"614","DOI":"10.1093\/bioinformatics\/btt593","volume":"30","author":"J Zhang","year":"2014","unstructured":"Zhang, J., Kobert, K., Flouri, T., Stamatakis, A.: PEAR: a fast and accurate Illumina Paired-End reAd mergeR. Bioinformatics 30, 614\u2013620 (2014)","journal-title":"Bioinformatics"},{"key":"12_CR25","doi-asserted-by":"publisher","first-page":"W20","DOI":"10.1093\/nar\/gkh435","volume":"32","author":"S McGinnis","year":"2004","unstructured":"McGinnis, S., Madden, T.L.: BLAST: at the core of a powerful and diverse set of sequence analysis tools. Nucleic Acids Res. 32, W20\u2013W25 (2004)","journal-title":"Nucleic Acids Res."},{"key":"12_CR26","doi-asserted-by":"crossref","unstructured":"Srinivasavaradhan, S.R., Gopi, S., Pfister, H.D., Yekhanin, S.: Trellis BMA: coded trace reconstruction on IDS channels for DNA storage. In: 2021 IEEE International Symposium on Information Theory (ISIT), pp. 2453\u20132458. IEEE (2021)","DOI":"10.1109\/ISIT45174.2021.9517821"}],"container-title":["Lecture Notes in Computer Science","Advances in Knowledge Discovery and Data Mining"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-8180-8_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T20:20:23Z","timestamp":1757190023000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-8180-8_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819681792","9789819681808"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-8180-8_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"20 June 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PAKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific-Asia Conference on Knowledge Discovery and Data Mining","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Sydney, NSW","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 June 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 June 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pakdd2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/pakdd2025.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}