{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T04:03:32Z","timestamp":1767931412474,"version":"3.49.0"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T00:00:00Z","timestamp":1765238400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T00:00:00Z","timestamp":1767830400000},"content-version":"vor","delay-in-days":30,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/100000143","name":"Division of Computing and Communication Foundations","doi-asserted-by":"publisher","award":["2316160, 1919122"],"award-info":[{"award-number":["2316160, 1919122"]}],"id":[{"id":"10.13039\/100000143","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000143","name":"Division of Computing and Communication Foundations","doi-asserted-by":"publisher","award":["2316160, 1919122"],"award-info":[{"award-number":["2316160, 1919122"]}],"id":[{"id":"10.13039\/100000143","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"DOI":"10.1186\/s12859-025-06333-8","type":"journal-article","created":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T17:26:14Z","timestamp":1765301174000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Density-reducing Jaccard estimators for sketch-based long read applications"],"prefix":"10.1186","volume":"27","author":[{"given":"Tazin","family":"Rahman","sequence":"first","affiliation":[]},{"given":"Ananth","family":"Kalyanaraman","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,12,9]]},"reference":[{"key":"6333_CR1","unstructured":"Broder AZ. On the resemblance and containment of documents. In: Proceedings. Compression and complexity of SEQUENCES 1997 (Cat. No. 97TB100171). IEEE; 1997. pp. 21\u201329."},{"issue":"18","key":"6333_CR2","doi-asserted-by":"publisher","first-page":"3363","DOI":"10.1093\/bioinformatics\/bth408","volume":"20","author":"M Roberts","year":"2004","unstructured":"Roberts M, Hayes W, Hunt BR, Mount SM, Yorke JA. Reducing storage requirements for biological sequence comparison. Bioinform. 2004;20(18):3363\u20139.","journal-title":"Bioinform"},{"issue":"12","key":"6333_CR3","doi-asserted-by":"publisher","first-page":"1251","DOI":"10.1089\/cmb.2023.0094","volume":"30","author":"H Zheng","year":"2023","unstructured":"Zheng H, Mar\u00e7ais G, Kingsford C. Creating and using minimizer sketches in computational genomics. J Comput Biol. 2023;30(12):1251\u201376.","journal-title":"J Comput Biol"},{"issue":"14","key":"6333_CR4","doi-asserted-by":"publisher","first-page":"2103","DOI":"10.1093\/bioinformatics\/btw152","volume":"32","author":"H Li","year":"2016","unstructured":"Li H. Minimap and miniasm: fast mapping and de novo assembly for noisy long sequences. Bioinform. 2016;32(14):2103\u201310.","journal-title":"Bioinform"},{"issue":"18","key":"6333_CR5","doi-asserted-by":"publisher","first-page":"3094","DOI":"10.1093\/bioinformatics\/bty191","volume":"34","author":"H Li","year":"2018","unstructured":"Li H. Minimap2: pairwise alignment for nucleotide sequences. Bioinform. 2018;34(18):3094\u2013100.","journal-title":"Bioinform"},{"issue":"17","key":"6333_CR6","doi-asserted-by":"publisher","first-page":"748","DOI":"10.1093\/bioinformatics\/bty597","volume":"34","author":"C Jain","year":"2018","unstructured":"Jain C, Koren S, Dilthey A, Phillippy AM, Aluru S. A fast adaptive algorithm for computing whole-genome homology maps. Bioinform. 2018;34(17):748\u201356.","journal-title":"Bioinform"},{"key":"6333_CR7","doi-asserted-by":"crossref","unstructured":"Nisa I, Pandey P, Ellis M, Oliker L., Bulu\u00e7 A, Yelick K. Distributed-memory k-mer counting on GPUs. In: 2021 IEEE international parallel and distributed processing symposium (IPDPS). IEEE; 2021. pp. 527\u201353.","DOI":"10.1109\/IPDPS49936.2021.00061"},{"issue":"10","key":"6333_CR8","doi-asserted-by":"publisher","first-page":"1569","DOI":"10.1093\/bioinformatics\/btv022","volume":"31","author":"S Deorowicz","year":"2015","unstructured":"Deorowicz S, Kokot M, Grabowski S, Debudaj-Grabysz A. KMC 2: fast and resource-frugal k-mer counting. Bioinform. 2015;31(10):1569\u201376.","journal-title":"Bioinform"},{"issue":"4","key":"6333_CR9","doi-asserted-by":"publisher","first-page":"1091","DOI":"10.1109\/TCBB.2017.2737999","volume":"16","author":"P Ghosh","year":"2017","unstructured":"Ghosh P, Kalyanaraman A. FastEtch: a fast sketch-based assembler for genomes. IEEE\/ACM Trans Comput Biol Bioinf. 2017;16(4):1091\u2013106.","journal-title":"IEEE\/ACM Trans Comput Biol Bioinf"},{"issue":"10","key":"6333_CR10","doi-asserted-by":"publisher","first-page":"958","DOI":"10.1016\/j.cels.2021.08.009","volume":"12","author":"B Ekim","year":"2021","unstructured":"Ekim B, Berger B, Chikhi R. Minimizer-space de Bruijn graphs: Whole-genome assembly of long reads in minutes on a personal computer. Cell Syst. 2021;12(10):958\u201368.","journal-title":"Cell Syst"},{"issue":"5","key":"6333_CR11","doi-asserted-by":"publisher","first-page":"722","DOI":"10.1101\/gr.215087.116","volume":"27","author":"S Koren","year":"2017","unstructured":"Koren S, Walenz BP, Berlin K, Miller JR, Bergman NH, Phillippy AM. Canu: scalable and accurate long-read assembly via adaptive k-mer weighting and repeat separation. Genome Res. 2017;27(5):722\u201336.","journal-title":"Genome Res"},{"issue":"Supplement-1","key":"6333_CR12","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1093\/bioinformatics\/btaa435","volume":"36","author":"C Jain","year":"2020","unstructured":"Jain C, Rhie A, Zhang H, Chu C, Walenz BP, Koren S, et al. Weighted minimizer sampling improves long read mapping. Bioinformatics. 2020;36(Supplement-1):111\u20138.","journal-title":"Bioinformatics"},{"key":"6333_CR13","doi-asserted-by":"publisher","first-page":"10805","DOI":"10.7717\/peerj.10805","volume":"9","author":"R Edgar","year":"2021","unstructured":"Edgar R. Syncmers are more sensitive than minimizers for selecting conserved k-mers in biological sequences. PeerJ. 2021;9:10805.","journal-title":"PeerJ"},{"issue":"11","key":"6333_CR14","doi-asserted-by":"publisher","first-page":"2080","DOI":"10.1101\/gr.275648.121","volume":"31","author":"K Sahlin","year":"2021","unstructured":"Sahlin K. Effective sequence similarity detection with strobemers. Genome Res. 2021;31(11):2080\u201394.","journal-title":"Genome Res"},{"issue":"22\u201323","key":"6333_CR15","first-page":"5344","volume":"36","author":"MC Frith","year":"2020","unstructured":"Frith MC, No\u00e9 L, Kucherov G. Minimally overlapping words for sequence similarity search. Bioinformatics. 2020;36(22\u201323):5344\u201350.","journal-title":"Bioinformatics"},{"issue":"Supplement-1","key":"6333_CR16","doi-asserted-by":"publisher","first-page":"232","DOI":"10.1093\/bioinformatics\/btad218","volume":"39","author":"X Li","year":"2023","unstructured":"Li X, Shi Q, Chen K, Shao M. Seeding with minimized subsequence. Bioinformatics. 2023;39(Supplement-1):232\u201341.","journal-title":"Bioinformatics"},{"issue":"1","key":"6333_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13059-016-0997-x","volume":"17","author":"BD Ondov","year":"2016","unstructured":"Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, et al. Mash: fast genome and metagenome distance estimation using MinHash. Genome Biol. 2016;17(1):1\u201314.","journal-title":"Genome Biol"},{"issue":"20","key":"6333_CR18","doi-asserted-by":"publisher","first-page":"4659","DOI":"10.1093\/bioinformatics\/btab790","volume":"38","author":"J Shaw","year":"2022","unstructured":"Shaw J, Yu YW. Theory of local k-mer selection with applications to long-read alignment. Bioinform. 2022;38(20):4659\u201369.","journal-title":"Bioinform"},{"issue":"1","key":"6333_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13059-018-1605-z","volume":"20","author":"S Fu","year":"2019","unstructured":"Fu S, Wang A, Au KF. A comparative evaluation of hybrid error correction methods for error-prone long reads. Genome Biol. 2019;20(1):1\u201317.","journal-title":"Genome Biol"},{"issue":"1","key":"6333_CR20","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1186\/s13059-023-02972-3","volume":"24","author":"K Sahlin","year":"2023","unstructured":"Sahlin K, Baudeau T, Cazaux B, Marchet C. A survey of mapping algorithms in the long-reads era. Genome Biol. 2023;24(1):133.","journal-title":"Genome Biol"},{"issue":"1","key":"6333_CR21","doi-asserted-by":"publisher","first-page":"260","DOI":"10.1186\/s13059-022-02831-7","volume":"23","author":"K Sahlin","year":"2022","unstructured":"Sahlin K. Strobealign: flexible seed size enables ultra-fast and accurate read alignment. Genome Biol. 2022;23(1):260.","journal-title":"Genome Biol"},{"key":"6333_CR22","doi-asserted-by":"crossref","unstructured":"Rahman T, Bhowmik O, Kalyanaraman A. An efficient parallel sketch-based algorithmic workflow for mapping long reads. In: IEEE\/ACM transactions on computational biology and bioinformatics. 2024.","DOI":"10.1101\/2023.11.28.569084"},{"issue":"10","key":"6333_CR23","doi-asserted-by":"publisher","first-page":"1010638","DOI":"10.1371\/journal.pcbi.1010638","volume":"18","author":"A Dutta","year":"2022","unstructured":"Dutta A, Pellow D, Shamir R. Parameterized syncmer schemes improve long-read mapping. PLoS Comput Biol. 2022;18(10):1010638.","journal-title":"PLoS Comput Biol"},{"issue":"1","key":"6333_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1471-2105-13-238","volume":"13","author":"MJ Chaisson","year":"2012","unstructured":"Chaisson MJ, Tesler G. Mapping single molecule sequencing reads using basic local alignment with successive refinement (BLASR): application and theory. BMC Bioinform. 2012;13(1):1\u201318.","journal-title":"BMC Bioinform"},{"issue":"1","key":"6333_CR25","doi-asserted-by":"publisher","first-page":"11307","DOI":"10.1038\/ncomms11307","volume":"7","author":"I Sovi\u0107","year":"2016","unstructured":"Sovi\u0107 I, \u0160iki\u0107 M, Wilm A, Fenlon SN, Chen S, Nagarajan N. Fast and sensitive mapping of nanopore sequencing reads with graphmap. Nat Commun. 2016;7(1):11307.","journal-title":"Nat Commun"},{"key":"6333_CR26","doi-asserted-by":"crossref","unstructured":"Rahman T, Bhowmik O, Kalyanaraman A. An efficient parallel sketch-based algorithm for mapping long reads to contigs. In: 2023 IEEE international parallel and distributed processing symposium workshops (IPDPSW). IEEE; 2023. pp. 157\u2013166.","DOI":"10.1109\/IPDPSW59300.2023.00037"},{"issue":"10","key":"6333_CR27","doi-asserted-by":"publisher","first-page":"1005777","DOI":"10.1371\/journal.pcbi.1005777","volume":"13","author":"Y Orenstein","year":"2017","unstructured":"Orenstein Y, Pellow D, Mar\u00e7ais G, Shamir R, Kingsford C. Designing small universal k-mer hitting sets for improved analysis of high-throughput sequencing. PLoS Comput Biol. 2017;13(10):1005777.","journal-title":"PLoS Comput Biol"},{"issue":"14","key":"6333_CR28","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1093\/bioinformatics\/btx235","volume":"33","author":"G Mar\u00e7ais","year":"2017","unstructured":"Mar\u00e7ais G, Pellow D, Bork D, Orenstein Y, Shamir R, Kingsford C. Improving the performance of minimizers and winnowing schemes. Bioinformatics. 2017;33(14):110\u20137.","journal-title":"Bioinformatics"},{"key":"6333_CR29","doi-asserted-by":"crossref","unstructured":"DeBlasio D, Gbosibo F, Kingsford C, Mar\u00e7ais G. Practical universal k-mer sets for minimizer schemes. In: Proceedings of the 10th ACM international conference on bioinformatics, computational biology and health informatics. 2019. pp. 167\u2013176.","DOI":"10.1145\/3307339.3342144"},{"issue":"Supplement\u20131","key":"6333_CR30","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1093\/bioinformatics\/btaa472","volume":"36","author":"H Zheng","year":"2020","unstructured":"Zheng H, Kingsford C, Mar\u00e7ais G. Improved design and analysis of practical minimizers. Bioinformatics. 2020;36(Supplement\u20131):119\u201327.","journal-title":"Bioinformatics"},{"key":"6333_CR31","doi-asserted-by":"crossref","unstructured":"Hoang M, Zheng H, Kingsford C. DeepMinimizer: a differentiable framework for optimizing sequence-specific minimizer schemes. In: International conference on research in computational molecular biology. Springer; 2022. pp. 52\u201369.","DOI":"10.1007\/978-3-031-04749-7_4"},{"key":"6333_CR32","doi-asserted-by":"crossref","unstructured":"Schleimer S, Wilkerson DS, Aiken A. Winnowing: local algorithms for document fingerprinting. In: Proceedings of the 2003 ACM SIGMOD international conference on management of data. 2003. pp. 76\u201385.","DOI":"10.1145\/872757.872770"},{"issue":"13","key":"6333_CR33","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1093\/bioinformatics\/bty258","volume":"34","author":"G Mar\u00e7ais","year":"2018","unstructured":"Mar\u00e7ais G, DeBlasio D, Kingsford C. Asymptotically optimal minimizers schemes. Bioinformatics. 2018;34(13):13\u201322.","journal-title":"Bioinformatics"},{"key":"6333_CR34","doi-asserted-by":"crossref","unstructured":"Ekim B, Berger B, Orenstein Y. A randomized parallel algorithm for efficiently finding near-optimal universal hitting sets. In: International conference on research in computational molecular biology. Springer; 2020. pp. 37\u201353.","DOI":"10.1007\/978-3-030-45257-5_3"},{"issue":"1","key":"6333_CR35","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1089\/cmb.2023.0212","volume":"31","author":"M Hoang","year":"2024","unstructured":"Hoang M, Mar\u00e7ais G, Kingsford C. Density and conservation optimization of the generalized masked-minimizer sketching scheme. J Comput Biol. 2024;31(1):2\u201320.","journal-title":"J Comput Biol"},{"key":"6333_CR36","unstructured":"Likic V. The Needleman-Wunsch algorithm for sequence alignment. Lecture given at the 7th Melbourne Bioinformatics Course, Bi021 Molecular Science and Biotechnology Institute, University of Melbourne. 2008. pp. 1\u201346."},{"issue":"1\u20132","key":"6333_CR37","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1016\/0167-8191(87)90010-X","volume":"5","author":"RW Hockney","year":"1987","unstructured":"Hockney RW. Parametrization of computer performance. Parallel Comput. 1987;5(1\u20132):97\u2013103.","journal-title":"Parallel Comput"},{"issue":"1","key":"6333_CR38","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1186\/s12859-024-05878-4","volume":"25","author":"O Bhowmik","year":"2024","unstructured":"Bhowmik O, Rahman T, Kalyanaraman A. Maptcha: an efficient parallel workflow for hybrid genome scaffolding. BMC Bioinform. 2024;25(1):263.","journal-title":"BMC Bioinform"},{"issue":"4","key":"6333_CR39","doi-asserted-by":"publisher","first-page":"733","DOI":"10.1002\/cpz1.733","volume":"3","author":"L Coombe","year":"2023","unstructured":"Coombe L, Warren RL, Wong J, Nikolic V, Birol I. ntLink: a toolkit for de novo genome assembly scaffolding and mapping using long reads. Curr Protoc. 2023;3(4):733.","journal-title":"Curr Protoc"},{"issue":"1","key":"6333_CR40","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12864-019-6337-2","volume":"20","author":"M Qin","year":"2019","unstructured":"Qin M, Wu S, Li A, Zhao F, Feng H, Ding L, et al. LRScaf: improving draft genomes using long noisy reads. BMC Genomics. 2019;20(1):1\u201312.","journal-title":"BMC Genomics"},{"key":"6333_CR41","doi-asserted-by":"crossref","unstructured":"Bhowmik O, Kalyanaraman A. Tile-X: A vertex reordering approach for scalable long read assembly. bioRxiv, 2025;2025\u201304.","DOI":"10.1101\/2025.04.21.649853"},{"issue":"4","key":"6333_CR42","first-page":"092","volume":"4","author":"Y Ono","year":"2022","unstructured":"Ono Y, Hamada M, Asai K. PBSIM3: a simulator for all types of PacBio and ONT long reads. NAR Genom Bioinform. 2022;4(4):092.","journal-title":"NAR Genom Bioinform"},{"issue":"D1","key":"6333_CR43","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1093\/nar\/gks1195","volume":"41","author":"DA Benson","year":"2012","unstructured":"Benson DA, Cavanaugh M, Clark K, Karsch-Mizrachi I, Lipman DJ, Ostell J, et al. Genbank. Nucleic Acids Res. 2012;41(D1):36\u201342.","journal-title":"Nucleic Acids Res"},{"key":"6333_CR44","unstructured":"Biosciences P. PacBio real-world HiFi long reads for O. sativa. https:\/\/downloads.pacbcloud.com\/public\/dataset\/Sequel-IIe-202104\/rice\/. (2021) Accessed Aug 2022."},{"issue":"4","key":"6333_CR45","doi-asserted-by":"publisher","first-page":"593","DOI":"10.1093\/bioinformatics\/btr708","volume":"28","author":"W Huang","year":"2012","unstructured":"Huang W, Li L, Myers JR, Marth GT. ART: a next-generation sequencing read simulator. Bioinform. 2012;28(4):593\u20134.","journal-title":"Bioinform"},{"issue":"1","key":"6333_CR46","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1748-7188-8-22","volume":"8","author":"R Chikhi","year":"2013","unstructured":"Chikhi R, Rizk G. Space-efficient and exact de bruijn graph representation based on a bloom filter. Algorithms Mol Biol. 2013;8(1):1\u20139.","journal-title":"Algorithms Mol Biol"},{"key":"6333_CR47","doi-asserted-by":"publisher","unstructured":"Nethercote N, Seward J. Valgrind: A framework for heavyweight dynamic binary instrumentation. In: Proceedings of the 28th ACM SIGPLAN conference on programming language design and implementation (PLDI \u201907). Association for Computing Machinery, San Diego, California, USA; 2007. pp. 89\u2013100. https:\/\/doi.org\/10.1145\/1250734.1250746","DOI":"10.1145\/1250734.1250746"}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-025-06333-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s12859-025-06333-8","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-025-06333-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T00:02:23Z","timestamp":1767916943000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1186\/s12859-025-06333-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,9]]},"references-count":47,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2026,12]]}},"alternative-id":["6333"],"URL":"https:\/\/doi.org\/10.1186\/s12859-025-06333-8","relation":{},"ISSN":["1471-2105"],"issn-type":[{"value":"1471-2105","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,9]]},"assertion":[{"value":"23 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare no conflict of interest.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"5"}}