{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T01:31:25Z","timestamp":1773279085656,"version":"3.50.1"},"reference-count":21,"publisher":"Oxford University Press (OUP)","issue":"11","license":[{"start":{"date-parts":[[2020,10,28]],"date-time":"2020-10-28T00:00:00Z","timestamp":1603843200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61872309"],"award-info":[{"award-number":["61872309"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61922020"],"award-info":[{"award-number":["61922020"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,12]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Summary<\/jats:title>\n                  <jats:p>Removing duplicate and near-duplicate reads, generated by high-throughput sequencing technologies, is able to reduce computational resources in downstream applications. Here we develop minirmd, a de novo tool to remove duplicate reads via multiple rounds of clustering using different length of minimizer. Experiments demonstrate that minirmd removes more near-duplicate reads than existing clustering approaches and is faster than existing multi-core tools. To the best of our knowledge, minirmd is the first tool to remove near-duplicates on reverse-complementary strand.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>https:\/\/github.com\/yuansliu\/minirmd.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Supplementary information<\/jats:title>\n                  <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaa915","type":"journal-article","created":{"date-parts":[[2020,10,14]],"date-time":"2020-10-14T12:19:12Z","timestamp":1602677952000},"page":"1604-1606","source":"Crossref","is-referenced-by-count":23,"title":["Minirmd: accurate and fast duplicate removal tool for short reads via multiple minimizers"],"prefix":"10.1093","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7680-3155","authenticated-orcid":false,"given":"Yuansheng","family":"Liu","sequence":"first","affiliation":[{"name":"College of Information Science and Engineering, Hunan University , Changsha, Hunan 410012, China"}]},{"given":"Xiaocai","family":"Zhang","sequence":"additional","affiliation":[{"name":"Advanced Analytics Institute, University of Technology Sydney , Broadway, NSW 2007, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6406-1142","authenticated-orcid":false,"given":"Quan","family":"Zou","sequence":"additional","affiliation":[{"name":"Institute of Fundamental and Frontier Sciences, University of Electronic Science and Technology of China , Chengdu 610054, China"}]},{"given":"Xiangxiang","family":"Zeng","sequence":"additional","affiliation":[{"name":"College of Information Science and Engineering, Hunan University , Changsha, Hunan 410012, China"}]}],"member":"286","published-online":{"date-parts":[[2020,10,28]]},"reference":[{"key":"2023051709442685300_btaa915-B1","doi-asserted-by":"crossref","first-page":"1324","DOI":"10.1093\/bioinformatics\/bts123","article-title":"Fulcrum: condensing redundant reads from high-throughput sequencing studies","volume":"28","author":"Burriesci","year":"2012","journal-title":"Bioinformatics"},{"key":"2023051709442685300_btaa915-B2","doi-asserted-by":"crossref","first-page":"i884","DOI":"10.1093\/bioinformatics\/bty560","article-title":"fastp: an ultra-fast all-in-one FASTQ preprocessor","volume":"34","author":"Chen","year":"2018","journal-title":"Bioinformatics"},{"key":"2023051709442685300_btaa915-B3","doi-asserted-by":"crossref","first-page":"606.","DOI":"10.1186\/s12859-019-3280-9","article-title":"Gencore: an efficient tool to generate consensus reads for error suppressing and duplicate removing of NGS data","volume":"20","author":"Chen","year":"2019","journal-title":"BMC Bioinformatics"},{"key":"2023051709442685300_btaa915-B4","doi-asserted-by":"crossref","first-page":"3254","DOI":"10.1093\/bioinformatics\/btaa112","article-title":"Nubeam-dedup: a fast and RAM-efficient tool to de-duplicate sequencing reads without mapping","volume":"36","author":"Dai","year":"2020","journal-title":"Bioinformatics"},{"key":"2023051709442685300_btaa915-B5","doi-asserted-by":"crossref","first-page":"491","DOI":"10.1038\/ng.806","article-title":"A framework for variation discovery and genotyping using next-generation DNA sequencing data","volume":"43","author":"DePristo","year":"2011","journal-title":"Nat. Genet"},{"key":"2023051709442685300_btaa915-B6","doi-asserted-by":"crossref","first-page":"2762","DOI":"10.1093\/bioinformatics\/btx307","article-title":"MarDRe: efficient MapReduce-based removal of duplicate DNA reads in the cloud","volume":"33","author":"Exp\u00f3sito","year":"2017","journal-title":"Bioinformatics"},{"key":"2023051709442685300_btaa915-B7","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1038\/s41598-019-48242-w","article-title":"NGSReadsTreatment\u2014a Cuckoo Filter-based tool for removing duplicate reads in NGS data","volume":"9","author":"Gaia","year":"2019","journal-title":"Sci. Rep"},{"key":"2023051709442685300_btaa915-B8","doi-asserted-by":"crossref","first-page":"1562","DOI":"10.1093\/bioinformatics\/btw038","article-title":"ParDRe: faster parallel duplicated reads removal tool for sequencing studies","volume":"32","author":"Gonz\u00e1lez-Dom\u00ednguez","year":"2016","journal-title":"Bioinformatics"},{"key":"2023051709442685300_btaa915-B9","doi-asserted-by":"crossref","first-page":"1389","DOI":"10.1093\/bioinformatics\/btu844","article-title":"Disk-based compression of data from genome sequencing","volume":"31","author":"Grabowski","year":"2015","journal-title":"Bioinformatics"},{"key":"2023051709442685300_btaa915-B10","doi-asserted-by":"crossref","first-page":"593","DOI":"10.1093\/bioinformatics\/btr708","article-title":"ART: a next-generation sequencing read simulator","volume":"28","author":"Huang","year":"2012","journal-title":"Bioinformatics"},{"key":"2023051709442685300_btaa915-B11","doi-asserted-by":"crossref","first-page":"680","DOI":"10.1093\/bioinformatics\/btq003","article-title":"CD-HIT Suite: a web server for clustering and comparing biological sequences","volume":"26","author":"Huang","year":"2010","journal-title":"Bioinformatics"},{"key":"2023051709442685300_btaa915-B12","doi-asserted-by":"crossref","first-page":"291","DOI":"10.1038\/nmeth.1311","article-title":"Amplification-free Illumina sequencing-library preparation facilitates improved mapping and assembly of (G+C)-biased genomes","volume":"6","author":"Kozarewa","year":"2009","journal-title":"Nat. Methods"},{"key":"2023051709442685300_btaa915-B13","doi-asserted-by":"crossref","first-page":"2078","DOI":"10.1093\/bioinformatics\/btp352","article-title":"The sequence alignment\/map format and SAMtools","volume":"25","author":"Li","year":"2009","journal-title":"Bioinformatics"},{"key":"2023051709442685300_btaa915-B14","doi-asserted-by":"crossref","first-page":"2066","DOI":"10.1093\/bioinformatics\/bty936","article-title":"Index suffix\u2013prefix overlaps by (w, k)-minimizer to generate long contigs for reads compression","volume":"35","author":"Liu","year":"2019","journal-title":"Bioinformatics"},{"key":"2023051709442685300_btaa915-B15","doi-asserted-by":"crossref","first-page":"2159","DOI":"10.1093\/bioinformatics\/btr325","article-title":"SEAL: a distributed short read mapping and duplicate removal tool","volume":"27","author":"Pireddu","year":"2011","journal-title":"Bioinformatics"},{"key":"2023051709442685300_btaa915-B16","doi-asserted-by":"crossref","first-page":"giaa086.","DOI":"10.1093\/gigascience\/giaa086","article-title":"A hybrid pipeline for reconstruction and analysis of viral genomes at multi-organ level","volume":"9","author":"Pratas","year":"2020","journal-title":"GigaScience"},{"key":"2023051709442685300_btaa915-B17","doi-asserted-by":"crossref","first-page":"1053","DOI":"10.1038\/nature09710","article-title":"Genetic history of an archaic hominin group from Denisova Cave in Siberia","volume":"468","author":"Reich","year":"2010","journal-title":"Nature"},{"key":"2023051709442685300_btaa915-B18","doi-asserted-by":"crossref","first-page":"3363","DOI":"10.1093\/bioinformatics\/bth408","article-title":"Reducing storage requirements for biological sequence comparison","volume":"20","author":"Roberts","year":"2004","journal-title":"Bioinformatics"},{"key":"2023051709442685300_btaa915-B19","doi-asserted-by":"crossref","first-page":"863","DOI":"10.1093\/bioinformatics\/btr026","article-title":"Quality control and preprocessing of metagenomic datasets","volume":"27","author":"Schmieder","year":"2011","journal-title":"Bioinformatics"},{"key":"2023051709442685300_btaa915-B20","doi-asserted-by":"crossref","first-page":"320","DOI":"10.1038\/nature10249","article-title":"Using the Acropora digitifera genome to understand coral responses to environmental change","volume":"476","author":"Shinzato","year":"2011","journal-title":"Nature"},{"key":"2023051709442685300_btaa915-B21","doi-asserted-by":"crossref","first-page":"2705","DOI":"10.1093\/bioinformatics\/btaa051","article-title":"BioSeqZip: a collapser of NGS redundant reads for the optimization of sequence analysis","volume":"36","author":"Urgese","year":"2020","journal-title":"Bioinformatics"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btaa915\/34160863\/btaa915.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/37\/11\/1604\/50360721\/btaa915.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/37\/11\/1604\/50360721\/btaa915.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,17]],"date-time":"2023-05-17T10:12:16Z","timestamp":1684318336000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/37\/11\/1604\/5942078"}},"subtitle":[],"editor":[{"given":"Robinson","family":"Peter","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2020,10,28]]},"references-count":21,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2021,7,12]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaa915","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2021,6,1]]},"published":{"date-parts":[[2020,10,28]]}}}