{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T13:04:14Z","timestamp":1775826254454,"version":"3.50.1"},"reference-count":65,"publisher":"Oxford University Press (OUP)","issue":"3","license":[{"start":{"date-parts":[[2020,6,26]],"date-time":"2020-06-26T00:00:00Z","timestamp":1593129600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2018YFC0310602"],"award-info":[{"award-number":["2018YFC0310602"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2016YFA0501704"],"award-info":[{"award-number":["2016YFA0501704"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Sciences Foundation of China","doi-asserted-by":"crossref","award":["31771477"],"award-info":[{"award-number":["31771477"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Sciences Foundation of China","doi-asserted-by":"crossref","award":["31571366"],"award-info":[{"award-number":["31571366"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,20]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:p>DNA repeats are abundant in eukaryotic genomes and have been proved to play a vital role in genome evolution and regulation. A large number of approaches have been proposed to identify various repeats in the genome. Some de novo repeat identification tools can efficiently generate sequence repetitive scores based on k-mer counting for repeat detection. However, we noticed that these tools can still be improved in terms of repetitive score calculation, sensitivity to segmental duplications and detection specificity. Therefore, here, we present a new computational approach named Repeat Locator (RepLoc), which is based on weighted k-mer coverage to quantify the genome sequence repetitiveness and locate the repetitive sequences. According to the repetitiveness map of the human genome generated by RepLoc, we found that there may be relationships between sequence repetitiveness and genome structures. A comprehensive benchmark shows that RepLoc is a more efficient k-mer counting based tool for de novo repeat detection. The RepLoc software is freely available at http:\/\/bis.zju.edu.cn\/reploc.<\/jats:p>","DOI":"10.1093\/bib\/bbaa086","type":"journal-article","created":{"date-parts":[[2020,4,23]],"date-time":"2020-04-23T11:07:44Z","timestamp":1587640064000},"source":"Crossref","is-referenced-by-count":11,"title":["Sequence repetitiveness quantification and <i>de novo<\/i> repeat detection by weighted k-mer coverage"],"prefix":"10.1093","volume":"22","author":[{"given":"Cong","family":"Feng","sequence":"first","affiliation":[{"name":"Ming Chen\u2019s laboratory in Zhejiang University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Min","family":"Dai","sequence":"additional","affiliation":[{"name":"Key Laboratory of Genetic Network Biology, Institute of Genetics and Developmental Biology, Chinese Academy of Sciences"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yongjing","family":"Liu","sequence":"additional","affiliation":[{"name":"Ming Chen\u2019s laboratory in Zhejiang University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ming","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Bioinformatics, College of Life Sciences, Zhejiang University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2020,6,26]]},"reference":[{"key":"2021052109500852600_ref1","doi-asserted-by":"crossref","first-page":"1112","DOI":"10.1126\/science.1178534","article-title":"The B73 maize genome: complexity, diversity, and dynamics","volume":"326","author":"Schnable","year":"2009","journal-title":"Science"},{"key":"2021052109500852600_ref2","doi-asserted-by":"crossref","first-page":"e1002384","DOI":"10.1371\/journal.pgen.1002384","article-title":"Repetitive elements may comprise over two-thirds of the human genome","volume":"7","author":"de Koning","year":"2011","journal-title":"PLoS Genet"},{"key":"2021052109500852600_ref3","doi-asserted-by":"crossref","first-page":"860","DOI":"10.1038\/35057062","article-title":"Initial sequencing and analysis of the human genome","volume":"409","author":"Lander","year":"2001","journal-title":"Nature"},{"key":"2021052109500852600_ref4","doi-asserted-by":"crossref","first-page":"604","DOI":"10.1038\/284604a0","article-title":"Selfish DNA: the ultimate parasite","volume":"284","author":"Orgel","year":"1980","journal-title":"Nature"},{"key":"2021052109500852600_ref5","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1038\/371215a0","article-title":"The evolutionary dynamics of repetitive DNA in eukaryotes","volume":"371","author":"Charlesworth","year":"1994","journal-title":"Nature"},{"key":"2021052109500852600_ref6","doi-asserted-by":"crossref","first-page":"1052","DOI":"10.1126\/science.451548","article-title":"Regulation of gene expression: possible role of repetitive sequences","volume":"204","author":"Davidson","year":"1979","journal-title":"Science"},{"key":"2021052109500852600_ref7","doi-asserted-by":"crossref","first-page":"932","DOI":"10.1038\/nature05977","article-title":"Expandable DNA repeats and human disease","volume":"447","author":"Mirkin","year":"2007","journal-title":"Nature"},{"key":"2021052109500852600_ref8","doi-asserted-by":"crossref","first-page":"88","DOI":"10.1038\/s41592-018-0236-3","article-title":"Long-read sequence and assembly of segmental duplications","volume":"16","author":"Vollger","year":"2019","journal-title":"Nat Methods"},{"key":"2021052109500852600_ref9","doi-asserted-by":"crossref","first-page":"573","DOI":"10.1093\/nar\/27.2.573","article-title":"Tandem repeats finder: a program to analyze DNA sequences","volume":"27","author":"Benson","year":"1999","journal-title":"Nucleic Acids Res"},{"key":"2021052109500852600_ref10","doi-asserted-by":"crossref","first-page":"e23","DOI":"10.1093\/nar\/gkq1212","article-title":"A new repeat-masking method enables specific detection of homologous sequences","volume":"39","author":"Frith","year":"2011","journal-title":"Nucleic Acids Res"},{"key":"2021052109500852600_ref11","doi-asserted-by":"crossref","first-page":"344","DOI":"10.1073\/pnas.36.6.344","article-title":"The origin and behavior of mutable loci in maize","volume":"36","author":"Mc","year":"1950","journal-title":"Proc Natl Acad Sci U S A"},{"key":"2021052109500852600_ref12","doi-asserted-by":"crossref","first-page":"370","DOI":"10.1038\/nrg798","article-title":"Alu repeats and human genomic diversity","volume":"3","author":"Batzer","year":"2002","journal-title":"Nat Rev Genet"},{"key":"2021052109500852600_ref13","volume-title":"RepeatMasker Open-4.0","author":"Smit","year":"2013\u20132015"},{"key":"2021052109500852600_ref14","doi-asserted-by":"crossref","first-page":"462","DOI":"10.1159\/000084979","article-title":"Repbase update, a database of eukaryotic repetitive elements","volume":"110","author":"Jurka","year":"2005","journal-title":"Cytogenet Genome Res"},{"key":"2021052109500852600_ref15","first-page":"1","article-title":"The Repeat Pattern Toolkit (RPT): analyzing the structure and evolution of the C. elegans genome","volume":"2","author":"Agarwal","year":"1994","journal-title":"Proc Int Conf Intell Syst Mol Biol"},{"key":"2021052109500852600_ref16","doi-asserted-by":"crossref","first-page":"1269","DOI":"10.1101\/gr.88502","article-title":"Automated de novo identification of repeat sequence families in sequenced genomes","volume":"12","author":"Bao","year":"2002","journal-title":"Genome Res"},{"issue":"Suppl 1","key":"2021052109500852600_ref17","doi-asserted-by":"crossref","first-page":"i152","DOI":"10.1093\/bioinformatics\/bti1003","article-title":"PILER: identification and classification of genomic repeats","volume":"21","author":"Edgar","year":"2005","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref18","doi-asserted-by":"crossref","first-page":"426","DOI":"10.1093\/bioinformatics\/15.5.426","article-title":"REPuter: fast computation of maximal repeats in complete genomes","volume":"15","author":"Kurtz","year":"1999","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref19","doi-asserted-by":"crossref","first-page":"2369","DOI":"10.1093\/nar\/27.11.2369","article-title":"Alignment of whole genomes","volume":"27","author":"Delcher","year":"1999","journal-title":"Nucleic Acids Res"},{"key":"2021052109500852600_ref20","first-page":"297","article-title":"The Vmatch large scale sequence analysis software","volume":"412","author":"Kurtz","year":"2003","journal-title":"Computer Program"},{"key":"2021052109500852600_ref21","doi-asserted-by":"crossref","first-page":"RESEARCH0027","DOI":"10.1186\/gb-2001-2-8-research0027","article-title":"A clustering method for repeat analysis in DNA sequences","volume":"2","author":"Volfovsky","year":"2001","journal-title":"Genome Biol"},{"issue":"Suppl 1","key":"2021052109500852600_ref22","doi-asserted-by":"crossref","first-page":"i351","DOI":"10.1093\/bioinformatics\/bti1018","article-title":"De novo identification of repeat families in large genomes","volume":"21","author":"Price","year":"2005","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref23","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1093\/bioinformatics\/btl519","article-title":"Repseek, a tool to retrieve approximate repeats from large DNA sequences","volume":"23","author":"Achaz","year":"2007","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref24","doi-asserted-by":"crossref","first-page":"1405","DOI":"10.1093\/bioinformatics\/bth103","article-title":"Spectral Repeat Finder (SRF): identification of repetitive sequences using Fourier transformation","volume":"20","author":"Sharma","year":"2004","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref25","doi-asserted-by":"crossref","first-page":"1786","DOI":"10.1101\/gr.2395204","article-title":"De novo repeat classification and fragment assembly","volume":"14","author":"Pevzner","year":"2004","journal-title":"Genome Res"},{"key":"2021052109500852600_ref26","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1093\/bioinformatics\/btf843","article-title":"FORRepeats: detects repeats on entire chromosomes and between genomes","volume":"19","author":"Lefebvre","year":"2003","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref27","first-page":"170","volume-title":"Brazilian Symposium on Bioinformatics","author":"Figueroa","year":"2013"},{"key":"2021052109500852600_ref28","doi-asserted-by":"crossref","first-page":"i209","DOI":"10.1093\/bioinformatics\/btw258","article-title":"phRAIDER: pattern-hunter based rapid ab initio detection of elementary repeats","volume":"32","author":"Schaeffer","year":"2016","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref29","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1109\/BIBE.2005.23","volume-title":"Fifth IEEE Symposium on Bioinformatics and Bioengineering (BIBE'05)","author":"Zheng","year":"2005"},{"key":"2021052109500852600_ref30","doi-asserted-by":"crossref","first-page":"1803","DOI":"10.1104\/pp.19.00386","article-title":"Generic Repeat Finder: a high-sensitivity tool for genome-wide de novo repeat detection","volume":"180","author":"Shi","year":"2019","journal-title":"Plant Physiol"},{"key":"2021052109500852600_ref31","doi-asserted-by":"crossref","first-page":"2306","DOI":"10.1101\/gr.1350803","article-title":"Annotating large genomes with exact word matches","volume":"13","author":"Healy","year":"2003","journal-title":"Genome Res"},{"key":"2021052109500852600_ref32","doi-asserted-by":"crossref","first-page":"134","DOI":"10.1093\/bioinformatics\/bti774","article-title":"WindowMasker: window-based masker for sequenced genomes","volume":"22","author":"Morgulis","year":"2006","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref33","doi-asserted-by":"crossref","first-page":"517","DOI":"10.1186\/1471-2164-9-517","article-title":"A new method to compute K-mer frequencies and its application to annotate large repetitive plant genomes","volume":"9","author":"Kurtz","year":"2008","journal-title":"BMC Genomics"},{"key":"2021052109500852600_ref34","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1186\/s12859-015-0654-5","article-title":"Red: an intelligent, rapid, accurate tool for detecting repeats de-novo on the genomic scale","volume":"16","author":"Girgis","year":"2015","journal-title":"BMC Bioinformatics"},{"key":"2021052109500852600_ref35","doi-asserted-by":"crossref","first-page":"582","DOI":"10.1093\/bioinformatics\/bti039","article-title":"RAP: a new computer program for de novo identification of repeated sequences in whole genomes","volume":"21","author":"Campagna","year":"2004","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref36","doi-asserted-by":"crossref","first-page":"77","DOI":"10.1016\/j.ab.2008.05.015","article-title":"Identification of repeat structure in large genomes using repeat probability clouds","volume":"380","author":"Gu","year":"2008","journal-title":"Anal Biochem"},{"key":"2021052109500852600_ref37","doi-asserted-by":"crossref","first-page":"764","DOI":"10.1093\/bioinformatics\/btr011","article-title":"A fast, lock-free approach for efficient parallel counting of occurrences of k-mers","volume":"27","author":"Marcais","year":"2011","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref38","doi-asserted-by":"crossref","first-page":"e80","DOI":"10.1093\/nar\/gku210","article-title":"RepARK\u2014de novo creation of repeat libraries from whole-genome NGS reads","volume":"42","author":"Koch","year":"2014","journal-title":"Nucleic Acids Res"},{"key":"2021052109500852600_ref39","doi-asserted-by":"crossref","first-page":"705","DOI":"10.1038\/nature11650","article-title":"Analysis of the bread wheat genome using whole-genome shotgun sequencing","volume":"491","author":"Brenchley","year":"2012","journal-title":"Nature"},{"key":"2021052109500852600_ref40","doi-asserted-by":"crossref","first-page":"1005","DOI":"10.1101\/gr.GR-1871R","article-title":"Segmental duplications: organization and impact within the current human genome project assembly","volume":"11","author":"Bailey","year":"2001","journal-title":"Genome Res"},{"key":"2021052109500852600_ref41","doi-asserted-by":"crossref","first-page":"1003","DOI":"10.1126\/science.1072047","article-title":"Recent segmental duplications in the human genome","volume":"297","author":"Bailey","year":"2002","journal-title":"Science"},{"key":"2021052109500852600_ref42","doi-asserted-by":"crossref","first-page":"i706","DOI":"10.1093\/bioinformatics\/bty586","article-title":"Fast characterization of segmental duplications in genome assemblies","volume":"34","author":"Numanagic","year":"2018","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref43","doi-asserted-by":"crossref","first-page":"1760","DOI":"10.1101\/gr.135350.111","article-title":"GENCODE: the reference human genome annotation for the ENCODE project","volume":"22","author":"Harrow","year":"2012","journal-title":"Genome Res"},{"key":"2021052109500852600_ref44","doi-asserted-by":"crossref","first-page":"2759","DOI":"10.1093\/bioinformatics\/btx304","article-title":"KMC 3: counting and manipulating k-mer statistics","volume":"33","author":"Kokot","year":"2017","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref45","doi-asserted-by":"crossref","first-page":"652","DOI":"10.1093\/bioinformatics\/btt020","article-title":"DSK: k-mer counting with very low memory usage","volume":"29","author":"Rizk","year":"2013","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref46","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1186\/s13015-017-0097-9","article-title":"Gerbil: a fast and memory-efficient k-mer counter with GPU-support","volume":"12","author":"Erbert","year":"2017","journal-title":"Algorithms Mol Biol"},{"key":"2021052109500852600_ref47","article-title":"A benchmark study of k-mer counting methods for high-throughput sequencing","volume":"7","author":"Manekar","year":"2018","journal-title":"Gigascience"},{"key":"2021052109500852600_ref48","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1038\/nrg3642","article-title":"Sequencing depth and coverage: key considerations in genomic analyses","volume":"15","author":"Sims","year":"2014","journal-title":"Nat Rev Genet"},{"key":"2021052109500852600_ref49","doi-asserted-by":"crossref","first-page":"R137","DOI":"10.1186\/gb-2008-9-9-r137","article-title":"Model-based analysis of ChIP-Seq (MACS)","volume":"9","author":"Zhang","year":"2008","journal-title":"Genome Biol"},{"key":"2021052109500852600_ref50","doi-asserted-by":"crossref","first-page":"841","DOI":"10.1093\/bioinformatics\/btq033","article-title":"BEDTools: a flexible suite of utilities for comparing genomic features","volume":"26","author":"Quinlan","year":"2010","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref51","doi-asserted-by":"crossref","first-page":"1420","DOI":"10.1093\/molbev\/msg153","article-title":"Nonrandom distribution of Alu elements in genes of various functional categories: insight from analysis of human chromosomes 21 and 22","volume":"20","author":"Grover","year":"2003","journal-title":"Mol Biol and Evol"},{"key":"2021052109500852600_ref52","doi-asserted-by":"crossref","first-page":"e1003470","DOI":"10.1371\/journal.pgen.1003470","article-title":"Transposable elements are major contributors to the origin, diversification, and regulation of vertebrate long noncoding RNAs","volume":"9","author":"Kapusta","year":"2013","journal-title":"PLoS Genet"},{"key":"2021052109500852600_ref53","doi-asserted-by":"crossref","first-page":"119","DOI":"10.2183\/pjab.90.119","article-title":"Ribosomal RNA gene repeats, their stability and cellular senescence","volume":"90","author":"Kobayashi","year":"2014","journal-title":"Proc Jpn Acad Ser B"},{"key":"2021052109500852600_ref54","doi-asserted-by":"crossref","first-page":"1466","DOI":"10.1101\/gr.331902","article-title":"Identification and analysis of over 2000 ribosomal protein pseudogenes in the human genome","volume":"12","author":"Zhang","year":"2002","journal-title":"Genome Res"},{"key":"2021052109500852600_ref55","doi-asserted-by":"crossref","first-page":"379","DOI":"10.1101\/gr.214202","article-title":"The human ribosomal protein genes: sequencing and comparative analysis of 73 genes","volume":"12","author":"Yoshihama","year":"2002","journal-title":"Genome Res"},{"key":"2021052109500852600_ref56","doi-asserted-by":"crossref","first-page":"2708","DOI":"10.1093\/bioinformatics\/bty172","article-title":"ASGART: fast and parallel genome scale segmental duplications mapping","volume":"34","author":"Delehelle","year":"2018","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref57","doi-asserted-by":"crossref","first-page":"901","DOI":"10.1101\/gr.228718.117","article-title":"Detection and analysis of ancient segmental duplications in mammalian genomes","volume":"28","author":"Pu","year":"2018","journal-title":"Genome Res"},{"key":"2021052109500852600_ref58","doi-asserted-by":"crossref","first-page":"66","DOI":"10.1038\/nbt.1518","article-title":"PeakSeq enables systematic scoring of ChIP-seq experiments relative to controls","volume":"27","author":"Rozowsky","year":"2009","journal-title":"Nat Biotechnol"},{"key":"2021052109500852600_ref59","doi-asserted-by":"crossref","first-page":"e103","DOI":"10.1093\/nar\/gkr425","article-title":"Systematic bias in high-throughput sequencing data and its correction by BEADS","volume":"39","author":"Cheung","year":"2011","journal-title":"Nucleic Acids Res"},{"key":"2021052109500852600_ref60","doi-asserted-by":"crossref","first-page":"e30377","DOI":"10.1371\/journal.pone.0030377","article-title":"Fast computation and applications of genome mappability","volume":"7","author":"Derrien","year":"2012","journal-title":"PLoS One"},{"key":"2021052109500852600_ref61","first-page":"e120","article-title":"Umap and Bismap: quantifying genome and methylome mappability","volume":"46","author":"Karimzadeh","year":"2018","journal-title":"Nucleic Acids Res"},{"key":"2021052109500852600_ref62","doi-asserted-by":"crossref","first-page":"568","DOI":"10.1093\/bioinformatics\/btx636","article-title":"Squeakr: an exact and approximate k-mer counting system","volume":"34","author":"Pandey","year":"2018","journal-title":"Bioinformatics"},{"key":"2021052109500852600_ref63","doi-asserted-by":"crossref","first-page":"1639","DOI":"10.1101\/gr.092759.109","article-title":"Circos: an information aesthetic for comparative genomics","volume":"19","author":"Krzywinski","year":"2009","journal-title":"Genome Res"},{"key":"2021052109500852600_ref64","doi-asserted-by":"crossref","first-page":"178","DOI":"10.1093\/bib\/bbs017","article-title":"Integrative Genomics Viewer (IGV): high-performance genomics data visualization and exploration","volume":"14","author":"Thorvaldsdottir","year":"2013","journal-title":"Brief Bioinform"},{"key":"2021052109500852600_ref65","doi-asserted-by":"crossref","first-page":"W187","DOI":"10.1093\/nar\/gku365","article-title":"deepTools: a flexible platform for exploring deep-sequencing data","volume":"42","author":"Ramirez","year":"2014","journal-title":"Nucleic Acids Res"}],"container-title":["Briefings in Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/bib\/article-pdf\/22\/3\/bbaa086\/37966047\/bbaa086.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"http:\/\/academic.oup.com\/bib\/article-pdf\/22\/3\/bbaa086\/37966047\/bbaa086.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,5,21]],"date-time":"2021-05-21T09:51:24Z","timestamp":1621590684000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bib\/article\/doi\/10.1093\/bib\/bbaa086\/5855256"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,6,26]]},"references-count":65,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2021,5,20]]}},"URL":"https:\/\/doi.org\/10.1093\/bib\/bbaa086","relation":{},"ISSN":["1477-4054"],"issn-type":[{"value":"1477-4054","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2021,5]]},"published":{"date-parts":[[2020,6,26]]},"article-number":"bbaa086"}}