{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T00:33:42Z","timestamp":1773275622599,"version":"3.50.1"},"reference-count":31,"publisher":"Oxford University Press (OUP)","issue":"14","license":[{"start":{"date-parts":[[2017,7,12]],"date-time":"2017-07-12T00:00:00Z","timestamp":1499817600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["BBSRC-NSF\/BIO-1564917"],"award-info":[{"award-number":["BBSRC-NSF\/BIO-1564917"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-1247726"],"award-info":[{"award-number":["IIS-1247726"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-1251137"],"award-info":[{"award-number":["IIS-1251137"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-1408695"],"award-info":[{"award-number":["CNS-1408695"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-1439084"],"award-info":[{"award-number":["CCF-1439084"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-1617618"],"award-info":[{"award-number":["CCF-1617618"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006234","name":"Sandia National Laboratories","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006234","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,7,15]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Almost all de novo short-read genome and transcriptome assemblers start by building a representation of the de Bruijn Graph of the reads they are given as input. Even when other approaches are used for subsequent assembly (e.g. when one is using \u2018long read\u2019 technologies like those offered by PacBio or Oxford Nanopore), efficient k-mer processing is still crucial for accurate assembly, and state-of-the-art long-read error-correction methods use de Bruijn Graphs. Because of the centrality of de Bruijn Graphs, researchers have proposed numerous methods for representing de Bruijn Graphs compactly. Some of these proposals sacrifice accuracy to save space. Further, none of these methods store abundance information, i.e. the number of times that each k-mer occurs, which is key in transcriptome assemblers.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>We present a method for compactly representing the weighted de Bruijn Graph (i.e. with abundance information) with essentially no errors. Our representation yields zero errors while increasing the space requirements by less than 18\u201328% compared to the approximate de Bruijn graph representation in Squeakr. Our technique is based on a simple invariant that all weighted de Bruijn Graphs must satisfy, and hence is likely to be of general interest and applicable in most weighted de Bruijn Graph-based systems.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>https:\/\/github.com\/splatlab\/debgr.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Supplementary information<\/jats:title>\n                  <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btx261","type":"journal-article","created":{"date-parts":[[2017,4,20]],"date-time":"2017-04-20T07:52:13Z","timestamp":1492674733000},"page":"i133-i141","source":"Crossref","is-referenced-by-count":33,"title":["deBGR: an efficient and near-exact representation of the weighted de Bruijn graph"],"prefix":"10.1093","volume":"33","author":[{"given":"Prashant","family":"Pandey","sequence":"first","affiliation":[{"name":"Department of Computer Science, Stony Brook University, Stony Brook, NY, USA"}]},{"given":"Michael A","family":"Bender","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Stony Brook University, Stony Brook, NY, USA"}]},{"given":"Rob","family":"Johnson","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Stony Brook University, Stony Brook, NY, USA"},{"name":"VMWare, Inc., Palo Alto, CA"}]},{"given":"Rob","family":"Patro","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Stony Brook University, Stony Brook, NY, USA"}]}],"member":"286","published-online":{"date-parts":[[2017,7,12]]},"reference":[{"key":"2023051506483284100_btx261-B1","first-page":"145","volume-title":"Fully Dynamic de Bruijn Graphs","author":"Belazzougui","year":"2016"},{"key":"2023051506483284100_btx261-B2","doi-asserted-by":"crossref","DOI":"10.14778\/2350229.2350275","article-title":"Don\u2019t thrash: how to cache your hash on flash","volume":"5","author":"Bender","year":"2012","journal-title":"Proc. VLDB Endowment"},{"key":"2023051506483284100_btx261-B3","doi-asserted-by":"crossref","first-page":"422","DOI":"10.1145\/362686.362692","article-title":"Spacetime trade-offs in hash coding with allowable errors","volume":"13","author":"Bloom","year":"1970","journal-title":"Commun. ACM"},{"key":"2023051506483284100_btx261-B4","doi-asserted-by":"crossref","first-page":"225","DOI":"10.1007\/978-3-642-33122-0_18","volume-title":"Proceedings of the International Workshop on Algorithms in Bioinformatics","author":"Bowe","year":"2012"},{"key":"2023051506483284100_btx261-B5","doi-asserted-by":"crossref","first-page":"1710","DOI":"10.1101\/gr.209247.116","article-title":"Improved assembly of noisy long reads by k-mer validation","volume":"26","author":"Carvalho","year":"2016","journal-title":"Genome Res"},{"key":"2023051506483284100_btx261-B6","doi-asserted-by":"crossref","first-page":"30.","DOI":"10.1186\/s13059-015-0596-2","article-title":"Bridger: a new framework for de novo transcriptome assembly using RNA-seq data","volume":"16","author":"Chang","year":"2015","journal-title":"Genome Biol"},{"key":"2023051506483284100_btx261-B7","doi-asserted-by":"crossref","first-page":"1.","DOI":"10.1186\/1748-7188-8-22","article-title":"Space-efficient and exact de Bruijn graph representation based on a Bloom filter","volume":"8","author":"Chikhi","year":"2013","journal-title":"Algorith. Mol. Biol"},{"key":"2023051506483284100_btx261-B8","first-page":"35","volume-title":"Proceedings of the International Conference on Research in Computational Molecular Biology","author":"Chikhi","year":"2014"},{"key":"2023051506483284100_btx261-B9","doi-asserted-by":"crossref","first-page":"987","DOI":"10.1038\/nbt.2023","article-title":"How to apply de Bruijn graphs to genome assembly","volume":"29","author":"Compeau","year":"2011","journal-title":"Nat. Biotechnol"},{"key":"2023051506483284100_btx261-B10","doi-asserted-by":"crossref","first-page":"58","DOI":"10.1016\/j.jalgor.2003.12.001","article-title":"An improved data stream summary: the count-min sketch and its applications","volume":"55","author":"Cormode","year":"2005","journal-title":"J. Algorith"},{"key":"2023051506483284100_btx261-B100"},{"key":"2023051506483284100_btx261-B11","doi-asserted-by":"crossref","first-page":"644","DOI":"10.1038\/nbt.1883","article-title":"Full-length transcriptome assembly from RNA-seq data without a reference genome","volume":"29","author":"Grabherr","year":"2011","journal-title":"Nat. Biotechnol"},{"key":"2023051506483284100_btx261-B12","doi-asserted-by":"crossref","first-page":"593.","DOI":"10.1093\/bioinformatics\/btr708","article-title":"ART: a next-generation sequencing read simulator","volume":"28","author":"Huang","year":"2012","journal-title":"Bioinformatics"},{"key":"2023051506483284100_btx261-B13","article-title":"Shannon: an information-optimal de novo RNA-seq assembler","author":"Kannan","year":"2016","journal-title":"bioRxiv"},{"key":"2023051506483284100_btx261-B14","article-title":"Canu: scalable and accurate long-read assembly via adaptive k-mer weighting and repeat separation","author":"Koren","year":"2017","journal-title":"bioRxiv"},{"key":"2023051506483284100_btx261-B15","doi-asserted-by":"crossref","first-page":"e1004772.","DOI":"10.1371\/journal.pcbi.1004772","article-title":"Binpacker: packing-based de novo transcriptome assembly from RNA-seq data","volume":"12","author":"Liu","year":"2016","journal-title":"PLOS Comput. Biol"},{"key":"2023051506483284100_btx261-B16","doi-asserted-by":"crossref","first-page":"1.","DOI":"10.1186\/1471-2105-12-333","article-title":"Efficient counting of k-mers in DNA sequences using a Bloom filter","volume":"12","author":"Melsted","year":"2011","journal-title":"BMC Bioinform"},{"key":"2023051506483284100_btx261-B17","article-title":"kWIP: the k-mer weighted inner product, a de novo estimator of genetic similarity","author":"Murray","year":"2016","journal-title":"bioRxiv"},{"key":"2023051506483284100_btx261-B18","doi-asserted-by":"crossref","DOI":"10.1145\/3035918.3035963","article-title":"A General-Purpose Counting Filter: Making Every Bit Count","author":"Pandey","year":"2017"},{"key":"2023051506483284100_btx261-B19","author":"Pandey","year":"2017"},{"key":"2023051506483284100_btx261-B20","doi-asserted-by":"crossref","first-page":"13272","DOI":"10.1073\/pnas.1121464109","article-title":"Scaling metagenome sequence assembly with probabilistic de Bruijn graphs","volume":"109","author":"Pell","year":"2012","journal-title":"Proc. Natl. Acad. Sci"},{"key":"2023051506483284100_btx261-B21","first-page":"137","volume-title":"International Conference on Research in Computational Molecular Biology","author":"Pellow","year":"2016"},{"key":"2023051506483284100_btx261-B22","doi-asserted-by":"crossref","first-page":"9748","DOI":"10.1073\/pnas.171285098","article-title":"An Eulerian path approach to DNA fragment assembly","volume":"98","author":"Pevzner","year":"2001","journal-title":"Proc. Natl. Acad. Sci"},{"key":"2023051506483284100_btx261-B23","doi-asserted-by":"crossref","first-page":"364","DOI":"10.1007\/978-3-642-40453-5_28","volume-title":"Algorithms in Bioinformatics","author":"Salikhov","year":"2013"},{"key":"2023051506483284100_btx261-B24","first-page":"btw321","article-title":"Accurate self-correction of errors in long reads using de Bruijn graphs","author":"Salmela","year":"2016","journal-title":"Bioinformatics"},{"key":"2023051506483284100_btx261-B25","doi-asserted-by":"crossref","first-page":"1086","DOI":"10.1093\/bioinformatics\/bts094","article-title":"Oases: robust de novo RNA-seq assembly across the dynamic range of expression levels","volume":"28","author":"Schulz","year":"2012","journal-title":"Bioinformatics"},{"key":"2023051506483284100_btx261-B26","doi-asserted-by":"crossref","first-page":"1117","DOI":"10.1101\/gr.089532.108","article-title":"ABySS: a parallel assembler for short read sequence data","volume":"19","author":"Simpson","year":"2009","journal-title":"Genome Res"},{"key":"2023051506483284100_btx261-B27","doi-asserted-by":"crossref","DOI":"10.1038\/nbt.3442","article-title":"Fast search of thousands of short-read sequencing experiments","author":"Solomon","year":"2016","journal-title":"Nat. Biotechnol"},{"key":"2023051506483284100_btx261-B28","doi-asserted-by":"crossref","first-page":"513","DOI":"10.1093\/bioinformatics\/btg005","article-title":"Alignment-free sequence comparison\u2013a review","volume":"19","author":"Vinga","year":"2003","journal-title":"Bioinformatics"},{"key":"2023051506483284100_btx261-B29","doi-asserted-by":"crossref","first-page":"821","DOI":"10.1101\/gr.074492.107","article-title":"Velvet: algorithms for de novo short read assembly using de Bruijn graphs","volume":"18","author":"Zerbino","year":"2008","journal-title":"Genome Res"},{"key":"2023051506483284100_btx261-B30","doi-asserted-by":"crossref","first-page":"e101271.","DOI":"10.1371\/journal.pone.0101271","article-title":"These are not the k-mers you are looking for: efficient online k-mer counting using a probabilistic data structure","volume":"9","author":"Zhang","year":"2014","journal-title":"PloS One"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/33\/14\/i133\/50315096\/bioinformatics_33_14_i133.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/33\/14\/i133\/50315096\/bioinformatics_33_14_i133.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,15]],"date-time":"2023-05-15T06:49:07Z","timestamp":1684133347000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/33\/14\/i133\/3953976"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,7,12]]},"references-count":31,"journal-issue":{"issue":"14","published-print":{"date-parts":[[2017,7,15]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btx261","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2017,7,15]]},"published":{"date-parts":[[2017,7,12]]}}}