{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T20:34:49Z","timestamp":1772138089720,"version":"3.50.1"},"reference-count":5,"publisher":"Oxford University Press (OUP)","issue":"1","license":[{"start":{"date-parts":[[2018,7,13]],"date-time":"2018-07-13T00:00:00Z","timestamp":1531440000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"name":"Institute for Human Infections and Immunity"},{"DOI":"10.13039\/100007865","name":"University of Texas Medical Branch","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100007865","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Aerosol Sciences Department"},{"DOI":"10.13039\/100006234","name":"Sandia National Laboratories","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006234","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,1,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>The data generation capabilities of high throughput sequencing (HTS) instruments have exponentially increased over the last few years, while the cost of sequencing has dramatically decreased allowing this technology to become widely used in biomedical studies. For small labs and individual researchers, however, storage and transfer of large amounts of HTS data present a significant challenge. The recent trends in increased sequencing quality and genome coverage can be used to reconsider HTS data storage strategies.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>We present Broom, a stand-alone application designed to select and store only high-quality sequencing reads at extremely high compression rates. Written in C++, the application accepts single and paired-end reads in FASTQ and FASTA formats and decompresses data in FASTA format.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>C++ code available at https:\/\/scsb.utmb.edu\/labgroups\/fofanov\/broom.asp.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Supplementary information<\/jats:title>\n                    <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/bty580","type":"journal-article","created":{"date-parts":[[2018,7,13]],"date-time":"2018-07-13T07:26:06Z","timestamp":1531466766000},"page":"143-145","source":"Crossref","is-referenced-by-count":2,"title":["Broom: application for non-redundant storage of high throughput sequencing data"],"prefix":"10.1093","volume":"35","author":[{"given":"Levent","family":"Albayrak","sequence":"first","affiliation":[{"name":"Department of Pharmacology and Toxicology, University of Texas Medical Branch - Galveston, Galveston, TX, USA"},{"name":"Sealy Center for Structural Biology and Molecular Biophysics, University of Texas Medical Branch - Galveston, Galveston, TX, USA"}]},{"given":"Kamil","family":"Khanipov","sequence":"additional","affiliation":[{"name":"Department of Pharmacology and Toxicology, University of Texas Medical Branch - Galveston, Galveston, TX, USA"},{"name":"Sealy Center for Structural Biology and Molecular Biophysics, University of Texas Medical Branch - Galveston, Galveston, TX, USA"},{"name":"Department of Computer Science, University of Houston, Houston, TX, USA"}]},{"given":"George","family":"Golovko","sequence":"additional","affiliation":[{"name":"Department of Pharmacology and Toxicology, University of Texas Medical Branch - Galveston, Galveston, TX, USA"},{"name":"Sealy Center for Structural Biology and Molecular Biophysics, University of Texas Medical Branch - Galveston, Galveston, TX, USA"}]},{"given":"Yuriy","family":"Fofanov","sequence":"additional","affiliation":[{"name":"Department of Pharmacology and Toxicology, University of Texas Medical Branch - Galveston, Galveston, TX, USA"},{"name":"Sealy Center for Structural Biology and Molecular Biophysics, University of Texas Medical Branch - Galveston, Galveston, TX, USA"}]}],"member":"286","published-online":{"date-parts":[[2018,7,13]]},"reference":[{"key":"2023013107195342100_bty580-B1","author":"Adler","year":"2007"},{"key":"2023013107195342100_bty580-B2","doi-asserted-by":"crossref","first-page":"e171","DOI":"10.1093\/nar\/gks754","article-title":"Compression of next-generation sequencing reads aided by highly efficient de novo assembly","volume":"40","author":"Jones","year":"2012","journal-title":"Nucleic Acids Res"},{"key":"2023013107195342100_bty580-B3","volume-title":"The Art of Computer Programming: Sorting and Searching","author":"Knuth","year":"1998"},{"key":"2023013107195342100_bty580-B4","doi-asserted-by":"crossref","first-page":"129","DOI":"10.1016\/0923-5965(92)90019-C","article-title":"The MPEG video compression algorithm","volume":"4","author":"Le Gall","year":"1992","journal-title":"Signal Process Image Commun"},{"key":"2023013107195342100_bty580-B5","doi-asserted-by":"crossref","first-page":"D19","DOI":"10.1093\/nar\/gkq1019","article-title":"The sequence read archive","volume":"39","author":"Leinonen","year":"2011","journal-title":"Nucleic Acids Res"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/35\/1\/143\/48962182\/bioinformatics_35_1_143.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/35\/1\/143\/48962182\/bioinformatics_35_1_143.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,31]],"date-time":"2023-01-31T04:59:16Z","timestamp":1675141156000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/35\/1\/143\/5053314"}},"subtitle":[],"editor":[{"given":"Alfonso","family":"Valencia","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2018,7,13]]},"references-count":5,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2019,1,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/bty580","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/312306","asserted-by":"object"}]},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2019,1,1]]},"published":{"date-parts":[[2018,7,13]]}}}