{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,30]],"date-time":"2025-07-30T11:44:18Z","timestamp":1753875858038,"version":"3.41.2"},"reference-count":9,"publisher":"Oxford University Press (OUP)","issue":"5","license":[{"start":{"date-parts":[[2023,5,2]],"date-time":"2023-05-02T00:00:00Z","timestamp":1682985600000},"content-version":"vor","delay-in-days":1,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Icelandic Research","award":["218111-051"],"award-info":[{"award-number":["218111-051"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,4]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Summary<\/jats:title>\n                  <jats:p>We describe a compression scheme for BUS files and an implementation of the algorithm in the BUStools software. Our compression algorithm yields smaller file sizes than gzip, at significantly faster compression and decompression speeds. We evaluated our algorithm on 533 BUS files from scRNA-seq experiments with a total size of 1TB. Our compression is 2.2\u00d7 faster than the fastest gzip option 35% slower than the fastest zstd option and results in 1.5\u00d7 smaller files than both methods. This amounts to an 8.3\u00d7 reduction in the file size, resulting in a compressed size of 122GB for the dataset.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>A complete description of the format is available at https:\/\/github.com\/BUStools\/BUSZ-format and an implementation at https:\/\/github.com\/BUStools\/bustools. The code to reproduce the results of this article is available at https:\/\/github.com\/pmelsted\/BUSZ_paper.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btad295","type":"journal-article","created":{"date-parts":[[2023,5,2]],"date-time":"2023-05-02T15:00:40Z","timestamp":1683039640000},"source":"Crossref","is-referenced-by-count":2,"title":["BUSZ: compressed BUS files"],"prefix":"10.1093","volume":"39","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1665-7459","authenticated-orcid":false,"given":"P\u00e9tur Helgi","family":"Einarsson","sequence":"first","affiliation":[{"name":"Faculty of Industrial Engineering, Mechanical Engineering, and Computer Science, University of Iceland , Reykjav\u00edk, Iceland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8418-6724","authenticated-orcid":false,"given":"P\u00e1ll","family":"Melsted","sequence":"additional","affiliation":[{"name":"Faculty of Industrial Engineering, Mechanical Engineering, and Computer Science, University of Iceland , Reykjav\u00edk, Iceland"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2023,5,2]]},"reference":[{"year":"2022","author":"Booeshaghi","key":"2023051522074755000_btad295-B1"},{"year":"2021","author":"Collet","key":"2023051522074755000_btad295-B2"},{"key":"2023051522074755000_btad295-B3","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1016\/0166-218X(93)00116-H","article-title":"Robust universal complete codes for transmission and compression","volume":"64","author":"Fraenkel","year":"1996","journal-title":"Discrete Appl Math"},{"key":"2023051522074755000_btad295-B4","doi-asserted-by":"crossref","first-page":"734","DOI":"10.1101\/gr.114819.110","article-title":"Efficient storage of high throughput DNA sequencing data using reference-based compression","volume":"21","author":"Fritz","year":"2011","journal-title":"Genome Res"},{"key":"2023051522074755000_btad295-B5","doi-asserted-by":"crossref","first-page":"813","DOI":"10.1038\/s41587-021-00870-2","article-title":"Modular, efficient and constant-memory single-cell RNA-seq preprocessing","volume":"39","author":"Melsted","year":"2021","journal-title":"Nat Biotechnol"},{"key":"2023051522074755000_btad295-B6","doi-asserted-by":"crossref","first-page":"4472","DOI":"10.1093\/bioinformatics\/btz279","article-title":"The barcode, UMI, set format and BUStools","volume":"35","author":"Melsted","year":"2019","journal-title":"Bioinformatics"},{"key":"2023051522074755000_btad295-B7","doi-asserted-by":"crossref","first-page":"eabl4896","DOI":"10.1126\/science.abl4896","article-title":"The tabula sapiens: a multiple-organ, single-cell transcriptomic atlas of humans","volume":"376","author":"The Tabula Sapiens Consortium","year":"2022","journal-title":"Science"},{"first-page":"401","year":"2009","author":"Yan","key":"2023051522074755000_btad295-B8"},{"first-page":"59","year":"2006","author":"Zukowski","key":"2023051522074755000_btad295-B9"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btad295\/50169813\/btad295.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/39\/5\/btad295\/50333341\/btad295.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/39\/5\/btad295\/50333341\/btad295.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,15]],"date-time":"2023-05-15T22:08:08Z","timestamp":1684188488000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btad295\/7147899"}},"subtitle":[],"editor":[{"given":"Christina","family":"Kendziorski","sequence":"additional","affiliation":[],"role":[{"role":"editor","vocabulary":"crossref"}]}],"short-title":[],"issued":{"date-parts":[[2023,5,1]]},"references-count":9,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2023,5,4]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btad295","relation":{},"ISSN":["1367-4811"],"issn-type":[{"type":"electronic","value":"1367-4811"}],"subject":[],"published-other":{"date-parts":[[2023,5,1]]},"published":{"date-parts":[[2023,5,1]]},"article-number":"btad295"}}