{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,30]],"date-time":"2025-07-30T11:44:54Z","timestamp":1753875894922,"version":"3.41.2"},"reference-count":12,"publisher":"Oxford University Press (OUP)","issue":"1","license":[{"start":{"date-parts":[[2024,1,5]],"date-time":"2024-01-05T00:00:00Z","timestamp":1704412800000},"content-version":"vor","delay-in-days":4,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,1,2]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Managing data and code in open scientific research is complicated by two key problems: large datasets often cannot be stored alongside code in repository platforms like GitHub, and iterative analysis can lead to unnoticed changes to data, increasing the risk that analyses are based on older versions of data.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>SciDataFlow is a fast, concurrent command-line tool paired with a simple Data Manifest specification that streamlines tracking data changes, uploading data to remote repositories, and pulling in all data necessary to reproduce a computational analysis.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>SciDataFlow is available at https:\/\/github.com\/vsbuffalo\/scidataflow.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btad754","type":"journal-article","created":{"date-parts":[[2024,1,5]],"date-time":"2024-01-05T17:49:48Z","timestamp":1704476988000},"source":"Crossref","is-referenced-by-count":0,"title":["SciDataFlow: a tool for improving the flow of data through science"],"prefix":"10.1093","volume":"40","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4510-1609","authenticated-orcid":false,"given":"Vince","family":"Buffalo","sequence":"first","affiliation":[{"name":"Department of Integrative Biology, University of California , CA 94720, United States"}]}],"member":"286","published-online":{"date-parts":[[2024,1,5]]},"reference":[{"key":"2024011300223996000_btad754-B1","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1145\/2723872.2723882","article-title":"An introduction to docker for reproducible research","volume":"49","author":"Boettiger","year":"2015","journal-title":"SIGOPS Oper Syst Rev"},{"volume-title":"Bioinformatics Data Skills: Reproducible and robust research with open source tools","year":"2015","author":"Buffalo","key":"2024011300223996000_btad754-B2"},{"key":"2024011300223996000_btad754-B3","doi-asserted-by":"crossref","first-page":"2151","DOI":"10.1038\/s41467-021-22381-z","article-title":"Go get data (GGD) is a framework that facilitates reproducible access to genomic data","volume":"12","author":"Cormier","year":"2021","journal-title":"Nat Commun"},{"key":"2024011300223996000_btad754-B4","doi-asserted-by":"crossref","first-page":"D988","DOI":"10.1093\/nar\/gkab1049","article-title":"Ensembl 2022","volume":"50","author":"Cunningham","year":"2022","journal-title":"Nucleic Acids Res"},{"key":"2024011300223996000_btad754-B5","doi-asserted-by":"crossref","first-page":"101132","DOI":"10.1016\/j.ecoinf.2020.101132","article-title":"Toward reliable biodiversity dataset references","volume":"59","author":"Elliott","year":"2020","journal-title":"Ecol. Inform"},{"key":"2024011300223996000_btad754-B6","doi-asserted-by":"crossref","first-page":"419","DOI":"10.1038\/s41597-023-02230-y","article-title":"Signing data citations enables data verification and citation persistence","volume":"10","author":"Elliott","year":"2023","journal-title":"Sci Data"},{"year":"2000","author":"Fielding","key":"2024011300223996000_btad754-B7"},{"key":"2024011300223996000_btad754-B8","doi-asserted-by":"crossref","first-page":"2520","DOI":"10.1093\/bioinformatics\/bts480","article-title":"Snakemake\u2013a scalable bioinformatics workflow engine","volume":"28","author":"K\u00f6ster","year":"2012","journal-title":"Bioinformatics"},{"year":"2018","author":"Kunze","key":"2024011300223996000_btad754-B9"},{"key":"2024011300223996000_btad754-B10","doi-asserted-by":"crossref","first-page":"D1188","DOI":"10.1093\/nar\/gkac1072","article-title":"The UCSC genome browser database: 2023 update","volume":"51","author":"Nassar","year":"2023","journal-title":"Nucleic Acids Res"},{"key":"2024011300223996000_btad754-B11","doi-asserted-by":"crossref","DOI":"10.1186\/1751-0473-8-7","article-title":"Git can facilitate greater reproducibility and increased transparency in science","volume":"8","author":"Ram","year":"2013","journal-title":"Source Code Biol. Med"},{"key":"2024011300223996000_btad754-B12","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1093\/gigascience\/giz149","article-title":"Refgenie: a reference genome resource manager","volume":"9","author":"Stolarczyk","year":"2020","journal-title":"Gigascience"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btad754\/55117267\/btad754.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/1\/btad754\/55612864\/btad754.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/1\/btad754\/55612864\/btad754.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,13]],"date-time":"2024-01-13T00:22:50Z","timestamp":1705105370000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btad754\/7511845"}},"subtitle":[],"editor":[{"given":"Jonathan","family":"Wren","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2024,1,1]]},"references-count":12,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2024,1,2]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btad754","relation":{},"ISSN":["1367-4811"],"issn-type":[{"type":"electronic","value":"1367-4811"}],"subject":[],"published-other":{"date-parts":[[2024,1,1]]},"published":{"date-parts":[[2024,1,1]]},"article-number":"btad754"}}