{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T19:57:41Z","timestamp":1772913461397,"version":"3.50.1"},"reference-count":45,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2021,5,1]],"date-time":"2021-05-01T00:00:00Z","timestamp":1619827200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,5,1]],"date-time":"2021-05-01T00:00:00Z","timestamp":1619827200000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100001871","name":"Fundao para a Cincia e a Tecnologia","doi-asserted-by":"publisher","award":["PTDC\/EEISCR\/6970\/2014"],"award-info":[{"award-number":["PTDC\/EEISCR\/6970\/2014"]}],"id":[{"id":"10.13039\/501100001871","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001871","name":"Fundao para a Cincia e a Tecnologia","doi-asserted-by":"publisher","award":["UIDB\/00408\/2020"],"award-info":[{"award-number":["UIDB\/00408\/2020"]}],"id":[{"id":"10.13039\/501100001871","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001871","name":"Fundao para a Cincia e a Tecnologia","doi-asserted-by":"publisher","award":["UIDB\/50014\/2020"],"award-info":[{"award-number":["UIDB\/50014\/2020"]}],"id":[{"id":"10.13039\/501100001871","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001871","name":"Fundao para a Cincia e a Tecnologia","doi-asserted-by":"publisher","award":["UIDP\/00408\/2020"],"award-info":[{"award-number":["UIDP\/00408\/2020"]}],"id":[{"id":"10.13039\/501100001871","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","award":["H2020-ICT-643964"],"award-info":[{"award-number":["H2020-ICT-643964"]}],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Comput."],"published-print":{"date-parts":[[2021,5,1]]},"DOI":"10.1109\/tc.2020.2994774","type":"journal-article","created":{"date-parts":[[2020,5,16]],"date-time":"2020-05-16T00:12:14Z","timestamp":1589587934000},"page":"669-681","source":"Crossref","is-referenced-by-count":13,"title":["GenoDedup: Similarity-Based Deduplication and Delta-Encoding for Genome Sequencing Data"],"prefix":"10.1109","volume":"70","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1299-8950","authenticated-orcid":false,"given":"Vinicius","family":"Cogo","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9752-2822","authenticated-orcid":false,"given":"Joao","family":"Paulo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8386-1628","authenticated-orcid":false,"given":"Alysson","family":"Bessani","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1186\/s12859-015-0628-7"},{"key":"ref38","first-page":"111","article-title":"Sparse indexing: Large scale, inline deduplication using sampling and locality","author":"lillibridge","year":"2009","journal-title":"Proc 7th Conf File Storage Technol"},{"key":"ref33","first-page":"26","article-title":"Evaluation criteria for data de-dupe","author":"freeman","year":"2007"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1186\/1471-2105-15-S15-S10"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btv384"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/j.gene.2015.12.053"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/2391229.2391246"},{"key":"ref36","first-page":"269","article-title":"Avoiding the disk bottleneck in the data domain deduplication file system","author":"zhu","year":"2008","journal-title":"Proc 6th USENIX Conf File Storage Technol"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2009.09.011"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.5902\/2448190421133"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1146\/annurev.genom.9.081307.164359"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/276698.276876"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1038\/nmeth.1974"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/2611778"},{"key":"ref13","first-page":"113","article-title":"Application-specific delta-encoding via resemblance detection","author":"douglis","year":"2003","journal-title":"Proc USENIX Annu Tech Conf"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3035918.3035938"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bty1015"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TCBB.2012.160"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/BIBM.2011.110"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1101\/gr.8.3.175"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1089\/cmb.2010.0253"},{"key":"ref28","first-page":"153","article-title":"Persona: A high-performance bioinformatics framework","author":"byma","year":"2017","journal-title":"Proc USENIX Conf USENIX Annu Tech Conf"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1155\/2012\/251364"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bty205"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/MSPEC.2018.8449046"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"2078","DOI":"10.1093\/bioinformatics\/btp352","article-title":"The sequence alignment\/map format and SAMtools","volume":"25","author":"lietal","year":"2009","journal-title":"Bioinformatics"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btq346"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"1767","DOI":"10.1093\/nar\/gkp1137","article-title":"The Sanger FASTQ file format for sequences with quality scores, and the Solexa\/Illumina FASTQ variants","volume":"38","author":"cocketal","year":"2010","journal-title":"Nucleic Acids Res"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0132460"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"1304","DOI":"10.1126\/science.1058040","article-title":"The sequence of the human genome","volume":"291","author":"venteretal","year":"2001","journal-title":"Science"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"255","DOI":"10.1038\/498255a","article-title":"Biology: The big challenges of big data","volume":"498","author":"marx","year":"2013","journal-title":"Nature"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1093\/bib\/bbq015"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1186\/s12920-015-0108-y"},{"key":"ref20","article-title":"NGS mapped","author":"hadfield","year":"2020"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/BIBMW.2010.5703863"},{"key":"ref22","article-title":"Data compression explained","author":"mahoney","year":"2013"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bts593"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772759"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btr014"},{"key":"ref41","first-page":"21","article-title":"On the resemblance and containment of documents","author":"broder","year":"1997","journal-title":"Proc IEEE Compression Complexity Sequences"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0059190"},{"key":"ref44","first-page":"183","article-title":"Effect of lossy compression of quality scores on variant calling","volume":"18","author":"ochoa","year":"2017","journal-title":"Briefings Bioinf"},{"key":"ref26","doi-asserted-by":"crossref","DOI":"10.1093\/nar\/gks754","article-title":"Compression of next-generation sequencing reads aided by highly efficient de novo assembly","volume":"40","author":"jones","year":"2012","journal-title":"Nucleic Acids Res"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1186\/1751-0473-9-13"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btu208"}],"container-title":["IEEE Transactions on Computers"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/12\/9397750\/09094002.pdf?arnumber=9094002","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,27]],"date-time":"2022-01-27T12:29:37Z","timestamp":1643286577000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9094002\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,1]]},"references-count":45,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tc.2020.2994774","relation":{},"ISSN":["0018-9340","1557-9956","2326-3814"],"issn-type":[{"value":"0018-9340","type":"print"},{"value":"1557-9956","type":"electronic"},{"value":"2326-3814","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,5,1]]}}}