{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T03:45:43Z","timestamp":1775187943726,"version":"3.50.1"},"reference-count":219,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2016,9,1]],"date-time":"2016-09-01T00:00:00Z","timestamp":1472688000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2016,9,1]],"date-time":"2016-09-01T00:00:00Z","timestamp":1472688000000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2016,9,1]],"date-time":"2016-09-01T00:00:00Z","timestamp":1472688000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2016,9,1]],"date-time":"2016-09-01T00:00:00Z","timestamp":1472688000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Proc. IEEE"],"published-print":{"date-parts":[[2016,9]]},"DOI":"10.1109\/jproc.2016.2571298","type":"journal-article","created":{"date-parts":[[2016,8,2]],"date-time":"2016-08-02T18:13:53Z","timestamp":1470161633000},"page":"1681-1710","source":"Crossref","is-referenced-by-count":279,"title":["A Comprehensive Study of the Past, Present, and Future of Data Deduplication"],"prefix":"10.1109","volume":"104","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4093-6391","authenticated-orcid":false,"given":"Wen","family":"Xia","sequence":"first","affiliation":[]},{"given":"Hong","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Dan","family":"Feng","sequence":"additional","affiliation":[]},{"given":"Fred","family":"Douglis","sequence":"additional","affiliation":[]},{"given":"Philip","family":"Shilane","sequence":"additional","affiliation":[]},{"given":"Yu","family":"Hua","sequence":"additional","affiliation":[]},{"given":"Min","family":"Fu","sequence":"additional","affiliation":[]},{"given":"Yucheng","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yukun","family":"Zhou","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref170","doi-asserted-by":"publisher","DOI":"10.1109\/CCP.2011.41"},{"key":"ref172","first-page":"181","article-title":"Estimating duplication by content-based sampling","author":"xie","year":"0","journal-title":"Proc USENIX Conf Annu Tech Conf"},{"key":"ref171","doi-asserted-by":"publisher","DOI":"10.1109\/INFCOM.2010.5461965"},{"key":"ref174","first-page":"175","article-title":"File recipe compression in data deduplication systems","author":"meister","year":"0","journal-title":"Proc 11th USENIX Conf File Storage Technol"},{"key":"ref173","doi-asserted-by":"publisher","DOI":"10.1109\/MSST.2012.6232381"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.5244\/C.28.55"},{"key":"ref175","first-page":"1","article-title":"ViDeDup: An application-aware framework for video de-duplication","author":"katiyar","year":"0","journal-title":"Proc 3rd USENIX Conf on Hot Topics in Storage and File Syst"},{"key":"ref178","article-title":"Storage efficiency opportunities and analysis for video repositories","author":"dewakar","year":"0","journal-title":"Proceedings of 4th USENIX Workshop on Hot Topics in Storage and File Systems"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.67"},{"key":"ref168","author":"li","year":"2004","journal-title":"Emerging Technology DD200 Restorer"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1109\/SRDS.2011.18"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/584091.584093"},{"key":"ref38","volume":"1","author":"marcellin","year":"2002","journal-title":"JPEG2000 Image Compression Fun-damentals Standards and Practice Image Compression Fundamentals Standards and Practice"},{"key":"ref33","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-31164-2","author":"christen","year":"2012","journal-title":"Data Matching Concepts and Techniques for Record Linkage Entity Resolution and Duplicate Detection"},{"key":"ref32","first-page":"320","article-title":"A survey on deduplication in cloud storage","volume":"13","author":"neelaveni","year":"2014","journal-title":"Asian J Inf Technol"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/2611778"},{"key":"ref30","first-page":"430","article-title":"Data deduplication techniques","volume":"1","author":"he","year":"0","journal-title":"Proc Int Conf Future Inf Technol Manage Eng"},{"key":"ref37","author":"gailly","year":"1991","journal-title":"The GZIP compressor"},{"key":"ref36","year":"0","journal-title":"Theory of data compression"},{"key":"ref35","author":"storer","year":"1988","journal-title":"Data Compression Methods and Theory"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.2200\/S00262ED1V01Y201003DTM003"},{"key":"ref181","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1145\/844128.844146","article-title":"Memory resource management in VMware ESX server","volume":"36","author":"waldspurger","year":"2002","journal-title":"ACM SIGOPS Oper Syst Rev"},{"key":"ref180","doi-asserted-by":"publisher","DOI":"10.1145\/1629080.1629084"},{"key":"ref185","first-page":"1","article-title":"CAFTL: A content-aware flash translation layer enhancing the lifespan of flash memory based solid state drives","author":"chen","year":"0","journal-title":"Proc 9th USENIX Conf File Storage Technol"},{"key":"ref184","doi-asserted-by":"publisher","DOI":"10.1145\/1594977.1592580"},{"key":"ref183","doi-asserted-by":"publisher","DOI":"10.1145\/347057.347408"},{"key":"ref182","first-page":"1","article-title":"Decentralized deduplication in SAN cluster file systems","author":"clements","year":"0","journal-title":"Proc USENIX Annu Tech Conf"},{"key":"ref189","first-page":"7","article-title":"Data deduplication with linux","volume":"2011","author":"koutoupis","year":"2011","journal-title":"Linux J"},{"key":"ref188","year":"0","journal-title":"Opendedup"},{"key":"ref187","first-page":"143","article-title":"Fast, inexpensive content-addressed storage in foundation","author":"rhea","year":"0","journal-title":"Proc USENIX Annu Tech Conf"},{"key":"ref186","first-page":"501","article-title":"Nitro: A capacity-optimized SSD cache for primary storage","author":"li","year":"0","journal-title":"Proc USENIX Conf USENIX Annu Tech Conf"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/1462735.1462739"},{"key":"ref27","author":"riveria","year":"2011","journal-title":"Advanced dedupe concepts"},{"key":"ref179","doi-asserted-by":"publisher","DOI":"10.1145\/1837915.1837921"},{"key":"ref29","first-page":"364","article-title":"A survey on deduplication methods","volume":"3","author":"banu","year":"2012","journal-title":"International Journal of Computer Trends & Technology"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.17487\/rfc1951"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/362686.362692"},{"key":"ref21","first-page":"257","article-title":"Migratory compression: Coarse-grained data reordering to improve compressibility","author":"lin","year":"0","journal-title":"Proc USENIX Conf File Storage Technol"},{"key":"ref24","author":"hamilton","year":"2008","journal-title":"Deduplication-methods for achieving data efficiency"},{"key":"ref23","first-page":"183","article-title":"Improving restore speed for backup systems that use inline chunk-based deduplication","author":"lillibridge","year":"0","journal-title":"Proc 11th USENIX Conf File Storage Technol"},{"key":"ref26","author":"brinkmann","year":"2011","journal-title":"Data Deduplication"},{"key":"ref25","author":"riveria","year":"2009","journal-title":"Understanding data deduplication"},{"key":"ref50","author":"tridgell","year":"1996","journal-title":"The rsync algorithm"},{"key":"ref51","first-page":"281","article-title":"TAPER: Tiered approach for eliminating redundancy in replica synchronization","author":"jain","year":"0","journal-title":"Proc USENIX Conf File Storage Technol"},{"key":"ref154","first-page":"1","article-title":"Secure deduplication with efficient and reliable convergent key management","volume":"25","author":"li","year":"2013","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"ref153","first-page":"195","article-title":"Fast and secure laptop backups with encrypted de-duplication","author":"anderson","year":"0","journal-title":"Proc 23th Int Conf Large Installation Syst Admin Strategies Tools Tech"},{"key":"ref156","first-page":"1","article-title":"DupLESS: Server-aided encryption for deduplicated storage","author":"bellare","year":"0","journal-title":"Proc 22nd USENIX Security Symp"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-38348-9_18"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2002.1022312"},{"key":"ref152","doi-asserted-by":"publisher","DOI":"10.1145\/1456469.1456471"},{"key":"ref151","doi-asserted-by":"publisher","DOI":"10.1145\/844128.844155"},{"key":"ref146","first-page":"1","article-title":"Dark clouds on the horizon: Using cloud storage as attack vector and online slack space","author":"mulazzani","year":"0","journal-title":"20th USENIX Security Symp"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2010.187"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1145\/2046707.2046765"},{"key":"ref149","article-title":"System for backing up files from disk volumes on multiple nodes of a computer network","author":"dilatush","year":"1998"},{"key":"ref59","first-page":"197","article-title":"HYDRAstor: A scalable secondary storage","volume":"9","author":"dubnicki","year":"0","journal-title":"Proc USENIX Conf File Storage Technol"},{"key":"ref58","first-page":"111","article-title":"Sparse indexing: Large scale, inline deduplication using sampling and locality","volume":"9","author":"lillibridge","year":"0","journal-title":"Proc 7th USENIX Conf File Storage Technol"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/1534530.1534541"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1145\/345063.339345"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/DCC.2014.38"},{"key":"ref54","first-page":"201","article-title":"Delta compressed and deduplicated storage using stream-informed locality","author":"shilane","year":"0","journal-title":"Proc 4th USENIX Conf Hot Topics Storage File Syst"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/266220.266223"},{"key":"ref52","author":"suel","year":"2002","journal-title":"Algorithms for Delta Compression and Remote File Synchronization"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/JRPROC.1952.273898"},{"key":"ref167","doi-asserted-by":"publisher","DOI":"10.1145\/1542275.1542327"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1145\/1925019.1925021"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOTS.2006.42"},{"key":"ref164","first-page":"1","article-title":"Convergent dispersal: Toward storage-efficient security in a cloud-of-clouds","author":"li","year":"0","journal-title":"Proceedings of 4th USENIX Workshop on Hot Topics in Storage and File Systems"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1109\/MIC.2016.45"},{"key":"ref162","doi-asserted-by":"publisher","DOI":"10.1109\/MSST.2015.7208297"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.1145\/2484313.2484340"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1145\/2133601.2133603"},{"key":"ref4","year":"2014","journal-title":"The digital universe of opportunities Rich data and the increasing value of the internet of things"},{"key":"ref3","article-title":"The digital universe in 2020: Big data, bigger digital shadows, biggest growth in the far east","author":"gantz","year":"2012","journal-title":"IDC IView IDC Analyze the Future"},{"key":"ref6","first-page":"1","article-title":"Primary data deduplication-large scale study and system design","author":"el-shimi","year":"0","journal-title":"Proc Conf USENIX Annu Tech Conf"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/2078861.2078864"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/2385603.2385606"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1145\/2414456.2414504"},{"key":"ref7","first-page":"1","article-title":"Characteristics of backup workloads in production systems","author":"wallace","year":"0","journal-title":"Proc 10th USENIX Conf File Storage Technol"},{"key":"ref49","author":"macdonald","year":"2000","journal-title":"File system support for delta compression"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1109\/NTMS.2012.6208705"},{"key":"ref9","author":"dubois","year":"2011","journal-title":"Key Considerations As Deduplication Evolves into Primary Storage"},{"key":"ref158","doi-asserted-by":"publisher","DOI":"10.1007\/0-387-34805-0_21"},{"key":"ref46","author":"burrows","year":"1994","journal-title":"A block-sorting lossless data compression algorithm"},{"key":"ref45","first-page":"559","article-title":"Parallel data compression with bzip2","volume":"16","author":"gilchrist","year":"0","journal-title":"Proc 16th Int Conf Parallel Distrib Comput Syst"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/279310.279321"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1002\/spe.4380150703"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/214762.214771"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1147\/rd.282.0135"},{"key":"ref44","year":"0","journal-title":"7zip"},{"key":"ref43","year":"1991","journal-title":"Deflate compression"},{"key":"ref73","year":"0","journal-title":"ZfS"},{"key":"ref72","first-page":"1","article-title":"Disk failures in the real world: What does an MTTF of 1,000,000 hours mean to you?","volume":"7","author":"schroeder","year":"0","journal-title":"Proc 5th USENIX Conf File Storage Technol"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/20.703881"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2012.14"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/NAS.2012.46"},{"key":"ref77","author":"rabin","year":"1981","journal-title":"Fingerprinting by random polynomials"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1145\/2398776.2398827"},{"key":"ref75","first-page":"1","article-title":"Bimodal content defined chunking for backup streams","author":"kruus","year":"0","journal-title":"Proc 7th USENIX Conf File Storage Technol"},{"key":"ref78","author":"eshghi","year":"2005","journal-title":"A framework for analyzing and improving content-based chunking algorithms"},{"key":"ref79","first-page":"1","article-title":"Fingerdiff: Improved duplicate elimination in storage systems","author":"bobbarjung","year":"0","journal-title":"Proc Mass Storage Syst Technol"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOT.2009.5366623"},{"key":"ref62","first-page":"1","article-title":"Building a high-performance deduplication system","author":"guo","year":"0","journal-title":"Proc USENIX Conf USENIX Annu Tech Conf"},{"key":"ref61","first-page":"1","article-title":"ChunkStash: Speeding up inline storage deduplication using flash memory","author":"debnath","year":"0","journal-title":"Proc USENIX Conf USENIX Annu Tech Conf"},{"key":"ref63","first-page":"285","article-title":"Silo: A similarity-locality based near-exact deduplication scheme with low ram overhead and high throughput","author":"xia","year":"0","journal-title":"Proc USENIX Conf USENIX Annu Tech Conf"},{"key":"ref64","first-page":"24","article-title":"iDedup: Latency-aware, inline data deduplication for primary storage","author":"srinivasan","year":"0","journal-title":"Proc 10th USENIX Conf File Storage Technol"},{"key":"ref65","first-page":"13","article-title":"An analysis of compare-by-hash","author":"henson","year":"0","journal-title":"Proc 9th Workshop on Hot Topics in Operating Systems"},{"key":"ref66","first-page":"85","article-title":"Compare-by-hash: A reasoned analysis","author":"black","year":"0","journal-title":"Proc General Track USENIX Annual Technical Conf"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/P2P.2010.5570004"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1016\/j.peva.2014.07.016"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.17487\/rfc3174"},{"key":"ref197","first-page":"1","article-title":"Exploiting similarity for multi-source downloads using file handprints","author":"pucha","year":"0","journal-title":"Proc 7th USENIX Conf Netw Syst Design Implementation"},{"key":"ref198","doi-asserted-by":"publisher","DOI":"10.1145\/2348543.2348565"},{"key":"ref199","doi-asserted-by":"publisher","DOI":"10.1145\/2486001.2491714"},{"key":"ref193","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2010.26"},{"key":"ref194","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-25821-3_5"},{"key":"ref195","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2010.17"},{"key":"ref196","doi-asserted-by":"publisher","DOI":"10.1145\/1996130.1996151"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1145\/1383422.1383443"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/MSST.2012.6232377"},{"key":"ref190","doi-asserted-by":"publisher","DOI":"10.1145\/2504730.2504762"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS.2009.103"},{"key":"ref191","first-page":"279","article-title":"XLH: More effective memory deduplication scanners through cross-layer hints","author":"miller","year":"0","journal-title":"Proc USENIX Annu Tech Conf"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2010.263"},{"key":"ref192","doi-asserted-by":"publisher","DOI":"10.1145\/1534530.1534540"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1145\/1210596.1210599"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2009.06.004"},{"key":"ref98","article-title":"Metadata considered harmful&#x2026;to deduplication","author":"lin","year":"0","journal-title":"Proceedings of 4th USENIX Workshop on Hot Topics in Storage and File Systems"},{"key":"ref99","first-page":"2","article-title":"Opendedup: Open-source deduplication put to the test","volume":"2013","author":"bowling","year":"2013","journal-title":"Linux J"},{"key":"ref96","first-page":"1","article-title":"Shredder: GPU-accelerated incremental storage and computation","author":"bhatotia","year":"0","journal-title":"Proc 10th USENIX Conf File Storage Technol"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1145\/2141702.2141705"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2015.7218510"},{"key":"ref81","first-page":"14","article-title":"EndRE: An end-system redundancy elimination service for enterprises","author":"aggarwal","year":"0","journal-title":"Proc 7th USENIX Conf Netw Syst Design Implementation"},{"key":"ref84","first-page":"1","article-title":"Anchor-driven subchunk deduplication","author":"roma?ski","year":"0","journal-title":"Proc the 4th Annual Int'l Conf Systems and Storage"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/MSST.2015.7208290"},{"key":"ref80","author":"teodosiu","year":"2006","journal-title":"Optimizing file replication over limited bandwidth networks using remote differential compression"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1145\/1555349.1555355"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOTS.2010.37"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2013.48"},{"key":"ref87","first-page":"1","article-title":"Some applications of Rabin&#x2019;s fingerprinting method","author":"broder","year":"1993","journal-title":"Sequences II Methods in Communication Security and Computer Science"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1145\/2789168.2790094"},{"key":"ref200","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2012.51"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1145\/1851476.1851497"},{"key":"ref100","first-page":"1","article-title":"Accelerating data deduplication by exploiting pipelining and parallelism with multicore or manycore processors","author":"xia","year":"0","journal-title":"Proc 10th USENIX Conf File Storage Technol"},{"key":"ref209","year":"2006","journal-title":"Symantec looks beyond vrtual tape"},{"key":"ref203","year":"2014","journal-title":"Eliminate the boundaries of traditional backup and archive"},{"key":"ref204","year":"2014","journal-title":"HYDRAstor&#x2014;Scale-out Grid Storage Platform"},{"key":"ref201","first-page":"91","article-title":"Leveraging value locality in optimizing NAND flash-based SSDs","author":"gupta","year":"0","journal-title":"Proc 9th USENIX Conf File Storage Technol"},{"key":"ref202","first-page":"319","article-title":"Secure deduplication of general computations","author":"tang","year":"0","journal-title":"Proc USENIX Annu Tech Conf"},{"key":"ref207","year":"2014","journal-title":"Is DDBoost a &#x2018;standard&#x2019;"},{"key":"ref208","year":"0","journal-title":"Oracle Database Backup and Recovery User's Guide"},{"key":"ref205","year":"0","journal-title":"Commvault Simpana software"},{"key":"ref206","year":"2014","journal-title":"Worldwide Purpose-Built Backup Appliance (PBBA) Market Posts 9 7% Year-Over-Year Revenue Growth in Fourth Quarter of 2013"},{"key":"ref211","doi-asserted-by":"publisher","DOI":"10.1109\/MSST.2014.6855536"},{"key":"ref210","year":"0","journal-title":"Avamar deduplication backup software and system"},{"key":"ref212","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2013.6544846"},{"key":"ref213","first-page":"24","article-title":"Generating realistic datasets for deduplication analysis","author":"tarasov","year":"0","journal-title":"Proc USENIX Conf Annu Tech Conf"},{"key":"ref214","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33615-7_9"},{"key":"ref215","first-page":"317","article-title":"SDGen: Mimicking datasets for content generation in storage benchmarks","author":"gracia-tinedo","year":"0","journal-title":"Proc USENIX Conf File Storage Technol"},{"key":"ref216","year":"2006","journal-title":"Symantec OpenStorage"},{"key":"ref217","author":"cohen","year":"2008","journal-title":"The BitTorrent Protocol Specification"},{"key":"ref218","doi-asserted-by":"publisher","DOI":"10.1109\/TDSC.2012.49"},{"key":"ref219","doi-asserted-by":"publisher","DOI":"10.1109\/MSST.2014.6855555"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2015.2456015"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2011.5749736"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2005.47"},{"key":"ref124","first-page":"113","article-title":"Application-specific delta-encoding via resemblance detection","author":"douglis","year":"0","journal-title":"Proc General Track USENIX Annual Technical Conf"},{"key":"ref129","first-page":"181","article-title":"Accelerating restore and garbage collection in deduplication-based backup systems via exploiting historical information","author":"fu","year":"0","journal-title":"Proc USENIX Annu Tech Conf"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOTS.2012.32"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2014.84"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2011.76"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1109\/NAS.2012.48"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1145\/2367589.2367600"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1145\/2500727.2500731"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/HPCC.2011.82"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-11197-1_35"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2015.2410781"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1145\/2641572"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1147\/sj.52.0078"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1145\/2512348"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1145\/2485732.2485753"},{"key":"ref142","first-page":"161","article-title":"Concurrent deletion in a distributed content-addressable storage system with global deduplication","author":"strzelczak","year":"0","journal-title":"Proc 11th USENIX Conf File Storage Technol"},{"key":"ref143","first-page":"83","article-title":"Dmdedup: Device mapper target for data deduplication","author":"tarasov","year":"0","journal-title":"Proc Ottawa Linux Symp"},{"key":"ref2","year":"2010","journal-title":"2011 Digital Universe Study"},{"key":"ref144","first-page":"81","article-title":"Memory efficient sanitization of a deduplicated storage system","author":"botelho","year":"0","journal-title":"Proc 11th USENIX Conf File Storage Technol"},{"key":"ref1","article-title":"The data deluge","year":"2010","journal-title":"The Economist"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2009.12.003"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/MSST.2012.6232390"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/MSST.2010.5496992"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1145\/1534530.1534539"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1145\/2485732.2485748"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2010.69"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/MSST.2010.5496987"},{"key":"ref103","first-page":"331","article-title":"Design tradeoffs for data deduplication performance in backup workloads","author":"fu","year":"0","journal-title":"Proc USENIX Conf File Storage Technol"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2014.2308181"},{"key":"ref111","first-page":"15","article-title":"Tradeoffs in scalable data routing for deduplication clusters","author":"dong","year":"0","journal-title":"Proc 9th USENIX Conf File Storage Technol"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-35170-9_18"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2010.5470468"},{"key":"ref10","first-page":"13","article-title":"Single instance storage in Windows 2000","author":"bolosky","year":"0","journal-title":"Proc 4th Usenix Windows Systems Symp"},{"key":"ref11","first-page":"1","article-title":"Venti: A new approach to archival storage","author":"quinlan","year":"0","journal-title":"Proc USENIX Conf File Storage Technol"},{"key":"ref12","first-page":"73","article-title":"Alternatives for detecting redundancy in storage systems data","author":"policroniades","year":"0","journal-title":"Proc General Track USENIX Annual Technical Conf"},{"key":"ref13","first-page":"1","article-title":"Avoiding the disk bottleneck in the data domain deduplication file system","volume":"8","author":"zhu","year":"0","journal-title":"Proc 6th USENIX Conf File Storage Technol"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/502034.502052"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/1996130.1996153"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1977.1055714"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1016\/j.jalgor.2003.12.002"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1978.1055934"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1145\/1989323.1989327"},{"key":"ref18","author":"oberhumer","year":"1997","journal-title":"LZO Real-Time Data Compression Library"},{"key":"ref19","first-page":"29","article-title":"LZW data compression","volume":"14","author":"nelson","year":"1989","journal-title":"Dr Dobb&#x2019;s J"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45123-4_1"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1145\/2391229.2391246"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/MSST.2012.6232380"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.14778\/1920841.1921015"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1109\/SEQUEN.1997.666900"},{"key":"ref120","first-page":"1","article-title":"Redundancy elimination within large collections of files","author":"kulkarni","year":"0","journal-title":"Proc USENIX Annu Tech Conf"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1145\/1831407.1831429"},{"key":"ref122","author":"trendafilov","year":"2002","journal-title":"Zdelta An Efficient Delta Compression Tool"},{"key":"ref123","first-page":"1","article-title":"Finding similar files in a large file system","volume":"94","author":"manber","year":"0","journal-title":"Proc USENIX Winter"}],"container-title":["Proceedings of the IEEE"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielaam\/5\/7547344\/7529062-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5\/7547344\/07529062.pdf?arnumber=7529062","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,4]],"date-time":"2025-06-04T11:30:56Z","timestamp":1749036656000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/7529062\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,9]]},"references-count":219,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/jproc.2016.2571298","relation":{},"ISSN":["0018-9219","1558-2256"],"issn-type":[{"value":"0018-9219","type":"print"},{"value":"1558-2256","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,9]]}}}