{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T06:12:08Z","timestamp":1747807928014},"reference-count":25,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010,3]]},"DOI":"10.1109\/infcom.2010.5461965","type":"proceedings-article","created":{"date-parts":[[2010,5,12]],"date-time":"2010-05-12T20:54:40Z","timestamp":1273697680000},"page":"1-9","source":"Crossref","is-referenced-by-count":16,"title":["Efficient Similarity Estimation for Systems Exploiting Data Redundancy"],"prefix":"10.1109","author":[{"given":"Kanat","family":"Tangwongsan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Himabindu","family":"Pucha","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David G.","family":"Andersen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael","family":"Kaminsky","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"crossref","DOI":"10.2172\/10178066","article-title":"Large deviation inequalities for sums of indicator variables","author":"janson","year":"1994","journal-title":"Technical Report"},{"key":"ref11","article-title":"Redundancy elimination within large collections of files","author":"kulkarni","year":"2004","journal-title":"Proc USENIX Annual Technical Conference"},{"key":"ref12","first-page":"1","article-title":"Finding similar files in a large file system","author":"manber","year":"1994","journal-title":"Proc Winter USENIX Conference"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1145\/502034.502052"},{"key":"ref14","article-title":"Supporting practical content-addressable caching with CZIP compression","author":"park","year":"2007","journal-title":"Proceedings of the USENIX Annual Technical Conference"},{"key":"ref15","article-title":"Alternatives for detecting redundancy in storage systems data","author":"policroniades","year":"2004","journal-title":"Proc USENIX Annual Technical Conference"},{"key":"ref16","article-title":"Exploiting similarity for multi-source downloads using file handprints","author":"pucha","year":"2007","journal-title":"Proc 4th USENIX NSDI"},{"key":"ref17","article-title":"Adaptive file transfers for diverse environments","author":"pucha","year":"2008","journal-title":"Proc USENIX Annual Technical Conference"},{"key":"ref18","first-page":"89","article-title":"Venti: A new approach to archival storage","author":"quinlan","year":"2002","journal-title":"Proc USENIX Conference on File and Storage Technologies (FAST)"},{"key":"ref19","article-title":"Fingerprinting by random polynomials","author":"rabin","year":"1981","journal-title":"Technical Report TR-15&#x2013;81"},{"key":"ref4","article-title":"Incentives build robustness in BitTorrent","author":"cohen","year":"2003","journal-title":"Workshop on Economics of Peer-to-Peer Systems"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1016\/S0169-7552(97)00031-7"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1145\/502051.502054"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1145\/1060289.1060316"},{"key":"ref8","article-title":"Application-specific delta-encoding via resemblance detection","author":"douglis","year":"2003","journal-title":"Proceedings of the USENIX Annual Technical Conference"},{"key":"ref7","article-title":"Duplicate management for reference data","author":"denehy","year":"2003","journal-title":"Research Report RJ10305"},{"key":"ref2","first-page":"21","article-title":"On the resemblance and containment of documents","author":"broder","year":"1997","journal-title":"SEQUENCES '97 Proceedings of the Compression and Complexity of Sequences 1997"},{"key":"ref1","article-title":"Shark: Scaling file servers via cooperative caching","author":"annapureddy","year":"2005","journal-title":"Proc 2nd USENIX NSDI"},{"year":"2003","journal-title":"EMC Centera Content addressed storage system","key":"ref9"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1145\/775152.775239"},{"key":"ref22","doi-asserted-by":"crossref","DOI":"10.1145\/347057.347408","article-title":"A protocol-independent technique for eliminating redundant network traffic","author":"spring","year":"2000","journal-title":"Proc ACM SIG-COMM"},{"year":"0","journal-title":"Riverbed","key":"ref21"},{"key":"ref24","article-title":"An architecture for Internet data transfer","author":"tolia","year":"2006","journal-title":"Proc 3rd Symposium on Networked Systems Design and Implementation (NSDI)"},{"key":"ref23","article-title":"Optimizing file replication over limited bandwidth networks using remote differential compression","author":"teodosiu","year":"2006","journal-title":"Technical Report MSR-TR-2006&#x2013;157"},{"key":"ref25","article-title":"The rsync algorithm","author":"tridgell","year":"1996","journal-title":"Technical Report TR-CS-96&#x2013;05"}],"event":{"name":"IEEE INFOCOM 2010 - IEEE Conference on Computer Communications","start":{"date-parts":[[2010,3,14]]},"location":"San Diego, CA, USA","end":{"date-parts":[[2010,3,19]]}},"container-title":["2010 Proceedings IEEE INFOCOM"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/5461675\/5461899\/05461965.pdf?arnumber=5461965","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,29]],"date-time":"2019-05-29T04:11:47Z","timestamp":1559103107000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5461965\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,3]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/infcom.2010.5461965","relation":{},"subject":[],"published":{"date-parts":[[2010,3]]}}}