{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T09:03:11Z","timestamp":1775638991652,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2015,8,27]],"date-time":"2015-08-27T00:00:00Z","timestamp":1440633600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Intel Science and Technology Center for Cloud Computing"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2015,8,27]]},"DOI":"10.1145\/2806777.2806840","type":"proceedings-article","created":{"date-parts":[[2015,8,24]],"date-time":"2015-08-24T14:09:20Z","timestamp":1440425360000},"page":"222-235","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":13,"title":["Reducing replication bandwidth for distributed document databases"],"prefix":"10.1145","author":[{"given":"Lianghong","family":"Xu","sequence":"first","affiliation":[{"name":"Carnegie Mellon University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andrew","family":"Pavlo","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sudipta","family":"Sengupta","sequence":"additional","affiliation":[{"name":"Microsoft Research"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jin","family":"Li","sequence":"additional","affiliation":[{"name":"Microsoft Research"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gregory R.","family":"Ganger","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2015,8,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Linux SDFS. www.opendedup.org.  Linux SDFS. www.opendedup.org."},{"key":"e_1_3_2_1_2_1","unstructured":"MongoDB. http:\/\/www.mongodb.org.  MongoDB. http:\/\/www.mongodb.org."},{"key":"e_1_3_2_1_3_1","unstructured":"MongoDB Monitoring Service. https:\/\/mms.mongodb.com.  MongoDB Monitoring Service. https:\/\/mms.mongodb.com."},{"key":"e_1_3_2_1_4_1","unstructured":"MurmurHash. https:\/\/sites.google.com\/site\/murmurhash.  MurmurHash. https:\/\/sites.google.com\/site\/murmurhash."},{"key":"e_1_3_2_1_5_1","unstructured":"NetApp Deduplication and Compression. www.netapp.com\/us\/products\/platform-os\/dedupe.html.  NetApp Deduplication and Compression. www.netapp.com\/us\/products\/platform-os\/dedupe.html."},{"key":"e_1_3_2_1_6_1","unstructured":"Ocarina Networks. www.ocarinanetworks.com.  Ocarina Networks. www.ocarinanetworks.com."},{"key":"e_1_3_2_1_7_1","unstructured":"Permabit Data Optimization. www.permabit.com.  Permabit Data Optimization. www.permabit.com."},{"key":"e_1_3_2_1_8_1","unstructured":"Stack Exchange Data Archive. https:\/\/archive.org\/details\/stackexchange.  Stack Exchange Data Archive. https:\/\/archive.org\/details\/stackexchange."},{"key":"e_1_3_2_1_9_1","unstructured":"Wikimedia Downloads. https:\/\/dumps.wikimedia.org.  Wikimedia Downloads. https:\/\/dumps.wikimedia.org."},{"key":"e_1_3_2_1_10_1","unstructured":"Windows Storage Server. technet.microsoft.com\/en-us\/library\/gg232683 (WS.10).aspx.  Windows Storage Server. technet.microsoft.com\/en-us\/library\/gg232683 (WS.10).aspx."},{"key":"e_1_3_2_1_11_1","unstructured":"ZFS Deduplication. blogs.oracle.com\/bonwick\/entry\/zfs_dedup.  ZFS Deduplication. blogs.oracle.com\/bonwick\/entry\/zfs_dedup."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1534530.1534539"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.5555\/789086.789698"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOT.2009.5366623"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/1281192.1281207"},{"key":"e_1_3_2_1_16_1","volume-title":"Compression and Complexity of Sequences","author":"Broder A.","year":"1997","unstructured":"A. Broder . On the resemblance and containment of documents . Compression and Complexity of Sequences , 1997 . A. Broder. On the resemblance and containment of documents. Compression and Complexity of Sequences, 1997."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.5555\/647819.736184"},{"key":"e_1_3_2_1_18_1","volume-title":"USENIX ATC","author":"Clements A.","year":"2009","unstructured":"A. Clements , I. Ahmad , M. Vilayannur , and J. Li . Decentralized Deduplication in SAN Cluster File Systems . In USENIX ATC , 2009 . A. Clements, I. Ahmad, M. Vilayannur, and J. Li. Decentralized Deduplication in SAN Cluster File Systems. In USENIX ATC, 2009."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.5555\/876875.878941"},{"key":"e_1_3_2_1_20_1","volume-title":"USENIX Annual Technical Conference","author":"Debnath B.","year":"2010","unstructured":"B. Debnath , S. Sengupta , and J. Li . Chunkstash: Speeding up inline storage deduplication using flash memory . In USENIX Annual Technical Conference , 2010 . B. Debnath, S. Sengupta, and J. Li. Chunkstash: Speeding up inline storage deduplication using flash memory. In USENIX Annual Technical Conference, 2010."},{"key":"e_1_3_2_1_21_1","volume-title":"FAST","author":"Dubnicki C.","year":"2009","unstructured":"C. Dubnicki , L. Gryz , L. Heldt , M. Kaczmarczyk , W. Kilian , P. Strzelczak , J. Szczepkowski , C. Ungureanu , and M. Welnicki . HYDRAstor: a Scalable Secondary Storage . In FAST , 2009 . C. Dubnicki, L. Gryz, L. Heldt, M. Kaczmarczyk, W. Kilian, P. Strzelczak, J. Szczepkowski, C. Ungureanu, and M. Welnicki. HYDRAstor: a Scalable Secondary Storage. In FAST, 2009."},{"key":"e_1_3_2_1_22_1","volume-title":"USENIX Annual Technical Conference","author":"El-Shimi A.","year":"2012","unstructured":"A. El-Shimi , R. Kalach , A. K. Adi , O. J. Li , and S. Sengupta . Primary data deduplication-large scale study and system design . In USENIX Annual Technical Conference , 2012 . A. El-Shimi, R. Kalach, A. K. Adi, O. J. Li, and S. Sengupta. Primary data deduplication-large scale study and system design. In USENIX Annual Technical Conference, 2012."},{"key":"e_1_3_2_1_23_1","volume-title":"Data Sheet","author":"EMC Corporation. EMC Center","year":"2002","unstructured":"EMC Corporation. EMC Center a : Content Addresses Storage System , Data Sheet , April 2002 . EMC Corporation. EMC Centera: Content Addresses Storage System, Data Sheet, April 2002."},{"key":"e_1_3_2_1_24_1","volume-title":"A framework for analyzing and improving content-based chunking algorithms","author":"Eshghi K.","year":"2005","unstructured":"K. Eshghi and H. K. Tang . A framework for analyzing and improving content-based chunking algorithms . 2005 . K. Eshghi and H. K. Tang. A framework for analyzing and improving content-based chunking algorithms. 2005."},{"key":"e_1_3_2_1_25_1","volume-title":"FAST","author":"Jain N.","year":"2005","unstructured":"N. Jain , M. Dahlin , and R. Tewari . Taper: Tiered approach for eliminating redundancy in replica synchronization . In FAST , 2005 . N. Jain, M. Dahlin, and R. Tewari. Taper: Tiered approach for eliminating redundancy in replica synchronization. In FAST, 2005."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2534169.2486019"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.5555\/1247415.1247420"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1247480.1247633"},{"key":"e_1_3_2_1_29_1","volume-title":"FAST","author":"Lillibridge M.","year":"2009","unstructured":"M. Lillibridge , K. Eshghi , D. Bhagwat , V. Deolalikar , G. Trezise , and P. Camble . Sparse indexing: Large scale, inline deduplication using sampling and locality . In FAST , 2009 . M. Lillibridge, K. Eshghi, D. Bhagwat, V. Deolalikar, G. Trezise, and P. Camble. Sparse indexing: Large scale, inline deduplication using sampling and locality. In FAST, 2009."},{"key":"e_1_3_2_1_30_1","volume-title":"Master's thesis","author":"MacDonald J. P.","year":"2000","unstructured":"J. P. MacDonald . File system support for delta compression. Master's thesis , University of California , Berkeley , 2000 . J. P. MacDonald. File system support for delta compression. Master's thesis, University of California, Berkeley, 2000."},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the USENIX","author":"Manber U.","year":"1994","unstructured":"U. Manber Finding similar files in a large file system . In Proceedings of the USENIX Winter 1994 Technical Conference , 1994. U. Manber et al. Finding similar files in a large file system. In Proceedings of the USENIX Winter 1994 Technical Conference, 1994."},{"key":"e_1_3_2_1_32_1","volume-title":"FAST","author":"Meyer D. T.","year":"2011","unstructured":"D. T. Meyer and W. J. Bolosky . A study of practical deduplication . In FAST , 2011 . D. T. Meyer and W. J. Bolosky. A study of practical deduplication. In FAST, 2011."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/502034.502052"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jalgor.2003.12.002"},{"key":"e_1_3_2_1_35_1","volume-title":"NSDI","author":"Pucha H.","year":"2007","unstructured":"H. Pucha , D. G. Andersen , and M. Kaminsky . Exploiting similarity for multi-source downloads using file handprints . In NSDI , 2007 . H. Pucha, D. G. Andersen, and M. Kaminsky. Exploiting similarity for multi-source downloads using file handprints. In NSDI, 2007."},{"key":"e_1_3_2_1_36_1","volume-title":"FAST","author":"Quinlan S.","year":"2002","unstructured":"S. Quinlan and S. Dorward . Venti: A new approach to archival storage . In FAST , 2002 . S. Quinlan and S. Dorward. Venti: A new approach to archival storage. In FAST, 2002."},{"key":"e_1_3_2_1_37_1","unstructured":"M. O. Rabin. Fingerprinting by random polynomials.  M. O. Rabin. Fingerprinting by random polynomials."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2009.01.004"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.5555\/2208461.2208466"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/347057.347408"},{"key":"e_1_3_2_1_41_1","volume-title":"FAST","author":"Srinivasan K.","year":"2012","unstructured":"K. Srinivasan , T. Bisson , G. Goodson , and K. Voruganti . id-edup: Latency-aware, inline data deduplication for primary storage . In FAST , 2012 . K. Srinivasan, T. Bisson, G. Goodson, and K. Voruganti. id-edup: Latency-aware, inline data deduplication for primary storage. In FAST, 2012."},{"key":"e_1_3_2_1_42_1","volume-title":"Lossless Compression Handbook","author":"Suel T.","year":"2002","unstructured":"T. Suel and N. Memon . Algorithms for delta compression and remote file synchronization . Lossless Compression Handbook , 2002 . T. Suel and N. Memon. Algorithms for delta compression and remote file synchronization. Lossless Compression Handbook, 2002."},{"key":"e_1_3_2_1_45_1","volume-title":"FAST","author":"Wallace G.","year":"2012","unstructured":"G. Wallace , F. Douglis , H. Qian , P. Shilane , S. Smaldone , M. Chamness , and W. Hsu . Characteristics of backup workloads in production systems . In FAST , 2012 . G. Wallace, F. Douglis, H. Qian, P. Shilane, S. Smaldone, M. Chamness, and W. Hsu. Characteristics of backup workloads in production systems. In FAST, 2012."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2003.1260818"},{"key":"e_1_3_2_1_47_1","volume-title":"FAST","author":"Zhu B.","year":"2008","unstructured":"B. Zhu , K. Li , and R. H. Patterson . Avoiding the disk bottleneck in the data domain deduplication file system . In FAST , 2008 . B. Zhu, K. Li, and R. H. Patterson. Avoiding the disk bottleneck in the data domain deduplication file system. In FAST, 2008."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1977.1055714"}],"event":{"name":"SoCC '15: ACM Symposium on Cloud Computing","location":"Kohala Coast Hawaii","acronym":"SoCC '15","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the Sixth ACM Symposium on Cloud Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2806777.2806840","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2806777.2806840","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T05:07:22Z","timestamp":1750223242000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2806777.2806840"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,8,27]]},"references-count":46,"alternative-id":["10.1145\/2806777.2806840","10.1145\/2806777"],"URL":"https:\/\/doi.org\/10.1145\/2806777.2806840","relation":{},"subject":[],"published":{"date-parts":[[2015,8,27]]},"assertion":[{"value":"2015-08-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}