{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T09:03:15Z","timestamp":1775638995214,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,5,9]],"date-time":"2017-05-09T00:00:00Z","timestamp":1494288000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,5,9]]},"DOI":"10.1145\/3035918.3035938","type":"proceedings-article","created":{"date-parts":[[2017,5,10]],"date-time":"2017-05-10T18:09:00Z","timestamp":1494439740000},"page":"1355-1368","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":31,"title":["Online Deduplication for Databases"],"prefix":"10.1145","author":[{"given":"Lianghong","family":"Xu","sequence":"first","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}]},{"given":"Andrew","family":"Pavlo","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}]},{"given":"Sudipta","family":"Sengupta","sequence":"additional","affiliation":[{"name":"Microsoft Research, Redmond, WA, USA"}]},{"given":"Gregory R.","family":"Ganger","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}]}],"member":"320","published-online":{"date-parts":[[2017,5,9]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Baidu Baike. http:\/\/baike.baidu.com\/.  Baidu Baike. http:\/\/baike.baidu.com\/."},{"key":"e_1_3_2_1_2_1","unstructured":"Enron Email Dataset. https:\/\/www.cs.cmu.edu\/~.\/enron\/.  Enron Email Dataset. https:\/\/www.cs.cmu.edu\/~.\/enron\/."},{"key":"e_1_3_2_1_3_1","unstructured":"InnoDB Compression. http:\/\/dev.mysql.com\/doc\/refman\/5.6\/en\/innodb-compression-internals.html.  InnoDB Compression. http:\/\/dev.mysql.com\/doc\/refman\/5.6\/en\/innodb-compression-internals.html."},{"key":"e_1_3_2_1_4_1","unstructured":"Linux SDFS. www.opendedup.org.  Linux SDFS. www.opendedup.org."},{"key":"e_1_3_2_1_5_1","unstructured":"MongoDB. http:\/\/www.mongodb.org.  MongoDB. http:\/\/www.mongodb.org."},{"key":"e_1_3_2_1_6_1","unstructured":"MurmurHash. https:\/\/sites.google.com\/site\/murmurhash.  MurmurHash. https:\/\/sites.google.com\/site\/murmurhash."},{"key":"e_1_3_2_1_7_1","unstructured":"Ocarina Networks. www.ocarinanetworks.com.  Ocarina Networks. www.ocarinanetworks.com."},{"key":"e_1_3_2_1_8_1","unstructured":"Permabit Data Optimization. www.permabit.com.  Permabit Data Optimization. www.permabit.com."},{"key":"e_1_3_2_1_9_1","unstructured":"Snappy. http:\/\/google.github.io\/snappy\/.  Snappy. http:\/\/google.github.io\/snappy\/."},{"key":"e_1_3_2_1_10_1","unstructured":"Stack Exchange Data Archive. https:\/\/archive.org\/details\/stackexchange.  Stack Exchange Data Archive. https:\/\/archive.org\/details\/stackexchange."},{"key":"e_1_3_2_1_11_1","unstructured":"Bulletin. https:\/\/www.vbulletin.com.  Bulletin. https:\/\/www.vbulletin.com."},{"key":"e_1_3_2_1_12_1","unstructured":"W3Techs. http:\/\/www.w3techs.com.  W3Techs. http:\/\/www.w3techs.com."},{"key":"e_1_3_2_1_13_1","unstructured":"Wikimedia Downloads. https:\/\/dumps.wikimedia.org.  Wikimedia Downloads. https:\/\/dumps.wikimedia.org."},{"key":"e_1_3_2_1_14_1","unstructured":"Wikipedia. https:\/\/www.wikipedia.org\/.  Wikipedia. https:\/\/www.wikipedia.org\/."},{"key":"e_1_3_2_1_15_1","unstructured":"Windows Storage Server. technet.microsoft.com\/en-us\/library\/gg232683(WS.10).aspx.  Windows Storage Server. technet.microsoft.com\/en-us\/library\/gg232683(WS.10).aspx."},{"key":"e_1_3_2_1_16_1","unstructured":"WiredTiger. http:\/\/www.wiredtiger.com\/.  WiredTiger. http:\/\/www.wiredtiger.com\/."},{"key":"e_1_3_2_1_17_1","unstructured":"ZFS Deduplication. blogs.oracle.com\/bonwick\/entry\/zfs_dedup.  ZFS Deduplication. blogs.oracle.com\/bonwick\/entry\/zfs_dedup."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/1142473.1142548"},{"key":"e_1_3_2_1_19_1","volume-title":"NetApp deduplication for FAS and V-Series deployment and implementation guide","author":"Alvarez C.","year":"2010","unstructured":"C. Alvarez . NetApp deduplication for FAS and V-Series deployment and implementation guide . 2010 . C. Alvarez. NetApp deduplication for FAS and V-Series deployment and implementation guide. 2010."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/1534530.1534539"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/789086.789698"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOT.2009.5366623"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/1559845.1559877"},{"key":"e_1_3_2_1_24_1","volume-title":"Compression and Complexity of Sequences","author":"Broder A.","year":"1997","unstructured":"A. Broder . On the resemblance and containment of documents . Compression and Complexity of Sequences , 1997 . A. Broder. On the resemblance and containment of documents. Compression and Complexity of Sequences, 1997."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.5555\/647819.736184"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/266220.266223"},{"key":"e_1_3_2_1_27_1","volume-title":"USENIX ATC","author":"Clements A.","year":"2009","unstructured":"A. Clements , I. Ahmad , M. Vilayannur , and J. Li . Decentralized Deduplication in SAN Cluster File Systems . In USENIX ATC , 2009 . A. Clements, I. Ahmad, M. Vilayannur, and J. Li. Decentralized Deduplication in SAN Cluster File Systems. In USENIX ATC, 2009."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.5555\/876875.878941"},{"key":"e_1_3_2_1_29_1","volume-title":"Version control with subversion","author":"Collins-Sussman B.","year":"2004","unstructured":"B. Collins-Sussman , B. Fitzpatrick , and M. Pilato . Version control with subversion . 2004 . B. Collins-Sussman, B. Fitzpatrick, and M. Pilato. Version control with subversion. 2004."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/214956.214963"},{"key":"e_1_3_2_1_31_1","volume-title":"USENIX Annual Technical Conference","author":"Debnath B.","year":"2010","unstructured":"B. Debnath , S. Sengupta , and J. Li . Chunkstash: Speeding up inline storage deduplication using flash memory . In USENIX Annual Technical Conference , 2010 . B. Debnath, S. Sengupta, and J. Li. Chunkstash: Speeding up inline storage deduplication using flash memory. In USENIX Annual Technical Conference, 2010."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.17487\/RFC1950"},{"key":"e_1_3_2_1_33_1","volume-title":"FAST","author":"Dubnicki C.","year":"2009","unstructured":"C. Dubnicki , L. Gryz , L. Heldt , M. Kaczmarczyk , W. Kilian , P. Strzelczak , and J. Szczepkowski . Hydrastor: A scalable secondary storage . In FAST , 2009 . C. Dubnicki, L. Gryz, L. Heldt, M. Kaczmarczyk, W. Kilian, P. Strzelczak, and J. Szczepkowski. Hydrastor: A scalable secondary storage. In FAST, 2009."},{"key":"e_1_3_2_1_34_1","volume-title":"FAST","author":"Dubnicki C.","year":"2009","unstructured":"C. Dubnicki , L. Gryz , L. Heldt , M. Kaczmarczyk , W. Kilian , P. Strzelczak , J. Szczepkowski , C. Ungureanu , , and M. Welnicki . HYDRAstor: a Scalable Secondary Storage . In FAST , 2009 . C. Dubnicki, L. Gryz, L. Heldt, M. Kaczmarczyk, W. Kilian, P. Strzelczak, J. Szczepkowski, C. Ungureanu, , and M. Welnicki. HYDRAstor: a Scalable Secondary Storage. In FAST, 2009."},{"key":"e_1_3_2_1_35_1","volume-title":"USENIX Annual Technical Conference","author":"El-Shimi A.","year":"2012","unstructured":"A. El-Shimi , R. Kalach , A. K. Adi , O. J. Li , and S. Sengupta . Primary data deduplication-large scale study and system design . In USENIX Annual Technical Conference , 2012 . A. El-Shimi, R. Kalach, A. K. Adi, O. J. Li, and S. Sengupta. Primary data deduplication-large scale study and system design. In USENIX Annual Technical Conference, 2012."},{"key":"e_1_3_2_1_36_1","first-page":"487","volume-title":"VLDB","author":"Harizopoulos S.","year":"2006","unstructured":"S. Harizopoulos , V. Liang , D. J. Abadi , and S. Madden . Performance tradeoffs in read-optimized databases . In VLDB , pages 487 -- 498 , 2006 . S. Harizopoulos, V. Liang, D. J. Abadi, and S. Madden. Performance tradeoffs in read-optimized databases. In VLDB, pages 487--498, 2006."},{"key":"e_1_3_2_1_37_1","volume-title":"Data compression support in databases","author":"Iyer B.","year":"1994","unstructured":"B. Iyer and D. Wilhite . Data compression support in databases . 1994 . B. Iyer and D. Wilhite. Data compression support in databases. 1994."},{"key":"e_1_3_2_1_38_1","volume-title":"FAST","author":"Jain N.","year":"2005","unstructured":"N. Jain , M. Dahlin , and R. Tewari . Taper: Tiered approach for eliminating redundancy in replica synchronization . In FAST , 2005 . N. Jain, M. Dahlin, and R. Tewari. Taper: Tiered approach for eliminating redundancy in replica synchronization. In FAST, 2005."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/1247480.1247633"},{"key":"e_1_3_2_1_40_1","volume-title":"FAST","author":"Lillibridge M.","year":"2009","unstructured":"M. Lillibridge , K. Eshghi , D. Bhagwat , V. Deolalikar , G. Trezise , and P. Camble . Sparse indexing: Large scale, inline deduplication using sampling and locality . In FAST , 2009 . M. Lillibridge, K. Eshghi, D. Bhagwat, V. Deolalikar, G. Trezise, and P. Camble. Sparse indexing: Large scale, inline deduplication using sampling and locality. In FAST, 2009."},{"key":"e_1_3_2_1_41_1","volume-title":"Version control with git: Powerful tools and techniques for collaborative software development","author":"Loeliger J.","year":"2009","unstructured":"J. Loeliger . Version control with git: Powerful tools and techniques for collaborative software development . 2009 . J. Loeliger. Version control with git: Powerful tools and techniques for collaborative software development. 2009."},{"key":"e_1_3_2_1_42_1","volume-title":"Master's thesis","author":"MacDonald J. P.","year":"2000","unstructured":"J. P. MacDonald . File system support for delta compression. Master's thesis , University of California , Berkeley , 2000 . J. P. MacDonald. File system support for delta compression. Master's thesis, University of California, Berkeley, 2000."},{"key":"e_1_3_2_1_43_1","volume-title":"SQL Server Technical Article","author":"Mishra S.","year":"2009","unstructured":"S. Mishra . Data compression : Strategy, capacity planning and best practices . SQL Server Technical Article , 2009 . S. Mishra. Data compression: Strategy, capacity planning and best practices. SQL Server Technical Article, 2009."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/502034.502052"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jalgor.2003.12.002"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.5555\/1315451.1315531"},{"key":"e_1_3_2_1_47_1","volume-title":"NSDI","author":"Pucha H.","year":"2007","unstructured":"H. Pucha , D. G. Andersen , and M. Kaminsky . Exploiting similarity for multi-source downloads using file handprints . In NSDI , 2007 . H. Pucha, D. G. Andersen, and M. Kaminsky. Exploiting similarity for multi-source downloads using file handprints. In NSDI, 2007."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.5555\/645928.672387"},{"key":"e_1_3_2_1_49_1","volume-title":"FAST","author":"Quinlan S.","year":"2002","unstructured":"S. Quinlan and S. Dorward . Venti: A new approach to archiva storage . In FAST , 2002 . S. Quinlan and S. Dorward. Venti: A new approach to archiva storage. In FAST, 2002."},{"key":"e_1_3_2_1_50_1","unstructured":"M. O. Rabin. Fingerprinting by random polynomials.  M. O. Rabin. Fingerprinting by random polynomials."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.14778\/2536222.2536233"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2009.01.004"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.5555\/2208461.2208466"},{"key":"e_1_3_2_1_54_1","volume-title":"USENIX Hot Storage","author":"Shilane P.","year":"2012","unstructured":"P. Shilane , G. Wallace , M. Huang , and W. Hsu . Delta compressed and deduplicated storage using stream-informed locality . USENIX Hot Storage , 2012 . P. Shilane, G. Wallace, M. Huang, and W. Hsu. Delta compressed and deduplicated storage using stream-informed locality. USENIX Hot Storage, 2012."},{"key":"e_1_3_2_1_55_1","volume-title":"FAST","author":"Srinivasan K.","year":"2012","unstructured":"K. Srinivasan , T. Bisson , G. Goodson , and K. Voruganti . idedup: Latency-aware, inline data deduplication for primary storage . In FAST , 2012 . K. Srinivasan, T. Bisson, G. Goodson, and K. Voruganti. idedup: Latency-aware, inline data deduplication for primary storage. In FAST, 2012."},{"key":"e_1_3_2_1_56_1","first-page":"553","volume-title":"VLDB","author":"Stonebraker M.","year":"2005","unstructured":"M. Stonebraker , D. J. Abadi , A. Batkin , X. Chen , M. Cherniack , M. Ferreira , E. Lau , A. Lin , S. Madden , E. O'Neil , : a column-oriented dbms . In VLDB , pages 553 -- 564 , 2005 . M. Stonebraker, D. J. Abadi, A. Batkin, X. Chen, M. Cherniack, M. Ferreira, E. Lau, A. Lin, S. Madden, E. O'Neil, et al. C-store: a column-oriented dbms. In VLDB, pages 553--564, 2005."},{"key":"e_1_3_2_1_57_1","volume-title":"Lossless Compression Handbook","author":"Suel T.","year":"2002","unstructured":"T. Suel and N. Memon . Algorithms for delta compression and remote file synchronization . Lossless Compression Handbook , 2002 . T. Suel and N. Memon. Algorithms for delta compression and remote file synchronization. Lossless Compression Handbook, 2002."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1002\/spe.4380150703"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2009.02.019"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2003.1260818"},{"key":"e_1_3_2_1_64_1","volume-title":"USENIX Annual Technical Conference","author":"Xia W.","year":"2011","unstructured":"W. Xia , H. Jiang , D. Feng , and Y. Hua . Silo: A similarity-locality based near-exact deduplication scheme with low ram overhead and high throughput . In USENIX Annual Technical Conference , 2011 . W. Xia, H. Jiang, D. Feng, and Y. Hua. Silo: A similarity-locality based near-exact deduplication scheme with low ram overhead and high throughput. In USENIX Annual Technical Conference, 2011."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/2806777.2806840"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2005.47"},{"key":"e_1_3_2_1_67_1","volume-title":"FAST","author":"Zhu B.","year":"2008","unstructured":"B. Zhu , K. Li , and R. H. Patterson . Avoiding the disk bottleneck in the data domain deduplication file system . In FAST , 2008 . B. Zhu, K. Li, and R. H. Patterson. Avoiding the disk bottleneck in the data domain deduplication file system. In FAST, 2008."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1977.1055714"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2006.150"}],"event":{"name":"SIGMOD\/PODS'17: International Conference on Management of Data","location":"Chicago Illinois USA","acronym":"SIGMOD\/PODS'17","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 2017 ACM International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3035918.3035938","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3035918.3035938","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T03:36:48Z","timestamp":1750217808000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3035918.3035938"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,5,9]]},"references-count":66,"alternative-id":["10.1145\/3035918.3035938","10.1145\/3035918"],"URL":"https:\/\/doi.org\/10.1145\/3035918.3035938","relation":{},"subject":[],"published":{"date-parts":[[2017,5,9]]},"assertion":[{"value":"2017-05-09","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}