{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T14:46:38Z","timestamp":1773153998830,"version":"3.50.1"},"reference-count":32,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012,4]]},"DOI":"10.1109\/msst.2012.6232381","type":"proceedings-article","created":{"date-parts":[[2012,7,19]],"date-time":"2012-07-19T23:41:39Z","timestamp":1342741299000},"page":"1-11","source":"Crossref","is-referenced-by-count":25,"title":["Estimation of deduplication ratios in large data sets"],"prefix":"10.1109","author":[{"given":"Danny","family":"Harnik","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Oded","family":"Margalit","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dalit","family":"Naor","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dmitry","family":"Sotnikov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gil","family":"Vernik","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"19","first-page":"281","article-title":"Estimating simple functions on the union of data streams","author":"gibbons","year":"2001","journal-title":"SPAA"},{"key":"17","first-page":"541","article-title":"Distinct sampling for highly-accurate answers to distinct values queries and event reports","author":"gibbons","year":"2001","journal-title":"VLDB"},{"key":"18","author":"gibbons","year":"2009","journal-title":"Distinct-values Estimation over Data Streams"},{"key":"15","author":"dutch","year":"2009","journal-title":"Understanding Data De-duplication Ratios"},{"key":"16","doi-asserted-by":"publisher","DOI":"10.1016\/0022-0000(85)90041-8"},{"key":"13","author":"domain","year":"0"},{"key":"14","first-page":"15","article-title":"Tradeoffs in scalable data routing for deduplication clusters","author":"dong","year":"2011","journal-title":"FAST"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1109\/CCP.2011.41"},{"key":"12","first-page":"16","article-title":"Chunkstash: Speeding up inline storage deduplication using flash memory","author":"debnath","year":"2010","journal-title":"Proceedings of the 2010 USENIX Conference on USENIX Annual Technical Conference"},{"key":"21","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1963.10500830"},{"key":"20","doi-asserted-by":"publisher","DOI":"10.1016\/j.dam.2008.06.020"},{"key":"22","doi-asserted-by":"publisher","DOI":"10.1145\/1534530.1534540"},{"key":"23","doi-asserted-by":"publisher","DOI":"10.1145\/1807085.1807094"},{"key":"24","first-page":"111","article-title":"Sparse indexing: Large scale, inline deduplication using sampling and locality","author":"lillibridge","year":"2009","journal-title":"FAST"},{"key":"25","first-page":"1","article-title":"A study of practical deduplication","author":"meyer","year":"2011","journal-title":"FAST"},{"key":"26","year":"0","journal-title":"Netapp"},{"key":"27","doi-asserted-by":"publisher","DOI":"10.1137\/070701649"},{"key":"28","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOT.2004.1348296"},{"key":"29","doi-asserted-by":"publisher","DOI":"10.1109\/INFCOM.2010.5461965"},{"key":"3","year":"0","journal-title":"Acronis"},{"key":"2","year":"0","journal-title":"IBM ProtecTIER"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1109\/DCC.2011.46"},{"key":"1","year":"0","journal-title":"EMC Data Domain"},{"key":"30","doi-asserted-by":"publisher","DOI":"10.1109\/P2P.2010.5570004"},{"key":"7","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOT.2009.5366623"},{"key":"6","first-page":"1","article-title":"Counting distinct elements in a data stream","author":"bar-yossef","year":"2002","journal-title":"RANDOM"},{"key":"32","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1977.1055714"},{"key":"5","year":"0","journal-title":"Avamar"},{"key":"31","first-page":"269","article-title":"Avoiding the disk bottleneck in the data domain deduplication file system","author":"zhu","year":"2008","journal-title":"FAST"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1006\/jcss.1997.1545"},{"key":"9","year":"0","journal-title":"Commvault"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1145\/335168.335230"}],"event":{"name":"2012 IEEE 28th Symposium on Mass Storage Systems and Technologies (MSST)","location":"Pacific Grove, CA, USA","start":{"date-parts":[[2012,4,16]]},"end":{"date-parts":[[2012,4,20]]}},"container-title":["012 IEEE 28th Symposium on Mass Storage Systems and Technologies (MSST)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/6224229\/6232364\/06232381.pdf?arnumber=6232381","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,3,21]],"date-time":"2017-03-21T16:25:41Z","timestamp":1490113541000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6232381\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,4]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/msst.2012.6232381","relation":{},"subject":[],"published":{"date-parts":[[2012,4]]}}}