{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,24]],"date-time":"2025-08-24T00:02:34Z","timestamp":1755993754786,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":24,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,8]],"date-time":"2024-07-08T00:00:00Z","timestamp":1720396800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Research Grants Council of Hong Kong","award":["GRF 11209122"],"award-info":[{"award-number":["GRF 11209122"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,8]]},"DOI":"10.1145\/3655038.3665940","type":"proceedings-article","created":{"date-parts":[[2024,6,27]],"date-time":"2024-06-27T00:19:48Z","timestamp":1719447588000},"page":"1-7","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Is Low Similarity Threshold A Bad Idea in Delta Compression?"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5531-474X","authenticated-orcid":false,"given":"Hongming","family":"Huang","sequence":"first","affiliation":[{"name":"City University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6431-9868","authenticated-orcid":false,"given":"Chun Jason","family":"Xue","sequence":"additional","affiliation":[{"name":"Mohamed bin Zayed University of Artificial Intelligence, Masdar City, Abu Dhabi, United Arab Emirates"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3775-911X","authenticated-orcid":false,"given":"Nan","family":"Guan","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9359-9571","authenticated-orcid":false,"given":"Hong","family":"Xu","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Hong Kong, China"}]}],"member":"320","published-online":{"date-parts":[[2024,7,8]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"ACM Symposium on Operating Systems Principles (SOSP). 174--187","author":"Athicha Muthitacharoen","year":"2001","unstructured":"Muthitacharoen Athicha, Benjie Chen, and David Mazi\u00e8res. 2001. A low-bandwidth network file system. In ACM Symposium on Operating Systems Principles (SOSP). 174--187."},{"key":"e_1_3_2_1_2_1","unstructured":"Andrei Z Broder. 1997. On the resemblance and containment of documents. In Compression and Complexity of Sequences (SEQUENCES). 21--29."},{"key":"e_1_3_2_1_3_1","volume-title":"Identifying and Filtering Near-Duplicate Documents. In 11th Annual Symposium on Combinatorial Pattern Matching (CPM)","author":"Broder Andrei Z.","year":"2000","unstructured":"Andrei Z. Broder. 2000. Identifying and Filtering Near-Duplicate Documents. In 11th Annual Symposium on Combinatorial Pattern Matching (CPM), Montreal, Canada, June 21-23. 1--10."},{"key":"e_1_3_2_1_4_1","volume-title":"Chunk2vec: A novel resemblance detection scheme based on Sentence-BERT for post-deduplication delta compression in network transmission. IET Communications","author":"Wang Keguan","year":"2024","unstructured":"ChunzhiWang, Keguan Wang, Min Li, Feifei Wei, and Neal Xiong. 2024. Chunk2vec: A novel resemblance detection scheme based on Sentence-BERT for post-deduplication delta compression in network transmission. IET Communications (2024), 145--159."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Yann Collet and Murray Kucherawy. 2021. Zstandard Compression and the 'application\/zstd' Media Type. Technical Report. https:\/\/www.rfc-editor.org\/rfc\/rfc8878","DOI":"10.17487\/RFC8878"},{"key":"e_1_3_2_1_6_1","volume-title":"USENIX Annual Technical Conference (ATC). 113--126","author":"Douglis Fred","year":"2003","unstructured":"Fred Douglis and Arun Iyengar. 2003. Application-specific Delta-encoding via Resemblance Detection. In USENIX Annual Technical Conference (ATC). 113--126."},{"key":"e_1_3_2_1_7_1","volume-title":"Primary Data Deduplication---Large Scale Study and System Design. In USENIX Annual Technical Conference (ATC). 285--296","author":"El-Shimi Ahmed","year":"2012","unstructured":"Ahmed El-Shimi, Ran Kalach, Ankit Kumar, Adi Ottean, Jin Li, and Sudipta Sengupta. 2012. Primary Data Deduplication---Large Scale Study and System Design. In USENIX Annual Technical Conference (ATC). 285--296."},{"key":"e_1_3_2_1_8_1","volume-title":"Palantir: Hierarchical Similarity Detection for Post-Deduplication Delta Compression. In ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS).","author":"Huang Hongming","year":"2024","unstructured":"Hongming Huang, Peng Wang, Qiang Su, Hong Xu, Chun Jason Xue, and Andr\u00e9 Brinkmann. 2024. Palantir: Hierarchical Similarity Detection for Post-Deduplication Delta Compression. In ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/DCC.2012.44"},{"key":"e_1_3_2_1_10_1","volume-title":"USENIX Conference on File and Storage Technologies (FAST). 257--271","author":"Lin Xing","year":"2014","unstructured":"Xing Lin, Guanlin Lu, Fred Douglis, Philip Shilane, and Grant Wallace. 2014. Migratory compression: coarse-grained data reordering to improve compressibility. In USENIX Conference on File and Storage Technologies (FAST). 257--271."},{"key":"e_1_3_2_1_11_1","unstructured":"Joshua P. MacDonald. 2000. File system support for delta compression. http:\/\/www.xmailserver.com\/xdfs.pdf."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2012.14"},{"key":"e_1_3_2_1_13_1","volume":"201","author":"Meyer Dutch T.","unstructured":"Dutch T. Meyer and William J. Bolosky. 2012. A study of practical deduplication. ACM Transactions on Storage (ToS) (2012), 14:1--14:20.","journal-title":"William J. Bolosky."},{"key":"e_1_3_2_1_14_1","volume-title":"Montesano Nicolo, Tariq Muhammad Imran, De la Hoz-Franco Emiro, and De-La-Hoz-Valdiris Ethel.","author":"Muhammad Naeem","year":"2022","unstructured":"Naeem Muhammad, Jamal Tauseef, Diaz-Martinez Jorge, Butt Shariq Aziz, Montesano Nicolo, Tariq Muhammad Imran, De la Hoz-Franco Emiro, and De-La-Hoz-Valdiris Ethel. 2022. Trends and future perspective challenges in big data. In Advances in Intelligent Data Analysis and Applications. 309--325."},{"key":"e_1_3_2_1_15_1","volume-title":"DeepSketch: A New Machine Learning-Based Reference Search Technique for Post-Deduplication Delta Compression. In USENIX Conference on File and Storage Technologies (FAST). 247--264","author":"Park Jisung","year":"2022","unstructured":"Jisung Park, Jeonggyun Kim, Yeseong Kim, Sungjin Lee, and Onur Mutlu. 2022. DeepSketch: A New Machine Learning-Based Reference Search Technique for Post-Deduplication Delta Compression. In USENIX Conference on File and Storage Technologies (FAST). 247--264."},{"key":"e_1_3_2_1_16_1","volume-title":"Alternatives for Detecting Redundancy in Storage Systems Data. In USENIX Annual Technical Conference (ATC). 73--86","author":"Policroniades Calicrates","year":"2004","unstructured":"Calicrates Policroniades and Ian Pratt. 2004. Alternatives for Detecting Redundancy in Storage Systems Data. In USENIX Annual Technical Conference (ATC). 73--86."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Claude E. Shannon. 1948. A mathematical theory of communication. The Bell system technical journal (1948) 379--423.","DOI":"10.1002\/j.1538-7305.1948.tb01338.x"},{"key":"e_1_3_2_1_18_1","volume-title":"WAN-optimized replication of backup datasets using stream-informed delta compression. ACM Transactions on Storage (ToS)","author":"Shilane Philip","year":"2012","unstructured":"Philip Shilane, Mark Huang, Grant Wallace, and Windsor Hsu. 2012. WAN-optimized replication of backup datasets using stream-informed delta compression. ACM Transactions on Storage (ToS) (2012), 13:1--13:26."},{"key":"e_1_3_2_1_19_1","volume-title":"Delta Compressed and Deduplicated Storage Using Stream-Informed Locality. In USENIX Workshop on Hot Topics in Storage and File Systems (HotStorage). 1--10","author":"Shilane Philip","year":"2012","unstructured":"Philip Shilane, Grant Wallace, Mark Huang, and Windsor Hsu. 2012. Delta Compressed and Deduplicated Storage Using Stream-Informed Locality. In USENIX Workshop on Hot Topics in Storage and File Systems (HotStorage). 1--10."},{"key":"e_1_3_2_1_20_1","volume-title":"USENIX Conference on File and Storage Technologies (FAST). 33--48","author":"Wallace Grant","year":"2012","unstructured":"Grant Wallace, Fred Douglis, Hangwei Qian, Philip Shilane, Stephen Smaldone, Mark Chamness, and Windsor Hsu. 2012. Characteristics of backup workloads in production systems. In USENIX Conference on File and Storage Technologies (FAST). 33--48."},{"key":"e_1_3_2_1_21_1","volume-title":"USENIX Annual Technical Conference (ATC). 101--114","author":"Xia Wen","year":"2016","unstructured":"Wen Xia, Yukun Zhou, Hong Jiang, Dan Feng, Yu Hua, Yuchong Hu, Liu, and Yucheng Zhang. 2016. FastCDC: a Fast and Efficient Content-Defined Chunking Approach for Data Deduplication. In USENIX Annual Technical Conference (ATC). 101--114."},{"key":"e_1_3_2_1_22_1","volume-title":"USENIX Conference on File and Storage Technologies (FAST). 121--128","author":"Zhang Yucheng","year":"2019","unstructured":"Yucheng Zhang, Wen Xia, Dan Feng, Hong Jiang, Yu Hua, and Qiang Wang. 2019. Finesse: Fine-Grained Feature Locality based Fast Resemblance Detection for Post-Deduplication Delta Compression. In USENIX Conference on File and Storage Technologies (FAST). 121--128."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1978.1055934"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE51399.2021.00048"}],"event":{"name":"HOTSTORAGE '24: 16th ACM Workshop on Hot Topics in Storage and File Systems","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"],"location":"Santa Clara CA USA","acronym":"HOTSTORAGE '24"},"container-title":["Proceedings of the 16th ACM Workshop on Hot Topics in Storage and File Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3655038.3665940","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3655038.3665940","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T02:10:00Z","timestamp":1755915000000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3655038.3665940"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,8]]},"references-count":24,"alternative-id":["10.1145\/3655038.3665940","10.1145\/3655038"],"URL":"https:\/\/doi.org\/10.1145\/3655038.3665940","relation":{},"subject":[],"published":{"date-parts":[[2024,7,8]]},"assertion":[{"value":"2024-07-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}