{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T23:08:30Z","timestamp":1774307310480,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,8]]},"DOI":"10.1145\/3721145.3730424","type":"proceedings-article","created":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:57:17Z","timestamp":1755867437000},"page":"580-595","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["DEDUPKV: A Space-Efficient and High-Performance Key-Value Store via Fine-Grained Deduplication"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9011-6431","authenticated-orcid":false,"given":"Safdar","family":"Jamil","sequence":"first","affiliation":[{"name":"Sogang University, Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2603-3516","authenticated-orcid":false,"given":"Awais","family":"Khan","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, TN, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5071-2861","authenticated-orcid":false,"given":"Xubin","family":"He","sequence":"additional","affiliation":[{"name":"Temple University, Philidelphia, PA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8786-3850","authenticated-orcid":false,"given":"Youngjae","family":"Kim","sequence":"additional","affiliation":[{"name":"Sogang University, Seoul, Republic of Korea"}]}],"member":"320","published-online":{"date-parts":[[2025,8,22]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Mohammadamin Ajdari Pyeongsu Park Dongup Kwon Joonsung Kim and Jangwoo Kim. 2017. A scalable HW-based inline deduplication for SSD arrays. IEEE Computer Architecture Letters 17 1 (2017) 47\u201350.","DOI":"10.1109\/LCA.2017.2753258"},{"key":"e_1_3_3_1_3_2","first-page":"281","volume-title":"20th USENIX Conference on File and Storage Technologies (FAST 22)","author":"Bacs Andrei","year":"2022","unstructured":"Andrei Bacs, Saidgani Musaev, Kaveh Razavi, Cristiano Giuffrida, and Herbert Bos. 2022. DUPEFS: Leaking Data Over the Network With Filesystem Deduplication Side Channels. In 20th USENIX Conference on File and Storage Technologies (FAST 22). USENIX Association, Santa Clara, CA, 281\u2013296."},{"key":"e_1_3_3_1_4_2","first-page":"363","volume-title":"2017 USENIX Annual Technical Conference (USENIX ATC 17)","author":"Balmau Oana","year":"2017","unstructured":"Oana Balmau, Diego Didona, Rachid Guerraoui, Willy Zwaenepoel, Huapeng Yuan, Aashray Arora, Karan Gupta, and Pavan Konka. 2017. TRIAD: Creating Synergies Between Memory, Disk and Log in Log Structured Key-Value Stores. In 2017 USENIX Annual Technical Conference (USENIX ATC 17). USENIX Association, Santa Clara, CA, 363\u2013375."},{"key":"e_1_3_3_1_5_2","first-page":"753","volume-title":"2019 USENIX Annual Technical Conference (USENIX ATC 19)","author":"Balmau Oana","year":"2019","unstructured":"Oana Balmau, Florin Dinu, Willy Zwaenepoel, Karan Gupta, Ravishankar Chandhiramoorthi, and Diego Didona. 2019. SILK: Preventing Latency Spikes in Log-Structured Merge Key-Value Stores. In 2019 USENIX Annual Technical Conference (USENIX ATC 19). USENIX Association, Renton, WA, 753\u2013766."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.5555\/3386691.3386712"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"crossref","unstructured":"Zhichao Cao Hao Wen Xiongzi Ge Jingwei Ma Jim Diehl and David\u00a0HC Du. 2019. TDDFS: A tier-aware data deduplication-based file system. ACM Transactions on Storage (TOS) 15 1 (2019) 1\u201326.","DOI":"10.1145\/3295461"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Fay Chang Jeffrey Dean Sanjay Ghemawat Wilson\u00a0C Hsieh Deborah\u00a0A Wallach Mike Burrows Tushar Chandra Andrew Fikes and Robert\u00a0E Gruber. 2008. Bigtable: A distributed storage system for structured data. ACM Transactions on Computer Systems (TOCS) 26 2 (2008) 1\u201326.","DOI":"10.1145\/1365815.1365816"},{"key":"e_1_3_3_1_9_2","unstructured":"Tianqi Chen Mu Li Yutian Li Min Lin Naiyan Wang Minjie Wang Tianjun Xiao Bing Xu Chiyuan Zhang and Zheng Zhang. 2015. MXNet: A Flexible and Efficient Machine Learning Library for Heterogeneous Distributed Systems. ArXiv abs\/1512.01274 (2015)."},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Wande Chen Zhenke Chen Dingding Li Hai Liu and Yong Tang. 2021. Low-overhead inline deduplication for persistent memory. Transactions on Emerging Telecommunications Technologies 32 8 (2021) e4079.","DOI":"10.1002\/ett.4079"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3465998.3466004"},{"key":"e_1_3_3_1_12_2","unstructured":"William Cohen. 2004. Enron Email Dataset. https:\/\/www.cs.cmu.edu\/\u00a0enron\/. Accessed: 2024-10-15."},{"key":"e_1_3_3_1_13_2","unstructured":"Yann Collet. 2011. LZ4 Compression. https:\/\/github.com\/lz4\/lz4. Accessed: 2024-10-16."},{"key":"e_1_3_3_1_14_2","unstructured":"Yann Collet. 2011. LZ4HC Compression. https:\/\/github.com\/lz4\/lz4. LZ4 high-compression mode Accessed: 2024-10-16."},{"key":"e_1_3_3_1_15_2","unstructured":"Yann Collet. 2016. Zstandard Compression. https:\/\/github.com\/facebook\/zstd. Accessed: 2024-10-16."},{"key":"e_1_3_3_1_16_2","unstructured":"Brian\u00a0F. Cooper Adam Silberstein Erwin Tam Raghu Ramakrishnan and Russell Sears. 2010. YCSB: Yahoo! Cloud Serving Benchmark. https:\/\/github.com\/brianfrankcooper\/YCSB. Accessed: 2024-10-15."},{"key":"e_1_3_3_1_17_2","first-page":"402","volume-title":"Experimental IR Meets Multilinguality, Multimodality, and Interaction: 10th International Conference of the CLEF Association, CLEF 2019, Lugano, Switzerland, September 9\u201312, 2019, Proceedings","author":"Daelemans Walter","year":"2019","unstructured":"Walter Daelemans, Mike Kestemont, Enrique Manjavacas, Martin Potthast, Francisco Rangel, Paolo Rosso, G\u00fcnther Specht, Efstathios Stamatatos, Benno Stein, Michael Tschuggnall, Matti Wiegmann, and Eva Zangerle. 2019. Overview of PAN 2019: Bots and Gender Profiling, Celebrity Profiling, Cross-Domain Authorship Attribution and Style Change Detection. In Experimental IR Meets Multilinguality, Multimodality, and Interaction: 10th International Conference of the CLEF Association, CLEF 2019, Lugano, Switzerland, September 9\u201312, 2019, Proceedings (Lugano, Switzerland). Springer-Verlag, Berlin, Heidelberg, 402\u2013416."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"D. Eastlake and P. Jones. 2001. RFC3174: US Secure Hash Algorithm 1 (SHA1).","DOI":"10.17487\/rfc3174"},{"key":"e_1_3_3_1_19_2","unstructured":"Inc. Facebook. 2013. RocksDB. https:\/\/rocksdb.org. Version 6.29 Accessed: 2024-10-15."},{"key":"e_1_3_3_1_20_2","unstructured":"Inc. Facebook. 2018. BlobDB. https:\/\/rocksdb.org. Version 6.29 Accessed: 2024-10-15."},{"key":"e_1_3_3_1_21_2","unstructured":"Inc. Facebook. 2018. Cassandra on RocksDB at Instagram. https:\/\/developers.facebook.com\/videos\/f8-2018\/cassandra-on-rocksdb-at-instagram\/. Accessed: 2024-10-15."},{"key":"e_1_3_3_1_22_2","unstructured":"Inc. Facebook. 2024. Facebook. https:\/\/www.facebook.com. Accessed: 2024-10-19."},{"key":"e_1_3_3_1_23_2","unstructured":"Apache\u00a0Software Foundation. 2008. Apache Cassandra: A Highly Scalable Distributed Database. https:\/\/cassandra.apache.org\/. Accessed: 2024-10-15."},{"key":"e_1_3_3_1_24_2","unstructured":"Google. 2011. Snappy Compression. https:\/\/github.com\/google\/snappy. Accessed: 2024-10-16."},{"key":"e_1_3_3_1_25_2","unstructured":"Inc. Google. 2011. LevelDB. https:\/\/github.com\/google\/leveldb. Accessed: 2024-10-15."},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"crossref","unstructured":"Safdar Jamil Joseph Ro Joo-Young Hwang and Youngjae Kim. 2024. Efficient Data Placement in Deduplication Enabled ZenFS via CRC-Based Prediction. IEEE Access (2024).","DOI":"10.1109\/ACCESS.2024.3520184"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"Charles Jaranilla and Jongmoo Choi. 2023. Requirements and Trade-Offs of Compression Techniques in Key\u2013Value Stores: A Survey. Electronics 12 20 (2023).","DOI":"10.3390\/electronics12204280"},{"key":"e_1_3_3_1_28_2","first-page":"993","volume-title":"Proceedings of the 2018 { USENIX} Annual Technical Conference (USENIX ATC 18)","author":"Kannan Sudarsun","year":"2018","unstructured":"Sudarsun Kannan, Nitish Bhat, Ada Gavrilovska, Andrea Arpaci-Dusseau, and Remzi Arpaci-Dusseau. 2018. Redesigning LSMs for nonvolatile memory with NoveLSM. In Proceedings of the 2018 { USENIX} Annual Technical Conference (USENIX ATC 18). 993\u20131005."},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"crossref","unstructured":"Awais Khan Prince Hamandawana and Youngjae Kim. 2020. A Content Fingerprint-Based Cluster-Wide Inline Deduplication for Shared-Nothing Storage Systems. IEEE Access 8 (2020) 209163\u2013209180.","DOI":"10.1109\/ACCESS.2020.3039056"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOTS.2018.00016"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/MSST.2012.6232379"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS53621.2022.00134"},{"key":"e_1_3_3_1_33_2","unstructured":"ByteDance Ltd.2024. ByteDance. https:\/\/www.bytedance.com. Accessed: 2024-10-19."},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"crossref","unstructured":"Lanyue Lu Thanumalayan\u00a0Sankaranarayana Pillai Hariharan Gopalakrishnan Andrea\u00a0C. Arpaci-Dusseau and Remzi\u00a0H. Arpaci-Dusseau. 2017. WiscKey: Separating Keys from Values in SSD-Conscious Storage. ACM Trans. Storage 13 1 Article 5 (March 2017) 28\u00a0pages.","DOI":"10.1145\/3033273"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/3591195.3595273"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.5555\/2388996.2389006"},{"key":"e_1_3_3_1_37_2","first-page":"247","volume-title":"20th USENIX Conference on File and Storage Technologies (FAST 22)","author":"Park Jisung","year":"2022","unstructured":"Jisung Park, Jeonggyun Kim, Yeseong Kim, Sungjin Lee, and Onur Mutlu. 2022. DeepSketch: A New Machine Learning-Based Reference Search Technique for Post-Deduplication Delta Compression. In 20th USENIX Conference on File and Storage Technologies (FAST 22). USENIX Association, Santa Clara, CA, 247\u2013264."},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"crossref","unstructured":"Jo\u00e3o Paulo and Jos\u00e9 Pereira. 2014. A Survey and Classification of Storage Deduplication Systems. ACM Comput. Surv. 47 1 Article 11 (June 2014) 30\u00a0pages.","DOI":"10.1145\/2611778"},{"key":"e_1_3_3_1_39_2","first-page":"101","volume-title":"In Proceedings of the 2023 USENIX Annual Technical Conference (USENIX ATC 23)","author":"Qiu Jiansheng","year":"2023","unstructured":"Jiansheng Qiu, Yanqi Pan, Wen Xia, Xiaojia Huang, Wenjun Wu, Xiangyu Zou, Shiyi Li, and Yu Hua. 2023. Light-Dedup: A Light-weight Inline Deduplication Framework for Non-Volatile Memory File Systems. In In Proceedings of the 2023 USENIX Annual Technical Conference (USENIX ATC 23). USENIX Association, Boston, MA, 101\u2013116."},{"key":"e_1_3_3_1_40_2","series-title":"(FAST\u201912)","first-page":"24","volume-title":"Proceedings of the 10th USENIX Conference on File and Storage Technologies","author":"Srinivasan Kiran","year":"2012","unstructured":"Kiran Srinivasan, Tim Bisson, Garth Goodson, and Kaladhar Voruganti. 2012. iDedup: latency-aware, inline data deduplication for primary storage. In Proceedings of the 10th USENIX Conference on File and Storage Technologies (San Jose, CA) (FAST\u201912). USENIX Association, USA, 24."},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"crossref","unstructured":"Chundong Wang Qingsong Wei Jun Yang Cheng Chen Yechao Yang and Mingdi Xue. 2017. NV-Dedup: High-performance inline deduplication for non-volatile memory. IEEE Trans. Comput. 67 5 (2017) 658\u2013671.","DOI":"10.1109\/TC.2017.2774270"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"crossref","unstructured":"Hao Wang Jiaxin Ou Ming Zhao Sheng Qiu Yizheng Jiao Yi Wang Qizhong Mao Zhengyu Yang Yang Liu Jianshun Zhang et\u00a0al. 2024. LavaStore: ByteDance\u2019s Purpose-built High-performance Cost-effective Local Storage Engine for Cloud Services. Proceedings of the VLDB Endowment 17 12 (2024) 3799 \u2013 3812.","DOI":"10.14778\/3685800.3685807"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/MSST.2019.00010"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","DOI":"10.1145\/3035918.3035938"},{"key":"e_1_3_3_1_45_2","series-title":"(USENIX ATC \u201919)","first-page":"633","volume-title":"Proceedings of the 2019 USENIX Conference on Usenix Annual Technical Conference","author":"Yang Qirui","year":"2019","unstructured":"Qirui Yang, Runyu Jin, and Ming Zhao. 2019. SmartDedup: optimizing deduplication for resource-constrained devices. In Proceedings of the 2019 USENIX Conference on Usenix Annual Technical Conference (Renton, WA, USA) (USENIX ATC \u201919). USENIX Association, USA, 633\u2013646."},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"crossref","unstructured":"Jianwei Yin Yan Tang Shuiguang Deng Ying Li and Albert\u00a0Y Zomaya. 2017. D3: A dynamic dual-phase deduplication framework for distributed primary storage. IEEE Trans. Comput. 67 2 (2017) 193\u2013207.","DOI":"10.1109\/TC.2017.2743199"},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"crossref","unstructured":"Hongliang Yu Xu Zhang Wei Huang and Weimin Zheng. 2017. PDFS: Partially Dedupped File System for Primary Workloads. IEEE Transactions on Parallel and Distributed Systems 28 3 (2017) 863\u2013876.","DOI":"10.1109\/TPDS.2016.2594070"},{"key":"e_1_3_3_1_48_2","first-page":"65","volume-title":"21st USENIX Conference on File and Storage Technologies (FAST 23)","author":"Yu Jinghuan","year":"2023","unstructured":"Jinghuan Yu, Sam\u00a0H. Noh, Young ri Choi, and Chun\u00a0Jason Xue. 2023. ADOC: Automatically Harmonizing Dataflow Between Components in Log-Structured Key-Value Stores for Improved Performance. In 21st USENIX Conference on File and Storage Technologies (FAST 23). USENIX Association, Santa Clara, CA, 65\u201380."},{"key":"e_1_3_3_1_49_2","first-page":"769","volume-title":"2020 USENIX annual technical conference (USENIX ATC 20)","author":"Zhao Nannan","year":"2020","unstructured":"Nannan Zhao, Hadeel Albahar, Subil Abraham, Keren Chen, Vasily Tarasov, Dimitrios Skourtis, Lukas Rupprecht, Ali Anwar, and Ali\u00a0R Butt. 2020. DupHunter: Flexible High-Performance Deduplication for Docker Registries. In 2020 USENIX annual technical conference (USENIX ATC 20). 769\u2013783."}],"event":{"name":"ICS '25: 2025 International Conference on Supercomputing","location":"Salt Lake City USA","acronym":"ICS '25","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 39th ACM International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721145.3730424","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:59:26Z","timestamp":1755867566000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721145.3730424"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,8]]},"references-count":48,"alternative-id":["10.1145\/3721145.3730424","10.1145\/3721145"],"URL":"https:\/\/doi.org\/10.1145\/3721145.3730424","relation":{},"subject":[],"published":{"date-parts":[[2025,6,8]]},"assertion":[{"value":"2025-08-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}