{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,22]],"date-time":"2025-12-22T22:14:53Z","timestamp":1766441693803,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":64,"publisher":"ACM","funder":[{"name":"NSFC","award":["92270204, 62202275"],"award-info":[{"award-number":["92270204, 62202275"]}]},{"name":"CAS Project for Young Scientists in Basic Research","award":["YSBR-118"],"award-info":[{"award-number":["YSBR-118"]}]},{"DOI":"10.13039\/501100007129","name":"Natural Science Foundation of Shandong Province","doi-asserted-by":"publisher","award":["ZR2022QF012"],"award-info":[{"award-number":["ZR2022QF012"]}],"id":[{"id":"10.13039\/501100007129","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,19]]},"DOI":"10.1145\/3719027.3744813","type":"proceedings-article","created":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T23:32:38Z","timestamp":1763854358000},"page":"1709-1723","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["RAG-WM: An Efficient Black-Box Watermarking Approach for Retrieval-Augmented Generation of Large Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2671-4314","authenticated-orcid":false,"given":"Peizhuo","family":"Lv","sequence":"first","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2042-9855","authenticated-orcid":false,"given":"Mengjie","family":"Sun","sequence":"additional","affiliation":[{"name":"IIE, CAS, Beijing, China and School of Cyber Security, UCAS, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7455-6175","authenticated-orcid":false,"given":"Hao","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Cyber Science and Technology, Shandong University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0607-4946","authenticated-orcid":false,"given":"XiaoFeng","family":"Wang","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9432-9779","authenticated-orcid":false,"given":"Shengzhi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Metropolitan College, Boston University, Boston, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0000-5031","authenticated-orcid":false,"given":"Yuxuan","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Cyber Science and Technology, Shandong University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5624-2987","authenticated-orcid":false,"given":"Kai","family":"Chen","sequence":"additional","affiliation":[{"name":"IIE, CAS, Beijing, China and School of Cyber Security, UCAS, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2745-7521","authenticated-orcid":false,"given":"Limin","family":"Sun","sequence":"additional","affiliation":[{"name":"IIE, CAS, Beijing, China and School of Cyber Security, UCAS, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,11,22]]},"reference":[{"volume-title":"27th USENIX security symposium (USENIX Security 18), 1615- 1631.","author":"Adi Yossi","key":"e_1_3_2_1_1_1","unstructured":"Yossi Adi, Carsten Baum, Moustapha Cisse, Benny Pinkas, and Joseph Keshet. 2018. Turning your weakness into a strength: watermarking deep neural networks by backdooring. In 27th USENIX security symposium (USENIX Security 18), 1615- 1631."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/872757.872865"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-155860869-6\/50022-6"},{"key":"e_1_3_2_1_4_1","unstructured":"Chroma AI. 2025. Chroma. https:\/\/docs.trychroma.com\/."},{"key":"e_1_3_2_1_5_1","unstructured":"Gabriel Alon and Michael Kamfonas. 2023. Detecting language model attacks with perplexity. arXiv preprint arXiv:2308.14132."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Maya Anderson Guy Amit and Abigail Goldsteen. 2024. Is my data in your retrieval database? membership inference attacks against retrieval augmented generation. arXiv preprint arXiv:2405.20446.","DOI":"10.5220\/0013108300003899"},{"key":"e_1_3_2_1_7_1","unstructured":"Akari Asai Zeqiu Wu Yizhong Wang Avirup Sil and Hannaneh Hajishirzi. 2023. Self-rag: learning to retrieve generate and critique through self-reflection. arXiv preprint arXiv:2310.11511."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45496-9_14"},{"key":"e_1_3_2_1_9_1","unstructured":"Payal Bajaj et al. 2016. Ms marco: a human generated machine reading comprehension dataset. arXiv preprint arXiv:1611.09268."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.3390\/info11020110"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Sukriti Bhattacharya Agostino Cortesi et al. 2009. A distortion free watermark framework for relational databases. In ICSOFT (2). Citeseer 229- 234.","DOI":"10.5220\/0002256402290234"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-30671-1_58"},{"key":"e_1_3_2_1_13_1","volume-title":"The Thirty Seventh Annual Conference on Learning Theory. PMLR, 1125- 1139","author":"Christ Miranda","year":"2024","unstructured":"Miranda Christ, Sam Gunn, and Or Zamir. 2024. Undetectable watermarks for language models. In The Thirty Seventh Annual Conference on Learning Theory. PMLR, 1125- 1139."},{"key":"e_1_3_2_1_14_1","unstructured":"2025. Code of RAG-WM. https:\/\/github.com\/873984419\/ragwm."},{"key":"e_1_3_2_1_15_1","unstructured":"DBpedia Community. 2024. DBpedia. https:\/\/www.dbpedia.org\/."},{"key":"e_1_3_2_1_16_1","unstructured":"Mintplex Labs Inc. 2025. Anything LLM AI. https:\/\/anythingllm.com\/."},{"key":"e_1_3_2_1_17_1","unstructured":"Gautier Izacard Mathilde Caron Lucas Hosseini Sebastian Riedel Piotr Bojanowski Armand Joulin and Edouard Grave. 2021. Unsupervised dense information retrieval with contrastive learning. arXiv preprint arXiv:2112.09118."},{"key":"e_1_3_2_1_18_1","unstructured":"Neel Jain et al. 2023. Baseline defenses for adversarial attacks against aligned language models. arXiv preprint arXiv:2309.00614."},{"key":"e_1_3_2_1_19_1","unstructured":"Eric Jang Shixiang Gu and Ben Poole. 2016. Categorical reparameterization with gumbel-softmax. arXiv preprint arXiv:1611.01144."},{"key":"e_1_3_2_1_20_1","volume-title":"30th USENIX security symposium (USENIX Security 21)","author":"Jia Hengrui","year":"1937","unstructured":"Hengrui Jia, Christopher A Choquette-Choo, Varun Chandrasekaran, and Nicolas Papernot. 2021. Entangled watermarks as a defense against model extraction. In 30th USENIX security symposium (USENIX Security 21), 1937- 1954."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Zhengbao Jiang Frank F Xu Luyu Gao Zhiqing Sun Qian Liu Jane Dwivedi-Yu Yiming Yang Jamie Callan and Graham Neubig. 2023. Active retrieval augmented generation. arXiv preprint arXiv:2305.06983.","DOI":"10.18653\/v1\/2023.emnlp-main.495"},{"key":"e_1_3_2_1_22_1","unstructured":"Nikola Jovanovic Robin Staab and et al. 2024. Ward: provable rag dataset inference via llm watermarks. arXiv preprint arXiv:2410.03537."},{"key":"e_1_3_2_1_23_1","unstructured":"Muhammad Kamran and Muddassar Farooq. 2018. A comprehensive survey of watermarking relational databases research. arXiv preprint arXiv:1801.08271."},{"key":"e_1_3_2_1_24_1","volume-title":"International Conference on Machine Learning. PMLR, 17061- 17084","author":"Kirchenbauer John","year":"2023","unstructured":"John Kirchenbauer, Jonas Geiping, Yuxin Wen, Jonathan Katz, Ian Miers, and Tom Goldstein. 2023. A watermark for large language models. In International Conference on Machine Learning. PMLR, 17061- 17084."},{"key":"e_1_3_2_1_25_1","unstructured":"John Kirchenbauer et al. 2023. On the reliability of watermarks for large language models. arXiv preprint arXiv:2306.04634."},{"key":"e_1_3_2_1_26_1","unstructured":"Kalpesh Krishna Yixiao Song Marzena Karpinska John Wieting and Mohit Iyyer. 2024. Paraphrasing evades detectors of ai-generated text but retrieval is an effective defense. Advances in Neural Information Processing Systems 36."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00276"},{"key":"e_1_3_2_1_28_1","unstructured":"John Snow Labs. 2025. John Snow Labs. https:\/\/www.johnsnowlabs.com\/healthcare-llm\/."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/219717.219745"},{"key":"e_1_3_2_1_30_1","unstructured":"Mingchen Li Halil Kilicoglu Hua Xu and Rui Zhang. 2024. Biomedrag: a retrieval augmented large language model for biomedicine. arXiv preprint arXiv:2405.00465."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/1029146.1029159"},{"key":"e_1_3_2_1_32_1","unstructured":"Yuying Li Gaoyang Liu Chen Wang and Yang Yang. 2024. Generating is believing: membership inference attacks against retrieval-augmented generation. arXiv preprint arXiv:2406.19234."},{"key":"e_1_3_2_1_33_1","unstructured":"Peizhuo Lv Mengjie Sun Hao Wang Xiaofeng Wang Shengzhi Zhang Yuxuan Chen Kai Chen and Limin Sun. 2025. Extended Version of RAG-WM. https:\/\/sites.google.com\/view\/lvpeizhuo\/publication\/."},{"volume-title":"Proceedings of the 2024 Annual Network and Distributed System Security Symposium, NDSS'24.","author":"Peizhuo","key":"e_1_3_2_1_34_1","unstructured":"Peizhuo Lv et al. 2024. Ssl-wm: a black-box watermarking approach for encoders pre-trained by self-supervised learning. In Proceedings of the 2024 Annual Network and Distributed System Security Symposium, NDSS'24."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2008.04.001"},{"key":"e_1_3_2_1_36_1","unstructured":"Meta. 2025. Llama. https:\/\/www.llama.com\/."},{"key":"e_1_3_2_1_37_1","unstructured":"Meta. 2025. Llama RAG. https:\/\/ai.meta.com\/blog\/meta-llama-3- 1\/."},{"key":"e_1_3_2_1_38_1","unstructured":"Microsoft. 2025. Azure. https:\/\/learn.microsoft.com\/zh-cn\/azure\/ai-studio\/concepts\/retrieval-augmented-generation."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Travis Munyer Abdullah Tanvir Arjon Das and Xin Zhong. 2023. Deep-textmark: a deep learning-driven text watermarking approach for identifying large language model generated text. arXiv preprint arXiv:2305.05773.","DOI":"10.1109\/ACCESS.2024.3376693"},{"key":"e_1_3_2_1_40_1","unstructured":"OpenAI. 2025. GPT. https:\/\/openai.com\/index\/gpt-4\/."},{"key":"e_1_3_2_1_41_1","volume-title":"IEEE International Semantic Web Conference.","author":"Paulheim Heiko","year":"2018","unstructured":"Heiko Paulheim. 2018. How much is a triple. In IEEE International Semantic Web Conference."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Saksham Rastogi and Danish Pruthi. 2024. Revisiting the robustness of watermarking to paraphrasing attacks. arXiv preprint arXiv:2411.05277.","DOI":"10.18653\/v1\/2024.emnlp-main.1005"},{"key":"e_1_3_2_1_43_1","unstructured":"Ryoma Sato Yuki Takezawa Han Bao Kenta Niwa and Makoto Yamada. 2023. Embarrassingly simple text watermarks. arXiv preprint arXiv:2310.08920."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2007.190668"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/872757.872772"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00530"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE55347.2025.00034"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/1161366.1161397"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"Harsh Trivedi Niranjan Balasubramanian Tushar Khot and Ashish Sabharwal. 2022. Interleaving retrieval with chain-of-thought reasoning for knowledge-intensive multi-step questions. arXiv preprint arXiv:2212.10509.","DOI":"10.18653\/v1\/2023.acl-long.557"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/IIHMSP.2007.4457595"},{"volume-title":"ACM SIGIR Forum number 1.","author":"Voorhees Ellen","key":"e_1_3_2_1_51_1","unstructured":"Ellen Voorhees, Tasmeer Alam, Steven Bedrick, Dina Demner-Fushman, William R Hersh, Kyle Lo, Kirk Roberts, Ian Soboroff, and Lucy Lu Wang. 2021. Treccovid: constructing a pandemic information retrieval test collection. In ACM SIGIR Forum number 1. Vol. 54. ACM New York, NY, USA, 1- 12."},{"key":"e_1_3_2_1_52_1","unstructured":"2024. Wikipedia. https:\/\/www.wikidata.org\/wiki\/Wikidata:Main_Page."},{"key":"e_1_3_2_1_53_1","unstructured":"Wikipedia. 2024. Statistical Hypothesis Test. https:\/\/en.wikipedia.org\/wiki\/Statistical_hypothesis_test."},{"key":"e_1_3_2_1_54_1","unstructured":"Lee Xiong Chenyan Xiong Ye Li Kwok-Fung Tang Jialin Liu Paul Bennett Junaid Ahmed and Arnold Overwijk. 2020. Approximate nearest neighbor negative contrastive learning for dense text retrieval. arXiv preprint arXiv:2007.00808."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"crossref","unstructured":"Shi-Qi Yan Jia-Chen Gu Yun Zhu and Zhen-Hua Ling. 2024. Corrective retrieval augmented generation. arXiv preprint arXiv:2401.15884.","DOI":"10.2139\/ssrn.5267341"},{"key":"e_1_3_2_1_56_1","unstructured":"Xi Yang Kejiang Chen Weiming Zhang Chang Liu Yuang Qi Jie Zhang Han Fang and Nenghai Yu. 2023. Watermarking text generated by black-box language models. arXiv preprint arXiv:2305.08883."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Zhilin Yang Peng Qi Saizheng Zhang Yoshua Bengio William W Cohen Ruslan Salakhutdinov and Christopher D Manning. 2018. Hotpotqa: a dataset for diverse explainable multi-hop question answering. arXiv preprint arXiv:1809.09600.","DOI":"10.18653\/v1\/D18-1259"},{"key":"e_1_3_2_1_58_1","unstructured":"Hanlin Zhang Benjamin L Edelman Danilo Francati Daniele Venturi Giuseppe Ateniese and Boaz Barak. 2023. Watermarks in the sand: impossibility of strong watermarking for generative models. arXiv preprint arXiv:2311.04378."},{"key":"e_1_3_2_1_59_1","first-page":"1813","volume-title":"33rd USENIX Security Symposium (USENIX Security 24)","author":"Zhang Ruisi","year":"2024","unstructured":"Ruisi Zhang, Shehzeen Samarah Hussain, Paarth Neekhara, and Farinaz Koushanfar. 2024. {Remark-llm}: a robust and efficient watermarking framework for generative large language models. In 33rd USENIX Security Symposium (USENIX Security 24), 1813- 1830."},{"key":"e_1_3_2_1_60_1","unstructured":"Yue Zhang et al. 2023. Siren's song in the ai ocean: a survey on hallucination in large language models. arXiv preprint arXiv:2309.01219."},{"key":"e_1_3_2_1_61_1","first-page":"1739","volume-title":"Proceedings of 2004 International Conference on Machine Learning and Cybernetics (IEEE Cat. No. 04EX826)","volume":"3","author":"Jin Xiao-Ming","year":"2004","unstructured":"Zhi-hao Zhang, Xiao-Ming Jin, Jian-Min Wang, and De-Yi Li. 2004. Watermarking relational database using image. In Proceedings of 2004 International Conference on Machine Learning and Cybernetics (IEEE Cat. No. 04EX826). Vol. 3. IEEE, 1739- 1744."},{"key":"e_1_3_2_1_62_1","unstructured":"Huaqin Zhao et al. 2024. Revolutionizing finance with llms: an overview of applications and insights. arXiv preprint arXiv:2401.11641."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"crossref","unstructured":"Pengyuan Zhou Lin Wang Zhi Liu Yanbin Hao Pan Hui Sasu Tarkoma and Jussi Kangasharju. 2024. A survey on generative ai and llm for video generation understanding and streaming. arXiv preprint arXiv:2404.16038.","DOI":"10.36227\/techrxiv.171172801.19993069\/v1"},{"key":"e_1_3_2_1_64_1","unstructured":"Wei Zou Runpeng Geng Binghui Wang and Jinyuan Jia. 2024. Poisonedrag: knowledge poisoning attacks to retrieval-augmented generation of large language models. arXiv preprint arXiv:2402.07867."}],"event":{"name":"CCS '25: ACM SIGSAC Conference on Computer and Communications Security","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control"],"location":"Taipei Taiwan","acronym":"CCS '25"},"container-title":["Proceedings of the 2025 ACM SIGSAC Conference on Computer and Communications Security"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3719027.3744813","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,22]],"date-time":"2025-12-22T22:08:21Z","timestamp":1766441301000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3719027.3744813"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,19]]},"references-count":64,"alternative-id":["10.1145\/3719027.3744813","10.1145\/3719027"],"URL":"https:\/\/doi.org\/10.1145\/3719027.3744813","relation":{},"subject":[],"published":{"date-parts":[[2025,11,19]]},"assertion":[{"value":"2025-11-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}