{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T16:55:18Z","timestamp":1776444918296,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":148,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,25]]},"DOI":"10.1145\/3708821.3733888","type":"proceedings-article","created":{"date-parts":[[2025,8,13]],"date-time":"2025-08-13T06:30:56Z","timestamp":1755066656000},"page":"425-441","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["SoK: The Privacy Paradox of Large Language Models: Advancements, Privacy Risks, and Mitigation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6414-9416","authenticated-orcid":false,"given":"Yashothara","family":"Shanmugarasa","sequence":"first","affiliation":[{"name":"CSIRO's Data61, Sydney, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3690-0321","authenticated-orcid":false,"given":"Ming","family":"Ding","sequence":"additional","affiliation":[{"name":"CSIRO's Data61, Sydney, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4286-3774","authenticated-orcid":false,"given":"Chamikara Mahawaga","family":"Arachchige","sequence":"additional","affiliation":[{"name":"CSIRO's Data61, Sydney, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7698-6214","authenticated-orcid":false,"given":"Thierry","family":"Rakotoarivelo","sequence":"additional","affiliation":[{"name":"CSIRO's Data61, Sydney, Australia"}]}],"member":"320","published-online":{"date-parts":[[2025,8,24]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"Accountability Act. 1996. Health insurance portability and accountability act of 1996. Public law 104 (1996) 191."},{"key":"e_1_3_3_2_3_2","volume-title":"Inspect AI: Framework for Large Language Model Evaluations","author":"AI\u00a0Safety\u00a0Institute UK","year":"2024","unstructured":"UK AI\u00a0Safety\u00a0Institute. 2024. Inspect AI: Framework for Large Language Model Evaluations. UK AI Safety Institute. https:\/\/github.com\/UKGovernmentBEIS\/inspect_ai"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.5555\/3305381.3305406"},{"key":"e_1_3_3_2_5_2","unstructured":"Shubhi Asthana Ruchi Mahindru Bing Zhang and Jorge Sanz. 2025. Adaptive PII Mitigation Framework for Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.12465 (2025)."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"crossref","unstructured":"Eugene Bagdasaryan Ren Yi Sahra Ghalebikesabi Peter Kairouz Marco Gruteser Sewoong Oh Borja Balle and Daniel Ramage. 2024. Air Gap: Protecting Privacy-Conscious Conversational Agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.05175 (2024).","DOI":"10.1145\/3658644.3690350"},{"key":"e_1_3_3_2_7_2","unstructured":"Yuntao Bai Saurav Kadavath Sandipan Kundu Amanda Askell Jackson Kernion Andy Jones Anna Chen Anna Goldie Azalia Mirhoseini Cameron McKinnon et\u00a0al. 2022. Constitutional ai: Harmlessness from ai feedback. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2212.08073 (2022)."},{"key":"e_1_3_3_2_8_2","unstructured":"Yuntao Bai Saurav Kadavath Sandipan Kundu and Amanda\u00a0Askell et al. 2022. Constitutional AI: Harmlessness from AI Feedback. arxiv:https:\/\/arXiv.org\/abs\/2212.08073\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2212.08073"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/SP46214.2022.9833677"},{"key":"e_1_3_3_2_10_2","unstructured":"Mislav Balunovic Dimitar Dimitrov Nikola Jovanovi\u0107 and Martin Vechev. 2022. Lamp: Extracting text from gradients with language model priors. Advances in Neural Information Processing Systems 35 (2022) 7641\u20137654."},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICDMW58026.2022.00078"},{"key":"e_1_3_3_2_12_2","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared\u00a0D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et\u00a0al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877\u20131901."},{"key":"e_1_3_3_2_13_2","first-page":"2633","volume-title":"30th USENIX security symposium (USENIX Security 21)","author":"Carlini Nicholas","year":"2021","unstructured":"Nicholas Carlini, Florian Tramer, Eric Wallace, Matthew Jagielski, Ariel Herbert-Voss, Katherine Lee, Adam Roberts, Tom Brown, Dawn Song, Ulfar Erlingsson, et\u00a0al. 2021. Extracting training data from large language models. In 30th USENIX security symposium (USENIX Security 21). 2633\u20132650."},{"key":"e_1_3_3_2_14_2","unstructured":"Ting-Yun Chang Jesse Thomason and Robin Jia. 2023. Do Localization Methods Actually Localize Memorized Data in LLMs? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.09060 (2023)."},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Tianyu Chen Hangbo Bao Shaohan Huang Li Dong Binxing Jiao Daxin Jiang Haoyi Zhou Jianxin Li and Furu Wei. 2022. The-x: Privacy-preserving transformer inference with homomorphic encryption. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2206.00216 (2022).","DOI":"10.18653\/v1\/2022.findings-acl.277"},{"key":"e_1_3_3_2_16_2","unstructured":"Yu Chen Tingxin Li Huiming Liu and Yang Yu. 2023. Hide and Seek (HaS): A Lightweight Framework for Prompt Privacy Protection. arxiv:https:\/\/arXiv.org\/abs\/2309.03057\u00a0[cs.CR]"},{"key":"e_1_3_3_2_17_2","unstructured":"Steffi Chern Zhen Fan and Andy Liu. 2024. Combating Adversarial Attacks with Multi-Agent Debate. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.05998 (2024)."},{"key":"e_1_3_3_2_18_2","unstructured":"Junjie Chu Zeyang Sha Michael Backes and Yang Zhang. 2024. Conversation Reconstruction Attack Against GPT Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.02987 (2024)."},{"key":"e_1_3_3_2_19_2","unstructured":"Badhan\u00a0Chandra Das M\u00a0Hadi Amini and Yanzhao Wu. 2024. Security and privacy challenges of large language models: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.00888 (2024)."},{"key":"e_1_3_3_2_20_2","volume-title":"Neurips Safe Generative AI Workshop 2024","author":"Castro Leo de","year":"2024","unstructured":"Leo de Castro, Antigoni Polychroniadou, and Daniel Escudero. 2024. Privacy-Preserving Large Language Model Inference via GPU-Accelerated Fully Homomorphic Encryption. In Neurips Safe Generative AI Workshop 2024."},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"crossref","unstructured":"Jieren Deng Yijue Wang Ji Li Chao Shang Hang Liu Sanguthevar Rajasekaran and Caiwen Ding. 2021. Tag: Gradient attack on transformer-based language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2103.06819 (2021).","DOI":"10.18653\/v1\/2021.findings-emnlp.305"},{"key":"e_1_3_3_2_22_2","unstructured":"Sentry Developers. [n. d.]. PII and Data Scrubbing \u2014 develop.sentry.dev. https:\/\/develop.sentry.dev\/pii. [Accessed 24-04-2024]."},{"key":"e_1_3_3_2_23_2","unstructured":"Jacob Devlin Ming-Wei Chang Kenton Lee and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arxiv:https:\/\/arXiv.org\/abs\/1810.04805\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/1810.04805"},{"key":"e_1_3_3_2_24_2","unstructured":"Peng Ding Jun Kuang Dan Ma Xuezhi Cao Yunsen Xian Jiajun Chen and Shujian Huang. 2023. A Wolf in Sheep\u2019s Clothing: Generalized Nested Jailbreak Prompts can Fool Large Language Models Easily. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.08268 (2023)."},{"key":"e_1_3_3_2_25_2","unstructured":"Xinshuai Dong Anh\u00a0Tuan Luu Min Lin Shuicheng Yan and Hanwang Zhang. 2021. How should pre-trained language models be fine-tuned towards adversarial robustness? Advances in Neural Information Processing Systems 34 (2021) 4356\u20134369."},{"key":"e_1_3_3_2_26_2","unstructured":"Wenyu Du Tongxu Luo Zihan Qiu Zeyu Huang Yikang Shen Reynold Cheng Yike Guo and Jie Fu. 2024. Stacking Your Transformers: A Closer Look at Model Growth for Efficient LLM Pre-Training. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.15319 (2024)."},{"key":"e_1_3_3_2_27_2","volume-title":"The 61st Annual Meeting Of The Association For Computational Linguistics","author":"Duan Haonan","year":"2023","unstructured":"Haonan Duan, Adam Dziedzic, Mohammad Yaghini, Nicolas Papernot, and Franziska Boenisch. 2023. On the privacy risk of in-context learning. In The 61st Annual Meeting Of The Association For Computational Linguistics."},{"key":"e_1_3_3_2_28_2","unstructured":"Michael Duan Anshuman Suri Niloofar Mireshghallah Sewon Min Weijia Shi Luke Zettlemoyer Yulia Tsvetkov Yejin Choi David Evans and Hannaneh Hajishirzi. 2024. Do Membership Inference Attacks Work on Large Language Models? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.07841 (2024)."},{"key":"e_1_3_3_2_29_2","unstructured":"Travis Dyde. 2023. Documentation on the emergence current iterations and possible future of Artificial Intelligence with a focus on Large Language Models. (2023)."},{"key":"e_1_3_3_2_30_2","unstructured":"Kennedy Edemacu and Xintao Wu. 2024. Privacy Preserving Prompt Engineering: A Survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.06001 (2024)."},{"key":"e_1_3_3_2_31_2","unstructured":"L. Ekenstam. 2023. Linus on x: \"if you think this is creepy... you should know what all major stores in the world knows about you as a shopper they use in-store online cell-tower and more to keep track of everything... everything. cambridge analytics is a joke in comparison\". https:\/\/x.com\/LinusEkenstam\/status\/1692602911518343502."},{"key":"e_1_3_3_2_32_2","unstructured":"Ronen Eldan and Mark Russinovich. 2023. Who\u2019s Harry Potter? Approximate Unlearning in LLMs. arxiv:https:\/\/arXiv.org\/abs\/2310.02238\u00a0[cs.CL]"},{"key":"e_1_3_3_2_33_2","unstructured":"Tao Fan Yan Kang Guoqiang Ma Weijing Chen Wenbin Wei Lixin Fan and Qiang Yang. 2023. Fate-llm: A industrial grade federated learning framework for large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.10049 (2023)."},{"key":"e_1_3_3_2_34_2","unstructured":"Richard Fang Rohan Bindu Akul Gupta and Daniel Kang. 2024. LLM Agents can Autonomously Exploit One-day Vulnerabilities. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.08144 (2024)."},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/3357713.3384290"},{"key":"e_1_3_3_2_36_2","unstructured":"Wenjie Fu Huandong Wang Chen Gao Guanghua Liu Yong Li and Tao Jiang. 2023. Practical membership inference attacks against fine-tuned large language models via self-prompt calibration. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.06062 (2023)."},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"crossref","unstructured":"Neil\u00a0Zhenqiang Gong and Bin Liu. 2018. Attribute inference attacks in online social networks. ACM Transactions on Privacy and Security (TOPS) 21 1 (2018) 1\u201330.","DOI":"10.1145\/3154793"},{"key":"e_1_3_3_2_38_2","unstructured":"Xiangming Gu Xiaosen Zheng Tianyu Pang Chao Du Qian Liu Ye Wang Jing Jiang and Min Lin. 2024. Agent Smith: A Single Image Can Jailbreak One Million Multimodal LLM Agents Exponentially Fast. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.08567 (2024)."},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"crossref","unstructured":"Suchin Gururangan Ana Marasovi\u0107 Swabha Swayamdipta Kyle Lo Iz Beltagy Doug Downey and Noah\u00a0A Smith. 2020. Don\u2019t stop pretraining: Adapt language models to domains and tasks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2004.10964 (2020).","DOI":"10.18653\/v1\/2020.acl-main.740"},{"key":"e_1_3_3_2_40_2","unstructured":"David Haber. 2024. Introducing Lakera Guard \u2013 Bringing Enterprise-Grade Security to LLMs with One Line of Code | Lakera \u2013 Protecting AI teams that disrupt the world. \u2014 lakera.ai. https:\/\/www.lakera.ai\/blog\/lakera-guard-overview. [Accessed 15-05-2024]."},{"key":"e_1_3_3_2_41_2","unstructured":"Meng Hao Hongwei Li Hanxiao Chen Pengzhi Xing Guowen Xu and Tianwei Zhang. 2022. Iron: Private inference on transformers. Advances in neural information processing systems 35 (2022) 15718\u201315731."},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"crossref","unstructured":"Florian Hartmann Duc-Hieu Tran Peter Kairouz Victor C\u0103rbune et\u00a0al. 2024. Can LLMs get help from other LLMs without revealing private information? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.01041 (2024).","DOI":"10.18653\/v1\/2024.privatenlp-1.12"},{"key":"e_1_3_3_2_43_2","unstructured":"Wenyue Hua Xianjun Yang Zelong Li Cheng Wei and Yongfeng Zhang. 2024. TrustAgent: Towards Safe and Trustworthy LLM-based Agents through Agent Constitution. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.01586 (2024)."},{"key":"e_1_3_3_2_44_2","unstructured":"Daphne Ippolito Florian Tram\u00e8r Milad Nasr Chiyuan Zhang Matthew Jagielski Katherine Lee Christopher\u00a0A Choquette-Choo and Nicholas Carlini. 2022. Preventing verbatim memorization in language models gives a false sense of privacy. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2210.17546 (2022)."},{"key":"e_1_3_3_2_45_2","unstructured":"Abhyuday Jagannatha Bhanu Pratap\u00a0Singh Rawat and Hong Yu. 2021. Membership inference attack susceptibility of clinical language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2104.08305 (2021)."},{"key":"e_1_3_3_2_46_2","unstructured":"Matthew Jagielski Om Thakkar Florian Tramer Daphne Ippolito Katherine Lee Nicholas Carlini Eric Wallace Shuang Song Abhradeep Thakurta Nicolas Papernot et\u00a0al. 2022. Measuring forgetting of memorized training examples. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2207.00099 (2022)."},{"key":"e_1_3_3_2_47_2","volume-title":"The Thirteenth International Conference on Learning Representations","author":"Jiang Tanqiu","year":"2025","unstructured":"Tanqiu Jiang, Zian Wang, Jiacheng Liang, Changjiang Li, Yuhui Wang, and Ting Wang. 2025. RobustKV: Defending Large Language Models against Jailbreak Attacks via KV Eviction. In The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=L5godAOC2z"},{"key":"e_1_3_3_2_48_2","unstructured":"Nikhil Kandpal Krishna Pillutla Alina Oprea Peter Kairouz Christopher\u00a0A Choquette-Choo and Zheng Xu. 2023. User inference attacks on large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.09266 (2023)."},{"key":"e_1_3_3_2_49_2","first-page":"10697","volume-title":"International Conference on Machine Learning","author":"Kandpal Nikhil","year":"2022","unstructured":"Nikhil Kandpal, Eric Wallace, and Colin Raffel. 2022. Deduplicating training data mitigates privacy risks in language models. In International Conference on Machine Learning. PMLR, 10697\u201310707."},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"crossref","unstructured":"Antonia Karamolegkou Jiaang Li Li Zhou and Anders S\u00f8gaard. 2023. Copyright violations and large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.13771 (2023).","DOI":"10.18653\/v1\/2023.emnlp-main.458"},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.265"},{"key":"e_1_3_3_2_52_2","unstructured":"Yigitcan Kaya Sanghyun Hong and Tudor Dumitras. 2020. On the Effectiveness of Regularization Against Membership Inference Attacks. arxiv:https:\/\/arXiv.org\/abs\/2006.05336\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2006.05336"},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"crossref","unstructured":"Sunder\u00a0Ali Khowaja Parus Khuwaja and Kapal Dev. 2023. Chatgpt needs spade (sustainability privacy digital divide and ethics) evaluation: A review. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.03123 (2023).","DOI":"10.36227\/techrxiv.22619932.v2"},{"key":"e_1_3_3_2_54_2","unstructured":"Siwon Kim Sangdoo Yun Hwaran Lee Martin Gubri Sungroh Yoon and Seong\u00a0Joon Oh. 2024. Propile: Probing privacy leakage in large language models. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"crossref","unstructured":"Jacob\u00a0Leon Kr\u00f6ger Leon Gellrich Sebastian Pape Saba\u00a0Rebecca Brause and Stefan Ullrich. 2022. Personal information inference from voice recordings: User awareness and privacy concerns. Proc. Priv. Enhancing Technol. 2022 1 (2022) 6\u201327.","DOI":"10.2478\/popets-2022-0002"},{"key":"e_1_3_3_2_56_2","unstructured":"Weirui Kuang Bingchen Qian Zitao Li Daoyuan Chen Dawei Gao Xuchen Pan Yuexiang Xie Yaliang Li Bolin Ding and Jingren Zhou. 2023. Federatedscope-llm: A comprehensive package for fine-tuning large language models in federated learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.00363 (2023)."},{"key":"e_1_3_3_2_57_2","unstructured":"Harrison Lee Samrat Phatale Hassan Mansoor Kellie Lu Thomas Mesnard Colton Bishop Victor Carbune and Abhinav Rastogi. 2023. Rlaif: Scaling reinforcement learning from human feedback with ai feedback. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.00267 (2023)."},{"key":"e_1_3_3_2_58_2","unstructured":"Katherine Lee Daphne Ippolito Andrew Nystrom Chiyuan Zhang Douglas Eck Chris Callison-Burch and Nicholas Carlini. 2021. Deduplicating training data makes language models better. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2107.06499 (2021)."},{"key":"e_1_3_3_2_59_2","unstructured":"Patrick Lewis Ethan Perez Aleksandra Piktus Fabio Petroni Vladimir Karpukhin Naman Goyal Heinrich K\u00fcttler Mike Lewis Wen-tau Yih Tim Rockt\u00e4schel et\u00a0al. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Advances in Neural Information Processing Systems 33 (2020) 9459\u20139474."},{"key":"e_1_3_3_2_60_2","unstructured":"Haoran Li Dadi Guo Wei Fan Mingshi Xu Jie Huang Fanpu Meng and Yangqiu Song. 2023. Multi-step jailbreaking privacy attacks on chatgpt. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2304.05197 (2023)."},{"key":"e_1_3_3_2_61_2","doi-asserted-by":"crossref","unstructured":"Kunhong Li Ruwei Huang and Bo Yang. 2025. Privacy-Preserving Text Classification on Deep Neural Network. Neural Processing Letters 57 2 (2025) 29.","DOI":"10.1007\/s11063-025-11738-w"},{"key":"e_1_3_3_2_62_2","unstructured":"Linyang Li Demin Song and Xipeng Qiu. 2022. Text adversarial purification as defense against adversarial attacks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2203.14207 (2022)."},{"key":"e_1_3_3_2_63_2","unstructured":"Tianshi Li Sauvik Das Hao-Ping Lee Dakuo Wang Bingsheng Yao and Zhiping Zhang. 2024. Human-Centered Privacy Research in the Age of Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.01994 (2024)."},{"key":"e_1_3_3_2_64_2","unstructured":"Xuechen Li Florian Tramer Percy Liang and Tatsunori Hashimoto. 2021. Large language models can be strong differentially private learners. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2110.05679 (2021)."},{"key":"e_1_3_3_2_65_2","unstructured":"Yuanchun Li Hao Wen Weijun Wang Xiangyu Li Yizhen Yuan Guohong Liu Jiacheng Liu Wenxing Xu Xiang Wang Yi Sun et\u00a0al. 2024. Personal llm agents: Insights and survey about the capability efficiency and security. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.05459 (2024)."},{"key":"e_1_3_3_2_66_2","unstructured":"Guo Lin Wenyue Hua and Yongfeng Zhang. 2024. PromptCrypt: Prompt Encryption for Secure Communication with Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.05868 (2024)."},{"key":"e_1_3_3_2_67_2","unstructured":"Tong Liu Yingjie Zhang Zhe Zhao Yinpeng Dong Guozhu Meng and Kai Chen. 2024. Making Them Ask and Answer: Jailbreaking Large Language Models in Few Queries via Disguise and Reconstruction. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.18104 (2024)."},{"key":"e_1_3_3_2_68_2","unstructured":"Yi Liu Gelei Deng Zhengzi Xu Yuekang Li Yaowen Zheng Ying Zhang Lida Zhao Tianwei Zhang and Yang Liu. 2023. Jailbreaking chatgpt via prompt engineering: An empirical study. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.13860 (2023)."},{"key":"e_1_3_3_2_69_2","unstructured":"Zechun Liu Changsheng Zhao Forrest Iandola Chen Lai Yuandong Tian Igor Fedorov Yunyang Xiong Ernie Chang Yangyang Shi Raghuraman Krishnamoorthi Liangzhen Lai and Vikas Chandra. 2024. MobileLLM: Optimizing Sub-billion Parameter Language Models for On-Device Use Cases. arxiv:https:\/\/arXiv.org\/abs\/2402.14905\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2402.14905"},{"key":"e_1_3_3_2_70_2","unstructured":"Scott\u00a0M Lundberg and Su-In Lee. 2017. A unified approach to interpreting model predictions. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_2_71_2","unstructured":"Aatish Mandelecha. 2024. How to Secure Sensitive Data in LLM Prompts? \u2014 strac.io. https:\/\/www.strac.io\/blog\/secure-sensitive-data-in-llm-prompts. [Accessed 14-05-2024]."},{"key":"e_1_3_3_2_72_2","unstructured":"E McGowan. 2024. Is ChatGPT\u2019s use of people\u2019s data even legal? \u2014 blog.avast.com. https:\/\/blog.avast.com\/chatgpt-data-use-legal. [Accessed 04-04-2024]."},{"key":"e_1_3_3_2_73_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.119"},{"key":"e_1_3_3_2_74_2","unstructured":"Sumeet\u00a0Ramesh Motwani Mikhail Baranchuk Martin Strohmeier Vijay Bolina Philip\u00a0HS Torr Lewis Hammond and Christian\u00a0Schroeder de Witt. 2024. Secret Collusion Among Generative AI Agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.07510 (2024)."},{"key":"e_1_3_3_2_75_2","unstructured":"Silen Naihin David Atkinson Marc Green Merwane Hamadi Craig Swift Douglas Schonholtz Adam\u00a0Tauman Kalai and David Bau. 2023. Testing language model agents safely in the wild. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.10538 (2023)."},{"key":"e_1_3_3_2_76_2","unstructured":"Reiichiro Nakano Jacob Hilton Suchir Balaji Jeff Wu Long Ouyang Christina Kim Christopher Hesse Shantanu Jain Vineet Kosaraju William Saunders et\u00a0al. 2021. Webgpt: Browser-assisted question-answering with human feedback. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2112.09332 (2021)."},{"key":"e_1_3_3_2_77_2","unstructured":"Milad Nasr Nicholas Carlini Jonathan Hayase Matthew Jagielski A\u00a0Feder Cooper Daphne Ippolito Christopher\u00a0A Choquette-Choo Eric Wallace Florian Tram\u00e8r and Katherine Lee. 2023. Scalable extraction of training data from (production) language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.17035 (2023)."},{"key":"e_1_3_3_2_78_2","unstructured":"Seth Neel and Peter Chang. 2023. Privacy issues in large language models: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.06717 (2023)."},{"key":"e_1_3_3_2_79_2","unstructured":"Helen Nissenbaum. 2004. Privacy as contextual integrity. Wash. L. Rev. 79 (2004) 119."},{"key":"e_1_3_3_2_80_2","doi-asserted-by":"crossref","unstructured":"Chris Olah Arvind Satyanarayan Ian Johnson Shan Carter Ludwig Schubert Katherine Ye and Alexander Mordvintsev. 2018. The building blocks of interpretability. Distill 3 3 (2018) e10.","DOI":"10.23915\/distill.00010"},{"key":"e_1_3_3_2_81_2","unstructured":"Matthew\u00a0J Page Joanne\u00a0E McKenzie Patrick\u00a0M Bossuyt Isabelle Boutron Tammy\u00a0C Hoffmann Cynthia\u00a0D Mulrow Larissa Shamseer Jennifer\u00a0M Tetzlaff Elie\u00a0A Akl Sue\u00a0E Brennan et\u00a0al. 2021. The PRISMA 2020 statement: an updated guideline for reporting systematic reviews. bmj 372 (2021)."},{"key":"e_1_3_3_2_82_2","unstructured":"Xianghe Pang Shuo Tang Rui Ye Yuxin Xiong Bolun Zhang Yanfeng Wang and Siheng Chen. 2024. Self-Alignment of Large Language Models via Monopolylogue-based Social Scene Simulation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.05699 (2024)."},{"key":"e_1_3_3_2_83_2","doi-asserted-by":"crossref","unstructured":"Ildik\u00f3 Pil\u00e1n Pierre Lison Lilja \u00d8vrelid Anthi Papadopoulou David S\u00e1nchez and Montserrat Batet. 2022. The text anonymization benchmark (tab): A dedicated corpus and evaluation framework for text anonymization. Computational Linguistics 48 4 (2022) 1053\u20131101.","DOI":"10.1162\/coli_a_00458"},{"key":"e_1_3_3_2_84_2","unstructured":"Aman Priyanshu Supriti Vijay Ayush Kumar Rakshit Naidu and Fatemehsadat Mireshghallah. 2023. Are chatbots ready for privacy-sensitive applications? an investigation into input regurgitation and prompt-induced sanitization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.15008 (2023)."},{"key":"e_1_3_3_2_85_2","doi-asserted-by":"publisher","DOI":"10.1109\/LT60077.2024.10469434"},{"key":"e_1_3_3_2_86_2","unstructured":"Abhinav Rao Sachin Vashistha Atharva Naik Somak Aditya and Monojit Choudhury. 2023. Tricking llms into disobedience: Formalizing analyzing and detecting jailbreaks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.14965 (2023)."},{"key":"e_1_3_3_2_87_2","unstructured":"Protection Regulation. 2016. Regulation (EU) 2016\/679 of the European Parliament and of the Council. Regulation (eu) 679 (2016) 2016."},{"key":"e_1_3_3_2_88_2","unstructured":"Protection Regulation. 2024. AI Act Regulation (EU) 2024\/1689 of the European Parliament and of the Council. Regulation (eu) 1689 (2024)."},{"key":"e_1_3_3_2_89_2","doi-asserted-by":"publisher","DOI":"10.1145\/2906388.2906392"},{"key":"e_1_3_3_2_90_2","unstructured":"Donghwan Rho Taeseong Kim Minje Park Jung\u00a0Woo Kim Hyunsik Chae Ernest\u00a0K Ryu and Jung\u00a0Hee Cheon. 2024. Encryption-friendly LLM architecture. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.02486 (2024)."},{"key":"e_1_3_3_2_91_2","unstructured":"Toran\u00a0Bruce Richards. 2023. Auto-gpt: Autonomous artificial intelligence software agent. https:\/\/github.com\/Significant-Gravitas\/AutoGPT. [Accessed 08-04-2024]."},{"key":"e_1_3_3_2_92_2","unstructured":"Jae\u00a0Hun Ro Srinadh Bhojanapalli Zheng Xu Yanxiang Zhang and Ananda\u00a0Theertha Suresh. 2024. Efficient Language Model Architectures for Differentially Private Federated Learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.08100 (2024)."},{"key":"e_1_3_3_2_93_2","unstructured":"Alexander Robey Eric Wong Hamed Hassani and George\u00a0J Pappas. 2023. Smoothllm: Defending large language models against jailbreaking attacks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.03684 (2023)."},{"key":"e_1_3_3_2_94_2","first-page":"396","volume-title":"Proceedings of the AAAI Symposium Series","volume":"2","author":"Romero Oscar\u00a0J","year":"2023","unstructured":"Oscar\u00a0J Romero, John Zimmerman, Aaron Steinfeld, and Anthony Tomasic. 2023. Synergistic integration of large language models and cognitive architectures for robust ai: An exploratory analysis. In Proceedings of the AAAI Symposium Series , Vol.\u00a02. 396\u2013405."},{"key":"e_1_3_3_2_95_2","doi-asserted-by":"publisher","DOI":"10.1145\/3643651.3659893"},{"key":"e_1_3_3_2_96_2","unstructured":"Yangjun Ruan Honghua Dong Andrew Wang Silviu Pitis Yongchao Zhou Jimmy Ba Yann Dubois Chris\u00a0J Maddison and Tatsunori Hashimoto. 2023. Identifying the risks of lm agents with an lm-emulated sandbox. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.15817 (2023)."},{"key":"e_1_3_3_2_97_2","unstructured":"Zhang Ruoyan Zheng Zhongxiang and Bao Wankang. 2025. Practical Secure Inference Algorithm for Fine-tuned Large Language Model Based on Fully Homomorphic Encryption. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.01672 (2025)."},{"key":"e_1_3_3_2_98_2","unstructured":"Victor Sanh Lysandre Debut Julien Chaumond and Thomas Wolf. 2019. DistilBERT a distilled version of BERT: smaller faster cheaper and lighter. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1910.01108 (2019)."},{"key":"e_1_3_3_2_99_2","unstructured":"Weiyan Shi Aiqi Cui Evan Li Ruoxi Jia and Zhou Yu. 2021. Selective differential privacy for language modeling. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2108.12944 (2021)."},{"key":"e_1_3_3_2_100_2","doi-asserted-by":"crossref","unstructured":"Taylor Shin Yasaman Razeghi Robert\u00a0L Logan\u00a0IV Eric Wallace and Sameer Singh. 2020. Autoprompt: Eliciting knowledge from language models with automatically generated prompts. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2010.15980 (2020).","DOI":"10.18653\/v1\/2020.emnlp-main.346"},{"key":"e_1_3_3_2_101_2","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2017.41"},{"key":"e_1_3_3_2_102_2","doi-asserted-by":"crossref","unstructured":"Tanmay Singh Harshvardhan Aditya Vijay\u00a0K Madisetti and Arshdeep Bahga. 2024. Whispered Tuning: Data Privacy Preservation in Fine-Tuning LLMs through Differential Privacy. Journal of Software Engineering and Applications 17 1 (2024) 1\u201322.","DOI":"10.4236\/jsea.2024.171001"},{"key":"e_1_3_3_2_103_2","unstructured":"Victoria Smith Ali\u00a0Shahin Shamsabadi Carolyn Ashurst and Adrian Weller. 2023. Identifying and mitigating privacy risks stemming from language models: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.01424 (2023)."},{"key":"e_1_3_3_2_104_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Staab Robin","year":"2024","unstructured":"Robin Staab, Mark Vero, Mislav Balunovic, and Martin Vechev. 2024. Beyond Memorization: Violating Privacy via Inference with Large Language Models. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=kmn0BhQk7p"},{"key":"e_1_3_3_2_105_2","unstructured":"Robin Staab Mark Vero Mislav Balunovi\u0107 and Martin Vechev. 2024. Large Language Models are Advanced Anonymizers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.13846 (2024)."},{"key":"e_1_3_3_2_106_2","unstructured":"Lukas Struppek Minh\u00a0Hieu Le Dominik Hintersdorf and Kristian Kersting. 2024. Exploring the Adversarial Capabilities of Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.09132 (2024)."},{"key":"e_1_3_3_2_107_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.trustnlp-1.18"},{"key":"e_1_3_3_2_108_2","unstructured":"Zhiqing Sun Yikang Shen Qinhong Zhou Hongxin Zhang and et\u00a0al Chen. 2024. Principle-driven self-alignment of language models from scratch with minimal human supervision. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_2_109_2","unstructured":"Jasper Tan Blake Mason Hamid Javadi and Richard\u00a0G. Baraniuk. 2022. Parameters or Privacy: A Provable Tradeoff Between Overparameterization and Membership Inference. arxiv:https:\/\/arXiv.org\/abs\/2202.01243\u00a0[stat.ML] https:\/\/arxiv.org\/abs\/2202.01243"},{"key":"e_1_3_3_2_110_2","unstructured":"Xinyu Tang Richard Shin Huseyin\u00a0A Inan Andre Manoel Fatemehsadat Mireshghallah Zinan Lin Sivakanth Gopi Janardhan Kulkarni and Robert Sim. 2023. Privacy-preserving in-context learning with differentially private few-shot generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.11765 (2023)."},{"key":"e_1_3_3_2_111_2","unstructured":"Yu Tian Xiao Yang Jingyuan Zhang Yinpeng Dong and Hang Su. 2023. Evil geniuses: Delving into the safety of llm-based agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.11855 (2023)."},{"key":"e_1_3_3_2_112_2","unstructured":"Minh\u00a0N Vu Truc Nguyen Tre\u2019R Jeter and My\u00a0T Thai. 2024. Analysis of Privacy Leakage in Federated Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.04784 (2024)."},{"key":"e_1_3_3_2_113_2","unstructured":"Jeffrey\u00a0G Wang Jason Wang Marvin Li and Seth Neel. 2024. Pandora\u2019s White-Box: Increased Training Data Leakage in Open LLMs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.17012 (2024)."},{"key":"e_1_3_3_2_114_2","unstructured":"Alexander Wei Nika Haghtalab and Jacob Steinhardt. 2024. Jailbroken: How does llm safety training fail? Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_2_115_2","unstructured":"Jules White Quchen Fu Sam Hays Michael Sandborn Carlos Olea Henry Gilbert Ashraf Elnashar Jesse Spencer-Smith and Douglas\u00a0C Schmidt. 2023. A prompt pattern catalog to enhance prompt engineering with chatgpt. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.11382 (2023)."},{"key":"e_1_3_3_2_116_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Wu Tong","year":"2023","unstructured":"Tong Wu, Ashwinee Panda, Jiachen\u00a0T Wang, and Prateek Mittal. 2023. Privacy-preserving in-context learning for large language models. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_2_117_2","unstructured":"Xiaodong Wu Ran Duan and Jianbing Ni. 2023. Unveiling security privacy and ethical concerns of chatgpt. Journal of Information and Intelligence (2023)."},{"key":"e_1_3_3_2_118_2","unstructured":"Zhiheng Xi Wenxiang Chen Xin Guo Wei He Yiwen Ding Boyang Hong Ming Zhang Junzhe Wang Senjie Jin Enyu Zhou et\u00a0al. 2023. The rise and potential of large language model based agents: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.07864 (2023)."},{"key":"e_1_3_3_2_119_2","unstructured":"Daliang Xu Wangsong Yin Xin Jin Ying Zhang Shiyun Wei Mengwei Xu and Xuanzhe Liu. 2023. Llmcad: Fast and scalable on-device large language model inference. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.04255 (2023)."},{"key":"e_1_3_3_2_120_2","unstructured":"Fuzhao Xue Yao Fu Wangchunshu Zhou Zangwei Zheng and Yang You. 2024. To repeat or not to repeat: Insights from scaling llm under token-crisis. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_2_121_2","doi-asserted-by":"crossref","unstructured":"Biwei Yan Kun Li Minghui Xu Yueyan Dong Yue Zhang Zhaochun Ren and Xiuzheng Cheng. 2024. On Protecting the Data Privacy of Large Language Models (LLMs): A Survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.05156 (2024).","DOI":"10.1109\/ICMC60390.2024.00008"},{"key":"e_1_3_3_2_122_2","doi-asserted-by":"crossref","unstructured":"Haomiao Yang Kunlan Xiang Mengyu Ge Hongwei Li Rongxing Lu and Shui Yu. 2024. A comprehensive overview of backdoor attacks in large language models within communication networks. IEEE Network (2024).","DOI":"10.1109\/MNET.2024.3367788"},{"key":"e_1_3_3_2_123_2","doi-asserted-by":"crossref","unstructured":"Linyao Yang Hongyang Chen Zhao Li Xiao Ding and Xindong Wu. 2024. Give us the facts: Enhancing large language models with knowledge graphs for fact-aware language modeling. IEEE Transactions on Knowledge and Data Engineering (2024).","DOI":"10.1109\/TKDE.2024.3360454"},{"key":"e_1_3_3_2_124_2","unstructured":"Zhou Yang Zhipeng Zhao Chenyu Wang Jieke Shi Dongsun Kim DongGyun Han and David Lo. 2023. What do code models memorize? an empirical study on large language models of code. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.09932 (2023)."},{"key":"e_1_3_3_2_125_2","doi-asserted-by":"crossref","unstructured":"Yifan Yao Jinhao Duan Kaidi Xu Yuanfang Cai Zhibo Sun and Yue Zhang. 2024. A survey on large language model (llm) security and privacy: The good the bad and the ugly. High-Confidence Computing (2024) 100211.","DOI":"10.1016\/j.hcc.2024.100211"},{"key":"e_1_3_3_2_126_2","unstructured":"Yixiang Yao Fei Wang Srivatsan Ravi and Muhao Chen. 2024. Privacy-Preserving Language Model Inference with Instance Obfuscation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.08227 (2024)."},{"key":"e_1_3_3_2_127_2","unstructured":"Yuanshun Yao Xiaojun Xu and Yang Liu. 2023. Large language model unlearning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.10683 (2023)."},{"key":"e_1_3_3_2_128_2","doi-asserted-by":"crossref","unstructured":"Rui Ye Wenhao Wang Jingyi Chai Dihan Li Zexi Li Yinda Xu Yaxin Du Yanfeng Wang and Siheng Chen. 2024. OpenFedLLM: Training Large Language Models on Decentralized Private Data via Federated Learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.06954 (2024).","DOI":"10.1145\/3637528.3671582"},{"key":"e_1_3_3_2_129_2","unstructured":"Rongjie Yi Liwei Guo Shiyun Wei Ao Zhou Shangguang Wang and Mengwei Xu. 2025. EdgeMoE: Empowering Sparse Large Language Models on Mobile Devices. IEEE Transactions on Mobile Computing (2025)."},{"key":"e_1_3_3_2_130_2","unstructured":"Keun\u00a0Soo Yim. 2023. Privacy-friendly Personalization of LLM Responses Using Hashed Entity Injection. Technical Disclosure Commons (2023)."},{"key":"e_1_3_3_2_131_2","unstructured":"Jin\u00a0Yong Yoo and Yanjun Qi. 2021. Towards improving adversarial training of NLP models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2109.00544 (2021)."},{"key":"e_1_3_3_2_132_2","doi-asserted-by":"crossref","unstructured":"Tongxin Yuan Zhiwei He Lingzhong Dong Yiming Wang Ruijie Zhao Tian Xia Lizhen Xu Binglin Zhou Fangqi Li Zhuosheng Zhang et\u00a0al. 2024. R-Judge: Benchmarking Safety Risk Awareness for LLM Agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.10019 (2024).","DOI":"10.18653\/v1\/2024.findings-emnlp.79"},{"key":"e_1_3_3_2_133_2","doi-asserted-by":"crossref","unstructured":"Ahtsham Zafar Venkatesh\u00a0Balavadhani Parthasarathy Chan\u00a0Le Van Saad Shahid Arsalan Shahid et\u00a0al. 2023. Building trust in conversational ai: A comprehensive review and solution architecture for explainable privacy-aware systems using llms and knowledge graph. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.13534 (2023).","DOI":"10.36227\/techrxiv.24009351.v1"},{"key":"e_1_3_3_2_134_2","doi-asserted-by":"publisher","DOI":"10.1145\/3372297.3417880"},{"key":"e_1_3_3_2_135_2","unstructured":"Yifan Zeng Yiran Wu Xiao Zhang Huazheng Wang and Qingyun Wu. 2024. AutoDefense: Multi-Agent LLM Defense against Jailbreak Attacks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.04783 (2024)."},{"key":"e_1_3_3_2_136_2","doi-asserted-by":"publisher","DOI":"10.3233\/FAIA240958"},{"key":"e_1_3_3_2_137_2","unstructured":"Jinghan Zhang Junteng Liu Junxian He et\u00a0al. 2023. Composing parameter-efficient modules with arithmetic operation. Advances in Neural Information Processing Systems 36 (2023) 12589\u201312610."},{"key":"e_1_3_3_2_138_2","unstructured":"Kaiyan Zhang Jianyu Wang Ermo Hua Biqing Qi Ning Ding and Bowen Zhou. 2024. CoGenesis: A Framework Collaborating Large and Small Language Models for Secure Context-Aware Instruction Following. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.03129 (2024)."},{"key":"e_1_3_3_2_139_2","doi-asserted-by":"publisher","unstructured":"Lexin Zhang Changxiang Li Qi Hu Jingjing Lang Sirui Huang Linyue Hu Jingwen Leng Qiuhan Chen and Chunli Lv. 2023. Enhancing Privacy in Large Language Model with Homomorphic Encryption and Sparse Attention. Applied Sciences 13 24 (2023). 10.3390\/app132413146","DOI":"10.3390\/app132413146"},{"key":"e_1_3_3_2_140_2","unstructured":"Ruisi Zhang Seira Hidano and Farinaz Koushanfar. 2022. Text revealer: Private text reconstruction via model inversion attacks against transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2209.10505 (2022)."},{"key":"e_1_3_3_2_141_2","doi-asserted-by":"crossref","unstructured":"Xinyu Zhang Huiyu Xu Zhongjie Ba Zhibo Wang Yuan Hong Jian Liu Zhan Qin and Kui Ren. 2024. PrivacyAsst: Safeguarding User Privacy in Tool-Using Large Language Model Agents. IEEE Transactions on Dependable and Secure Computing (2024).","DOI":"10.1109\/TDSC.2024.3372777"},{"key":"e_1_3_3_2_142_2","unstructured":"Yao Zhang Zijian Ma Yunpu Ma Zhen Han Yu Wu and Volker Tresp. 2024. WebPilot: A Versatile and Autonomous Multi-Agent System for Web Task Execution with Strategic Exploration. arxiv:https:\/\/arXiv.org\/abs\/2408.15978\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2408.15978"},{"key":"e_1_3_3_2_143_2","unstructured":"Zhiping Zhang Michelle Jia Bingsheng Yao Sauvik Das Ada Lerner Dakuo Wang Tianshi Li et\u00a0al. 2023. \" It\u2019s a Fair Game\u201d or Is It? Examining How Users Navigate Disclosure Risks and Benefits When Using LLM-Based Conversational Agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.11653 (2023)."},{"key":"e_1_3_3_2_144_2","doi-asserted-by":"crossref","unstructured":"Zhexin Zhang Jiaxin Wen and Minlie Huang. 2023. Ethicist: Targeted training data extraction through loss smoothed soft prompting and calibrated confidence estimation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.04401 (2023).","DOI":"10.18653\/v1\/2023.acl-long.709"},{"key":"e_1_3_3_2_145_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.481"},{"key":"e_1_3_3_2_146_2","doi-asserted-by":"crossref","unstructured":"Haiyan Zhao Hanjie Chen Fan Yang Ninghao Liu Huiqi Deng Hengyi Cai Shuaiqiang Wang Dawei Yin and Mengnan Du. 2024. Explainability for large language models: A survey. ACM Transactions on Intelligent Systems and Technology 15 2 (2024) 1\u201338.","DOI":"10.1145\/3639372"},{"key":"e_1_3_3_2_147_2","unstructured":"Xuandong Zhao Lei Li and Yu-Xiang Wang. 2022. Provably confidential language modelling. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2205.01863 (2022)."},{"key":"e_1_3_3_2_148_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.337"},{"key":"e_1_3_3_2_149_2","unstructured":"Ligeng Zhu Zhijian Liu and Song Han. 2019. Deep leakage from gradients. Advances in neural information processing systems 32 (2019)."}],"event":{"name":"ASIA CCS '25: 20th ACM Asia Conference on Computer and Communications Security","location":"Hanoi Vietnam","acronym":"ASIA CCS '25","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control"]},"container-title":["Proceedings of the 20th ACM Asia Conference on Computer and Communications Security"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3708821.3733888","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,13]],"date-time":"2025-08-13T07:25:31Z","timestamp":1755069931000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3708821.3733888"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,24]]},"references-count":148,"alternative-id":["10.1145\/3708821.3733888","10.1145\/3708821"],"URL":"https:\/\/doi.org\/10.1145\/3708821.3733888","relation":{},"subject":[],"published":{"date-parts":[[2025,8,24]]},"assertion":[{"value":"2025-08-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}