{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,29]],"date-time":"2026-07-29T12:34:40Z","timestamp":1785328480758,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":93,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,3]]},"DOI":"10.1145\/3711896.3736573","type":"proceedings-article","created":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T21:04:26Z","timestamp":1754255066000},"page":"6227-6236","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["A Survey on Model Extraction Attacks and Defenses for Large Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-8174-0581","authenticated-orcid":false,"given":"Kaixiang","family":"Zhao","sequence":"first","affiliation":[{"name":"University of Notre Dame, South Bend, IN, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3797-4055","authenticated-orcid":false,"given":"Lincan","family":"Li","sequence":"additional","affiliation":[{"name":"Florida State University, Tallahassee, FL, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6684-6752","authenticated-orcid":false,"given":"Kaize","family":"Ding","sequence":"additional","affiliation":[{"name":"Northwestern University, Evanston, IL, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9900-9309","authenticated-orcid":false,"given":"Neil Zhenqiang","family":"Gong","sequence":"additional","affiliation":[{"name":"Duke University, Durham, NC, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3401-4921","authenticated-orcid":false,"given":"Yue","family":"Zhao","sequence":"additional","affiliation":[{"name":"University of Southern California, Los Angeles, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7504-6159","authenticated-orcid":false,"given":"Yushun","family":"Dong","sequence":"additional","affiliation":[{"name":"Florida State University, Tallahassee, FL, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,8,3]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al., 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"Attacks, Defenses, and Countermeasures. arXiv preprint arXiv:2505.01177","author":"Aguilera-Mart\u00ednez Francisco","year":"2025","unstructured":"Francisco Aguilera-Mart\u00ednez and Fernando Berzal. 2025. LLM Security: Vulnerabilities, Attacks, Defenses, and Countermeasures. arXiv preprint arXiv:2505.01177 (2025)."},{"key":"e_1_3_2_1_3_1","volume-title":"Large language models: A survey of their development, capabilities, and applications. Knowledge and Information Systems","author":"Annepaka Yadagiri","year":"2024","unstructured":"Yadagiri Annepaka and Partha Pakray. 2024. Large language models: A survey of their development, capabilities, and applications. Knowledge and Information Systems (2024), 1-56."},{"key":"e_1_3_2_1_4_1","unstructured":"Anahita Baninajjar Kamran Hosseini Ahmed Rezine and Amir Aminifar. 2024. Verified relative safety margins for neural network twins. arXiv preprint arXiv:2409.16726(2024)."},{"key":"e_1_3_2_1_5_1","unstructured":"Lewis Birch William Hackett Stefan Trawicki Neeraj Suri and Peter Garraghan. 2023. Model leeching: An extraction attack targeting llms. arXiv preprint arXiv:2309.10544(2023)."},{"key":"e_1_3_2_1_6_1","volume-title":"er","author":"Carlini Nicholas","year":"2024","unstructured":"Nicholas Carlini, Daniel Paleka, Krishnamurthy Dvijotham, Thomas Steinke, Jonathan Hayase, A. Feder Cooper, Katherine Lee, Matthew Jagielski, Milad Nasr, Arthur Conmy, Itay Yona, Eric Wallace, David Rolnick, and Florian Tram`er. 2024. Stealing Part of a Production Language Model. arXiv preprint arXiv:2403.06634(2024). https:\/\/arxiv.org\/abs\/2403.06634"},{"key":"e_1_3_2_1_7_1","first-page":"2633","volume-title":"30th USENIX Security Symposium (USENIX Security 21)","author":"Carlini Nicholas","year":"2021","unstructured":"Nicholas Carlini, Florian Tram\u00e8r, Eric Wallace, Matthew Jagielski, Ariel Herbert-Voss, Katherine Lee, Adam Roberts, Tom Brown, Dawn Song, \u00dalfar Erlingsson, Alina Oprea, and Colin Raffel. 2021. Extracting Training Data from Large Language Models. In 30th USENIX Security Symposium (USENIX Security 21). USENIX Association, 2633-2650."},{"key":"e_1_3_2_1_8_1","unstructured":"Chen Chen Xuanli He Lingjuan Lyu and Fangzhao Wu. 2021. Killing one bird with two stones: model extraction and attribute inference attacks against bert-based apis. arXiv preprint arXiv:2105.10909(2021)."},{"key":"e_1_3_2_1_9_1","volume-title":"ATOM: A Framework of Detecting Query-Based Model Extraction Attacks for Graph Neural Networks. arXiv preprint arXiv:2503.16693(2025).","author":"Cheng Zhan","year":"2025","unstructured":"Zhan Cheng, Bolin Shen, Tianming Sha, Yuan Gao, Shibo Li, and Yushun Dong. 2025. ATOM: A Framework of Detecting Query-Based Model Extraction Attacks for Graph Neural Networks. arXiv preprint arXiv:2503.16693(2025)."},{"key":"e_1_3_2_1_10_1","unstructured":"Jing Cui Yishi Xu Zhewei Huang Shuchang Zhou Jianbin Jiao and Junge Zhang. 2024. Recent advances in attack and defense approaches of large language models. arXiv preprint arXiv:2409.03274(2024)."},{"key":"e_1_3_2_1_11_1","unstructured":"Chenxi Dai Lin Lu and Pan Zhou. 2025. Stealing Training Data from Large Language Models in Decentralized Training through Activation Inversion Attack. arXiv preprint arXiv:2502.16086(2025)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3712001"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.4236\/jis.2024.154026"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Yi Dong Ronghui Mu Yanghao Zhang Siqi Sun Tianle Zhang Changshun Wu Gaojie Jin Yi Qi Jinwei Hu Jie Meng et al. 2024. Safeguarding large language models: A survey. arXiv preprint arXiv:2406.02622(2024).","DOI":"10.1007\/s10462-025-11389-2"},{"key":"e_1_3_2_1_15_1","volume-title":"International Conference on Ubiquitous Security. Springer, 76-95","author":"Esmradi Aysan","year":"2023","unstructured":"Aysan Esmradi, Daniel Wankit Yip, and Chun Fai Chan. 2023. A comprehensive survey of attack techniques, implementation, and mitigation strategies in large language models. In International Conference on Ubiquitous Security. Springer, 76-95."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Luyang Fang Xiaowei Yu Jiazhang Cai Yongkai Chen Shushan Wu Zhengliang Liu Zhenyuan Yang Haoran Lu Xilin Gong Yufang Liu et al. 2025. Knowledge Distillation and Dataset Distillation of Large Language Models: Emerging Trends Challenges and Future Directions. arXiv preprint arXiv:2504.14772(2025).","DOI":"10.1007\/s10462-025-11423-3"},{"key":"e_1_3_2_1_17_1","unstructured":"Shanglun Feng and Florian Tram\u00e8r. 2024. Privacy backdoors: stealing data with corrupted pretrained models. arXiv preprint arXiv:2404.00473(2024)."},{"key":"e_1_3_2_1_18_1","first-page":"9","article-title":"Extracting Training Data: Risks and solutions in the context of LLM security","volume":"12","author":"Gerasimenko Denis V","year":"2024","unstructured":"Denis V Gerasimenko and Dmitry Namiot. 2024. Extracting Training Data: Risks and solutions in the context of LLM security. International Journal of Open Information Technologies, Vol. 12, 11 (2024), 9-19.","journal-title":"International Journal of Open Information Technologies"},{"key":"e_1_3_2_1_19_1","unstructured":"Daya Guo Dejian Yang Haowei Zhang Junxiao Song Ruoyu Zhang Runxin Xu Qihao Zhu Shirong Ma Peiyi Wang Xiao Bi et al. 2025. Deepseek-r1: Incentivizing reasoning capability in llms via reinforcement learning. arXiv preprint arXiv:2501.12948(2025)."},{"key":"e_1_3_2_1_20_1","unstructured":"Danny Halawi Alexander Wei Eric Wallace Tony T Wang Nika Haghtalab and Jacob Steinhardt. 2024. Covert malicious finetuning: Challenges in safeguarding llm adaptation. arXiv preprint arXiv:2406.20053(2024)."},{"key":"e_1_3_2_1_21_1","unstructured":"Xuanli He Lingjuan Lyu Qiongkai Xu and Lichao Sun. 2021. Model extraction and adversarial transferability your BERT is vulnerable! arXiv preprint arXiv:2103.10013(2021)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.52202\/068431-0392"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Cheng-Yu Hsieh Chun-Liang Li Chih-Kuan Yeh Hootan Nakhost Yasuhisa Fujii Alexander Ratner Ranjay Krishna Chen-Yu Lee and Tomas Pfister. 2023. Distilling step-by-step! outperforming larger language models with less training data and smaller model sizes. arXiv preprint arXiv:2305.02301(2023).","DOI":"10.18653\/v1\/2023.findings-acl.507"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Jie Huang Hanyin Shao and Kevin Chen-Chuan Chang. 2022. Are large pre-trained language models leaking your personal information? arXiv preprint arXiv:2205.12628(2022).","DOI":"10.18653\/v1\/2022.findings-emnlp.148"},{"key":"e_1_3_2_1_25_1","unstructured":"Zhen Huang Haoyang Zou Xuefeng Li Yixiu Liu Yuxiang Zheng Ethan Chern Shijie Xia Yiwei Qin Weizhe Yuan and Pengfei Liu. 2024. O1 Replication Journey-Part 2: Surpassing O1-preview through Simple Distillation Big Progress or Bitter Lesson? arXiv preprint arXiv:2411.16489(2024)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3658644.3670370"},{"key":"e_1_3_2_1_27_1","volume-title":"Chiron: Privacy-preserving machine learning as a service. arXiv preprint arXiv:1803.05961(2018).","author":"Hunt Tyler","year":"2018","unstructured":"Tyler Hunt, Congzheng Song, Reza Shokri, Vitaly Shmatikov, and Emmett Witchel. 2018. Chiron: Privacy-preserving machine learning as a service. arXiv preprint arXiv:1803.05961(2018)."},{"key":"e_1_3_2_1_28_1","unstructured":"Yoichi Ishibashi and Hidetoshi Shimodaira. 2023. Knowledge sanitization of large language models. arXiv preprint arXiv:2309.11852(2023)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"Ziyou Jiang Mingyang Li Guowei Yang Junjie Wang Yuekai Huang Zhiyuan Chang and Qing Wang. 2025. Mimicking the Familiar: Dynamic Command Generation for Information Theft Attacks in LLM Tool-Learning System. arXiv preprint arXiv:2502.11358(2025).","DOI":"10.18653\/v1\/2025.acl-long.672"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3274694.3274740"},{"key":"e_1_3_2_1_31_1","volume-title":"Nsml: Meet the mlaas platform with a real-world case study. arXiv preprint arXiv:1810","author":"Kim Hanjoo","year":"2018","unstructured":"Hanjoo Kim, Minkyu Kim, Dongjoo Seo, Jinwoong Kim, Heungseok Park, Soeun Park, Hyunwoo Jo, KyungHyun Kim, Youngil Yang, Youngkwan Kim, et al., 2018. Nsml: Meet the mlaas platform with a real-world case study. arXiv preprint arXiv:1810.09957(2018)."},{"key":"e_1_3_2_1_32_1","volume-title":"Protection of LLM Environment Using Prompt Security. In 2024 15th International Conference on Information and Communication Technology Convergence (ICTC). IEEE, 1715-1719","author":"Kim Minjae","year":"2024","unstructured":"Minjae Kim, Taehyeong Kwon, Kibeom Shim, and Beonghoon Kim. 2024. Protection of LLM Environment Using Prompt Security. In 2024 15th International Conference on Information and Communication Technology Convergence (ICTC). IEEE, 1715-1719."},{"key":"e_1_3_2_1_33_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=Byl5NREFDr","author":"Krishna Kalpesh","year":"2020","unstructured":"Kalpesh Krishna, Gaurav Singh Tomar, Ankur P Parikh, Nicolas Papernot, and Mohit Iyyer. 2020. Thieves on Sesame Street! Model Extraction of BERT-based APIs. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=Byl5NREFDr"},{"key":"e_1_3_2_1_34_1","unstructured":"Sunbowen Lee Junting Zhou Chang Ao Kaige Li Xinrun Du Sirui He Haihong Wu Tianci Liu Jiaheng Liu Hamid Alinejad-Rokny Min Yang Yitao Liang Zhoufutu Wen and Shiwen Ni. 2025. Quantification of Large Language Model Distillation. arxiv:2501.12619"},{"key":"e_1_3_2_1_35_1","unstructured":"Chenyang Li Zhao Song Weixin Wang and Chiwun Yang. 2023. A theoretical insight into attack and defense of gradient leakage in transformer. arXiv preprint arXiv:2311.13624(2023)."},{"key":"e_1_3_2_1_36_1","volume-title":"Llm-pbe: Assessing data privacy in large language models. arXiv preprint arXiv:2408.12787(2024).","author":"Li Qinbin","year":"2024","unstructured":"Qinbin Li, Junyuan Hong, Chulin Xie, Jeffrey Tan, Rachel Xin, Junyi Hou, Xavier Yin, Zhun Wang, Dan Hendrycks, Zhangyang Wang, et al., 2024a. Llm-pbe: Assessing data privacy in large language models. arXiv preprint arXiv:2408.12787(2024)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680786"},{"key":"e_1_3_2_1_38_1","unstructured":"Qinfeng Li Yangfan Xie Tianyu Du Zhiqiang Shen Zhenghan Qin Hao Peng Xinkui Zhao Xianwei Zhu Jianwei Yin and Xuhong Zhang. 2024c. CoreGuard: Safeguarding Foundational Capabilities of LLMs Against Model Stealing in Edge Deployment. arXiv preprint arXiv:2410.13903(2024)."},{"key":"e_1_3_2_1_39_1","unstructured":"Zi Liang Haibo Hu Qingqing Ye Yaxin Xiao and Haoyang Li. 2024. Why Are My Prompts Leaked? Unraveling Prompt Extraction Threats in Customized Large Language Models. arXiv preprint arXiv:2408.02416(2024)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3691626","article-title":"A survey of text watermarking in the era of large language models","volume":"57","author":"Liu Aiwei","year":"2024","unstructured":"Aiwei Liu, Leyi Pan, Yijian Lu, Jingjing Li, Xuming Hu, Xi Zhang, Lijie Wen, Irwin King, Hui Xiong, and Philip Yu. 2024a. A survey of text watermarking in the era of large language models. Comput. Surveys, Vol. 57, 2 (2024), 1-36.","journal-title":"Comput. Surveys"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Feng Liu Jiaqi Jiang Yating Lu Zhanyi Huang and Jiuming Jiang. 2025. The ethical security of large language models: A systematic review. Frontiers of Engineering Management(2025) 1-13.","DOI":"10.1007\/s42524-025-4082-6"},{"key":"e_1_3_2_1_42_1","first-page":"98297","article-title":"Ddk: Distilling domain knowledge for efficient large language models","volume":"37","author":"Liu Jiaheng","year":"2024","unstructured":"Jiaheng Liu, Chenchen Zhang, Jinyang Guo, Yuanxing Zhang, Haoran Que, Ken Deng, Jie Liu, Ge Zhang, Yanan Wu, Congnan Liu, et al., 2024b. Ddk: Distilling domain knowledge for efficient large language models. Advances in Neural Information Processing Systems, Vol. 37 (2024), 98297-98319.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3548606.3560586"},{"key":"e_1_3_2_1_44_1","volume-title":"Scale: A Comprehensive Survey of Large Model Safety. arXiv preprint arXiv:2502.05206(2025).","author":"Ma Xingjun","year":"2025","unstructured":"Xingjun Ma, Yifeng Gao, Yixu Wang, Ruofan Wang, Xin Wang, Ye Sun, Yifan Ding, Hengyuan Xu, Yunhao Chen, Yunhan Zhao, et al., 2025. Safety at Scale: A Comprehensive Survey of Large Model Safety. arXiv preprint arXiv:2502.05206(2025)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-54827-7_14"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Eleena Mathew. 2024. Enhancing Security in Large Language Models: A Comprehensive Review of Prompt Injection Attacks and Defenses. Authorea Preprints(2024).","DOI":"10.36227\/techrxiv.172954263.32914470\/v1"},{"key":"e_1_3_2_1_47_1","first-page":"61065","article-title":"Tree of attacks: Jailbreaking black-box llms automatically","volume":"37","author":"Mehrotra Anay","year":"2024","unstructured":"Anay Mehrotra, Manolis Zampetakis, Paul Kassianik, Blaine Nelson, Hyrum Anderson, Yaron Singer, and Amin Karbasi. 2024. Tree of attacks: Jailbreaking black-box llms automatically. Advances in Neural Information Processing Systems, Vol. 37 (2024), 61065-61105.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_48_1","volume-title":"LLM-FIN: Large Language Models Fingerprinting Attack on Edge Devices. In 2024 25th International Symposium on Quality Electronic Design (ISQED). IEEE, 1-6.","author":"Nazari Najmeh","year":"2024","unstructured":"Najmeh Nazari, Furi Xiang, Chongzhou Fang, Hosein Mohammadi Makrani, Aditya Puri, Kartik Patwari, Hossein Sayadi, Setareh Rafatirad, Chen-Nee Chuah, and Houman Homayoun. 2024. LLM-FIN: Large Language Models Fingerprinting Attack on Edge Devices. In 2024 25th International Symposium on Quality Electronic Design (ISQED). IEEE, 1-6."},{"key":"e_1_3_2_1_49_1","volume-title":"ModelShield: Adaptive and Robust Watermark against Model Extraction Attack","author":"Pang Kaiyi","year":"2025","unstructured":"Kaiyi Pang, Tao Qi, Chuhan Wu, Minhao Bai, Minghu Jiang, and Yongfeng Huang. 2025. ModelShield: Adaptive and Robust Watermark against Model Extraction Attack. IEEE Transactions on Information Forensics and Security(2025)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Rahil Parikh Christophe Dupuy and Rahul Gupta. 2022. Canary extraction in natural language understanding models. arXiv preprint arXiv:2203.13920(2022).","DOI":"10.18653\/v1\/2022.acl-short.61"},{"key":"e_1_3_2_1_51_1","unstructured":"Vaidehi Patil Peter Hase and Mohit Bansal. 2023. Can sensitive information be deleted from llms? objectives for defending against extraction attacks. arXiv preprint arXiv:2309.17410(2023)."},{"key":"e_1_3_2_1_52_1","unstructured":"Tianyu Peng and Jiajun Zhang. 2024. Enhancing Knowledge Distillation of Large Language Models through Efficient Multi-Modal Distribution Alignment. arXiv preprint arXiv:2409.12545(2024)."},{"key":"e_1_3_2_1_53_1","unstructured":"F\u00e1bio Perez and Ian Ribeiro. 2022. Ignore previous prompt: Attack techniques for language models. arXiv preprint arXiv:2211.09527(2022)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3428757.3429152"},{"key":"e_1_3_2_1_55_1","volume-title":"Replication Journey: A Strategic Progress Report-Part 1. arXiv preprint arXiv:2410.18982(2024).","author":"Qin Yiwei","year":"2024","unstructured":"Yiwei Qin, Xuefeng Li, Haoyang Zou, Yixiu Liu, Shijie Xia, Zhen Huang, Yixin Ye, Weizhe Yuan, Hector Liu, Yuanzhi Li, et al., 2024. O1 Replication Journey: A Strategic Progress Report-Part 1. arXiv preprint arXiv:2410.18982(2024)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP46214.2022.9833743"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCWC62904.2025.10903912"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3338498.3358646"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2015.152"},{"key":"e_1_3_2_1_60_1","volume-title":"AI investment forecast to approach $200 billion globally by","author":"Sachs Goldman","year":"2025","unstructured":"Goldman Sachs. 2023. AI investment forecast to approach $200 billion globally by 2025. Artificial intelligence outlooks-01 AUG(2023)."},{"key":"e_1_3_2_1_61_1","unstructured":"Zeyang Sha and Yang Zhang. 2024. Prompt Stealing Attacks Against Large Language Models. arXiv preprint arXiv:2402.12959(2024)."},{"key":"e_1_3_2_1_62_1","first-page":"5823","volume-title":"33rd USENIX Security Symposium (USENIX Security 24)","author":"Shen Xinyue","year":"2024","unstructured":"Xinyue Shen, Yiting Qu, Michael Backes, and Yang Zhang. 2024. Prompt Stealing Attacks Against {Text-to-Image} Generation Models. In 33rd USENIX Security Symposium (USENIX Security 24). 5823-5840."},{"key":"e_1_3_2_1_63_1","volume-title":"Tolga Aktas, and Vijay Aski.","author":"Shirgaonkar Anup","year":"2024","unstructured":"Anup Shirgaonkar, Nikhil Pandey, Nazmiye Ceren Abay, Tolga Aktas, and Vijay Aski. 2024. Knowledge Distillation Using Frontier Open-source LLMs: Generalizability and the Role of Synthetic Data. arXiv preprint arXiv:2410.18588(2024)."},{"key":"e_1_3_2_1_64_1","unstructured":"Gemini Team Rohan Anil Sebastian Borgeaud Jean-Baptiste Alayrac Jiahui Yu Radu Soricut Johan Schalkwyk Andrew M Dai Anja Hauth Katie Millican et al. 2023. Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805(2023)."},{"key":"e_1_3_2_1_65_1","unstructured":"Stephen Burabari Tete. 2024. Threat modelling and risk analysis for large language model (llm)-powered applications. arXiv preprint arXiv:2406.11007(2024)."},{"key":"e_1_3_2_1_66_1","volume-title":"25th USENIX security symposium (USENIX Security 16). 601-618.","author":"Tram\u00e8r Florian","unstructured":"Florian Tram\u00e8r, Fan Zhang, Ari Juels, Michael K Reiter, and Thomas Ristenpart. 2016. Stealing machine learning models via prediction {APIs}. In 25th USENIX security symposium (USENIX Security 16). 601-618."},{"key":"e_1_3_2_1_67_1","volume-title":"Somnath Basu Roy Chowdhury, and Snigdha Chaturvedi","author":"Vijjini Anvesh Rao","year":"2024","unstructured":"Anvesh Rao Vijjini, Somnath Basu Roy Chowdhury, and Snigdha Chaturvedi. 2024. Exploring safety-utility trade-offs in personalized language models. arXiv preprint arXiv:2406.11107(2024)."},{"key":"e_1_3_2_1_68_1","unstructured":"Jeffrey G Wang Jason Wang Marvin Li and Seth Neel. 2024a. Pandora's White-Box: Precise Training Data Detection and Extraction in Large Language Models. arXiv preprint arXiv:2402.17012(2024)."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.441"},{"key":"e_1_3_2_1_70_1","unstructured":"Shang Wang Tianqing Zhu Bo Liu Ming Ding Xu Guo Dayong Ye Wanlei Zhou and Philip S Yu. 2024b. Unique security and privacy threats of large language model: A comprehensive survey. arXiv preprint arXiv:2406.07973(2024)."},{"key":"e_1_3_2_1_71_1","volume-title":"The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=IQxBDLmVpT","author":"Wang Xinyi","year":"2025","unstructured":"Xinyi Wang, Antonis Antoniades, Yanai Elazar, Alfonso Amayuelas, Alon Albalak, Kexun Zhang, and William Yang Wang. 2025 a. Generalization v.s. Memorization: Tracing Language Modelstextquoteright Capabilities Back to Pretraining Data. In The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=IQxBDLmVpT"},{"key":"e_1_3_2_1_72_1","unstructured":"Yu Wang Cailing Cai Zhihua Xiao and Peifung E Lam. 2025 b. LLM Access Shield: Domain-Specific LLM Framework for Privacy Policy Compliance. arXiv preprint arXiv:2505.17145(2025)."},{"key":"e_1_3_2_1_73_1","volume-title":"Self-guard: Empower the llm to safeguard itself. arXiv preprint arXiv:2310.15851(2023).","author":"Wang Zezhong","year":"2023","unstructured":"Zezhong Wang, Fangkai Yang, Lu Wang, Pu Zhao, Hongru Wang, Liang Chen, Qingwei Lin, and Kam-Fai Wong. 2023. Self-guard: Empower the llm to safeguard itself. arXiv preprint arXiv:2310.15851(2023)."},{"key":"e_1_3_2_1_74_1","first-page":"80079","article-title":"Jailbroken: How does llm safety training fail","volume":"36","author":"Wei Alexander","year":"2023","unstructured":"Alexander Wei, Nika Haghtalab, and Jacob Steinhardt. 2023. Jailbroken: How does llm safety training fail? Advances in Neural Information Processing Systems, Vol. 36 (2023), 80079-80110.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_75_1","unstructured":"Fangzhou Wu Ning Zhang Somesh Jha Patrick McDaniel and Chaowei Xiao. 2024. A new era in llm security: Exploring security concerns in real-world llm-based systems. arXiv preprint arXiv:2402.18649(2024)."},{"key":"e_1_3_2_1_76_1","volume-title":"Pang Wei Koh, Chaowei Xiao, and Muhao Chen.","author":"Xu Jiashu","year":"2024","unstructured":"Jiashu Xu, Fei Wang, Mingyu Derek Ma, Pang Wei Koh, Chaowei Xiao, and Muhao Chen. 2024. Instructional fingerprinting of large language models. arXiv preprint arXiv:2401.12255(2024)."},{"key":"e_1_3_2_1_77_1","unstructured":"Qiongkai Xu Xuanli He Lingjuan Lyu Lizhen Qu and Gholamreza Haffari. 2021. Student surpasses teacher: Imitation attack for black-box NLP APIs. arXiv preprint arXiv:2108.13873(2021)."},{"key":"e_1_3_2_1_78_1","unstructured":"Wenrui Xu and Keshab K Parhi. 2025. A Survey of Attacks on Large Language Models. arXiv preprint arXiv:2505.12567(2025)."},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"crossref","unstructured":"Biwei Yan Kun Li Minghui Xu Yueyan Dong Yue Zhang Zhaochun Ren and Xiuzhen Cheng. 2024. On protecting the data privacy of large language models (llms): A survey. arXiv preprint arXiv:2403.05156(2024).","DOI":"10.1109\/ICMC60390.2024.00008"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1145\/3650212.3680304"},{"key":"e_1_3_2_1_81_1","volume-title":"Proceedings of the 31st International Conference on Computational Linguistics. 913-932","author":"Yang Wenkai","year":"2025","unstructured":"Wenkai Yang, Yankai Lin, Jie Zhou, and Ji-Rong Wen. 2025. Distilling Rule-based Knowledge into Large Language Models. In Proceedings of the 31st International Conference on Computational Linguistics. 913-932."},{"key":"e_1_3_2_1_82_1","volume-title":"PRSA: PRompt Stealing Attacks against large language models. arXiv preprint arXiv:2402.19200(2024).","author":"Yang Yong","year":"2024","unstructured":"Yong Yang, Changjiang Li, Yi Jiang, Xi Chen, Haoyu Wang, Xuhong Zhang, Zonghui Wang, and Shouling Ji. 2024b. PRSA: PRompt Stealing Attacks against large language models. arXiv preprint arXiv:2402.19200(2024)."},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3639074"},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"crossref","unstructured":"Yifan Yao Jinhao Duan Kaidi Xu Yuanfang Cai Zhibo Sun and Yue Zhang. 2024. A survey on large language model (llm) security and privacy: The good the bad and the ugly. High-Confidence Computing(2024) 100211.","DOI":"10.1016\/j.hcc.2024.100211"},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.1145\/3131365.3131372"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1145\/3662006.3662060"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"crossref","unstructured":"Collin Zhang John X Morris and Vitaly Shmatikov. 2024c. Extracting prompts by inverting llm outputs. arXiv preprint arXiv:2405.15012(2024).","DOI":"10.18653\/v1\/2024.emnlp-main.819"},{"key":"e_1_3_2_1_88_1","unstructured":"Ruisi Zhang Seira Hidano and Farinaz Koushanfar. 2022. Text revealer: Private text reconstruction via model inversion attacks against transformers. arXiv preprint arXiv:2209.10505(2022)."},{"key":"e_1_3_2_1_89_1","volume-title":"33rd USENIX Security Symposium (USENIX Security 24)","author":"Zhang Ruisi","year":"2024","unstructured":"Ruisi Zhang, Shehzeen Samarah Hussain, Paarth Neekhara, and Farinaz Koushanfar. 2024a. {REMARK-LLM}: A robust and efficient watermarking framework for generative large language models. In 33rd USENIX Security Symposium (USENIX Security 24). 1813-1830."},{"key":"e_1_3_2_1_90_1","unstructured":"Yuehan Zhang Peizhuo Lv Yinpeng Liu Yongqiang Ma Wei Lu Xiaofeng Wang Xiaozhong Liu and Jiawei Liu. 2024b. PersonaMark: Personalized LLM watermarking for model protection and user attribution. arXiv preprint arXiv:2409.09739(2024)."},{"key":"e_1_3_2_1_91_1","volume-title":"Ethicist: Targeted training data extraction through loss smoothed soft prompting and calibrated confidence estimation. arXiv preprint arXiv:2307.04401","author":"Zhang Zhexin","year":"2023","unstructured":"Zhexin Zhang, Jiaxin Wen, and Minlie Huang. 2023. Ethicist: Targeted training data extraction through loss smoothed soft prompting and calibrated confidence estimation. arXiv preprint arXiv:2307.04401 (2023)."},{"key":"e_1_3_2_1_92_1","volume-title":"Yue Zhao, and Yushun Dong.","author":"Zhao Kaixiang","year":"2025","unstructured":"Kaixiang Zhao, Lincan Li, Kaize Ding, Neil Zhenqiang Gong, Yue Zhao, and Yushun Dong. 2025. A Survey of Model Extraction Attacks and Defenses in Distributed Computing Environments. arXiv preprint arXiv:2502.16065 (2025)."},{"key":"e_1_3_2_1_93_1","volume-title":"The Thirteenth International Conference on Learning Representations.","author":"Zhao Zhengyue","unstructured":"Zhengyue Zhao, Xiaogeng Liu, Somesh Jha, Patrick McDaniel, Bo Li, and Chaowei Xiao. [n.d.]. Can Watermarks be Used to Detect LLM IP Infringement For Free?. In The Thirteenth International Conference on Learning Representations."}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Toronto ON Canada","acronym":"KDD '25","sponsor":["SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711896.3736573","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T18:18:13Z","timestamp":1777573093000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711896.3736573"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,3]]},"references-count":93,"alternative-id":["10.1145\/3711896.3736573","10.1145\/3711896"],"URL":"https:\/\/doi.org\/10.1145\/3711896.3736573","relation":{},"subject":[],"published":{"date-parts":[[2025,8,3]]},"assertion":[{"value":"2025-08-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}