{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T13:20:42Z","timestamp":1775913642303,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":76,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,19]],"date-time":"2023-11-19T00:00:00Z","timestamp":1700352000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,19]]},"DOI":"10.1145\/3689217.3690621","type":"proceedings-article","created":{"date-parts":[[2024,11,19]],"date-time":"2024-11-19T18:22:01Z","timestamp":1732040521000},"page":"57-68","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":78,"title":["PromptRobust: Towards Evaluating the Robustness of Large Language Models on Adversarial Prompts"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-6220-1476","authenticated-orcid":false,"given":"Kaijie","family":"Zhu","sequence":"first","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4833-0880","authenticated-orcid":false,"given":"Jindong","family":"Wang","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9720-4711","authenticated-orcid":false,"given":"Jiaheng","family":"Zhou","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7402-4323","authenticated-orcid":false,"given":"Zichen","family":"Wang","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3424-835X","authenticated-orcid":false,"given":"Hao","family":"Chen","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6365-259X","authenticated-orcid":false,"given":"Yidong","family":"Wang","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5928-148X","authenticated-orcid":false,"given":"Linyi","family":"Yang","sequence":"additional","affiliation":[{"name":"Westlake University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1459-0155","authenticated-orcid":false,"given":"Wei","family":"Ye","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5214-2268","authenticated-orcid":false,"given":"Yue","family":"Zhang","sequence":"additional","affiliation":[{"name":"Westlake University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9900-9309","authenticated-orcid":false,"given":"Neil","family":"Gong","sequence":"additional","affiliation":[{"name":"Duke University, Durham, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8608-8482","authenticated-orcid":false,"given":"Xing","family":"Xie","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,11,19]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445922"},{"key":"e_1_3_2_1_2_1","volume-title":"Lintang Sutawika, Hailey Schoelkopf, Quentin Anthony, Shivanshu Purohit, and Edward Raf.","author":"Biderman Stella","year":"2023","unstructured":"Stella Biderman, USVSN Sai Prashanth, Lintang Sutawika, Hailey Schoelkopf, Quentin Anthony, Shivanshu Purohit, and Edward Raf. 2023. Emergent and predictable memorization in large language models. arXiv preprint arXiv:2304.11158 (2023)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2204.06745"},{"key":"e_1_3_2_1_4_1","unstructured":"Google Brain. 2023. A NewOpen Source Flan 20B with UL2. https:\/\/www.yitay.net\/ blog\/flan-ul2--20b"},{"key":"e_1_3_2_1_5_1","volume-title":"Adversarial patch. arXiv preprint arXiv:1712.09665","author":"Brown Tom B","year":"2017","unstructured":"Tom B Brown, Dandelion Man\u00e9, Aurko Roy, Mart\u00edn Abadi, and Justin Gilmer. 2017. Adversarial patch. arXiv preprint arXiv:1712.09665 (2017)."},{"key":"e_1_3_2_1_6_1","volume-title":"Quantifying Memorization Across Neural Language Models. In The Eleventh International Conference on Learning Representations.","author":"Carlini Nicholas","year":"2023","unstructured":"Nicholas Carlini, Daphne Ippolito, Matthew Jagielski, Katherine Lee, Florian Tramer, and Chiyuan Zhang. 2023. Quantifying Memorization Across Neural Language Models. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 14th International Conference on Spoken Language Translation. International Workshop on Spoken Language Translation","author":"Cettolo Mauro","year":"2017","unstructured":"Mauro Cettolo, Marcello Federico, Luisa Bentivogli, Jan Niehues, Sebastian St\u00fcker, Katsuhito Sudoh, Koichiro Yoshino, and Christian Federmann. 2017. Overview of the IWSLT 2017 Evaluation Campaign. In Proceedings of the 14th International Conference on Spoken Language Translation. International Workshop on Spoken Language Translation, Tokyo, Japan, 2--14. https:\/\/aclanthology.org\/2017.iwslt- 1.1"},{"key":"e_1_3_2_1_8_1","volume-title":"Xing","author":"Chiang Wei-Lin","year":"2023","unstructured":"Wei-Lin Chiang, Zhuohan Li, Zi Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph E. Gonzalez, Ion Stoica, and Eric P. Xing. 2023. Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90%* ChatGPT Quality. https:\/\/lmsys.org\/blog\/2023-03--30-vicuna\/"},{"key":"e_1_3_2_1_9_1","volume-title":"Deep reinforcement learning from human preferences. Advances in neural information processing systems 30","author":"Christiano Paul F","year":"2017","unstructured":"Paul F Christiano, Jan Leike, Tom Brown, Miljan Martic, Shane Legg, and Dario Amodei. 2017. Deep reinforcement learning from human preferences. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","unstructured":"Hyung Won Chung Le Hou Shayne Longpre Barret Zoph Yi Tay William Fedus Eric Li XuezhiWang Mostafa Dehghani Siddhartha Brahma AlbertWebson Shixiang Shane Gu Zhuyun Dai Mirac Suzgun Xinyun Chen Aakanksha Chowdhery Sharan Narang Gaurav Mishra Adams Yu Vincent Zhao Yanping Huang Andrew Dai Hongkun Yu Slav Petrov Ed H. Chi Jeff Dean Jacob Devlin Adam Roberts Denny Zhou Quoc V. Le and JasonWei. 2022. Scaling Instruction- Finetuned Language Models. https:\/\/doi.org\/10.48550\/ARXIV.2210.11416","DOI":"10.48550\/ARXIV.2210.11416"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICALT58122.2023.00100"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/363958.363994"},{"key":"e_1_3_2_1_13_1","volume-title":"Hello Dolly: Democratizing the magic of ChatGPT with open models. https:\/\/www.databricks.com\/blog\/2023\/03\/24\/hello-dollydemocratizing- magic-chatgpt-open-models.html","year":"2023","unstructured":"Databricks. 2023. Hello Dolly: Democratizing the magic of ChatGPT with open models. https:\/\/www.databricks.com\/blog\/2023\/03\/24\/hello-dollydemocratizing- magic-chatgpt-open-models.html"},{"key":"e_1_3_2_1_14_1","volume-title":"Toxicity in chatgpt: Analyzing persona-assigned language models. arXiv preprint arXiv:2304.05335","author":"Deshpande Ameet","year":"2023","unstructured":"Ameet Deshpande, Vishvak Murahari, Tanmay Rajpurohit, Ashwin Kalyan, and Karthik Narasimhan. 2023. Toxicity in chatgpt: Analyzing persona-assigned language models. arXiv preprint arXiv:2304.05335 (2023)."},{"key":"e_1_3_2_1_15_1","unstructured":"Nolan Dey Gurpreet Gosal Zhiming Chen Hemant Khachane William Marshall Ribhu Pathria Marvin Tom and Joel Hestness. 2023. Cerebras-GPT: Open Compute-Optimal Language Models Trained on the CerebrasWafer-Scale Cluster. arXiv:2304.03208 [cs.LG]"},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the Third International Workshop on Paraphrasing (IWP2005)","author":"William","unstructured":"William B. Dolan and Chris Brockett. 2005. Automatically Constructing a Corpus of Sentential Paraphrases. In Proceedings of the Third International Workshop on Paraphrasing (IWP2005). https:\/\/aclanthology.org\/I05--5002"},{"key":"e_1_3_2_1_17_1","volume-title":"Analysis of large-language model versus human performance for genetics questions. European Journal of Human Genetics","author":"Duong Dat","year":"2023","unstructured":"Dat Duong and Benjamin D Solomon. 2023. Analysis of large-language model versus human performance for genetics questions. European Journal of Human Genetics (2023), 1--3."},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10)","author":"Eisele Andreas","year":"2010","unstructured":"Andreas Eisele and Yu Chen. 2010. MultiUN: A Multilingual Corpus from United Nation Documents. In Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10). European Language Resources Association (ELRA), Valletta, Malta. http:\/\/www.lrec-conf.org\/proceedings\/ lrec2010\/pdf\/686_Paper.pdf"},{"key":"e_1_3_2_1_19_1","volume-title":"Data augmentation for low-resource neural machine translation. arXiv preprint arXiv:1705.00440","author":"Fadaee Marzieh","year":"2017","unstructured":"Marzieh Fadaee, Arianna Bisazza, and Christof Monz. 2017. Data augmentation for low-resource neural machine translation. arXiv preprint arXiv:1705.00440 (2017)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/SPW.2018.00016"},{"key":"e_1_3_2_1_21_1","volume-title":"Realtoxicityprompts: Evaluating neural toxic degeneration in language models. arXiv preprint arXiv:2009.11462","author":"Gehman Samuel","year":"2020","unstructured":"Samuel Gehman, Suchin Gururangan, Maarten Sap, Yejin Choi, and Noah A Smith. 2020. Realtoxicityprompts: Evaluating neural toxic degeneration in language models. arXiv preprint arXiv:2009.11462 (2020)."},{"key":"e_1_3_2_1_22_1","volume-title":"Explaining and harnessing adversarial examples. arXiv preprint arXiv:1412.6572","author":"Goodfellow Ian J","year":"2014","unstructured":"Ian J Goodfellow, Jonathon Shlens, and Christian Szegedy. 2014. Explaining and harnessing adversarial examples. arXiv preprint arXiv:1412.6572 (2014)."},{"key":"e_1_3_2_1_23_1","volume-title":"More than you?ve asked for: A Comprehensive Analysis of Novel Prompt Injection Threats to Application-Integrated Large Language Models. arXiv preprint arXiv:2302.12173","author":"Greshake Kai","year":"2023","unstructured":"Kai Greshake, Sahar Abdelnabi, Shailesh Mishra, Christoph Endres, Thorsten Holz, and Mario Fritz. 2023. More than you?ve asked for: A Comprehensive Analysis of Novel Prompt Injection Threats to Application-Integrated Large Language Models. arXiv preprint arXiv:2302.12173 (2023)."},{"key":"e_1_3_2_1_24_1","volume-title":"Exploring the Responses of Large Language Models to Beginner Programmers? Help Requests. arXiv preprint arXiv:2306.05715","author":"Hellas Arto","year":"2023","unstructured":"Arto Hellas, Juho Leinonen, Sami Sarsa, Charles Koutcheme, Lilja Kujanp\u00e4\u00e4, and Juha Sorva. 2023. Exploring the Responses of Large Language Models to Beginner Programmers? Help Requests. arXiv preprint arXiv:2306.05715 (2023)."},{"key":"e_1_3_2_1_25_1","volume-title":"Measuring Massive Multitask Language Understanding. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=d7KBjmI3GmQ","author":"Hendrycks Dan","year":"2021","unstructured":"Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt. 2021. Measuring Massive Multitask Language Understanding. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=d7KBjmI3GmQ"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Jason Holmes Zhengliang Liu Lian Zhang Yuzhen Ding Terence T Sio Lisa A McGee Jonathan B Ashman Xiang Li Tianming Liu Jiajian Shen et al. 2023. Evaluating large language models on a highly-specialized topic radiation oncology physics. arXiv preprint arXiv:2304.01938 (2023).","DOI":"10.3389\/fonc.2023.1219326"},{"key":"e_1_3_2_1_27_1","volume-title":"Adaptive mixtures of local experts. Neural computation 3, 1","author":"Jacobs Robert A","year":"1991","unstructured":"Robert A Jacobs, Michael I Jordan, Steven J Nowlan, and Geoffrey E Hinton. 1991. Adaptive mixtures of local experts. Neural computation 3, 1 (1991), 79--87."},{"key":"e_1_3_2_1_28_1","volume-title":"Joey Tianyi Zhou, and Peter Szolovits","author":"Jin Di","year":"2019","unstructured":"Di Jin, Zhijing Jin, Joey Tianyi Zhou, and Peter Szolovits. 2019. Is BERT Really Robust? Natural Language Attack on Text Classification and Entailment. arXiv preprint arXiv:1907.11932 (2019)."},{"key":"e_1_3_2_1_29_1","volume-title":"Techniques for automatically correcting words in text. ACM computing surveys (CSUR) 24, 4","author":"Kukich Karen","year":"1992","unstructured":"Karen Kukich. 1992. Techniques for automatically correcting words in text. ACM computing surveys (CSUR) 24, 4 (1992), 377--439."},{"key":"e_1_3_2_1_30_1","unstructured":"Alexey Kurakin Ian Goodfellow Samy Bengio et al. 2016. Adversarial examples in the physical world."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.5555\/3031843.3031909"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2019.23138"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.500"},{"key":"e_1_3_2_1_34_1","volume-title":"Aligning Large Multi-Modal Model with Robust Instruction Tuning. arXiv preprint arXiv:2306.14565","author":"Liu Fuxiao","year":"2023","unstructured":"Fuxiao Liu, Kevin Lin, Linjie Li, Jianfeng Wang, Yaser Yacoob, and Lijuan Wang. 2023. Aligning Large Multi-Modal Model with Robust Instruction Tuning. arXiv preprint arXiv:2306.14565 (2023)."},{"key":"e_1_3_2_1_35_1","volume-title":"Evaluating the logical reasoning ability of chatgpt and gpt-4. arXiv preprint arXiv:2304.03439","author":"Liu Hanmeng","year":"2023","unstructured":"Hanmeng Liu, Ruoxi Ning, Zhiyang Teng, Jian Liu, Qiji Zhou, and Yue Zhang. 2023. Evaluating the logical reasoning ability of chatgpt and gpt-4. arXiv preprint arXiv:2304.03439 (2023)."},{"key":"e_1_3_2_1_36_1","volume-title":"Prompt Injection attack against LLM-integrated Applications. arXiv preprint arXiv:2306.05499","author":"Liu Yi","year":"2023","unstructured":"Yi Liu, Gelei Deng, Yuekang Li, Kailong Wang, Tianwei Zhang, Yepang Liu, Haoyu Wang, Yan Zheng, and Yang Liu. 2023. Prompt Injection attack against LLM-integrated Applications. arXiv preprint arXiv:2306.05499 (2023)."},{"key":"e_1_3_2_1_37_1","volume-title":"Data contamination: From memorization to exploitation. arXiv preprint arXiv:2203.08242","author":"Magar Inbal","year":"2022","unstructured":"Inbal Magar and Roy Schwartz. 2022. Data contamination: From memorization to exploitation. arXiv preprint arXiv:2203.08242 (2022)."},{"key":"e_1_3_2_1_38_1","first-page":"89","article-title":"Long Short Term Memory Networks for Anomaly Detection in Time Series","volume":"2015","author":"Malhotra Pankaj","year":"2015","unstructured":"Pankaj Malhotra, Lovekesh Vig, Gautam Shroff, Puneet Agarwal, et al. 2015. Long Short Term Memory Networks for Anomaly Detection in Time Series. In Esann, Vol. 2015. 89.","journal-title":"Esann"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Seyed-Mohsen Moosavi-Dezfooli Alhussein Fawzi Omar Fawzi and Pascal Frossard. 2017. Universal adversarial perturbations. In CVPR. 1765--1773.","DOI":"10.1109\/CVPR.2017.17"},{"key":"e_1_3_2_1_40_1","volume-title":"Stress Test Evaluation for Natural Language Inference","author":"Naik Aakanksha","unstructured":"Aakanksha Naik, Abhilasha Ravichander, Norman Sadeh, Carolyn Rose, and Graham Neubig. 2018. Stress Test Evaluation for Natural Language Inference. In ACL. Association for Computational Linguistics, Santa Fe, New Mexico, USA, 2340--2353. https:\/\/aclanthology.org\/C18--1198"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.441"},{"key":"e_1_3_2_1_42_1","unstructured":"OpenAI. 2023. https:\/\/chat.openai.com.chat."},{"key":"e_1_3_2_1_43_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. arXiv:2303.08774 [cs.CL]"},{"key":"e_1_3_2_1_44_1","volume-title":"International Conference on Machine Learning. PMLR, 4970--4979","author":"Pang Tianyu","year":"2019","unstructured":"Tianyu Pang, Kun Xu, Chao Du, Ning Chen, and Jun Zhu. 2019. Improving adversarial robustness via promoting ensemble diversity. In International Conference on Machine Learning. PMLR, 4970--4979."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","unstructured":"Kishore Papineni Salim Roukos Todd Ward and Wei-Jing Zhu. 2002. Bleu: a Method for Automatic Evaluation of Machine Translation. In ACL. 311--318. https:\/\/doi.org\/10.3115\/1073083.1073135","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_46_1","volume-title":"Ignore previous prompt: Attack techniques for language models. arXiv preprint arXiv:2211.09527","author":"Perez F\u00e1bio","year":"2022","unstructured":"F\u00e1bio Perez and Ian Ribeiro. 2022. Ignore previous prompt: Attack techniques for language models. arXiv preprint arXiv:2211.09527 (2022)."},{"key":"e_1_3_2_1_47_1","volume-title":"Toolllm: Facilitating large language models to master 16000 real-world apis. arXiv preprint arXiv:2307.16789","author":"Qin Yujia","year":"2023","unstructured":"Yujia Qin, Shihao Liang, Yining Ye, Kunlun Zhu, Lan Yan, Yaxi Lu, Yankai Lin, Xin Cong, Xiangru Tang, Bill Qian, et al. 2023. Toolllm: Facilitating large language models to master 16000 real-world apis. arXiv preprint arXiv:2307.16789 (2023)."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3109859.3109896"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18--2124"},{"key":"e_1_3_2_1_50_1","volume-title":"A Survey of Hallucination in Large Foundation Models. arXiv preprint arXiv:2309.05922","author":"Rawte Vipula","year":"2023","unstructured":"Vipula Rawte, Amit Sheth, and Amitava Das. 2023. A Survey of Hallucination in Large Foundation Models. arXiv preprint arXiv:2309.05922 (2023)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1079"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.442"},{"key":"e_1_3_2_1_53_1","unstructured":"David Saxton Edward Grefenstette Felix Hill and Pushmeet Kohli. 2019. Analysing Mathematical Reasoning Abilities of Neural Models. In ICLR. https: \/\/openreview.net\/forum?id=H1gR5iR5FX"},{"key":"e_1_3_2_1_54_1","volume-title":"Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. arXiv preprint arXiv:1701.06538","author":"Shazeer Noam","year":"2017","unstructured":"Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, and Jeff Dean. 2017. Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. arXiv preprint arXiv:1701.06538 (2017)."},{"key":"e_1_3_2_1_55_1","volume-title":"Hugginggpt: Solving ai tasks with chatgpt and its friends in huggingface. arXiv preprint arXiv:2303.17580","author":"Shen Yongliang","year":"2023","unstructured":"Yongliang Shen, Kaitao Song, Xu Tan, Dongsheng Li, Weiming Lu, and Yueting Zhuang. 2023. Hugginggpt: Solving ai tasks with chatgpt and its friends in huggingface. arXiv preprint arXiv:2303.17580 (2023)."},{"key":"e_1_3_2_1_56_1","volume-title":"Societal biases in language generation: Progress and challenges. arXiv preprint arXiv:2105.04054","author":"Sheng Emily","year":"2021","unstructured":"Emily Sheng, Kai-Wei Chang, Premkumar Natarajan, and Nanyun Peng. 2021. Societal biases in language generation: Progress and challenges. arXiv preprint arXiv:2105.04054 (2021)."},{"key":"e_1_3_2_1_57_1","volume-title":"Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank","author":"Socher Richard","unstructured":"Richard Socher, Alex Perelygin, Jean Wu, Jason Chuang, Christopher D. Manning, Andrew Ng, and Christopher Potts. 2013. Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank. In EMNLP. Association for Computational Linguistics, Seattle, Washington, USA, 1631--1642. https:\/\/www.aclweb.org\/anthology\/D13--1170"},{"key":"e_1_3_2_1_58_1","volume-title":"Safety Assessment of Chinese Large Language Models. arXiv preprint arXiv:2304.10436","author":"Sun Hao","year":"2023","unstructured":"Hao Sun, Zhexin Zhang, Jiawen Deng, Jiale Cheng, and Minlie Huang. 2023. Safety Assessment of Chinese Large Language Models. arXiv preprint arXiv:2304.10436 (2023)."},{"key":"e_1_3_2_1_59_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_60_1","volume-title":"Universal adversarial triggers for attacking and analyzing NLP. arXiv preprint arXiv:1908.07125","author":"Wallace Eric","year":"2019","unstructured":"Eric Wallace, Shi Feng, Nikhil Kandpal, Matt Gardner, and Sameer Singh. 2019. Universal adversarial triggers for attacking and analyzing NLP. arXiv preprint arXiv:1908.07125 (2019)."},{"key":"e_1_3_2_1_61_1","volume-title":"Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch\u00e9-Buc","author":"Pruksachatkun Yada","year":"2019","unstructured":"AlexWang, Yada Pruksachatkun, Nikita Nangia, Amanpreet Singh, Julian Michael, Felix Hill, Omer Levy, and Samuel Bowman. 2019. SuperGLUE: A Stickier Benchmark for General-Purpose Language Understanding Systems. In Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch\u00e9-Buc, E. Fox, and R. Garnett (Eds.), Vol. 32. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2019\/file\/ 4496bf24afe7fab6f046bf4923da8de6-Paper.pdf"},{"key":"e_1_3_2_1_62_1","volume-title":"Bowman","author":"Singh Amanpreet","year":"2019","unstructured":"AlexWang, Amanpreet Singh, Julian Michael, Felix Hill, Omer Levy, and Samuel R. Bowman. 2019. GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. In the Proceedings of ICLR.."},{"key":"e_1_3_2_1_63_1","unstructured":"Boxin Wang Weixin Chen Hengzhi Pei Chulin Xie Mintong Kang Chenhui Zhang Chejian Xu Zidi Xiong Ritik Dutta Rylan Schaeffer et al. 2023. DecodingTrust: A Comprehensive Assessment of Trustworthiness in GPT Models. arXiv preprint arXiv:2306.11698 (2023)."},{"key":"e_1_3_2_1_64_1","volume-title":"Ahmed Hassan Awadallah, and Bo Li","author":"Wang Boxin","year":"2021","unstructured":"Boxin Wang, Chejian Xu, Shuohang Wang, Zhe Gan, Yu Cheng, Jianfeng Gao, Ahmed Hassan Awadallah, and Bo Li. 2021. Adversarial glue: A multi-task benchmark for robustness evaluation of language models. arXiv preprint arXiv:2111.02840 (2021)."},{"key":"e_1_3_2_1_65_1","volume-title":"International conference on learning representations (ICLR) workshop on Trustworthy and Reliable Large-Scale Machine Learning Models.","author":"Wang Jindong","year":"2023","unstructured":"Jindong Wang, Xixu Hu, Wenxin Hou, Hao Chen, Runkai Zheng, Yidong Wang, Linyi Yang, Haojun Huang, Wei Ye, Xiubo Geng, et al. 2023. On the robustness of chatgpt: An adversarial and out-of-distribution perspective. In International conference on learning representations (ICLR) workshop on Trustworthy and Reliable Large-Scale Machine Learning Models."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"crossref","unstructured":"Zhiguo Wang Wael Hamza and Radu Florian. 2017. Bilateral Multi-Perspective Matching for Natural Language Sentences. arXiv:1702.03814 [cs.AI]","DOI":"10.24963\/ijcai.2017\/579"},{"key":"e_1_3_2_1_67_1","volume-title":"Neural Network Acceptability Judgments. arXiv preprint arXiv:1805.12471","author":"Warstadt Alex","year":"2018","unstructured":"Alex Warstadt, Amanpreet Singh, and Samuel R Bowman. 2018. Neural Network Acceptability Judgments. arXiv preprint arXiv:1805.12471 (2018)."},{"key":"e_1_3_2_1_68_1","volume-title":"Fei Xia, Ed H. Chi, Quoc V Le, and Denny Zhou.","author":"Schuurmans Dale","year":"2022","unstructured":"JasonWei, XuezhiWang, Dale Schuurmans, Maarten Bosma, brian ichter, Fei Xia, Ed H. Chi, Quoc V Le, and Denny Zhou. 2022. Chain of Thought Prompting Elicits Reasoning in Large Language Models. In Advances in Neural Information Processing Systems, Alice H. Oh, Alekh Agarwal, Danielle Belgrave, and Kyunghyun Cho (Eds.)."},{"key":"e_1_3_2_1_69_1","volume-title":"NAACL HLT (New Orleans, Louisiana)","author":"Williams Adina","unstructured":"Adina Williams, Nikita Nangia, and Samuel Bowman. 2018. A Broad-Coverage Challenge Corpus for Sentence Understanding through Inference. In NAACL HLT (New Orleans, Louisiana). Association for Computational Linguistics, 1112--1122. http:\/\/aclweb.org\/anthology\/N18--1101"},{"key":"e_1_3_2_1_70_1","volume-title":"International Conference on Machine Learning. PMLR, 23965--23998","author":"Wortsman Mitchell","year":"2022","unstructured":"Mitchell Wortsman, Gabriel Ilharco, Samir Ya Gadre, Rebecca Roelofs, Raphael Gontijo-Lopes, Ari S Morcos, Hongseok Namkoong, Ali Farhadi, Yair Carmon, Simon Kornblith, et al. 2022. Model soups: averaging weights of multiple fine-tuned models improves accuracy without increasing inference time. In International Conference on Machine Learning. PMLR, 23965--23998."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00780"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.806"},{"key":"e_1_3_2_1_73_1","unstructured":"Shunyu Yao Dian Yu Jeffrey Zhao Izhak Shafran Thomas L. Griffiths Yuan Cao and Karthik Narasimhan. 2023. Tree of Thoughts: Deliberate Problem Solving with Large Language Models. arXiv:2305.10601"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.540"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3510191"},{"key":"e_1_3_2_1_76_1","volume-title":"On Robustness of Prompt-based Semantic Parsing with Large Pre-trained Language Model: An Empirical Study on Codex. arXiv preprint arXiv:2301.12868","author":"Zhuo Terry Yue","year":"2023","unstructured":"Terry Yue Zhuo, Zhuang Li, Yujin Huang, Yuan-Fang Li,WeiqingWang, Gholamreza Haffari, and Fatemeh Shiri. 2023. On Robustness of Prompt-based Semantic Parsing with Large Pre-trained Language Model: An Empirical Study on Codex. arXiv preprint arXiv:2301.12868 (2023)."}],"event":{"name":"CCS '24: ACM SIGSAC Conference on Computer and Communications Security","location":"Salt Lake City UT USA","acronym":"CCS '24","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control"]},"container-title":["Proceedings of the 1st ACM Workshop on Large AI Systems and Models with Privacy and Safety Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3689217.3690621","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3689217.3690621","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T18:13:15Z","timestamp":1755972795000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3689217.3690621"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,19]]},"references-count":76,"alternative-id":["10.1145\/3689217.3690621","10.1145\/3689217"],"URL":"https:\/\/doi.org\/10.1145\/3689217.3690621","relation":{},"subject":[],"published":{"date-parts":[[2023,11,19]]},"assertion":[{"value":"2024-11-19","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}