{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T03:41:55Z","timestamp":1774669315763,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,19]],"date-time":"2023-11-19T00:00:00Z","timestamp":1700352000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS-2145493"],"award-info":[{"award-number":["CNS-2145493"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,19]]},"DOI":"10.1145\/3689217.3690618","type":"proceedings-article","created":{"date-parts":[[2024,11,19]],"date-time":"2024-11-19T18:22:01Z","timestamp":1732040521000},"page":"77-87","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["\"Prompter Says\": A Linguistic Approach to Understanding and Detecting Jailbreak Attacks Against Large-Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-2883-9090","authenticated-orcid":false,"given":"Dylan","family":"Lee","sequence":"first","affiliation":[{"name":"Department of Computer Science, UC Irvine, Irvine, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-1358-3010","authenticated-orcid":false,"given":"Shaoyuan","family":"Xie","sequence":"additional","affiliation":[{"name":"Department of Computer Science, UC Irvine, Irvine, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6362-7289","authenticated-orcid":false,"given":"Shagoto","family":"Rahman","sequence":"additional","affiliation":[{"name":"Department of Computer Science, UC Irvine, Irvine, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8388-7917","authenticated-orcid":false,"given":"Kenneth","family":"Pat","sequence":"additional","affiliation":[{"name":"Department of Computer Science, UC Irvine, Irvine, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8120-8765","authenticated-orcid":false,"given":"David","family":"Lee","sequence":"additional","affiliation":[{"name":"Department of Computer Science, UC Irvine, Irvine, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0316-9285","authenticated-orcid":false,"given":"Qi Alfred","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Computer Science, UC Irvine, Irvine, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,11,19]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Josh Achiam et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682430"},{"key":"e_1_3_2_1_3_1","volume-title":"Jordan","author":"Blei David M.","year":"2003","unstructured":"David M. Blei, Andrew Y. Ng, and Michael I. Jordan. 2003. Latent dirichlet allocation. J. Mach. Learn. Res., 3, (Mar. 2003), 993--1022."},{"key":"e_1_3_2_1_4_1","unstructured":"Patrick Chao Alexander Robey Edgar Dobriban Hamed Hassani George J Pappas and Eric Wong. 2023. Jailbreaking black box large language models in twenty queries. arXiv preprint arXiv:2310.08419."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3605760.3623764"},{"key":"e_1_3_2_1_6_1","article-title":"Palm: scaling language modeling with pathways","volume":"24","author":"Aakanksha Chowdhery","year":"2023","unstructured":"Aakanksha Chowdhery et al. 2023. Palm: scaling language modeling with pathways. Journal of Machine Learning Research, 24, 240, 1--113.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_7_1","first-page":"11","article-title":"A formula for predicting readability","volume":"27","author":"Dale Edgar","year":"1948","unstructured":"Edgar Dale and Jeanne S. Chall. 1948. A formula for predicting readability. Educational Research Bulletin, 27, 1, 11--28. Retrieved July 19, 2024 from http:\/\/www.jstor.org\/stable\/1473169.","journal-title":"Educational Research Bulletin"},{"key":"e_1_3_2_1_8_1","volume-title":"Sinno Jialin Pan, and Lidong Bing","author":"Deng Yue","year":"2024","unstructured":"Yue Deng, Wenxuan Zhang, Sinno Jialin Pan, and Lidong Bing. 2024. Multilingual jailbreak challenges in large language models. (2024). https:\/\/arxiv.org\/abs\/2310.06474 arXiv: 2310.06474 [cs.CL]."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.11192"},{"key":"e_1_3_2_1_10_1","unstructured":"Yunfan Gao et al. 2024. Retrieval-augmented generation for large language models: a survey. (2024). https:\/\/arxiv.org\/abs\/2312.10997 arXiv: 2312.10997 [cs.CL]."},{"key":"e_1_3_2_1_11_1","volume-title":"Adasyn: adaptive synthetic sampling approach for imbalanced learning. In 2008 IEEE international joint conference on neural networks","author":"He Haibo","unstructured":"Haibo He, Yang Bai, Edwardo A Garcia, and Shutao Li. 2008. Adasyn: adaptive synthetic sampling approach for imbalanced learning. In 2008 IEEE international joint conference on neural networks (IEEE world congress on computational intelligence). Ieee, 1322--1328."},{"key":"e_1_3_2_1_12_1","unstructured":"Jiabao Ji Bairu Hou Alexander Robey George J. Pappas Hamed Hassani Yang Zhang Eric Wong and Shiyu Chang. 2024. Defending large language models against jailbreak attacks via semantic smoothing. (2024). https:\/\/arxiv.org\/abs\/2402.16192 arXiv: 2402.16192 [cs.CL]."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6311"},{"key":"e_1_3_2_1_14_1","unstructured":"Zhihua Jin Shiyi Liu Haotian Li Xun Zhao and Huamin Qu. 2024. Jailbreakhunter: a visual analytics approach for jailbreak prompts discovery from large-scale human-llm conversational datasets. arXiv preprint arXiv:2407.03045."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Marek Kubis Pawe Sk\u00f3rzewski Marcin Sowa'ski and Tomasz Zitkiewicz. 2023. Back transcription as a method for evaluating robustness of natural language understanding models to speech recognition errors. (2023). https:\/\/arxiv.org\/abs\/2310.16609 arXiv: 2310.16609 [cs.CL].","DOI":"10.18653\/v1\/2023.emnlp-main.724"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00655"},{"key":"e_1_3_2_1_17_1","unstructured":"Jie Li Yi Liu Chongyang Liu Ling Shi Xiaoning Ren Yaowen Zheng Yang Liu and Yinxing Xue. 2024. A cross-language investigation into jailbreak attacks in large language models. (2024). https:\/\/arxiv.org\/abs\/2401.16765 arXiv: 2401.16765 [cs.CR]."},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the LREC 2010 Workshop on New Challenges for NLP Frameworks. http:\/\/is.muni.cz\/publication\/884893\/en. ELRA","author":"Petr Sojka Radim","year":"2010","unstructured":"Radim ehek and Petr Sojka. 2010. Software Framework for Topic Modelling with Large Corpora. English. In Proceedings of the LREC 2010 Workshop on New Challenges for NLP Frameworks. http:\/\/is.muni.cz\/publication\/884893\/en. ELRA, Valletta, Malta, (May 2010), 45--50."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Xinyue Shen Zeyuan Chen Michael Backes Yun Shen and Yang Zhang. 2023. \"do anything now\": characterizing and evaluating in-the-wild jailbreak prompts on large language models. arXiv preprint arXiv:2308.03825.","DOI":"10.1145\/3658644.3670388"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-6504"},{"key":"e_1_3_2_1_21_1","unstructured":"Hugo Touvron et al. 2023. Llama 2: open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288."},{"key":"e_1_3_2_1_22_1","unstructured":"Xunguang Wang et al. 2024. Selfdefend: llms can defend themselves against jailbreaking in a practical manner. arXiv preprint arXiv:2406.05498."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Yihan Wang Zhouxing Shi Andrew Bai and Cho-Jui Hsieh. 2024. Defending llms against jailbreaking attacks via backtranslation. (2024). https:\/\/arxiv.org\/abs\/2402.16459 arXiv: 2402.16459 [cs.CL].","DOI":"10.18653\/v1\/2024.findings-acl.948"},{"key":"e_1_3_2_1_24_1","volume-title":"Jailbroken: How Does LLM Safety Training Fail? Advances in Neural Information Processing Systems, 36.","author":"Wei Alexander","year":"2024","unstructured":"Alexander Wei, Nika Haghtalab, and Jacob Steinhardt. 2024. Jailbroken: How Does LLM Safety Training Fail? Advances in Neural Information Processing Systems, 36."},{"key":"e_1_3_2_1_25_1","unstructured":"Zhiyuan Yu Xiaogeng Liu Shunning Liang Zach Cameron Chaowei Xiao and Ning Zhang. 2024. Dont listen to me: understanding and exploring jailbreak prompts of large language models. arXiv preprint arXiv:2403.17336."}],"event":{"name":"CCS '24: ACM SIGSAC Conference on Computer and Communications Security","location":"Salt Lake City UT USA","acronym":"CCS '24","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control"]},"container-title":["Proceedings of the 1st ACM Workshop on Large AI Systems and Models with Privacy and Safety Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3689217.3690618","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3689217.3690618","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T18:12:35Z","timestamp":1755972755000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3689217.3690618"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,19]]},"references-count":25,"alternative-id":["10.1145\/3689217.3690618","10.1145\/3689217"],"URL":"https:\/\/doi.org\/10.1145\/3689217.3690618","relation":{},"subject":[],"published":{"date-parts":[[2023,11,19]]},"assertion":[{"value":"2024-11-19","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}