{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T15:25:57Z","timestamp":1781018757703,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":24,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T00:00:00Z","timestamp":1774224000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"National Research Foundation of Korea(NRF) funded by the Ministry of Education","award":["25411243"],"award-info":[{"award-number":["25411243"]}]},{"name":"Korea Internet & Security Agency (KISA) grant funded by the Korean government (Ministry of Science and ICT, MSIT)","award":["Information Security Workforce Development (Information Security Specialized University Program)"],"award-info":[{"award-number":["Information Security Workforce Development (Information Security Specialized University Program)"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,3,23]]},"DOI":"10.1145\/3748522.3779852","type":"proceedings-article","created":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T14:17:49Z","timestamp":1781014669000},"page":"1515-1524","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["LOKI: Alias-based Semantic Jailbreak Strategy for Large Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-5013-8445","authenticated-orcid":false,"given":"Heeyeong","family":"Suh","sequence":"first","affiliation":[{"name":"Kangwon National University, Chuncheon, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6415-8096","authenticated-orcid":false,"given":"Nayeon","family":"Ryu","sequence":"additional","affiliation":[{"name":"Kangwon national university, Chuncheon, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5910-3024","authenticated-orcid":false,"given":"Seokjae","family":"Gwon","sequence":"additional","affiliation":[{"name":"Kangwon National University, Chuncheon, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8071-852X","authenticated-orcid":false,"given":"Jeongho","family":"Lee","sequence":"additional","affiliation":[{"name":"Kangwon National University, Chuncheon, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8277-8486","authenticated-orcid":false,"given":"Seyoung","family":"Lee","sequence":"additional","affiliation":[{"name":"Kangwon National University, Chuncheon, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,9]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Yuntao Bai et al. 2022. Training a helpful and harmless assistant with reinforcement learning from human feedback. arXiv preprint arXiv:2204.05862."},{"key":"e_1_3_2_1_2_1","unstructured":"Tom Brown et al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 1877\u20131901."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.52202\/075280-2687"},{"key":"e_1_3_2_1_4_1","unstructured":"Nicholas Carlini et al. 2021. Extracting training data from large language models. In 30th USENIX security symposium (USENIX Security 21) 2633\u20132650."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/SaTML64287.2025.00010"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Liying Cheng Xingxuan Li and Lidong Bing. 2023. Is gpt-4 a good data analyst? arXiv preprint arXiv:2305.15038.","DOI":"10.18653\/v1\/2023.findings-emnlp.637"},{"key":"e_1_3_2_1_7_1","unstructured":"Deep Ganguli et al. 2022. Red teaming language models to reduce harms: methods scaling behaviors and lessons learned. arXiv preprint arXiv:2209.07858."},{"key":"e_1_3_2_1_8_1","unstructured":"Hila Gonen and Yoav Goldberg. 2019. Lipstick on a pig: debiasing methods cover up systematic gender biases in word embeddings but do not remove them. arXiv preprint arXiv:1903.03862."},{"key":"e_1_3_2_1_9_1","unstructured":"Ian J Goodfellow Jonathon Shlens and Christian Szegedy. 2014. Explaining and harnessing adversarial examples. arXiv preprint arXiv:1412.6572."},{"key":"e_1_3_2_1_10_1","unstructured":"Yangsibo Huang Samyak Gupta Mengzhou Xia Kai Li and Danqi Chen. 2023. Catastrophic jailbreak of open-source llms via exploiting generation. arXiv preprint arXiv:2310.06987."},{"key":"e_1_3_2_1_11_1","unstructured":"Saurav Kadavath et al. 2022. Language models (mostly) know what they know. arXiv preprint arXiv:2207.05221."},{"key":"e_1_3_2_1_12_1","unstructured":"Xiaogeng Liu Nan Xu Muhao Chen and Chaowei Xiao. 2023. Autodan: generating stealthy jailbreak prompts on aligned large language models. arXiv preprint arXiv:2310.04451."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Long Ouyang et al. 2022. Training language models to follow instructions with human feedback. Advances in neural information processing systems 35 27730\u201327744.","DOI":"10.52202\/068431-2011"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Ethan Perez Saffron Huang Francis Song Trevor Cai Roman Ring John Aslanides Amelia Glaese Nat McAleese and Geoffrey Irving. 2022. Red teaming language models with language models. arXiv preprint arXiv:2202.03286.","DOI":"10.18653\/v1\/2022.emnlp-main.225"},{"key":"e_1_3_2_1_15_1","unstructured":"F\u00e1bio Perez and Ian Ribeiro. 2022. Ignore previous prompt: attack techniques for language models. arXiv preprint arXiv:2211.09527."},{"key":"e_1_3_2_1_16_1","unstructured":"Xiangyu Qi Yi Zeng Tinghao Xie Pin-Yu Chen Ruoxi Jia Prateek Mittal and Peter Henderson. 2023. Fine-tuning aligned language models compromises safety even when users do not intend to! URL https:\/\/arxiv.org\/abs\/2310.03693."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Marco Tulio Ribeiro Tongshuang Wu Carlos Guestrin and Sameer Singh. 2020. Beyond accuracy: behavioral testing of nlp models with checklist. arXiv preprint arXiv:2005.04118.","DOI":"10.18653\/v1\/2020.acl-main.442"},{"key":"e_1_3_2_1_18_1","unstructured":"Paul R\u00f6ttger Bertram Vidgen Dong Nguyen Zeerak Waseem Helen Margetts and Janet B Pierrehumbert. 2020. Hatecheck: functional tests for hate speech detection models. arXiv preprint arXiv:2012.15606."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3658644.3670388"},{"key":"e_1_3_2_1_20_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Eric Wallace Shi Feng Nikhil Kandpal Matt Gardner and Sameer Singh. 2019. Universal adversarial triggers for attacking and analyzing nlp. arXiv preprint arXiv:1908.07125.","DOI":"10.18653\/v1\/D19-1221"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Alex Wang Amanpreet Singh Julian Michael Felix Hill Omer Levy and Samuel R Bowman. 2018. Glue: a multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:1804.07461.","DOI":"10.18653\/v1\/W18-5446"},{"key":"e_1_3_2_1_23_1","unstructured":"Jason Wei et al. 2022. Emergent abilities of large language models. arXiv preprint arXiv:2206.07682."},{"key":"e_1_3_2_1_24_1","unstructured":"Andy Zou Zifan Wang Nicholas Carlini Milad Nasr J Zico Kolter and Matt Fredrikson. 2023. Universal and transferable adversarial attacks on aligned language models. arXiv preprint arXiv:2307.15043."}],"event":{"name":"SAC '26: 41st ACM\/SIGAPP Symposium on Applied Computing","location":"Grand Hotel Palace Thessaloniki Greece","acronym":"SAC '26","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"]},"container-title":["Proceedings of the 41st ACM\/SIGAPP Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3748522.3779852","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T14:28:09Z","timestamp":1781015289000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3748522.3779852"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,23]]},"references-count":24,"alternative-id":["10.1145\/3748522.3779852","10.1145\/3748522"],"URL":"https:\/\/doi.org\/10.1145\/3748522.3779852","relation":{},"subject":[],"published":{"date-parts":[[2026,3,23]]},"assertion":[{"value":"2026-06-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}