{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T20:28:40Z","timestamp":1760300920914,"version":"3.40.3"},"publisher-location":"Cham","reference-count":34,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031709029"},{"type":"electronic","value":"9783031709036"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70903-6_8","type":"book-chapter","created":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T22:04:30Z","timestamp":1725487470000},"page":"146-165","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["IntentObfuscator: A Jailbreaking Method via\u00a0Confusing LLM with\u00a0Prompts"],"prefix":"10.1007","author":[{"given":"Shang","family":"Shang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhongjiang","family":"Yao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yepeng","family":"Yao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liya","family":"Su","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zijing","family":"Fan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaodan","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhengwei","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,5]]},"reference":[{"key":"8_CR1","unstructured":"Abcarter: What is the size of the training set for gpt-3 (2023). https:\/\/community.openai.com\/t\/what-is-the-size-of-the-training-set-for-gpt-3\/360896\/1"},{"key":"8_CR2","unstructured":"AlexalBERT: Hypothetical response. https:\/\/www.jailbreakchat.com\/prompt\/b1fe938b-4541-41c8-96e7-b1c659ec4ef9"},{"key":"8_CR3","unstructured":"Bai, J., et\u00a0al.: Qwen Technical report. arXiv preprint arXiv:2309.16609 (2023)"},{"key":"8_CR4","unstructured":"Beckerich, M., Plein, L., Coronado, S.: RatGPT: turning online LLMs into proxies for malware attacks. arXiv preprint arXiv:2308.09183 (2023)"},{"key":"8_CR5","doi-asserted-by":"crossref","unstructured":"Cao, Q., Kojima, T., Matsuo, Y., Iwasawa, Y.: Unnatural error correction: GPT-4 can almost perfectly handle unnatural scrambled text (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.550"},{"key":"8_CR6","doi-asserted-by":"crossref","unstructured":"Chen, B., Wang, G., Guo, H., Wang, Y., Yan, Q.: Understanding multi-turn toxic behaviors in open-domain chatbots. In: Proceedings of the 26th International Symposium on Research in Attacks, Intrusions and Defenses, pp. 282\u2013296 (2023)","DOI":"10.1145\/3607199.3607237"},{"key":"8_CR7","doi-asserted-by":"crossref","unstructured":"Chen, Y., Arunasalam, A., Celik, Z.B.: Can large language models provide security & privacy advice? Measuring the ability of LLMs to refute misconceptions. In: Proceedings of the 39th Annual Computer Security Applications Conference, pp. 366\u2013378 (2023)","DOI":"10.1145\/3627106.3627196"},{"key":"8_CR8","unstructured":"Chin, Z.Y., Jiang, C.M., Huang, C.C., Chen, P.Y., Chiu, W.C.: Prompting4debugging: red-teaming text-to-image diffusion models by finding problematic prompts. arXiv preprint arXiv:2309.06135 (2023)"},{"key":"8_CR9","unstructured":"Chu, J., Liu, Y., Yang, Z., Shen, X., Backes, M., Zhang, Y.: Comprehensive assessment of jailbreak attacks against LLMs. arXiv preprint arXiv:2402.05668 (2024)"},{"key":"8_CR10","doi-asserted-by":"crossref","unstructured":"Deng, B., Wang, W., Feng, F., Deng, Y., Wang, Q., He, X.: Attack prompt generation for red teaming and defending large language models. arXiv preprint arXiv:2310.12505 (2023)","DOI":"10.18653\/v1\/2023.findings-emnlp.143"},{"key":"8_CR11","doi-asserted-by":"crossref","unstructured":"Deng, G., et al.: Masterkey: automated jailbreaking of large language model chatbots. In: Proceedings of ISOC NDSS (2024)","DOI":"10.14722\/ndss.2024.24188"},{"key":"8_CR12","doi-asserted-by":"crossref","unstructured":"Ghafouri, V., Agarwal, V., Zhang, Y., Sastry, N., Such, J., Suarez-Tangil, G.: AI in the gray: exploring moderation policies in dialogic large language models vs. human answers in controversial topics. In: Proceedings of the 32nd ACM International Conference on Information and Knowledge Management, pp. 556\u2013565 (2023)","DOI":"10.1145\/3583780.3614777"},{"key":"8_CR13","unstructured":"Google, Inc: Google\u2019s secure AI framework (SAIF) (2023). https:\/\/safety.google\/cybersecurity-advancements\/saif\/t"},{"key":"8_CR14","doi-asserted-by":"crossref","unstructured":"Gupta, M., Akiri, C., Aryal, K., Parker, E., Praharaj, L.: From ChatGPT to threatGPT: impact of generative AI in cybersecurity and privacy. IEEE Access (2023)","DOI":"10.1109\/ACCESS.2023.3300381"},{"key":"8_CR15","unstructured":"Hazell, J.: Large language models can be used to effectively scale spear phishing campaigns. arXiv preprint arXiv:2305.06972 (2023)"},{"key":"8_CR16","unstructured":"Jiang, S., Chen, X., Tang, R.: Prompt packer: deceiving LLMs through compositional instruction with hidden attacks (2023)"},{"key":"8_CR17","doi-asserted-by":"crossref","unstructured":"Kang, D., Li, X., Stoica, I., Guestrin, C., Zaharia, M., Hashimoto, T.: Exploiting programmatic behavior of LLMs: dual-use through standard security attacks. arXiv preprint arXiv:2302.05733 (2023)","DOI":"10.1109\/SPW63631.2024.00018"},{"key":"8_CR18","doi-asserted-by":"crossref","unstructured":"Li, H., Guo, D., Fan, W., Xu, M., Song, Y.: Multi-step jailbreaking privacy attacks on ChatGPT. arXiv preprint arXiv:2304.05197 (2023)","DOI":"10.18653\/v1\/2023.findings-emnlp.272"},{"key":"8_CR19","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: Jailbreaking ChatGPT via prompt engineering: an empirical study (2023)","DOI":"10.1145\/3663530.3665021"},{"key":"8_CR20","unstructured":"OpenAI, Inc: Content policy (2022). https:\/\/labs.openai.com\/policies\/content-policy"},{"key":"8_CR21","unstructured":"OpenAI, Inc: GPT-3.5 Turbo. https:\/\/platform.openai.com\/docs\/models\/gpt-3-5"},{"key":"8_CR22","unstructured":"OpenAI, Inc: GPT-4 and GPT-4 Turbo. https:\/\/platform.openai.com\/docs\/models\/gpt-4-and-gpt-4-turbo"},{"key":"8_CR23","doi-asserted-by":"crossref","unstructured":"Qi, X., Huang, K., Panda, A., Henderson, P., Wang, M., Mittal, P.: Visual adversarial examples jailbreak aligned large language models (2023)","DOI":"10.1609\/aaai.v38i19.30150"},{"key":"8_CR24","doi-asserted-by":"crossref","unstructured":"Reynolds, L., McDonell, K.: Prompt programming for large language models: beyond the few-shot paradigm. In: Extended Abstracts of the 2021 CHI Conference on Human Factors in Computing Systems, pp.\u00a01\u20137 (2021)","DOI":"10.1145\/3411763.3451760"},{"key":"8_CR25","doi-asserted-by":"crossref","unstructured":"Shanahan, M., McDonell, K., Reynolds, L.: Role play with large language models. Nature, 1\u20136 (2023)","DOI":"10.1038\/s41586-023-06647-8"},{"key":"8_CR26","unstructured":"Shen, X., Chen, Z., Backes, M., Shen, Y., Zhang, Y.: \u201cdo anything now\u201d: Characterizing and evaluating in-the-wild jailbreak prompts on large language models. arXiv preprint arXiv:2308.03825 (2023)"},{"key":"8_CR27","doi-asserted-by":"crossref","unstructured":"Si, W.M., et al.: Why so toxic? Measuring and triggering toxic behavior in open-domain chatbots. In: Proceedings of the 2022 ACM SIGSAC Conference on Computer and Communications Security, pp. 2659\u20132673 (2022)","DOI":"10.1145\/3548606.3560599"},{"key":"8_CR28","unstructured":"Wolf, Y., Wies, N., Levine, Y., Shashua, A.: Fundamental limitations of alignment in large language models. arXiv preprint arXiv:2304.11082 (2023)"},{"key":"8_CR29","unstructured":"Yang, A., et\u00a0al.: Baichuan 2: Open large-scale language models. arXiv preprint arXiv:2309.10305 (2023)"},{"key":"8_CR30","doi-asserted-by":"crossref","unstructured":"Yao, D., Zhang, J., Harris, I.G., Carlsson, M.: FuzzLLM: a novel and universal fuzzing framework for proactively discovering jailbreak vulnerabilities in large language models. arXiv preprint arXiv:2309.05274 (2023)","DOI":"10.1109\/ICASSP48485.2024.10448041"},{"key":"8_CR31","unstructured":"Yu, J., Lin, X., Xing, X.: GPTfuzzer: red teaming large language models with auto-generated jailbreak prompts. arXiv preprint arXiv:2309.10253 (2023)"},{"key":"8_CR32","doi-asserted-by":"crossref","unstructured":"Zamfirescu-Pereira, J., Wong, R.Y., Hartmann, B., Yang, Q.: Why Johnny can\u2019t prompt: how non-AI experts try (and fail) to design LLM prompts. In: Proceedings of the 2023 CHI Conference on Human Factors in Computing Systems, pp. 1\u201321 (2023)","DOI":"10.1145\/3544548.3581388"},{"key":"8_CR33","unstructured":"Zhang, M., Pan, X., Yang, M.: Jade: a linguistics-based safety evaluation platform for LLM (2023)"},{"key":"8_CR34","unstructured":"Zou, A., Wang, Z., Kolter, J.Z., Fredrikson, M.: Universal and transferable adversarial attacks on aligned language models. arXiv preprint arXiv:2307.15043 (2023)"}],"container-title":["Lecture Notes in Computer Science","Computer Security \u2013 ESORICS 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70903-6_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T22:06:22Z","timestamp":1725487582000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70903-6_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031709029","9783031709036"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70903-6_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"5 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ESORICS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Symposium on Research in Computer Security","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bydgoszcz","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Poland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"esorics2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/esorics2024.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}