{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,8]],"date-time":"2026-07-08T16:00:59Z","timestamp":1783526459126,"version":"3.55.0"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031734632","type":"print"},{"value":"9783031734649","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T00:00:00Z","timestamp":1733270400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T00:00:00Z","timestamp":1733270400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73464-9_11","type":"book-chapter","created":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T09:39:24Z","timestamp":1733218764000},"page":"174-189","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":28,"title":["Images are Achilles\u2019 Heel of\u00a0Alignment: Exploiting Visual Vulnerabilities for\u00a0Jailbreaking Multimodal Large Language Models"],"prefix":"10.1007","author":[{"given":"Yifan","family":"Li","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hangyu","family":"Guo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kun","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wayne Xin","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ji-Rong","family":"Wen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,12,4]]},"reference":[{"key":"11_CR1","unstructured":"Anil, R., et\u00a0al.: Gemini: a family of highly capable multimodal models. CoRR abs\/2312.11805 (2023)"},{"key":"11_CR2","unstructured":"Askell, A., et al.: A general language assistant as a laboratory for alignment. CoRR abs\/2112.00861 (2021)"},{"key":"11_CR3","unstructured":"Carlini, N., et al.: Are aligned neural networks adversarially aligned? CoRR abs\/2306.15447 (2023)"},{"key":"11_CR4","unstructured":"Chao, P., Robey, A., Dobriban, E., Hassani, H., Pappas, G.J., Wong, E.: Jailbreaking black box large language models in twenty queries. CoRR abs\/2310.08419 (2023)"},{"key":"11_CR5","unstructured":"Chen, J., et al.: MiniGPT-v2: large language model as a unified interface for vision-language multi-task learning. CoRR abs\/2310.09478 (2023)"},{"key":"11_CR6","unstructured":"Chen, J., et al.: PixArt-$$\\alpha $$: fast training of diffusion transformer for photorealistic text-to-image synthesis. CoRR abs\/2310.00426 (2023)"},{"key":"11_CR7","unstructured":"Chiang, W.L., et al.: Vicuna: an open-source chatbot impressing GPT-4 with 90%* chatGPT quality (2023). https:\/\/lmsys.org\/blog\/2023-03-30-vicuna\/"},{"key":"11_CR8","unstructured":"Dong, Y., et al.: How robust is Google\u2019s bard to adversarial image attacks? CoRR abs\/2309.11751 (2023)"},{"key":"11_CR9","unstructured":"Ganguli, D., et al.: Red teaming language models to reduce harms: methods, scaling behaviors, and lessons learned. CoRR abs\/2209.07858 (2022)"},{"key":"11_CR10","unstructured":"Gong, Y., et al.: FigStep: jailbreaking large vision-language models via typographic visual prompts. CoRR abs\/2311.05608 (2023)"},{"key":"11_CR11","unstructured":"Ji, J., et al.: Beavertails: towards improved safety alignment of LLM via a human-preference dataset. In: Oh, A., Naumann, T., Globerson, A., Saenko, K., Hardt, M., Levine, S. (eds.) Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, 10\u201316 December 2023, New Orleans, LA, USA (2023)"},{"key":"11_CR12","unstructured":"Ji, J., et al.: Beavertails: towards improved safety alignment of LLM via a human-preference dataset. CoRR abs\/2307.04657 (2023)"},{"key":"11_CR13","doi-asserted-by":"crossref","unstructured":"Li, M., Li, L., Yin, Y., Ahmed, M., Liu, Z., Liu, Q.: Red teaming visual language models. CoRR abs\/2401.12915 (2024)","DOI":"10.18653\/v1\/2024.findings-acl.198"},{"key":"11_CR14","unstructured":"Liu, H., Li, C., Li, Y., Lee, Y.J.: Improved baselines with visual instruction tuning. CoRR abs\/2310.03744 (2023)"},{"key":"11_CR15","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. CoRR abs\/2304.08485 (2023)"},{"key":"11_CR16","unstructured":"Liu, X., Zhu, Y., Lan, Y., Yang, C., Qiao, Y.: Query-relevant images jailbreak large multi-modal models. CoRR abs\/2311.17600 (2023)"},{"key":"11_CR17","unstructured":"Niu, Z., Ren, H., Gao, X., Hua, G., Jin, R.: Jailbreaking attack against multimodal large language model. CoRR abs\/2402.02309 (2024)"},{"key":"11_CR18","unstructured":"OpenAI: GPT-4v(ision) system card (2023)"},{"key":"11_CR19","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. In: NeurIPS (2022)"},{"key":"11_CR20","unstructured":"Qi, X., Huang, K., Panda, A., Wang, M., Mittal, P.: Visual adversarial examples jailbreak large language models. CoRR abs\/2306.13213 (2023)"},{"key":"11_CR21","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: ICML. Proceedings of Machine Learning Research, vol.\u00a0139, pp. 8748\u20138763. PMLR (2021)"},{"key":"11_CR22","doi-asserted-by":"crossref","unstructured":"Schlarmann, C., Hein, M.: On the adversarial robustness of multi-modal foundation models. In: IEEE\/CVF International Conference on Computer Vision, ICCV 2023 - Workshops, Paris, France, 2\u20136 October 2023, pp. 3679\u20133687. IEEE (2023)","DOI":"10.1109\/ICCVW60793.2023.00395"},{"key":"11_CR23","unstructured":"Shayegani, E., Dong, Y., Abu-Ghazaleh, N.B.: Jailbreak in pieces: compositional adversarial attacks on multi-modal language models. CoRR abs\/2307.14539 (2023)"},{"key":"11_CR24","unstructured":"Shen, X., Chen, Z., Backes, M., Shen, Y., Zhang, Y.: \u201cDo anything now\u201d: characterizing and evaluating in-the-wild jailbreak prompts on large language models. CoRR abs\/2308.03825 (2023)"},{"key":"11_CR25","unstructured":"Subhash, V., Bialas, A., Pan, W., Doshi-Velez, F.: Why do universal adversarial attacks work on large language models?: geometry might be the answer. CoRR abs\/2309.00254 (2023)"},{"key":"11_CR26","unstructured":"Touvron, H., et al.: LLaMA 2: open foundation and fine-tuned chat models. CoRR abs\/2307.09288 (2023)"},{"key":"11_CR27","unstructured":"Tu, H., et al.: How many unicorns are in this image? A safety evaluation benchmark for vision LLMs. CoRR abs\/2311.16101 (2023)"},{"key":"11_CR28","unstructured":"Wang, J.G., Wang, J., Li, M., Neel, S.: Pandora\u2019s white-box: increased training data leakage in open LLMs. arXiv preprint arXiv:2402.17012 (2024)"},{"key":"11_CR29","unstructured":"Wei, A., Haghtalab, N., Steinhardt, J.: Jailbroken: how does LLM safety training fail? In: Oh, A., Naumann, T., Globerson, A., Saenko, K., Hardt, M., Levine, S. (eds.) Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, 10\u201316 December 2023, New Orleans, LA, USA (2023)"},{"key":"11_CR30","unstructured":"Wei, Z., Wang, Y., Wang, Y.: Jailbreak and guard aligned language models with only few in-context demonstrations. CoRR abs\/2310.06387 (2023)"},{"key":"11_CR31","unstructured":"Wu, Y., Li, X., Liu, Y., Zhou, P., Sun, L.: Jailbreaking GPT-4V via self-adversarial attacks with system prompts. CoRR abs\/2311.09127 (2023)"},{"key":"11_CR32","doi-asserted-by":"crossref","unstructured":"Yin, S., et al.: A survey on multimodal large language models. CoRR abs\/2306.13549 (2023)","DOI":"10.1093\/nsr\/nwae403"},{"key":"11_CR33","unstructured":"Zhao, W.X., et al.: A survey of large language models. CoRR abs\/2303.18223 (2023)"},{"key":"11_CR34","unstructured":"Zhao, Y., et al.: On evaluating adversarial robustness of large vision-language models. CoRR abs\/2305.16934 (2023)"},{"key":"11_CR35","unstructured":"Zhu, D., Chen, J., Shen, X., Li, X., Elhoseiny, M.: MiniGPT-4: enhancing vision-language understanding with advanced large language models. CoRR abs\/2304.10592 (2023)"},{"key":"11_CR36","unstructured":"Zou, A., Wang, Z., Kolter, J.Z., Fredrikson, M.: Universal and transferable adversarial attacks on aligned language models. CoRR abs\/2307.15043 (2023)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73464-9_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T10:05:54Z","timestamp":1733220354000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73464-9_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,4]]},"ISBN":["9783031734632","9783031734649"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73464-9_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,4]]},"assertion":[{"value":"4 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}