{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T17:32:52Z","timestamp":1772127172355,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Natural Science Foundation of China","award":["62276067"],"award-info":[{"award-number":["62276067"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022ZD0160103"],"award-info":[{"award-number":["2022ZD0160103"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681092","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"6920-6928","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["White-box Multimodal Jailbreaks Against Large Vision-Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-6122-1380","authenticated-orcid":false,"given":"Ruofan","family":"Wang","sequence":"first","affiliation":[{"name":"Shanghai Key Lab of Intell. Info. Processing, School of CS, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2099-4973","authenticated-orcid":false,"given":"Xingjun","family":"Ma","sequence":"additional","affiliation":[{"name":"Shanghai Key Lab of Intell. Info. Processing, School of CS, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8492-4492","authenticated-orcid":false,"given":"Hanxu","family":"Zhou","sequence":"additional","affiliation":[{"name":"School of Mathematical Sciences, Shanghai Jiaotong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7747-5835","authenticated-orcid":false,"given":"Chuanjun","family":"Ji","sequence":"additional","affiliation":[{"name":"DataGrand Co., Ltd, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4973-7942","authenticated-orcid":false,"given":"Guangnan","family":"Ye","sequence":"additional","affiliation":[{"name":"Shanghai Key Lab of Intell. Info. Processing, School of CS, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1907-8567","authenticated-orcid":false,"given":"Yu-Gang","family":"Jiang","sequence":"additional","affiliation":[{"name":"Shanghai Key Lab of Intell. Info. Processing, School of CS, Fudan University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","first-page":"23716","article-title":"Flamingo: a visual language model for few-shot learning","volume":"35","author":"Alayrac Jean-Baptiste","year":"2022","unstructured":"Jean-Baptiste Alayrac, Jeff Donahue, Pauline Luc, Antoine Miech, Iain Barr, Yana Hasson, Karel Lenc, Arthur Mensch, Katherine Millican, Malcolm Reynolds, et al. 2022. Flamingo: a visual language model for few-shot learning. Advances in Neural Information Processing Systems, Vol. 35 (2022), 23716--23736.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_3_1","volume-title":"arXiv preprint arXiv:2307.10490","author":"Bagdasaryan Eugene","year":"2023","unstructured":"Eugene Bagdasaryan, Tsung-Yin Hsieh, Ben Nassi, and Vitaly Shmatikov. 2023. (Ab) using Images and Sounds for Indirect Instruction Injection in Multi-Modal LLMs. arXiv preprint arXiv:2307.10490 (2023)."},{"key":"e_1_3_2_1_4_1","volume-title":"Image hijacks: Adversarial images can control generative models at runtime. arXiv preprint arXiv:2309.00236","author":"Bailey Luke","year":"2023","unstructured":"Luke Bailey, Euan Ong, Stuart Russell, and Scott Emmons. 2023. Image hijacks: Adversarial images can control generative models at runtime. arXiv preprint arXiv:2309.00236 (2023)."},{"key":"e_1_3_2_1_5_1","volume-title":"Daphne Ippolito, Florian Tramer, and Ludwig Schmidt.","author":"Carlini Nicholas","year":"2024","unstructured":"Nicholas Carlini, Milad Nasr, Christopher A Choquette-Choo, Matthew Jagielski, Irena Gao, Pang Wei W Koh, Daphne Ippolito, Florian Tramer, and Ludwig Schmidt. 2024. Are aligned neural networks adversarially aligned? Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_6_1","volume-title":"Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality. See https:\/\/vicuna. lmsys. org (accessed","author":"Chiang Wei-Lin","year":"2023","unstructured":"Wei-Lin Chiang, Zhuohan Li, Zi Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph E Gonzalez, et al. 2023. Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality. See https:\/\/vicuna. lmsys. org (accessed 14 April 2023) (2023)."},{"key":"e_1_3_2_1_7_1","volume-title":"Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, and Steven Hoi.","author":"Dai Wenliang","year":"2023","unstructured":"Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, and Steven Hoi. 2023. InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning. arxiv: 2305.06500 [cs.CV]"},{"key":"e_1_3_2_1_8_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_9_1","volume-title":"Hotflip: White-box adversarial examples for text classification. arXiv preprint arXiv:1712.06751","author":"Ebrahimi Javid","year":"2017","unstructured":"Javid Ebrahimi, Anyi Rao, Daniel Lowd, and Dejing Dou. 2017. Hotflip: White-box adversarial examples for text classification. arXiv preprint arXiv:1712.06751 (2017)."},{"key":"e_1_3_2_1_10_1","volume-title":"Realtoxicityprompts: Evaluating neural toxic degeneration in language models. arXiv preprint arXiv:2009.11462","author":"Gehman Samuel","year":"2020","unstructured":"Samuel Gehman, Suchin Gururangan, Maarten Sap, Yejin Choi, and Noah A Smith. 2020. Realtoxicityprompts: Evaluating neural toxic degeneration in language models. arXiv preprint arXiv:2009.11462 (2020)."},{"key":"e_1_3_2_1_11_1","volume-title":"Figstep: Jailbreaking large vision-language models via typographic visual prompts. arXiv preprint arXiv:2311.05608","author":"Gong Yichen","year":"2023","unstructured":"Yichen Gong, Delong Ran, Jinyuan Liu, Conglei Wang, Tianshuo Cong, Anyu Wang, Sisi Duan, and Xiaoyun Wang. 2023. Figstep: Jailbreaking large vision-language models via typographic visual prompts. arXiv preprint arXiv:2311.05608 (2023)."},{"key":"e_1_3_2_1_12_1","volume-title":"More than you've asked for: A Comprehensive Analysis of Novel Prompt Injection Threats to Application-Integrated Large Language Models. arXiv e-prints","author":"Greshake Kai","year":"2023","unstructured":"Kai Greshake, Sahar Abdelnabi, Shailesh Mishra, Christoph Endres, Thorsten Holz, and Mario Fritz. 2023. More than you've asked for: A Comprehensive Analysis of Novel Prompt Injection Threats to Application-Integrated Large Language Models. arXiv e-prints (2023), arXiv--2302."},{"key":"e_1_3_2_1_13_1","unstructured":"Laura Hanu and Unitary team. 2020. Detoxify. Github. https:\/\/github.com\/unitaryai\/detoxify."},{"key":"e_1_3_2_1_14_1","volume-title":"The curious case of neural text degeneration. arXiv preprint arXiv:1904.09751","author":"Holtzman Ari","year":"2019","unstructured":"Ari Holtzman, Jan Buys, Li Du, Maxwell Forbes, and Yejin Choi. 2019. The curious case of neural text degeneration. arXiv preprint arXiv:1904.09751 (2019)."},{"key":"e_1_3_2_1_15_1","volume-title":"Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597 (2023)."},{"key":"e_1_3_2_1_16_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems","author":"Liu Haotian","year":"2024","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2024. Visual instruction tuning. Advances in neural information processing systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_17_1","volume-title":"Towards deep learning models resistant to adversarial attacks. arXiv preprint arXiv:1706.06083","author":"Madry Aleksander","year":"2017","unstructured":"Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, Dimitris Tsipras, and Adrian Vladu. 2017. Towards deep learning models resistant to adversarial attacks. arXiv preprint arXiv:1706.06083 (2017)."},{"key":"e_1_3_2_1_18_1","volume-title":"Robust conversational agents against imperceptible toxicity triggers. arXiv preprint arXiv:2205.02392","author":"Mehrabi Ninareh","year":"2022","unstructured":"Ninareh Mehrabi, Ahmad Beirami, Fred Morstatter, and Aram Galstyan. 2022. Robust conversational agents against imperceptible toxicity triggers. arXiv preprint arXiv:2205.02392 (2022)."},{"key":"e_1_3_2_1_19_1","volume-title":"Jailbreaking attack against multimodal large language model. arXiv preprint arXiv:2402.02309","author":"Niu Zhenxing","year":"2024","unstructured":"Zhenxing Niu, Haodong Ren, Xinbo Gao, Gang Hua, and Rong Jin. 2024. Jailbreaking attack against multimodal large language model. arXiv preprint arXiv:2402.02309 (2024)."},{"key":"e_1_3_2_1_20_1","volume-title":"Visual Adversarial Examples Jailbreak Large Language Models. arXiv preprint arXiv:2306.13213","author":"Qi Xiangyu","year":"2023","unstructured":"Xiangyu Qi, Kaixuan Huang, Ashwinee Panda, Mengdi Wang, and Prateek Mittal. 2023. Visual Adversarial Examples Jailbreak Large Language Models. arXiv preprint arXiv:2306.13213 (2023)."},{"key":"e_1_3_2_1_21_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00434"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00395"},{"key":"e_1_3_2_1_24_1","volume-title":"Plug and Pray: Exploiting off-the-shelf components of Multi-Modal Models. arXiv preprint arXiv:2307.14539","author":"Shayegani Erfan","year":"2023","unstructured":"Erfan Shayegani, Yue Dong, and Nael Abu-Ghazaleh. 2023. Plug and Pray: Exploiting off-the-shelf components of Multi-Modal Models. arXiv preprint arXiv:2307.14539 (2023)."},{"key":"e_1_3_2_1_25_1","volume-title":"Yu Fu, Pedram Zaree, Yue Dong, and Nael Abu-Ghazaleh.","author":"Shayegani Erfan","year":"2023","unstructured":"Erfan Shayegani, Md Abdullah Al Mamun, Yu Fu, Pedram Zaree, Yue Dong, and Nael Abu-Ghazaleh. 2023. Survey of vulnerabilities in large language models revealed by adversarial attacks. arXiv preprint arXiv:2310.10844 (2023)."},{"key":"e_1_3_2_1_26_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_27_1","volume-title":"Universal adversarial triggers for attacking and analyzing NLP. arXiv preprint arXiv:1908.07125","author":"Wallace Eric","year":"2019","unstructured":"Eric Wallace, Shi Feng, Nikhil Kandpal, Matt Gardner, and Sameer Singh. 2019. Universal adversarial triggers for attacking and analyzing NLP. arXiv preprint arXiv:1908.07125 (2019)."},{"key":"e_1_3_2_1_28_1","volume-title":"Ngai-Man Man Cheung, and Min Lin","author":"Zhao Yunqing","year":"2024","unstructured":"Yunqing Zhao, Tianyu Pang, Chao Du, Xiao Yang, Chongxuan Li, Ngai-Man Man Cheung, and Min Lin. 2024. On evaluating adversarial robustness of large vision-language models. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_29_1","volume-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592","author":"Zhu Deyao","year":"2023","unstructured":"Deyao Zhu, Jun Chen, Xiaoqian Shen, Xiang Li, and Mohamed Elhoseiny. 2023. Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592 (2023)."},{"key":"e_1_3_2_1_30_1","volume-title":"Universal and transferable adversarial attacks on aligned language models. arXiv preprint arXiv:2307.15043","author":"Zou Andy","year":"2023","unstructured":"Andy Zou, Zifan Wang, J Zico Kolter, and Matt Fredrikson. 2023. Universal and transferable adversarial attacks on aligned language models. arXiv preprint arXiv:2307.15043 (2023)."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681092","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681092","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:52Z","timestamp":1750294672000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681092"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":30,"alternative-id":["10.1145\/3664647.3681092","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681092","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}