{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T16:57:23Z","timestamp":1775667443657,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/100007567","name":"City University of Hong Kong","doi-asserted-by":"publisher","award":["Grants 9678146 and 9678126"],"award-info":[{"award-number":["Grants 9678146 and 9678126"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/100007567","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Hong Kong RGC","award":["Grants CityU 11218521, 11218322, R6021-20F, R1012-21, RFS2122-1S04, C2004-21G, C1029-22G, and N_CityU139\/21"],"award-info":[{"award-number":["Grants CityU 11218521, 11218322, R6021-20F, R1012-21, RFS2122-1S04, C2004-21G, C1029-22G, and N_CityU139\/21"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681379","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"3578-3586","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["Arondight: Red Teaming Large Vision Language Models with Auto-generated Multi-modal Jailbreak Prompts"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0811-6150","authenticated-orcid":false,"given":"Yi","family":"Liu","sequence":"first","affiliation":[{"name":"Department of Computer Science, City University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8045-4226","authenticated-orcid":false,"given":"Chengjun","family":"Cai","sequence":"additional","affiliation":[{"name":"Department of Computer Science, City University of Hong Kong (Dongguan), Dongguan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8317-5539","authenticated-orcid":false,"given":"Xiaoli","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Science and Technology Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3701-4946","authenticated-orcid":false,"given":"Xingliang","family":"Yuan","sequence":"additional","affiliation":[{"name":"The University of Melbourne, Melbourne, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0547-315X","authenticated-orcid":false,"given":"Cong","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Computer Science, City University of Hong Kong, Hong Kong, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. Bing Chat. https:\/\/www.microsoft.com\/en-us\/edge\/features\/bing-chat."},{"key":"e_1_3_2_1_2_1","unstructured":"[n. d.]. Google Bard. https:\/\/bard.google.com\/."},{"key":"e_1_3_2_1_3_1","unstructured":"[n. d.]. RNIE Bot. https:\/\/yiyan.baidu.com\/welcome."},{"key":"e_1_3_2_1_4_1","unstructured":"[n. d.]. Spark. https:\/\/xinghuo.xfyun.cn\/desk."},{"key":"e_1_3_2_1_5_1","unstructured":"[n. d.]. T5--3B. https:\/\/huggingface.co\/sentence-transformers\/paraphrasemultilingual-MiniLM-L12-v2."},{"key":"e_1_3_2_1_6_1","unstructured":"[n. d.]. Toxicity category rating. https:\/\/en.wikipedia.org\/wiki\/Toxicity_category_rating."},{"key":"e_1_3_2_1_7_1","volume-title":"Proc. of NeurIPS.","author":"Alayrac Jean-Baptiste","year":"2022","unstructured":"Jean-Baptiste Alayrac, Jeff Donahue, Pauline Luc, Antoine Miech, Iain Barr, Yana Hasson, Karel Lenc, Arthur Mensch, Katherine Millican, Malcolm Reynolds, et al. 2022. Flamingo: a visual language model for few-shot learning. In Proc. of NeurIPS."},{"key":"e_1_3_2_1_8_1","volume-title":"Proc. of USENIX Security.","author":"Arp Daniel","year":"2022","unstructured":"Daniel Arp, Erwin Quiring, Feargus Pendlebury, Alexander Warnecke, Fabio Pierazzi, ChristianWressnegger, Lorenzo Cavallaro, and Konrad Rieck. 2022. Dos and don'ts of machine learning in computer security. In Proc. of USENIX Security."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP46214.2022.9833572"},{"key":"e_1_3_2_1_10_1","volume-title":"Qwen-vl: A frontier large visionlanguage model with versatile abilities. arXiv preprint arXiv:2308.12966","author":"Bai Jinze","year":"2023","unstructured":"Jinze Bai, Shuai Bai, Shusheng Yang, ShijieWang, Sinan Tan, PengWang, Junyang Lin, Chang Zhou, and Jingren Zhou. 2023. Qwen-vl: A frontier large visionlanguage model with versatile abilities. arXiv preprint arXiv:2308.12966 (2023)."},{"key":"e_1_3_2_1_11_1","volume-title":"Red-teaming large language models using chain of utterances for safety-alignment. arXiv preprint arXiv:2308.09662","author":"Bhardwaj Rishabh","year":"2023","unstructured":"Rishabh Bhardwaj and Soujanya Poria. 2023. Red-teaming large language models using chain of utterances for safety-alignment. arXiv preprint arXiv:2308.09662 (2023)."},{"key":"e_1_3_2_1_12_1","volume-title":"Proc. of NeurIPS.","author":"Carlini Nicholas","year":"2023","unstructured":"Nicholas Carlini, Milad Nasr, Christopher A Choquette-Choo, Matthew Jagielski, Irena Gao, Anas Awadalla, PangWei Koh, Daphne Ippolito, Katherine Lee, Florian Tramer, et al. 2023. Are aligned neural networks adversarially aligned?. In Proc. of NeurIPS."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3128572.3140444"},{"key":"e_1_3_2_1_14_1","volume-title":"Jailbreaking Black Box Large Language Models in Twenty Queries. arXiv preprint arXiv:2310.08419","author":"Chao Patrick","year":"2023","unstructured":"Patrick Chao, Alexander Robey, Edgar Dobriban, Hamed Hassani, George J Pappas, and Eric Wong. 2023. Jailbreaking Black Box Large Language Models in Twenty Queries. arXiv preprint arXiv:2310.08419 (2023)."},{"key":"e_1_3_2_1_15_1","volume-title":"Proc. of NDSS.","author":"Deng Gelei","year":"2023","unstructured":"Gelei Deng, Yi Liu, Yuekang Li, Kailong Wang, Ying Zhang, Zefeng Li, Haoyu Wang, Tianwei Zhang, and Yang Liu. 2023. Jailbreaker: Automated jailbreak across multiple large language model chatbots. In Proc. of NDSS."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.26"},{"key":"e_1_3_2_1_17_1","unstructured":"Peng Gao Jiaming Han Renrui Zhang Ziyi Lin Shijie Geng Aojun Zhou Wei Zhang Pan Lu Conghui He Xiangyu Yue et al. 2023. Llama-adapter v2: Parameter-efficient visual instruction model. arXiv preprint arXiv:2304.15010 (2023)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.498"},{"key":"e_1_3_2_1_19_1","volume-title":"FigStep: Jailbreaking Large Visionlanguage Models via Typographic Visual Prompts. arXiv preprint arXiv:2311.05608","author":"Gong Yichen","year":"2023","unstructured":"Yichen Gong, Delong Ran, Jinyuan Liu, Conglei Wang, Tianshuo Cong, Anyu Wang, Sisi Duan, and Xiaoyun Wang. 2023. FigStep: Jailbreaking Large Visionlanguage Models via Typographic Visual Prompts. arXiv preprint arXiv:2311.05608 (2023)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP54263.2024.00061"},{"key":"e_1_3_2_1_21_1","volume-title":"Proc. of ICLR.","author":"Hong Zhang-Wei","year":"2023","unstructured":"Zhang-Wei Hong, Idan Shenfeld, Tsun-Hsuan Wang, Yung-Sung Chuang, Aldo Pareja, James R Glass, Akash Srivastava, and Pulkit Agrawal. 2023. Curiositydriven Red-teaming for Large Language Models. In Proc. of ICLR."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583348"},{"key":"e_1_3_2_1_23_1","volume-title":"Proc. of NeurIPS.","author":"Ji Jiaming","year":"2023","unstructured":"Jiaming Ji, Mickel Liu, Juntao Dai, Xuehai Pan, Chi Zhang, Ce Bian, Boyuan Chen, Ruiyang Sun, Yizhou Wang, and Yaodong Yang. 2023. BeaverTails: Towards Improved Safety Alignment of LLM via a Human-Preference Dataset. In Proc. of NeurIPS."},{"key":"e_1_3_2_1_24_1","volume-title":"Proc. of ICML.","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In Proc. of ICML."},{"key":"e_1_3_2_1_25_1","volume-title":"Red teaming visual language models. arXiv preprint arXiv:2401.12915","author":"Li Mukai","year":"2024","unstructured":"Mukai Li, Lei Li, Yuwei Yin, Masood Ahmed, Zhenguang Liu, and Qi Liu. 2024. Red teaming visual language models. arXiv preprint arXiv:2401.12915 (2024)."},{"key":"e_1_3_2_1_26_1","volume-title":"DeepInception: Hypnotize Large Language Model to Be Jailbreaker. arXiv preprint arXiv:2311.03191","author":"Li Xuan","year":"2023","unstructured":"Xuan Li, Zhanke Zhou, Jianing Zhu, Jiangchao Yao, Tongliang Liu, and Bo Han. 2023. DeepInception: Hypnotize Large Language Model to Be Jailbreaker. arXiv preprint arXiv:2311.03191 (2023)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547969"},{"key":"e_1_3_2_1_28_1","volume-title":"Proc. of ICLR.","author":"Liu Xiaogeng","year":"2024","unstructured":"Xiaogeng Liu, Nan Xu, Muhao Chen, and Chaowei Xiao. 2024. AutoDAN: Generating Stealthy Jailbreak Prompts on Aligned Large Language Models. In Proc. of ICLR."},{"key":"e_1_3_2_1_29_1","volume-title":"Query-Relevant Images Jailbreak Large Multi-Modal Models. arXiv preprint arXiv:2311.17600","author":"Liu Xin","year":"2023","unstructured":"Xin Liu, Yichen Zhu, Yunshi Lan, Chao Yang, and Yu Qiao. 2023. Query-Relevant Images Jailbreak Large Multi-Modal Models. arXiv preprint arXiv:2311.17600 (2023)."},{"key":"e_1_3_2_1_30_1","volume-title":"Proc. of NDSS.","author":"Liu Yi","year":"2023","unstructured":"Yi Liu, Gelei Deng, Zhengzi Xu, Yuekang Li, Yaowen Zheng, Ying Zhang, Lida Zhao, Tianwei Zhang, and Yang Liu. 2023. Jailbreaking chatgpt via prompt engineering: An empirical study. Proc. of NDSS."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611994"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/EuroSP.2018.00035"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.225"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i19.30150"},{"key":"e_1_3_2_1_36_1","volume-title":"Latent jailbreak: A benchmark for evaluating text safety and output robustness of large language models. arXiv preprint arXiv:2307.08487","author":"Qiu Huachuan","year":"2023","unstructured":"Huachuan Qiu, Shuai Zhang, Anqi Li, Hongliang He, and Zhenzhong Lan. 2023. Latent jailbreak: A benchmark for evaluating text safety and output robustness of large language models. arXiv preprint arXiv:2307.08487 (2023)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_1_38_1","volume-title":"Yu Fu, Pedram Zaree, Yue Dong, and Nael Abu-Ghazaleh.","author":"Shayegani Erfan","year":"2023","unstructured":"Erfan Shayegani, Md Abdullah Al Mamun, Yu Fu, Pedram Zaree, Yue Dong, and Nael Abu-Ghazaleh. 2023. Survey of Vulnerabilities in Large Language Models Revealed by Adversarial Attacks. arXiv preprint arXiv:2310.10844 (2023)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00639"},{"key":"e_1_3_2_1_40_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_41_1","volume-title":"Proc. of NeurIPS.","author":"Tsimpoukelli Maria","year":"2021","unstructured":"Maria Tsimpoukelli, Jacob L Menick, Serkan Cabi, SM Eslami, Oriol Vinyals, and Felix Hill. 2021. Multimodal few-shot learning with frozen language models. In Proc. of NeurIPS."},{"key":"e_1_3_2_1_42_1","volume-title":"How Many Unicorns Are in This Image? A Safety Evaluation Benchmark for Vision LLMs. arXiv preprint arXiv:2311.16101","author":"Tu Haoqin","year":"2023","unstructured":"Haoqin Tu, Chenhang Cui, ZijunWang, Yiyang Zhou, Bingchen Zhao, Junlin Han, Wangchunshu Zhou, Huaxiu Yao, and Cihang Xie. 2023. How Many Unicorns Are in This Image? A Safety Evaluation Benchmark for Vision LLMs. arXiv preprint arXiv:2311.16101 (2023)."},{"key":"e_1_3_2_1_43_1","volume-title":"Proc. of NeurIPS.","author":"Wei Alexander","year":"2023","unstructured":"Alexander Wei, Nika Haghtalab, and Jacob Steinhardt. 2023. Jailbroken: How Does LLM Safety Training Fail?. In Proc. of NeurIPS."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"Yuchen Yang Bo Hui Haolin Yuan Neil Gong and Yinzhi Cao. 2023. SneakyPrompt: Jailbreaking Text-to-image Generative Models. arXiv:2305.12082 [cs.LG]","DOI":"10.1109\/SP54263.2024.00123"},{"key":"e_1_3_2_1_45_1","volume-title":"Gptfuzzer: Red teaming large language models with auto-generated jailbreak prompts. arXiv preprint arXiv:2309.10253","author":"Yu Jiahao","year":"2023","unstructured":"Jiahao Yu, Xingwei Lin, and Xinyu Xing. 2023. Gptfuzzer: Red teaming large language models with auto-generated jailbreak prompts. arXiv preprint arXiv:2309.10253 (2023)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547801"},{"key":"e_1_3_2_1_47_1","volume-title":"Proc. of ICLR.","author":"Zhang Tianyi","year":"2019","unstructured":"Tianyi Zhang, Varsha Kishore, Felix Wu, Kilian Q Weinberger, and Yoav Artzi. 2019. BERTScore: Evaluating Text Generation with BERT. In Proc. of ICLR."},{"key":"e_1_3_2_1_48_1","volume-title":"Proc. of NeurIPS.","author":"Zhao Yunqing","year":"2024","unstructured":"Yunqing Zhao, Tianyu Pang, Chao Du, Xiao Yang, Chongxuan Li, Ngai-Man Man Cheung, and Min Lin. 2024. On evaluating adversarial robustness of large visionlanguage models. In Proc. of NeurIPS."},{"key":"e_1_3_2_1_49_1","volume-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592","author":"Zhu Deyao","year":"2023","unstructured":"Deyao Zhu, Jun Chen, Xiaoqian Shen, Xiang Li, and Mohamed Elhoseiny. 2023. Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592 (2023)."},{"key":"e_1_3_2_1_50_1","volume-title":"Universal and transferable adversarial attacks on aligned language models. arXiv preprint arXiv:2307.15043","author":"Zou Andy","year":"2023","unstructured":"Andy Zou, Zifan Wang, J Zico Kolter, and Matt Fredrikson. 2023. Universal and transferable adversarial attacks on aligned language models. arXiv preprint arXiv:2307.15043 (2023)."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681379","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681379","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:44Z","timestamp":1750295864000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681379"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":49,"alternative-id":["10.1145\/3664647.3681379","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681379","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}