{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:40:37Z","timestamp":1765309237807,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","funder":[{"name":"Research Grants Council of Hong Kong","award":["15207122,15213323,15205325"],"award-info":[{"award-number":["15207122,15213323,15205325"]}]},{"name":"The Hong Kong Polytechnic University","award":["BDWP"],"award-info":[{"award-number":["BDWP"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755702","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:56:44Z","timestamp":1761375404000},"page":"4992-5000","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards Harmless Multimodal Assistants with Blind Preference Optimization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6932-4228","authenticated-orcid":false,"given":"Yongqi","family":"Li","sequence":"first","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6696-6399","authenticated-orcid":false,"given":"Lu","family":"Yang","sequence":"additional","affiliation":[{"name":"Wuhan University, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8992-8336","authenticated-orcid":false,"given":"Jian","family":"Wang","sequence":"additional","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6018-1129","authenticated-orcid":false,"given":"Runyang","family":"You","sequence":"additional","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7360-8864","authenticated-orcid":false,"given":"Wenjie","family":"Li","sequence":"additional","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1476-0273","authenticated-orcid":false,"given":"Liqiang","family":"Nie","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology (Shenzhen), Shenzhen, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al., 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","unstructured":"Jean-Baptiste Alayrac Jeff Donahue Pauline Luc Antoine Miech Iain Barr Yana Hasson Karel Lenc Arthur Mensch Katherine Millican Malcolm Reynolds et al. 2022. Flamingo: a visual language model for few-shot learning. Advances in neural information processing systems Vol. 35 (2022) 23716-23736."},{"key":"e_1_3_2_1_3_1","unstructured":"Amanda Askell Yuntao Bai Anna Chen Dawn Drain Deep Ganguli Tom Henighan Andy Jones Nicholas Joseph Ben Mann Nova DasSarma et al. 2021. A general language assistant as a laboratory for alignment. arXiv preprint arXiv:2112.00861 (2021)."},{"key":"e_1_3_2_1_4_1","volume-title":"Qwen-vl: A frontier large vision-language model with versatile abilities. arXiv preprint arXiv:2308.12966","author":"Bai Jinze","year":"2023","unstructured":"Jinze Bai, Shuai Bai, Shusheng Yang, Shijie Wang, Sinan Tan, Peng Wang, Junyang Lin, Chang Zhou, and Jingren Zhou. 2023. Qwen-vl: A frontier large vision-language model with versatile abilities. arXiv preprint arXiv:2308.12966 (2023)."},{"key":"e_1_3_2_1_5_1","unstructured":"Yuntao Bai Andy Jones Kamal Ndousse Amanda Askell Anna Chen Nova DasSarma Dawn Drain Stanislav Fort Deep Ganguli Tom Henighan et al. 2022. Training a helpful and harmless assistant with reinforcement learning from human feedback. arXiv preprint arXiv:2204.05862 (2022)."},{"key":"e_1_3_2_1_6_1","volume-title":"Image hijacks: Adversarial images can control generative models at runtime. arXiv preprint arXiv:2309.00236","author":"Bailey Luke","year":"2023","unstructured":"Luke Bailey, Euan Ong, Stuart Russell, and Scott Emmons. 2023. Image hijacks: Adversarial images can control generative models at runtime. arXiv preprint arXiv:2309.00236 (2023)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01350"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01124"},{"key":"e_1_3_2_1_9_1","volume-title":"Ultrafeedback: Boosting language models with high-quality feedback.","author":"Cui Ganqu","year":"2023","unstructured":"Ganqu Cui, Lifan Yuan, Ning Ding, Guanming Yao, Wei Zhu, Yuan Ni, Guotong Xie, Zhiyuan Liu, and Maosong Sun. 2023. Ultrafeedback: Boosting language models with high-quality feedback. (2023)."},{"key":"e_1_3_2_1_10_1","unstructured":"Xiaoyi Dong Pan Zhang Yuhang Zang Yuhang Cao Bin Wang Linke Ouyang Xilin Wei Songyang Zhang Haodong Duan Maosong Cao et al. 2024. Internlm-xcomposer2: Mastering free-form text-image composition and comprehension in vision-language large model. arXiv preprint arXiv:2401.16420 (2024)."},{"key":"e_1_3_2_1_11_1","volume-title":"Bidipta Sarkar, Rohan Taori, Yusuke Noda, Demetri Terzopoulos, Yejin Choi, et al.","author":"Durante Zane","year":"2024","unstructured":"Zane Durante, Qiuyuan Huang, Naoki Wake, Ran Gong, Jae Sung Park, Bidipta Sarkar, Rohan Taori, Yusuke Noda, Demetri Terzopoulos, Yejin Choi, et al., 2024. Agent ai: Surveying the horizons of multimodal interaction. arXiv preprint arXiv:2401.03568 (2024)."},{"key":"e_1_3_2_1_12_1","volume-title":"Figstep: Jailbreaking large vision-language models via typographic visual prompts. arXiv preprint arXiv:2311.05608","author":"Gong Yichen","year":"2023","unstructured":"Yichen Gong, Delong Ran, Jinyuan Liu, Conglei Wang, Tianshuo Cong, Anyu Wang, Sisi Duan, and Xiaoyun Wang. 2023. Figstep: Jailbreaking large vision-language models via typographic visual prompts. arXiv preprint arXiv:2311.05608 (2023)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.670"},{"key":"e_1_3_2_1_14_1","volume-title":"More than you've asked for: A comprehensive analysis of novel prompt injection threats to application-integrated large language models. arXiv preprint arXiv:2302.12173","author":"Greshake Kai","year":"2023","unstructured":"Kai Greshake, Sahar Abdelnabi, Shailesh Mishra, Christoph Endres, Thorsten Holz, and Mario Fritz. 2023. More than you've asked for: A comprehensive analysis of novel prompt injection threats to application-integrated large language models. arXiv preprint arXiv:2302.12173, Vol. 27 (2023)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.358"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3726302.3730077"},{"key":"e_1_3_2_1_17_1","volume-title":"European Conference on Computer Vision. Springer, 174-189","author":"Li Yifan","year":"2024","unstructured":"Yifan Li, Hangyu Guo, Kun Zhou, Wayne Xin Zhao, and Ji-Rong Wen. 2024a. Images are achilles' heel of alignment: Exploiting visual vulnerabilities for jailbreaking multimodal large language models. In European Conference on Computer Vision. Springer, 174-189."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.639"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"e_1_3_2_1_20_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems","author":"Liu Haotian","year":"2024","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2024c. Visual instruction tuning. Advances in neural information processing systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210003"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240549"},{"key":"e_1_3_2_1_23_1","volume-title":"European Conference on Computer Vision. Springer, 386-403","author":"Liu Xin","year":"2024","unstructured":"Xin Liu, Yichen Zhu, Jindong Gu, Yunshi Lan, Chao Yang, and Yu Qiao. 2024d. Mm-safetybench: A benchmark for safety evaluation of multimodal large language models. In European Conference on Computer Vision. Springer, 386-403."},{"key":"e_1_3_2_1_24_1","volume-title":"European conference on computer vision. Springer, 216-233","author":"Liu Yuan","year":"2024","unstructured":"Yuan Liu, Haodong Duan, Yuanhan Zhang, Bo Li, Songyang Zhang, Wangbo Zhao, Yike Yuan, Jiaqi Wang, Conghui He, Ziwei Liu, et al., 2024a. Mmbench: Is your multi-modal model an all-around player?. In European conference on computer vision. Springer, 216-233."},{"key":"e_1_3_2_1_25_1","unstructured":"Haoyu Lu Wen Liu Bo Zhang Bingxuan Wang Kai Dong Bo Liu Jingxiang Sun Tongzheng Ren Zhuoshu Li Hao Yang et al. 2024. Deepseek-vl: towards real-world vision-language understanding. arXiv preprint arXiv:2403.05525 (2024)."},{"key":"e_1_3_2_1_26_1","unstructured":"Long Ouyang Jeffrey Wu Xu Jiang Diogo Almeida Carroll Wainwright Pamela Mishkin Chong Zhang Sandhini Agarwal Katarina Slama Alex Ray et al. 2022. Training language models to follow instructions with human feedback. Advances in neural information processing systems Vol. 35 (2022) 27730-27744."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i19.30150"},{"key":"e_1_3_2_1_28_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Rafailov Rafael","year":"2024","unstructured":"Rafael Rafailov, Archit Sharma, Eric Mitchell, Christopher D Manning, Stefano Ermon, and Chelsea Finn. 2024. Direct preference optimization: Your language model is secretly a reward model. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_29_1","volume-title":"Assessment of multimodal large language models in alignment with human values. arXiv preprint arXiv:2403.17830","author":"Shi Zhelun","year":"2024","unstructured":"Zhelun Shi, Zhipin Wang, Hongxing Fan, Zaibin Zhang, Lijun Li, Yongting Zhang, Zhenfei Yin, Lu Sheng, Yu Qiao, and Jing Shao. 2024. Assessment of multimodal large language models in alignment with human values. arXiv preprint arXiv:2403.17830 (2024)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i6.28338"},{"key":"e_1_3_2_1_31_1","volume-title":"Cross-modality safety alignment. arXiv preprint arXiv:2406.15279","author":"Wang Siyin","year":"2024","unstructured":"Siyin Wang, Xingsong Ye, Qinyuan Cheng, Junwen Duan, Shimin Li, Jinlan Fu, Xipeng Qiu, and Xuanjing Huang. 2024b. Cross-modality safety alignment. arXiv preprint arXiv:2406.15279 (2024)."},{"key":"e_1_3_2_1_32_1","volume-title":"Jailbreak and guard aligned language models with only few in-context demonstrations. arXiv preprint arXiv:2310.06387","author":"Wei Zeming","year":"2023","unstructured":"Zeming Wei, Yifei Wang, Ang Li, Yichuan Mo, and Yisen Wang. 2023. Jailbreak and guard aligned language models with only few in-context demonstrations. arXiv preprint arXiv:2310.06387 (2023)."},{"key":"e_1_3_2_1_33_1","volume-title":"Minicpm-v: A gpt-4v level mllm on your phone. arXiv preprint arXiv:2408.01800","author":"Yao Yuan","year":"2024","unstructured":"Yuan Yao, Tianyu Yu, Ao Zhang, Chongyi Wang, Junbo Cui, Hongji Zhu, Tianchi Cai, Haoyu Li, Weilin Zhao, Zhihui He, et al., 2024. Minicpm-v: A gpt-4v level mllm on your phone. arXiv preprint arXiv:2408.01800 (2024)."},{"key":"e_1_3_2_1_34_1","volume-title":"mplug-owl3: Towards long image-sequence understanding in multi-modal large language models. arXiv preprint arXiv:2408.04840","author":"Ye Jiabo","year":"2024","unstructured":"Jiabo Ye, Haiyang Xu, Haowei Liu, Anwen Hu, Ming Yan, Qi Qian, Ji Zhang, Fei Huang, and Jingren Zhou. 2024. mplug-owl3: Towards long image-sequence understanding in multi-modal large language models. arXiv preprint arXiv:2408.04840 (2024)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01310"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Yongting Zhang Lu Chen Guodong Zheng Yifeng Gao Rui Zheng Jinlan Fu Zhenfei Yin Senjie Jin Yu Qiao Xuanjing Huang et al. 2024. SPA-VL: A Comprehensive Safety Preference Alignment Dataset for Vision Language Model. arXiv preprint arXiv:2406.12030 (2024).","DOI":"10.1109\/CVPR52734.2025.01850"},{"key":"e_1_3_2_1_37_1","first-page":"55006","article-title":"Lima: Less is more for alignment","volume":"36","author":"Zhou Chunting","year":"2023","unstructured":"Chunting Zhou, Pengfei Liu, Puxin Xu, Srinivasan Iyer, Jiao Sun, Yuning Mao, Xuezhe Ma, Avia Efrat, Ping Yu, Lili Yu, et al., 2023. Lima: Less is more for alignment. Advances in Neural Information Processing Systems, Vol. 36 (2023), 55006-55021.","journal-title":"Advances in Neural Information Processing Systems"},{"volume-title":"The Twelfth International Conference on Learning Representations.","author":"Zhu Deyao","key":"e_1_3_2_1_38_1","unstructured":"Deyao Zhu, Jun Chen, Xiaoqian Shen, Xiang Li, and Mohamed Elhoseiny. [n.d.]. MiniGPT-4: Enhancing Vision-Language Understanding with Advanced Large Language Models. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_39_1","volume-title":"Safety fine-tuning at (almost) no cost: A baseline for vision large language models. arXiv preprint arXiv:2402.02207","author":"Zong Yongshuo","year":"2024","unstructured":"Yongshuo Zong, Ondrej Bohdal, Tingyang Yu, Yongxin Yang, and Timothy Hospedales. 2024. Safety fine-tuning at (almost) no cost: A baseline for vision large language models. arXiv preprint arXiv:2402.02207 (2024)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755702","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:36:09Z","timestamp":1765308969000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755702"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":39,"alternative-id":["10.1145\/3746027.3755702","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755702","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}