{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T01:10:24Z","timestamp":1755825024324,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":76,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100006374","name":"Natural Science Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2024A1515030209, 2024A1515011970"],"award-info":[{"award-number":["2024A1515030209, 2024A1515011970"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Key R&D Program of China","award":["2023YFC3321600"],"award-info":[{"award-number":["2023YFC3321600"]}]},{"name":"Guangdong Basic and Applied Basic Research Foundation","award":["2023B1515130002"],"award-info":[{"award-number":["2023B1515130002"]}]},{"name":"Shenzhen Science and Technology Innovation Commission","award":["JCYJ20230807140507015, JCYJ20220531100804009"],"award-info":[{"award-number":["JCYJ20230807140507015, JCYJ20220531100804009"]}]},{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62376263, 62372443, 62271496"],"award-info":[{"award-number":["62376263, 62372443, 62271496"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1145\/3731715.3733407","type":"proceedings-article","created":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T18:31:39Z","timestamp":1750876299000},"page":"1118-1127","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["On the Adversarial Robustness of Visual-Language Chat Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-8386-2003","authenticated-orcid":false,"given":"Tianrui","family":"Qin","sequence":"first","affiliation":[{"name":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China and OPPO Research Institute, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9900-9117","authenticated-orcid":false,"given":"Xuan","family":"Wang","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1002-9272","authenticated-orcid":false,"given":"Juanjuan","family":"Zhao","sequence":"additional","affiliation":[{"name":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6133-407X","authenticated-orcid":false,"given":"Kejiang","family":"Ye","sequence":"additional","affiliation":[{"name":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China and Shenzhen University of Advanced Technology, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9480-0356","authenticated-orcid":false,"given":"Cheng-zhong","family":"Xu","sequence":"additional","affiliation":[{"name":"University of Macau, Macau, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2063-2051","authenticated-orcid":false,"given":"Xitong","family":"Gao","sequence":"additional","affiliation":[{"name":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China and Shenzhen University of Advanced Technology, Shenzhen, China"}]}],"member":"320","published-online":{"date-parts":[[2025,6,30]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat et al. 2023. GPT-4 Technical Report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","first-page":"23716","article-title":"Flamingo: a visual language model for few-shot learning","volume":"35","author":"Alayrac Jean-Baptiste","year":"2022","unstructured":"Jean-Baptiste Alayrac, Jeff Donahue, Pauline Luc, Antoine Miech, Iain Barr, Yana Hasson, Karel Lenc, Arthur Mensch, Katherine Millican, Malcolm Reynolds, et al. 2022. Flamingo: a visual language model for few-shot learning. Advances in Neural Information Processing Systems, Vol. 35 (2022), 23716--23736.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_3_1","volume-title":"Generating natural language adversarial examples. arXiv preprint arXiv:1804.07998","author":"Alzantot Moustafa","year":"2018","unstructured":"Moustafa Alzantot, Yash Sharma, Ahmed Elgohary, Bo-Jhang Ho, Mani Srivastava, and Kai-Wei Chang. 2018. Generating natural language adversarial examples. arXiv preprint arXiv:1804.07998 (2018)."},{"key":"e_1_3_2_1_4_1","volume-title":"Image Hijacks: Adversarial Images can Control Generative Models at Runtime. arXiv preprint arXiv:2309.00236","author":"Bailey Luke","year":"2023","unstructured":"Luke Bailey, Euan Ong, Stuart Russell, and Scott Emmons. 2023. Image Hijacks: Adversarial Images can Control Generative Models at Runtime. arXiv preprint arXiv:2309.00236 (2023)."},{"key":"e_1_3_2_1_5_1","unstructured":"Rishi Bommasani Drew A Hudson Ehsan Adeli Russ Altman Simran Arora Sydney von Arx Michael S Bernstein Jeannette Bohg Antoine Bosselut Emma Brunskill et al. 2021. On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258 (2021)."},{"key":"e_1_3_2_1_6_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems Vol. 33 (2020) 1877--1901."},{"key":"e_1_3_2_1_7_1","volume-title":"Daphne Ippolito, Katherine Lee, Florian Tramer, et al.","author":"Carlini Nicholas","year":"2023","unstructured":"Nicholas Carlini, Milad Nasr, Christopher A Choquette-Choo, Matthew Jagielski, Irena Gao, Anas Awadalla, Pang Wei Koh, Daphne Ippolito, Katherine Lee, Florian Tramer, et al. 2023. Are aligned neural networks adversarially aligned? arXiv preprint arXiv:2306.15447 (2023)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3128572.3140444"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00421"},{"key":"e_1_3_2_1_10_1","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery Aakanksha","year":"2023","unstructured":"Aakanksha Chowdhery, Sharan Narang, Jacob Devlin, Maarten Bosma, Gaurav Mishra, Adam Roberts, Paul Barham, Hyung Won Chung, Charles Sutton, Sebastian Gehrmann, et al. 2023. Palm: Scaling language modeling with pathways. Journal of Machine Learning Research, Vol. 24, 240 (2023), 1--113.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_11_1","unstructured":"Jiaming Chu Yanzhuo Xiang Yuqi Li Chuanguang Yang Zhulin An and Yongjun Xu. 2025. Cross-Layer Graph Knowledge Distillation for Image Recognition. In ICASSP."},{"key":"e_1_3_2_1_12_1","unstructured":"Francesco Croce Maksym Andriushchenko Vikash Sehwag and et al. 2020. RobustBench: a standardized adversarial robustness benchmark. arXiv:2010.09670 (2020)."},{"key":"e_1_3_2_1_13_1","volume-title":"Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, and Steven Hoi.","author":"Dai Wenliang","year":"2023","unstructured":"Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, and Steven Hoi. 2023. InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning. arxiv: 2305.06500 [cs.CV]"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_15_1","volume-title":"How Robust is Google's Bard to Adversarial Image Attacks? arXiv preprint arXiv:2309.11751","author":"Dong Yinpeng","year":"2023","unstructured":"Yinpeng Dong, Huanran Chen, Jiawei Chen, Zhengwei Fang, Xiao Yang, Yichi Zhang, Yu Tian, Hang Su, and Jun Zhu. 2023. How Robust is Google's Bard to Adversarial Image Attacks? arXiv preprint arXiv:2309.11751 (2023)."},{"key":"e_1_3_2_1_16_1","volume-title":"Class-wise Image Mixture Guided Self-Knowledge Distillation for Image Classification. In 2024 27th International Conference on Computer Supported Cooperative Work in Design (CSCWD). IEEE, 310--315","author":"Dong Zeyu","year":"2024","unstructured":"Zeyu Dong, Chuanguang Yang, Yuqi Li, Libo Huang, Zhulin An, and Yongjun Xu. 2024. Class-wise Image Mixture Guided Self-Knowledge Distillation for Image Classification. In 2024 27th International Conference on Computer Supported Cooperative Work in Design (CSCWD). IEEE, 310--315."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11023-020-09548-1"},{"key":"e_1_3_2_1_18_1","unstructured":"Deep Ganguli Liane Lovitt Jackson Kernion Amanda Askell Yuntao Bai Saurav Kadavath Ben Mann Ethan Perez Nicholas Schiefer Kamal Ndousse et al. 2022. Red teaming language models to reduce harms: Methods scaling behaviors and lessons learned. arXiv preprint arXiv:2209.07858 (2022)."},{"key":"e_1_3_2_1_19_1","volume-title":"Explaining and harnessing adversarial examples. arXiv preprint arXiv:1412.6572","author":"Goodfellow Ian J","year":"2014","unstructured":"Ian J Goodfellow, Jonathon Shlens, and Christian Szegedy. 2014. Explaining and harnessing adversarial examples. arXiv preprint arXiv:1412.6572 (2014)."},{"volume-title":"https:\/\/bard.google.com\/","year":"2023","key":"e_1_3_2_1_20_1","unstructured":"Google. 2023. Bard. https:\/\/bard.google.com\/ (2023)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_22_1","volume-title":"Automatically Auditing Large Language Models via Discrete Optimization. arXiv preprint arXiv:2303.04381","author":"Jones Erik","year":"2023","unstructured":"Erik Jones, Anca Dragan, Aditi Raghunathan, and Jacob Steinhardt. 2023. Automatically Auditing Large Language Models via Discrete Optimization. arXiv preprint arXiv:2303.04381 (2023)."},{"key":"e_1_3_2_1_23_1","volume-title":"GPT2: Empirical slant delay model for radio space geodetic techniques. Geophysical research letters","author":"Lagler Klemens","year":"2013","unstructured":"Klemens Lagler, Michael Schindelegger, Johannes B\u00f6hm, Hana Kr\u00e1sn\u00e1, and Tobias Nilsson. 2013. GPT2: Empirical slant delay model for radio space geodetic techniques. Geophysical research letters, Vol. 40, 6 (2013), 1069--1073."},{"key":"e_1_3_2_1_24_1","volume-title":"Robust evaluation of diffusion-based adversarial purification. arXiv preprint arXiv:2303.09051","author":"Lee Minjong","year":"2023","unstructured":"Minjong Lee and Dongwoo Kim. 2023. Robust evaluation of diffusion-based adversarial purification. arXiv preprint arXiv:2303.09051 (2023)."},{"key":"e_1_3_2_1_25_1","volume-title":"Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597 (2023)."},{"volume-title":"ICASSP 2025--2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","author":"Li Jiarui","key":"e_1_3_2_1_26_1","unstructured":"Jiarui Li, Qiu Zhen, Yilin Yang, Yuqi Li, Zeyu Dong, and Chuanguang Yang. 2025 b. Prototype-Driven Multi-Feature Generation for Visible-Infrared Person Re-identification. In ICASSP 2025--2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 1--5."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00205"},{"key":"e_1_3_2_1_28_1","volume-title":"2025 a. FedKD-hybrid: Federated Hybrid Knowledge Distillation for Lithography Hotspot Detection. arXiv preprint arXiv:2501.04066","author":"Li Yuqi","year":"2025","unstructured":"Yuqi Li, Xingyou Lin, Kai Zhang, Chuanguang Yang, Zhongliang Guo, Jianping Gou, and Yanli Li. 2025 a. FedKD-hybrid: Federated Hybrid Knowledge Distillation for Lithography Hotspot Detection. arXiv preprint arXiv:2501.04066 (2025)."},{"key":"e_1_3_2_1_29_1","volume-title":"SGLP: A Similarity Guided Fast Layer Partition Pruning for Compressing Large Deep Models. arXiv preprint arXiv:2410.14720","author":"Li Yuqi","year":"2024","unstructured":"Yuqi Li, Yao Lu, Zeyu Dong, Chuanguang Yang, Yihao Chen, and Jianping Gou. 2024. SGLP: A Similarity Guided Fast Layer Partition Pruning for Compressing Large Deep Models. arXiv preprint arXiv:2410.14720 (2024)."},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings, Part V 13","author":"Lin Tsung-Yi","year":"2014","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C Lawrence Zitnick. 2014. Microsoft coco: Common objects in context. In Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6--12, 2014, Proceedings, Part V 13. Springer, 740--755."},{"key":"e_1_3_2_1_31_1","unstructured":"Xun Lin Yi Yu Song Xia Jue Jiang Haoran Wang Zitong Yu Yizhong Liu Ying Fu Shuai Wang Wenzhong Tang et al. 2024a. Safeguarding Medical Image Segmentation Datasets against Unauthorized Training via Contour-and Texture-Aware Perturbations. arXiv preprint arXiv:2403.14250 (2024)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680806"},{"key":"e_1_3_2_1_33_1","volume-title":"Visual instruction tuning. arXiv preprint arXiv:2304.08485","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2023b. Visual instruction tuning. arXiv preprint arXiv:2304.08485 (2023)."},{"key":"e_1_3_2_1_34_1","volume-title":"Prompt Injection attack against LLM-integrated Applications. arXiv preprint arXiv:2306.05499","author":"Liu Yi","year":"2023","unstructured":"Yi Liu, Gelei Deng, Yuekang Li, Kailong Wang, Tianwei Zhang, Yepang Liu, Haoyu Wang, Yan Zheng, and Yang Liu. 2023a. Prompt Injection attack against LLM-integrated Applications. arXiv preprint arXiv:2306.05499 (2023)."},{"key":"e_1_3_2_1_35_1","volume-title":"A generic layer pruning method for signal modulation recognition deep learning models","author":"Lu Yao","year":"2024","unstructured":"Yao Lu, Yutao Zhu, Yuqi Li, Dongwei Xu, Yun Lin, Qi Xuan, and Xiaoniu Yang. 2024. A generic layer pruning method for signal modulation recognition deep learning models. IEEE TCCN (2024)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3381337"},{"key":"e_1_3_2_1_37_1","volume-title":"Towards deep learning models resistant to adversarial attacks. arXiv preprint arXiv:1706.06083","author":"Madry Aleksander","year":"2017","unstructured":"Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, Dimitris Tsipras, and Adrian Vladu. 2017. Towards deep learning models resistant to adversarial attacks. arXiv preprint arXiv:1706.06083 (2017)."},{"key":"e_1_3_2_1_38_1","volume-title":"Diffusion models for adversarial purification. arXiv preprint arXiv:2205.07460","author":"Nie Weili","year":"2022","unstructured":"Weili Nie, Brandon Guo, Yujia Huang, Chaowei Xiao, Arash Vahdat, and Anima Anandkumar. 2022. Diffusion models for adversarial purification. arXiv preprint arXiv:2205.07460 (2022)."},{"key":"e_1_3_2_1_39_1","volume-title":"Adversarial NLI: A new benchmark for natural language understanding. arXiv preprint arXiv:1910.14599","author":"Nie Yixin","year":"2019","unstructured":"Yixin Nie, Adina Williams, Emily Dinan, Mohit Bansal, Jason Weston, and Douwe Kiela. 2019. Adversarial NLI: A new benchmark for natural language understanding. arXiv preprint arXiv:1910.14599 (2019)."},{"key":"e_1_3_2_1_40_1","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, et al. 2022. Training language models to follow instructions with human feedback. Advances in Neural Information Processing Systems, Vol. 35 (2022), 27730--27744.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_41_1","volume-title":"Red teaming language models with language models. arXiv preprint arXiv:2202.03286","author":"Perez Ethan","year":"2022","unstructured":"Ethan Perez, Saffron Huang, Francis Song, Trevor Cai, Roman Ring, John Aslanides, Amelia Glaese, Nat McAleese, and Geoffrey Irving. 2022. Red teaming language models with language models. arXiv preprint arXiv:2202.03286 (2022)."},{"key":"e_1_3_2_1_42_1","volume-title":"The Second Workshop on New Frontiers in Adversarial Machine Learning","volume":"1","author":"Qi Xiangyu","year":"2023","unstructured":"Xiangyu Qi, Kaixuan Huang, Ashwinee Panda, Mengdi Wang, and Prateek Mittal. 2023. Visual adversarial examples jailbreak aligned large language models. In The Second Workshop on New Frontiers in Adversarial Machine Learning, Vol. 1."},{"key":"e_1_3_2_1_43_1","volume-title":"Learning the unlearnable: Adversarial augmentations suppress unlearnable example attacks. arXiv preprint arXiv:2303.15127","author":"Qin Tianrui","year":"2023","unstructured":"Tianrui Qin, Xitong Gao, Juanjuan Zhao, Kejiang Ye, and Cheng-Zhong Xu. 2023. Learning the unlearnable: Adversarial augmentations suppress unlearnable example attacks. arXiv preprint arXiv:2303.15127 (2023)."},{"key":"e_1_3_2_1_44_1","volume-title":"APBench: A Unified Availability Poisoning Attack and Defenses Benchmark. Transactions on Machine Learning Research","author":"Qin Tianrui","year":"2024","unstructured":"Tianrui Qin, Xitong Gao, Juanjuan Zhao, Kejiang Ye, and Cheng-zhong Xu. 2024. APBench: A Unified Availability Poisoning Attack and Defenses Benchmark. Transactions on Machine Learning Research (2024)."},{"key":"e_1_3_2_1_45_1","volume-title":"Flareon: Stealthy any2any backdoor injection via poisoned augmentation. arXiv preprint arXiv:2212.09979","author":"Qin Tianrui","year":"2022","unstructured":"Tianrui Qin, Xianghuan He, Xitong Gao, Yiren Zhao, Kejiang Ye, and Cheng-Zhong Xu. 2022. Flareon: Stealthy any2any backdoor injection via poisoned augmentation. arXiv preprint arXiv:2212.09979 (2022)."},{"key":"e_1_3_2_1_46_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00395"},{"key":"e_1_3_2_1_48_1","first-page":"20346","article-title":"Human-adversarial visual question answering","volume":"34","author":"Sheng Sasha","year":"2021","unstructured":"Sasha Sheng, Amanpreet Singh, Vedanuj Goswami, Jose Magana, Tristan Thrush, Wojciech Galuba, Devi Parikh, and Douwe Kiela. 2021. Human-adversarial visual question answering. Advances in Neural Information Processing Systems, Vol. 34 (2021), 20346--20359.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_49_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. 2023a. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_50_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023b. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_51_1","volume-title":"International Conference on Machine Learning. PMLR, 21692--21702","author":"Tramer Florian","year":"2022","unstructured":"Florian Tramer. 2022. Detecting adversarial examples is (nearly) as hard as classifying them. In International Conference on Machine Learning. PMLR, 21692--21702."},{"key":"e_1_3_2_1_52_1","first-page":"200","article-title":"Multimodal few-shot learning with frozen language models","volume":"34","author":"Tsimpoukelli Maria","year":"2021","unstructured":"Maria Tsimpoukelli, Jacob L Menick, Serkan Cabi, SM Eslami, Oriol Vinyals, and Felix Hill. 2021. Multimodal few-shot learning with frozen language models. Advances in Neural Information Processing Systems, Vol. 34 (2021), 200--212.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00279"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10446953"},{"key":"e_1_3_2_1_55_1","unstructured":"Jindong Wang Xixu Hu Wenxin Hou Hao Chen Runkai Zheng Yidong Wang Linyi Yang Haojun Huang Wei Ye Xiubo Geng et al. 2023. On the robustness of chatgpt: An adversarial and out-of-distribution perspective. arXiv preprint arXiv:2302.12095 (2023)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00888"},{"key":"e_1_3_2_1_57_1","volume-title":"Jailbroken: How does llm safety training fail? arXiv preprint arXiv:2307.02483","author":"Wei Alexander","year":"2023","unstructured":"Alexander Wei, Nika Haghtalab, and Jacob Steinhardt. 2023. Jailbroken: How does llm safety training fail? arXiv preprint arXiv:2307.02483 (2023)."},{"key":"e_1_3_2_1_58_1","volume-title":"Brian Lester, Nan Du, Andrew M Dai, and Quoc V Le.","author":"Wei Jason","year":"2021","unstructured":"Jason Wei, Maarten Bosma, Vincent Y Zhao, Kelvin Guu, Adams Wei Yu, Brian Lester, Nan Du, Andrew M Dai, and Quoc V Le. 2021. Finetuned language models are zero-shot learners. arXiv preprint arXiv:2109.01652 (2021)."},{"key":"e_1_3_2_1_59_1","volume-title":"Proceedings of the Neural Information Processing Systems.","author":"Xia Song","year":"2024","unstructured":"Song Xia, Wenhan Yang, Yi Yu, Xun Lin, Henghui Ding, LINGYU DUAN, and Xudong Jiang. 2024a. Transferable Adversarial Attacks on SAM and Its Downstream Models. In Proceedings of the Neural Information Processing Systems."},{"key":"e_1_3_2_1_60_1","volume-title":"Proceedings of the International Conference on Learning Representations.","author":"Xia Song","year":"2024","unstructured":"Song Xia, Yi Yu, Xudong Jiang, and Henghui Ding. 2024b. Mitigating the Curse of Dimensionality for Certified Robustness via Dual Randomized Smoothing. In Proceedings of the International Conference on Learning Representations."},{"key":"e_1_3_2_1_61_1","volume-title":"Theoretical Insights in Model Inversion Robustness and Conditional Entropy Maximization for Collaborative Inference Systems. arXiv preprint arXiv:2503.00383","author":"Xia Song","year":"2025","unstructured":"Song Xia, Yi Yu, Wenhan Yang, Meiwen Ding, Zhuo Chen, Lingyu Duan, Alex C Kot, and Xudong Jiang. 2025. Theoretical Insights in Model Inversion Robustness and Conditional Entropy Maximization for Collaborative Inference Systems. arXiv preprint arXiv:2503.00383 (2025)."},{"key":"e_1_3_2_1_62_1","unstructured":"Qinghao Ye Haiyang Xu Guohai Xu Jiabo Ye Ming Yan Yiyang Zhou Junyang Wang Anwen Hu Pengcheng Shi Yaya Shi et al. 2023a. mPLUG-OWL: Modularization empowers large language models with multimodality. arXiv preprint arXiv:2304.14178 (2023)."},{"key":"e_1_3_2_1_63_1","volume-title":"mPLUG-OWL2: Revolutionizing multi-modal large language model with modality collaboration. arXiv preprint arXiv:2311.04257","author":"Ye Qinghao","year":"2023","unstructured":"Qinghao Ye, Haiyang Xu, Jiabo Ye, Ming Yan, Haowei Liu, Qi Qian, Ji Zhang, Fei Huang, and Jingren Zhou. 2023b. mPLUG-OWL2: Revolutionizing multi-modal large language model with modality collaboration. arXiv preprint arXiv:2311.04257 (2023)."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00568"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3323698"},{"key":"e_1_3_2_1_66_1","volume-title":"Proc. of NeurIPS, Alice H. Oh, Alekh Agarwal, Danielle Belgrave, and Kyunghyun Cho (Eds.).","author":"Yu Yunrui","year":"2022","unstructured":"Yunrui Yu, Xitong Gao, and Cheng zhong Xu. 2022a. MORA: Improving Ensemble Robustness Evaluation with Model Reweighing Attack. In Proc. of NeurIPS, Alice H. Oh, Alekh Agarwal, Danielle Belgrave, and Kyunghyun Cho (Eds.)."},{"key":"e_1_3_2_1_67_1","volume-title":"Proceedings of the International Conference on Machine Learning. PMLR, 57678--57702","author":"Yu Yi","year":"2024","unstructured":"Yi Yu, Yufei Wang, Song Xia, Wenhan Yang, Shijian Lu, Yap-Peng Tan, and Alex Kot. 2024b. Purify Unlearnable Examples via Rate-Constrained Variational Autoencoders. In Proceedings of the International Conference on Machine Learning. PMLR, 57678--57702."},{"key":"e_1_3_2_1_68_1","unstructured":"Yi Yu Yufei Wang Wenhan Yang Lanqing Guo Shijian Lu Ling-Yu Duan Yap-Peng Tan and Alex C Kot. 2024c. Robust and Transferable Backdoor Attacks Against Deep Image Compression With Selective Frequency Prior. (2024)."},{"volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 12250--12259","author":"Yu Yi","key":"e_1_3_2_1_69_1","unstructured":"Yi Yu, Yufei Wang, Wenhan Yang, Shijian Lu, Yap-Peng Tan, and Alex C. Kot. 2023. Backdoor Attacks Against Deep Image Compression via Adaptive Frequency Trigger. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 12250--12259."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i9.33051"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00592"},{"key":"e_1_3_2_1_72_1","volume-title":"Enhancing spatiotemporal prediction through the integration of Mamba state space models and Diffusion Transformers. Knowledge-Based Systems","author":"Zeng Hansheng","year":"2025","unstructured":"Hansheng Zeng, Yuqi Li, Ruize Niu, Chuanguang Yang, and Shiping Wen. 2025. Enhancing spatiotemporal prediction through the integration of Mamba state space models and Diffusion Transformers. Knowledge-Based Systems (2025)."},{"key":"e_1_3_2_1_73_1","unstructured":"Yunqing Zhao Tianyu Pang Chao Du Xiao Yang Chongxuan Li Ngai-Man Cheung and Min Lin. 2023. On evaluating adversarial robustness of large vision-language models. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_74_1","volume-title":"Minigpt-5: Interleaved vision-and-language generation via generative vokens. arXiv preprint arXiv:2310.02239","author":"Zheng Kaizhi","year":"2023","unstructured":"Kaizhi Zheng, Xuehai He, and Xin Eric Wang. 2023. Minigpt-5: Interleaved vision-and-language generation via generative vokens. arXiv preprint arXiv:2310.02239 (2023)."},{"key":"e_1_3_2_1_75_1","volume-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592","author":"Zhu Deyao","year":"2023","unstructured":"Deyao Zhu, Jun Chen, Xiaoqian Shen, Xiang Li, and Mohamed Elhoseiny. 2023. Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592 (2023)."},{"key":"e_1_3_2_1_76_1","volume-title":"Universal and transferable adversarial attacks on aligned language models. arXiv preprint arXiv:2307.15043","author":"Zou Andy","year":"2023","unstructured":"Andy Zou, Zifan Wang, J Zico Kolter, and Matt Fredrikson. 2023. Universal and transferable adversarial attacks on aligned language models. arXiv preprint arXiv:2307.15043 (2023)."}],"event":{"name":"ICMR '25: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Chicago IL USA","acronym":"ICMR '25"},"container-title":["Proceedings of the 2025 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731715.3733407","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T04:11:17Z","timestamp":1755749477000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731715.3733407"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":76,"alternative-id":["10.1145\/3731715.3733407","10.1145\/3731715"],"URL":"https:\/\/doi.org\/10.1145\/3731715.3733407","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]},"assertion":[{"value":"2025-06-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}