{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T18:56:59Z","timestamp":1780426619267,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":95,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T00:00:00Z","timestamp":1763769600000},"content-version":"vor","delay-in-days":3,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS-2241303"],"award-info":[{"award-number":["CNS-2241303"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["N000142412669"],"award-info":[{"award-number":["N000142412669"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2140001"],"award-info":[{"award-number":["2140001"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,19]]},"DOI":"10.1145\/3719027.3744845","type":"proceedings-article","created":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T23:32:38Z","timestamp":1763854358000},"page":"2848-2862","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["On the Feasibility of Poisoning Text-to-Image AI Models via Adversarial Mislabeling"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-5214-2704","authenticated-orcid":false,"given":"Stanley","family":"Wu","sequence":"first","affiliation":[{"name":"University of Chicago, Chicago, IL, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7524-9292","authenticated-orcid":false,"given":"Ronik","family":"Bhaskar","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, IL, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5551-7847","authenticated-orcid":false,"given":"Anna Yoo Jeong","family":"Ha","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, IL, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4324-7817","authenticated-orcid":false,"given":"Shawn","family":"Shan","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, IL, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5918-2940","authenticated-orcid":false,"given":"Haitao","family":"Zheng","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, IL, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8909-0494","authenticated-orcid":false,"given":"Ben Y.","family":"Zhao","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, IL, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,11,22]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/EuroSP51992.2021.00021"},{"key":"e_1_3_2_2_2_1","volume-title":"Proc. of USENIX Security.","author":"Bagdasaryan Eugene","year":"2024","unstructured":"Eugene Bagdasaryan, Rishi Jha, Vitaly Shmatikov, and Tingwei Zhang. 2024. Adversarial illusions in multi-modal embeddings. In Proc. of USENIX Security."},{"key":"e_1_3_2_2_3_1","unstructured":"Bagheera. 2024. SimpleTuner. https:\/\/github.com\/bghira\/SimpleTuner."},{"key":"e_1_3_2_2_4_1","volume-title":"Proc. of PMLR.","author":"Bailey Luke","year":"2024","unstructured":"Luke Bailey, Euan Ong, Stuart Russell, and Scott Emmons. 2024. Image hijacks: adversarial images can control generative models at runtime. In Proc. of PMLR."},{"key":"e_1_3_2_2_5_1","unstructured":"James Betker et al. 2023. Improving image generation with better captions. https:\/\/cdn.openai.com\/papers\/dall-e-3.pdf."},{"key":"e_1_3_2_2_6_1","unstructured":"Black Forest Labs. 2024. FLUX.1-dev Model Card. https:\/\/huggingface.co\/black-forest-labs\/FLUX.1-dev."},{"key":"e_1_3_2_2_7_1","volume-title":"Proc. of NeurIPS.","author":"Nicholas","unstructured":"Nicholas Carlini et al., 2024a. Are aligned neural networks adversarially aligned?. In Proc. of NeurIPS."},{"key":"e_1_3_2_2_8_1","volume-title":"Proc. of IEEE S&P.","author":"Nicholas","unstructured":"Nicholas Carlini et al., 2024b. Poisoning web-scale training datasets is practical. In Proc. of IEEE S&P."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3128572.3140444"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00356"},{"key":"e_1_3_2_2_11_1","volume-title":"Proc. of ICLR.","author":"Huanran","unstructured":"Huanran Chen et al., 2024a. Rethinking model ensemble in transfer-based adversarial attacks. In Proc. of ICLR."},{"key":"e_1_3_2_2_12_1","volume-title":"Proc. of ICLR.","author":"Junsong","unstructured":"Junsong Chen et al., 2024b. PixArt-\u03b1: fast training of diffusion transformer for photorealistic text-to-image synthesis. In Proc. of ICLR."},{"key":"e_1_3_2_2_13_1","volume-title":"Proc. of ECCV.","author":"Junsong","unstructured":"Junsong Chen et al., 2024c. PixArt-\u03a3: weak-to-strong training of diffusion transformer for 4k text-to-image generation. In Proc. of ECCV."},{"key":"e_1_3_2_2_14_1","volume-title":"Proc. of IEEE ICRA.","author":"Long","unstructured":"Long Chen et al., 2024d. Driving with LLMs: fusing object-level vector modality for explainable autonomous driving. In Proc. of IEEE ICRA."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3385003.3410925"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00393"},{"key":"e_1_3_2_2_17_1","volume-title":"Proc. of ACSAC.","author":"Xiaoyi","unstructured":"Xiaoyi Chen et al., 2021. BadNL: backdoor attacks against NLP models with semantic-preserving improvements. In Proc. of ACSAC."},{"key":"e_1_3_2_2_18_1","volume-title":"Targeted backdoor attacks on deep learning systems using data poisoning. arXiv preprint arXiv:1712.05526","author":"Chen Xinyun","year":"2017","unstructured":"Xinyun Chen, Chang Liu, Bo Li, Kimberly Lu, and Dawn Song. 2017. Targeted backdoor attacks on deep learning systems using data poisoning. arXiv preprint arXiv:1712.05526 (2017)."},{"key":"e_1_3_2_2_19_1","volume-title":"Proc. of CVPR.","author":"Zhe","unstructured":"Zhe Chen et al., 2024 e. InternVL: scaling up vision foundation models and aligning for generic visual-linguistic tasks. In Proc. of CVPR."},{"key":"e_1_3_2_2_20_1","volume-title":"Proc. of CVPR.","author":"Mehdi","unstructured":"Mehdi Cherti et al., 2023. Reproducible scaling laws for contrastive language-image learning. In Proc. of CVPR."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00391"},{"key":"e_1_3_2_2_22_1","unstructured":"Civitai. 2022. What the heck is Civitaic https:\/\/civitai.com\/content\/guides\/what-is-civitai."},{"key":"e_1_3_2_2_23_1","unstructured":"Google Deepmind. 2024. Imagen 3. https:\/\/deepmind.google\/technologies\/imagen-3\/."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3658644.3690205"},{"key":"e_1_3_2_2_25_1","volume-title":"Proc. of NeurIPS Workshop.","author":"Yinpeng","unstructured":"Yinpeng Dong et al., 2023. How robust is Google's Bard to adversarial image attacks?. In Proc. of NeurIPS Workshop."},{"key":"e_1_3_2_2_26_1","volume-title":"Roy","author":"Dziugaite Gintare Karolina","year":"2016","unstructured":"Gintare Karolina Dziugaite, Zoubin Ghahramani, and Daniel M. Roy. 2016. A study of the effect of JPG compression on adversarial images. arXiv preprint arXiv:1608.00853 (2016)."},{"key":"e_1_3_2_2_27_1","volume-title":"Proc. of ICML.","author":"Patrick","unstructured":"Patrick Esser et al., 2024. Scaling rectified flow transformers for high-resolution image synthesis. In Proc. of ICML."},{"key":"e_1_3_2_2_28_1","volume-title":"Proc. of ICLR.","author":"Kuofeng","unstructured":"Kuofeng Gao et al., 2024b. Inducing high energy-latency of large vision-language models with verbose images. In Proc. of ICLR."},{"key":"e_1_3_2_2_29_1","volume-title":"Proc. of ICLR Workshop.","author":"Gao Kuofeng","year":"2024","unstructured":"Kuofeng Gao, Yang Bai, Jiawang Bai, Yong Yang, and Shu-Tao Xia. 2024a. Adversarial robustness for visual grounding of multimodal large language models. In Proc. of ICLR Workshop."},{"key":"e_1_3_2_2_30_1","volume-title":"Proc. of ICLR.","author":"Peng","unstructured":"Peng Gao et al., 2025. Lumina-T2X: scalable flow-based large diffusion transformer for flexible resolution generation. In Proc. of ICLR."},{"key":"e_1_3_2_2_31_1","unstructured":"Google. 2024. Vertex AI platform. https:\/\/cloud.google.com\/vertex-ai?hl=en."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2016.0020"},{"key":"e_1_3_2_2_33_1","volume-title":"BadNets: evaluating backdooring attacks on deep neural networks","author":"Gu Tianyu","year":"2019","unstructured":"Tianyu Gu, Kang Liu, Brendan Dolan-Gavitt, and Siddharth Garg. 2019. BadNets: evaluating backdooring attacks on deep neural networks. IEEE Access (2019)."},{"key":"e_1_3_2_2_34_1","volume-title":"Proc. of NeurIPS.","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. In Proc. of NeurIPS."},{"key":"e_1_3_2_2_35_1","volume-title":"Proc. of AAAI.","author":"Yihao","unstructured":"Yihao Huang et al., 2024. Personalization as a shortcut for few-shot backdoor attack against text-to-image diffusion models. In Proc. of AAAI."},{"key":"e_1_3_2_2_36_1","unstructured":"jhc13. 2024. TagGUI. https:\/\/github.com\/jhc13\/taggui."},{"key":"e_1_3_2_2_37_1","unstructured":"John. 2023. We need CogVLM support - extremely good image and text analysis feels like a multi generational step forward. https:\/\/github.com\/ggerganov\/llama.cpp\/issues\/4387."},{"key":"e_1_3_2_2_38_1","volume-title":"Proc. of USENIX Security.","author":"Li Huiying","year":"2022","unstructured":"Huiying Li, Shawn Shan, Emily Wenger, Jiayun Zhang, Haitao Zheng, and Ben Y Zhao. 2022. Blacklight: scalable defense for neural networks against query-based black-box attacks. In Proc. of USENIX Security."},{"key":"e_1_3_2_2_39_1","unstructured":"Zhimin Li et al. 2024. Hunyuan-DiT: a powerful multi-resolution diffusion transformer with fine-grained chinese understanding. arXiv preprint arXiv:2405.08748 (2024)."},{"key":"e_1_3_2_2_40_1","volume-title":"Proc. of NeurIPS.","author":"Liu Haotian","year":"2024","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2024. Visual instruction tuning. In Proc. of NeurIPS."},{"key":"e_1_3_2_2_41_1","volume-title":"Proc. of ICLR.","author":"Liu Yanpei","year":"2017","unstructured":"Yanpei Liu, Xinyun Chen, Chang Liu, and Dawn Song. 2017. Delving into transferable adversarial examples and black-box attacks. Proc. of ICLR."},{"key":"e_1_3_2_2_42_1","unstructured":"LyPreto. 2023. Shinning the spotlight on CogVLM. https:\/\/www.reddit.com\/r\/LocalLLaMA\/comments\/18evtgp\/shinning_the_spotlight_on_cogvlm\/."},{"key":"e_1_3_2_2_43_1","unstructured":"Microsoft. 2024. Image captions. https:\/\/learn.microsoft.com\/en-us\/azure\/ai-services\/computer-vision\/concept-describe-images-40?tabs=image."},{"key":"e_1_3_2_2_44_1","volume-title":"Proc. of NeurIPS.","author":"Nguyen Thao","year":"2024","unstructured":"Thao Nguyen, Samir Yitzhak Gadre, Gabriel Ilharco, Sewoong Oh, and Ludwig Schmidt. 2024. Improving multimodal datasets with image captioning. In Proc. of NeurIPS."},{"key":"e_1_3_2_2_45_1","unstructured":"Tuan Anh Nguyen and Anh Tran. 2020. Input-aware dynamic backdoor attack."},{"key":"e_1_3_2_2_46_1","volume-title":"Proc. of ECCV.","author":"Ming","unstructured":"Ming Nie et al., 2025. Reason2Drive: towards interpretable and chain-based reasoning for autonomous driving. In Proc. of ECCV."},{"key":"e_1_3_2_2_47_1","volume-title":"Proc. of ICML.","author":"Weili","unstructured":"Weili Nie et al., 2022. Diffusion models for adversarial purification. In Proc. of ICML."},{"key":"e_1_3_2_2_48_1","unstructured":"NovelAI. 2022. NovelAI changelog. https:\/\/novelai.net\/updates."},{"key":"e_1_3_2_2_49_1","volume-title":"Proc. NeurIPS Workshop.","author":"Zhuoshi","unstructured":"Zhuoshi Pan et al., 2023. From trojan horses to castle walls: unveiling bilateral backdoor effects in diffusion models. In Proc. NeurIPS Workshop."},{"key":"e_1_3_2_2_50_1","volume-title":"Proc. of ICLR.","author":"Dustin","unstructured":"Dustin Podell et al., 2024. SDXL: improving latent diffusion models for high-resolution image synthesis. In Proc. of ICLR."},{"key":"e_1_3_2_2_51_1","unstructured":"PseudoTerminal X. 2024. Photo Concept Bucket. https:\/\/huggingface.co\/datasets\/bghira\/photo-concept-bucket."},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i19.30150"},{"key":"e_1_3_2_2_53_1","volume-title":"Proc. of ICML.","author":"Alec","unstructured":"Alec Radford et al., 2021. Learning transferable visual models from natural language supervision. In Proc. of ICML."},{"key":"e_1_3_2_2_54_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)."},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00408"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_57_1","volume-title":"Proc. of NeurIPS.","author":"Chitwan","unstructured":"Chitwan Saharia et al., 2022. Photorealistic text-to-image diffusion models with deep language understanding. In Proc. of NeurIPS."},{"key":"e_1_3_2_2_58_1","unstructured":"Scenario.gg. 2022. AI-generated game assets. https:\/\/www.scenario.gg\/."},{"key":"e_1_3_2_2_59_1","unstructured":"Christoph Schuhmann. 2022. LAION-Aesthetics. https:\/\/laion.ai\/blog\/laion-aesthetics\/."},{"key":"e_1_3_2_2_60_1","volume-title":"Proc. of NeurIPS.","author":"Christoph","unstructured":"Christoph Schuhmann et al., 2022. LAION-5B: an open large-scale dataset for training next generation image-text models. In Proc. of NeurIPS."},{"key":"e_1_3_2_2_61_1","volume-title":"A picture is worth a thousand words: principled recaptioning improves image generation. arXiv preprint arXiv:2310.16656","author":"Segalis Eyal","year":"2023","unstructured":"Eyal Segalis, Dani Valevski, Danny Lumen, Yossi Matias, and Yaniv Leviathan. 2023. A picture is worth a thousand words: principled recaptioning improves image generation. arXiv preprint arXiv:2310.16656 (2023)."},{"key":"e_1_3_2_2_62_1","volume-title":"Proc. of USENIX Security.","author":"Shan Shawn","year":"2023","unstructured":"Shawn Shan, Jenna Cryan, Emily Wenger, Haitao Zheng, Rana Hanocka, and Ben Y Zhao. 2023. Glaze: protecting artists from style mimicry by text-to-image models. In Proc. of USENIX Security."},{"key":"e_1_3_2_2_63_1","volume-title":"Zhao","author":"Shan Shawn","year":"2024","unstructured":"Shawn Shan, Wenxin Ding, Josephine Passananti, Stanley Wu, Haitao Zheng, and Ben Y. Zhao. 2024. Nightshade: prompt-specific poisoning attacks on text-to-image generative models. In Proc. of IEEE S&P."},{"key":"e_1_3_2_2_64_1","volume-title":"Proc. of ICLR.","author":"Shayegani Erfan","year":"2024","unstructured":"Erfan Shayegani, Yue Dong, and Nael Abu-Ghazaleh. 2024. Jailbreak in pieces: compositional adversarial attacks on multi-modal language models. In Proc. of ICLR."},{"key":"e_1_3_2_2_65_1","unstructured":"Richard Shin and Dawn Song. 2017. JPEG-resistant adversarial images. https:\/\/machine-learning-and-security.github.io\/papers\/mlsec17_paper_54.pdf."},{"key":"e_1_3_2_2_66_1","doi-asserted-by":"crossref","unstructured":"Ilia Shumailov et al. 2024. AI models collapse when trained on recursively generated data. Nature (2024).","DOI":"10.1038\/s41586-024-07566-y"},{"key":"e_1_3_2_2_67_1","volume-title":"Proc. of ICLR.","author":"Song Jiaming","year":"2021","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2021b. Denoising diffusion implicit models. In Proc. of ICLR."},{"key":"e_1_3_2_2_68_1","volume-title":"Proc. of ICLR.","author":"Yang","unstructured":"Yang Song et al., 2021a. Score-based generative modeling through stochastic differential equations. In Proc. of ICLR."},{"key":"e_1_3_2_2_69_1","unstructured":"Stability AI. 2022. Stable Diffusion 2.0 release. https:\/\/stability.ai\/blog\/stable-diffusion-v2-release."},{"key":"e_1_3_2_2_70_1","unstructured":"Stability AI. 2024a. Stable Diffusion v1-5 Model Card. https:\/\/huggingface.co\/stable-diffusion-v1-5\/stable-diffusion-v1-5."},{"key":"e_1_3_2_2_71_1","unstructured":"Stability AI. 2024b. Stable Diffusion v2-1 Model Card. https:\/\/huggingface.co\/stabilityai\/stable-diffusion-2-1."},{"key":"e_1_3_2_2_72_1","volume-title":"Kolors: effective training of diffusion model for photorealistic text-to-image synthesis. arXiv preprint","author":"Team Kolors","year":"2024","unstructured":"Kolors Team. 2024. Kolors: effective training of diffusion model for photorealistic text-to-image synthesis. arXiv preprint (2024). https:\/\/github.com\/Kwai-Kolors\/Kolors\/blob\/master\/imgs\/Kolors_paper.pdf"},{"key":"e_1_3_2_2_73_1","volume-title":"Proc. of NeurIPS.","author":"Tram\u00e8r Florian","year":"2020","unstructured":"Florian Tram\u00e8r, Nicholas Carlini, Wieland Brendel, and Aleksander Madry. 2020. On adaptive attacks to adversarial example defenses. In Proc. of NeurIPS."},{"key":"e_1_3_2_2_74_1","unstructured":"Tony Ho Tran. 2022. Image apps like Lensa AI are sweeping the internet and stealing from artists. https:\/\/www.thedailybeast.com\/how-lensa-ai-and-image-generators-steal-from-artists."},{"key":"e_1_3_2_2_75_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25353"},{"key":"e_1_3_2_2_76_1","volume-title":"Proc. of NeurIPS.","author":"Weihan","unstructured":"Weihan Wang et al., 2024. CogVLM: visual expert for pretrained language models. In Proc. of NeurIPS."},{"key":"e_1_3_2_2_77_1","volume-title":"Data poisoning attacks against online learning. arXiv preprint arXiv:1808.08994","author":"Wang Yizhen","year":"2018","unstructured":"Yizhen Wang and Kamalika Chaudhuri. 2018. Data poisoning attacks against online learning. arXiv preprint arXiv:1808.08994 (2018)."},{"key":"e_1_3_2_2_78_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00614"},{"key":"e_1_3_2_2_79_1","volume-title":"Proc. of ICLR.","author":"Henry Chen","unstructured":"Chen Henry Wu et al., 2025. Dissecting adversarial robustness of multimodal LM agents. In Proc. of ICLR."},{"key":"e_1_3_2_2_80_1","volume-title":"Proc. of ICLR.","author":"Xiao Chang","year":"2020","unstructured":"Chang Xiao, Peilin Zhong, and Changxi Zheng. 2020. Enhancing adversarial defense by k-Winners-Take-All. In Proc. of ICLR."},{"key":"e_1_3_2_2_81_1","volume-title":"Proc. of ICLR.","author":"Enze","unstructured":"Enze Xie et al., 2025. SANA: efficient high-resolution image synthesis with linear diffusion transformers. In Proc. of ICLR."},{"key":"e_1_3_2_2_82_1","unstructured":"Chejian Xu et al. 2024. AdvWeb: controllable black-box attacks on vlm-powered web agents. arXiv preprint arXiv:2410.17401 (2024)."},{"key":"e_1_3_2_2_83_1","volume-title":"Proc. of NeurIPS.","author":"Yuancheng","unstructured":"Yuancheng Xu et al., 2025. Shadowcast: stealthy data poisoning attacks against vision-language models. In Proc. of NeurIPS."},{"key":"e_1_3_2_2_84_1","unstructured":"Le Xue et al. 2024. xGen-MM (BLIP-3): a family of open large multimodal models. arXiv preprint arXiv:2408.08872 (2024)."},{"key":"e_1_3_2_2_85_1","volume-title":"two advanced backdoor attacks against deep learning models","author":"Xue Mingfu","year":"2020","unstructured":"Mingfu Xue, Can He, Jian Wang, and Weiqiang Liu. 2020. One-to-N & N-to-One: two advanced backdoor attacks against deep learning models. IEEE TDSC (2020)."},{"key":"e_1_3_2_2_86_1","volume-title":"Proc. of NeurIPS.","author":"Chengyuan","unstructured":"Chengyuan Yao et al., 2021. Automated discovery of adaptive attacks on adversarial defenses. In Proc. of NeurIPS."},{"key":"e_1_3_2_2_87_1","volume-title":"Proc. of NeurIPS.","author":"Ziyi","unstructured":"Ziyi Yin et al., 2024. VLATTACK: multimodal adversarial attacks on vision-language tasks via pre-trained models. In Proc. of NeurIPS."},{"key":"e_1_3_2_2_88_1","volume-title":"Weinberger","author":"Yu Tao","year":"2019","unstructured":"Tao Yu, Shengyuan Hu, Chuan Guo, Wei-Lun Chao, and Kilian Q. Weinberger. 2019. A new defense against adversarial images: turning a weakness into a strength. In Proc. of NeurIPS."},{"key":"e_1_3_2_2_89_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612108"},{"key":"e_1_3_2_2_90_1","unstructured":"Tianyuan Zhang et al. 2024. Visual adversarial attack on vision-language models for autonomous driving. arXiv preprint arXiv:2411.18275 (2024)."},{"key":"e_1_3_2_2_91_1","volume-title":"Proc. of L4DC. PMLR.","author":"Zhang Xuezhou","year":"2020","unstructured":"Xuezhou Zhang, Xiaojin Zhu, and Laurent Lessard. 2020. Online data poisoning attacks. In Proc. of L4DC. PMLR."},{"key":"e_1_3_2_2_92_1","volume-title":"Proc. of NeurIPS.","author":"Yunqing","unstructured":"Yunqing Zhao et al., 2024. On evaluating adversarial robustness of large vision-language models. In Proc. of NeurIPS."},{"key":"e_1_3_2_2_93_1","volume-title":"Revisiting the adversarial robustness of vision language models: a multimodal perspective. arXiv preprint arXiv:2404.19287","author":"Zhou Wanqi","year":"2024","unstructured":"Wanqi Zhou, Shuanghao Bai, Qibin Zhao, and Badong Chen. 2024. Revisiting the adversarial robustness of vision language models: a multimodal perspective. arXiv preprint arXiv:2404.19287 (2024)."},{"key":"e_1_3_2_2_94_1","volume-title":"Proc. of ICLR.","author":"Zhu Deyao","year":"2024","unstructured":"Deyao Zhu, Jun Chen, Xiaoqian Shen, Xiang Li, and Mohamed Elhoseiny. 2024. MiniGPT-4: enhancing vision-language understanding with advanced large language models. In Proc. of ICLR."},{"key":"e_1_3_2_2_95_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210080"}],"event":{"name":"CCS '25: ACM SIGSAC Conference on Computer and Communications Security","location":"Taipei Taiwan","acronym":"CCS '25","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control"]},"container-title":["Proceedings of the 2025 ACM SIGSAC Conference on Computer and Communications Security"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3719027.3744845","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3719027.3744845","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,22]],"date-time":"2025-12-22T22:09:23Z","timestamp":1766441363000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3719027.3744845"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,19]]},"references-count":95,"alternative-id":["10.1145\/3719027.3744845","10.1145\/3719027"],"URL":"https:\/\/doi.org\/10.1145\/3719027.3744845","relation":{},"subject":[],"published":{"date-parts":[[2025,11,19]]},"assertion":[{"value":"2025-11-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}