{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:04:45Z","timestamp":1750309485017,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U2341228"],"award-info":[{"award-number":["U2341228"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3680902","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"10872-10881","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Natural Language Induced Adversarial Images"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8971-6475","authenticated-orcid":false,"given":"Xiaopei","family":"Zhu","sequence":"first","affiliation":[{"name":"Department of Computer Science &amp; Technology, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1349-2333","authenticated-orcid":false,"given":"Peiyang","family":"Xu","sequence":"additional","affiliation":[{"name":"Department of Computer Science &amp; Technology, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3783-9276","authenticated-orcid":false,"given":"Guanning","family":"Zeng","sequence":"additional","affiliation":[{"name":"Department of Computer Science &amp; Technology, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1299-683X","authenticated-orcid":false,"given":"Yinpeng","family":"Dong","sequence":"additional","affiliation":[{"name":"Department of Computer Science &amp; Technology, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4907-7354","authenticated-orcid":false,"given":"Xiaolin","family":"Hu","sequence":"additional","affiliation":[{"name":"Department of Computer Science &amp; Technology, Tsinghua University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Animal pictures of 10 different categories taken from google images. [EB\/OL]. https:\/\/www.kaggle.com\/datasets\/alessiocorrado99\/animals10 Accessed","author":"Alessio Corrado","year":"2023","unstructured":"Corrado Alessio. [n.,d.]. Animal pictures of 10 different categories taken from google images. [EB\/OL]. https:\/\/www.kaggle.com\/datasets\/alessiocorrado99\/animals10 Accessed Sep 5, 2023."},{"key":"e_1_3_2_1_2_1","first-page":"3","article-title":"Improving image generation with better captions","volume":"2","author":"Betker James","year":"2023","unstructured":"James Betker, Gabriel Goh, Li Jing, Tim Brooks, Jianfeng Wang, Linjie Li, Long Ouyang, Juntang Zhuang, Joyce Lee, Yufei Guo, et al. 2023. Improving image generation with better captions. Computer Science, Vol. 2 (2023), 3.","journal-title":"Computer Science"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_3_1","DOI":"10.1109\/SP.2017.49"},{"key":"e_1_3_2_1_4_1","first-page":"14929","article-title":"Adversarial attack on attackers: Post-process to mitigate black-box score-based query attacks","volume":"35","author":"Chen Sizhe","year":"2022","unstructured":"Sizhe Chen, Zhehao Huang, Qinghua Tao, Yingwen Wu, Cihang Xie, and Xiaolin Huang. 2022. Adversarial attack on attackers: Post-process to mitigate black-box score-based query attacks. Advances in Neural Information Processing Systems, Vol. 35 (2022), 14929--14943.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_5_1","volume-title":"Content-Based Unrestricted Adversarial Attack. Conference and Workshop on Neural Information Processing Systems","author":"Chen Zhaoyu","year":"2023","unstructured":"Zhaoyu Chen, Bo Li, Shuang Wu, Kaixun Jiang, Shouhong Ding, and Wenqiang Zhang. 2023. Content-Based Unrestricted Adversarial Attack. Conference and Workshop on Neural Information Processing Systems (2023)."},{"key":"e_1_3_2_1_6_1","volume-title":"https:\/\/civitai.com\/models\/136070\/controlnetxl-cnxl Accessed","author":"AI.","year":"2023","unstructured":"CivitAI. 2023. ControlNetXL. [EB\/OL]. https:\/\/civitai.com\/models\/136070\/controlnetxl-cnxl Accessed: November 16, 2023."},{"key":"e_1_3_2_1_7_1","volume-title":"https:\/\/civitai.com\/models\/112902\/dreamshaper-xl Accessed","author":"AI.","year":"2023","unstructured":"CivitAI. 2023. DreamShaper-XL. [EB\/OL]. https:\/\/civitai.com\/models\/112902\/dreamshaper-xl Accessed: October 25, 2023."},{"key":"e_1_3_2_1_8_1","volume-title":"https:\/\/civitai.com\/models\/125907\/realcartoon-xl Accessed","author":"AI.","year":"2023","unstructured":"CivitAI. 2023. RealCartoon-XL. [EB\/OL]. https:\/\/civitai.com\/models\/125907\/realcartoon-xl Accessed: October 26, 2023."},{"key":"e_1_3_2_1_9_1","volume-title":"https:\/\/creator.nightcafe.studio\/model\/mysterious-xl-v4 Accessed","author":"Creator NightCafe","year":"2023","unstructured":"NightCafe Creator. 2023. Mysterious-XL. [EB\/OL]. https:\/\/creator.nightcafe.studio\/model\/mysterious-xl-v4 Accessed: October 21, 2023."},{"key":"e_1_3_2_1_10_1","volume-title":"International Conference on Machine Learning","author":"Croce Francesco","year":"2020","unstructured":"Francesco Croce and Matthias Hein. 2020. Minimally Distorted Adversarial Examples with a Fast Adaptive Boundary Attack. International Conference on Machine Learning (2020), 2196--2205."},{"key":"e_1_3_2_1_11_1","volume-title":"International Conference on Learning Representations","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, et al. 2021. An image is worth 16x16 words: Transformers for image recognition at scale. International Conference on Learning Representations (2021)."},{"key":"e_1_3_2_1_12_1","volume-title":"International Conference on Machine Learning","author":"Engstrom Logan","year":"2019","unstructured":"Logan Engstrom, Brandon Tran, Dimitris Tsipras, Ludwig Schmidt, and Aleksander Madry. 2019. A rotation and a translation suffice: Fooling cnns with simple transformations. International Conference on Machine Learning (2019)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_13_1","DOI":"10.1109\/CVPR.2018.00175"},{"key":"e_1_3_2_1_14_1","volume-title":"Improving Fast Minimum-Norm Attacks with Hyperparameter Optimization. European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning","author":"Floris Giuseppe","year":"2018","unstructured":"Giuseppe Floris, Raffaele Mura, Luca Scionis, Giorgio Piras, Maura Pintor, Ambra Demontis, Battista Biggio, et al. 2018. Improving Fast Minimum-Norm Attacks with Hyperparameter Optimization. European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (2018)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_15_1","DOI":"10.1145\/3422622"},{"key":"e_1_3_2_1_16_1","volume-title":"International Conference on Learning Representations","author":"Goodfellow Ian J","year":"2015","unstructured":"Ian J Goodfellow, Jonathon Shlens, and Christian Szegedy. 2015. Explaining and harnessing adversarial examples. International Conference on Learning Representations (2015)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_17_1","DOI":"10.1109\/CVPR.2016.90"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_18_1","DOI":"10.1007\/s10489-022-03838-0"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_19_1","DOI":"10.1109\/CVPRW.2018.00212"},{"key":"e_1_3_2_1_20_1","volume-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861","author":"Howard Andrew G","year":"2017","unstructured":"Andrew G Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, and Hartwig Adam. 2017. Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_21_1","DOI":"10.1109\/ICCV48922.2021.00775"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_22_1","DOI":"10.1109\/CVPR.2017.243"},{"key":"e_1_3_2_1_23_1","volume-title":"International Conference on Learning Representations","author":"Iandola Forrest N","year":"2017","unstructured":"Forrest N Iandola, Song Han, Matthew W Moskewicz, Khalid Ashraf, William J Dally, and Kurt Keutzer. 2017. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and < 0.5 MB model size. International Conference on Learning Representations (2017)."},{"key":"e_1_3_2_1_24_1","volume-title":"Prior Convictions: Black-Box Adversarial Attacks with Bandits and Priors. International Conference on Learning Representations","author":"Ilyas Andrew","year":"2019","unstructured":"Andrew Ilyas, Logan Engstrom, and Aleksander Madry. 2019. Prior Convictions: Black-Box Adversarial Attacks with Bandits and Priors. International Conference on Learning Representations (2019)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_25_1","DOI":"10.1109\/WACV48630.2021.00159"},{"key":"e_1_3_2_1_26_1","first-page":"1106","article-title":"Imagenet classification with deep convolutional neural networks","volume":"25","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. 2012. Imagenet classification with deep convolutional neural networks. Advances in Neural Information Processing Systems, Vol. 25 (2012), 1106--1114.","journal-title":"Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_27_1","DOI":"10.1016\/bs.adcom.2015.05.003"},{"key":"e_1_3_2_1_28_1","volume-title":"Functional adversarial attacks. Advances in Neural Information Processing Systems","author":"Laidlaw Cassidy","year":"2019","unstructured":"Cassidy Laidlaw and Soheil Feizi. 2019. Functional adversarial attacks. Advances in Neural Information Processing Systems (2019), 10408--10418."},{"key":"e_1_3_2_1_29_1","first-page":"3487","article-title":"Dual manifold adversarial robustness: Defense against lp and non-lp adversarial attacks","volume":"33","author":"Lin Wei-An","year":"2020","unstructured":"Wei-An Lin, Chun Pong Lau, Alexander Levine, Rama Chellappa, and Soheil Feizi. 2020. Dual manifold adversarial robustness: Defense against lp and non-lp adversarial attacks. Advances in Neural Information Processing Systems, Vol. 33 (2020), 3487--3498.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_30_1","volume-title":"International Conference on Learning Representations","author":"Derek Liu Hsueh-Ti","year":"2019","unstructured":"Hsueh-Ti Derek Liu, Michael Tao, Chun-Liang Li, Derek Nowrouzezahrai, and Alec Jacobson. 2019. Beyond pixel norm-balls: Parametric adversaries using an analytically differentiable renderer. International Conference on Learning Representations (2019)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_31_1","DOI":"10.1109\/CVPR52688.2022.01455"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_32_1","DOI":"10.1109\/INFOCOM48880.2022.9796974"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_33_1","DOI":"10.1109\/ICCV48922.2021.00986"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_34_1","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"e_1_3_2_1_35_1","volume-title":"No need to worry about adversarial examples in object detection in autonomous vehicles. arXiv preprint arXiv:1707.03501","author":"Lu Jiajun","year":"2017","unstructured":"Jiajun Lu, Hussein Sibai, Evan Fabry, and David Forsyth. 2017. No need to worry about adversarial examples in object detection in autonomous vehicles. arXiv preprint arXiv:1707.03501 (2017)."},{"key":"e_1_3_2_1_36_1","volume-title":"International Conference on Learning Representations","author":"Madry Aleksander","year":"2018","unstructured":"Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, Dimitris Tsipras, and Adrian Vladu. 2018. Towards deep learning models resistant to adversarial attacks. International Conference on Learning Representations (2018)."},{"volume-title":"https:\/\/www.midjourney.com\/ Accessed","year":"2023","unstructured":"midjourney group. 2022. Midjourney. [EB\/OL]. https:\/\/www.midjourney.com\/ Accessed: August 3, 2023.","key":"e_1_3_2_1_37_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_38_1","DOI":"10.1109\/CVPR.2019.00930"},{"key":"e_1_3_2_1_39_1","volume-title":"International Conference on Machine Learning.","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_40_1","DOI":"10.1109\/CVPR42600.2020.01044"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_41_1","DOI":"10.1109\/CVPR42600.2020.00847"},{"key":"e_1_3_2_1_42_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125, Vol. 1, 2 (2022), 3."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_43_1","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_44_1","volume-title":"Burcu Karagol Ayan, Tim Salimans, et al.","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily L Denton, Kamyar Ghasemipour, Raphael Gontijo Lopes, Burcu Karagol Ayan, Tim Salimans, et al. 2022. Photorealistic text-to-image diffusion models with deep language understanding. Advances in Neural Information Processing Systems (2022), 36479--36494."},{"key":"e_1_3_2_1_45_1","volume-title":"International Conference on Learning Representations","author":"Simonyan Karen","year":"2015","unstructured":"Karen Simonyan and Andrew Zisserman. 2015. Very deep convolutional networks for large-scale image recognition. International Conference on Learning Representations (2015)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_46_1","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_1_47_1","volume-title":"Intriguing Properties of Neural Networks. International Conference on Learning Representations","author":"Szegedy Christian","year":"2014","unstructured":"Christian Szegedy, Wojciech Zaremba, Ilya Sutskever, Joan Bruna, Dumitru Erhan, Ian Goodfellow, and Rob Fergus. 2014. Intriguing Properties of Neural Networks. International Conference on Learning Representations (2014)."},{"key":"e_1_3_2_1_48_1","volume-title":"International Conference on Machine Learning.","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. In International Conference on Machine Learning."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_49_1","DOI":"10.1109\/CVPRW.2019.00012"},{"key":"e_1_3_2_1_50_1","volume-title":"British Machine Vision Conference","author":"Wang Chenan","year":"2023","unstructured":"Chenan Wang, Jinhao Duan, Chaowei Xiao, Edward Kim, Matthew Stamm, and Kaidi Xu. 2023. Semantic Adversarial Attacks via Diffusion Models. British Machine Vision Conference (2023)."},{"key":"e_1_3_2_1_51_1","volume-title":"A survey on physical adversarial attack in computer vision. arXiv preprint arXiv:2209.14262","author":"Wang Donghua","year":"2022","unstructured":"Donghua Wang, Wen Yao, Tingsong Jiang, Guijian Tang, and Xiaoqian Chen. 2022. A survey on physical adversarial attack in computer vision. arXiv preprint arXiv:2209.14262 (2022)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_52_1","DOI":"10.1109\/TPAMI.2020.3032061"},{"key":"e_1_3_2_1_53_1","volume-title":"Demiguise Attack: Crafting Invisible Semantic Adversarial Perturbations with Perceptual Similarity. International Joint Conference on Artificial Inteligence","author":"Wang Yajie","year":"2021","unstructured":"Yajie Wang, Shangbo Wu, Wenyi Jiang, Shengang Hao, Yu-an Tan, and Quanxin Zhang. 2021. Demiguise Attack: Crafting Invisible Semantic Adversarial Perturbations with Perceptual Similarity. International Joint Conference on Artificial Inteligence (2021)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_54_1","DOI":"10.1109\/CVPR.2019.00284"},{"key":"e_1_3_2_1_55_1","volume-title":"Structured Adversarial Attack: Towards General Implementation and Better Interpretability. International Conference on Learning Representations","author":"Xu Kaidi","year":"2019","unstructured":"Kaidi Xu, Sijia Liu, Pu Zhao, Pin-Yu Chen, Huan Zhang, Quanfu Fan, Deniz Erdogmus, Yanzhi Wang, and Xue Lin. 2019. Structured Adversarial Attack: Towards General Implementation and Better Interpretability. International Conference on Learning Representations (2019)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_56_1","DOI":"10.1109\/CVPR.2018.00520"},{"key":"e_1_3_2_1_57_1","volume-title":"Diffusion-Based Adversarial Sample Generation for Improved Stealthiness and Controllability. Conference and Workshop on Neural Information Processing Systems","author":"Xue Haotian","year":"2023","unstructured":"Haotian Xue, Alexandre Araujo, Bin Hu, and Yongxin Chen. 2023. Diffusion-Based Adversarial Sample Generation for Improved Stealthiness and Controllability. Conference and Workshop on Neural Information Processing Systems (2023)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_58_1","DOI":"10.1109\/CVPR.2019.00443"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_59_1","DOI":"10.1016\/j.neunet.2023.08.048"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_60_1","DOI":"10.1016\/j.cose.2024.103746"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_61_1","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_2_1_62_1","volume-title":"Generating Natural Adversarial Examples. In International Conference on Learning Representations.","author":"Zhao Zhengli","year":"2018","unstructured":"Zhengli Zhao, Dheeru Dua, and Sameer Singh. 2018. Generating Natural Adversarial Examples. In International Conference on Learning Representations."}],"event":{"sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"acronym":"MM '24","name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680902","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3680902","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:33Z","timestamp":1750295853000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680902"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":62,"alternative-id":["10.1145\/3664647.3680902","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3680902","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}