{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,5]],"date-time":"2026-02-05T08:47:16Z","timestamp":1770281236909,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681243","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"3587-3596","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Disrupting Diffusion: Token-Level Attention Erasure Attack against Diffusion-based Customization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-8490-472X","authenticated-orcid":false,"given":"Yisu","family":"Liu","sequence":"first","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0327-3809","authenticated-orcid":false,"given":"Jinyang","family":"An","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5734-4072","authenticated-orcid":false,"given":"Wanqian","family":"Zhang","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8604-7226","authenticated-orcid":false,"given":"Dayan","family":"Wu","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7387-0230","authenticated-orcid":false,"given":"Jingzi","family":"Gu","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8432-1658","authenticated-orcid":false,"given":"Zheng","family":"Lin","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8618-4992","authenticated-orcid":false,"given":"Weiping","family":"Wang","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME57554.2024.10688292"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19781-9_4"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2018.00020"},{"key":"e_1_3_2_2_5_1","volume-title":"Attend-and-excite: Attention-based semantic guidance for text-to-image diffusion models. ACM Transactions on Graphics (TOG)","author":"Chefer Hila","year":"2023","unstructured":"Hila Chefer, Yuval Alaluf, Yael Vinker, Lior Wolf, and Daniel Cohen-Or. 2023. Attend-and-excite: Attention-based semantic guidance for text-to-image diffusion models. ACM Transactions on Graphics (TOG) (2023)."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413630"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00525"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00482"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00957"},{"key":"e_1_3_2_2_10_1","unstructured":"Hugging Face. 2024. . https:\/\/huggingface.co\/"},{"key":"e_1_3_2_2_11_1","volume-title":"The Eleventh International Conference on Learning Representations, ICLR.","author":"Gal Rinon","year":"2023","unstructured":"Rinon Gal, Yuval Alaluf, Yuval Atzmon, Or Patashnik, Amit Haim Bermano, Gal Chechik, and Daniel Cohen-Or. 2023. An Image is Worth One Word: Personalizing Text-to-Image Generation using Textual Inversion. In The Eleventh International Conference on Learning Representations, ICLR."},{"key":"e_1_3_2_2_12_1","volume-title":"Deepfake video detection via predictive representation learning. ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM)","author":"Ge Shiming","year":"2022","unstructured":"Shiming Ge, Fanzhao Lin, Chenyu Li, Daichi Zhang, Weiping Wang, and Dan Zeng. 2022. Deepfake video detection via predictive representation learning. ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM) (2022)."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547923"},{"key":"e_1_3_2_2_14_1","volume-title":"Prompt-to-Prompt Image Editing with Cross-Attention Control. In The Eleventh International Conference on Learning Representations, ICLR 2023","author":"Hertz Amir","year":"2023","unstructured":"Amir Hertz, Ron Mokady, Jay Tenenbaum, Kfir Aberman, Yael Pritch, and Daniel Cohen-Or. 2023. Prompt-to-Prompt Image Editing with Cross-Attention Control. In The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1--5, 2023."},{"key":"e_1_3_2_2_15_1","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Heusel Martin","year":"2017","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in Neural Information Processing Systems, Vol. 30 (2017)."},{"key":"e_1_3_2_2_16_1","volume-title":"LoRA: Low-Rank Adaptation of Large Language Models. In The Tenth International Conference on Learning Representations, ICLR 2022","author":"Hu Edward J.","year":"2022","unstructured":"Edward J. Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2022. LoRA: Low-Rank Adaptation of Large Language Models. In The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, April 25--29, 2022."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19982"},{"key":"e_1_3_2_2_18_1","volume-title":"Ahmad Almadhor, Zunera Jalil, Sajid Anwar, and Imad Rida.","author":"Iqbal Farkhund","year":"2023","unstructured":"Farkhund Iqbal, Ahmed Abbasi, Abdul Rehman Javed, Ahmad Almadhor, Zunera Jalil, Sajid Anwar, and Imad Rida. 2023. Data augmentation-based novel deep learning method for deepfaked images detection. ACM Transactions on Multimedia Computing, Communications and Applications (2023)."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01051"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"e_1_3_2_2_22_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Li Dongxu","year":"2024","unstructured":"Dongxu Li, Junnan Li, and Steven Hoi. 2024. Blip-diffusion: Pre-trained subject representation for controllable text-to-image generation and editing. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_2_23_1","volume-title":"International Conference on Machine Learning, ICML 2023","author":"Liang Chumeng","year":"2023","unstructured":"Chumeng Liang, Xiaoyu Wu, Yang Hua, Jiaru Zhang, Yiming Xue, Tao Song, Zhengui Xue, Ruhui Ma, and Haibing Guan. 2023. Adversarial Example Does Good: Preventing Painting Imitation from Diffusion Models via Adversarial Examples. In International Conference on Machine Learning, ICML 2023, 23--29 July 2023, Honolulu, Hawaii, USA (Proceedings of Machine Learning Research)."},{"key":"e_1_3_2_2_24_1","volume-title":"FAMM: Facial muscle motions for detecting compressed deepfake videos over social networks","author":"Liao Xin","year":"2023","unstructured":"Xin Liao, Yumei Wang, Tianyi Wang, Juan Hu, and Xiaoshuai Wu. 2023. FAMM: Facial muscle motions for detecting compressed deepfake videos over social networks. IEEE Transactions on Circuits and Systems for Video Technology (2023)."},{"key":"e_1_3_2_2_25_1","volume-title":"Prediction Exposes Your Face: Black-box Model Inversion via Prediction Alignment. arXiv preprint arXiv:2407.08127","author":"Liu Yufan","year":"2024","unstructured":"Yufan Liu, Wanqian Zhang, Dayan Wu, Zheng Lin, Jingzi Gu, and Weiping Wang. 2024. Prediction Exposes Your Face: Black-box Model Inversion via Prediction Alignment. arXiv preprint arXiv:2407.08127 (2024)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.425"},{"key":"e_1_3_2_2_27_1","volume-title":"6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings.","author":"Madry Aleksander","year":"2018","unstructured":"Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, Dimitris Tsipras, and Adrian Vladu. 2018. Towards Deep Learning Models Resistant to Adversarial Attacks. In 6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2012.2214050"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00209"},{"key":"e_1_3_2_2_30_1","volume-title":"International Conference on Machine Learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning. PMLR, 8748--8763."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00223"},{"key":"e_1_3_2_2_32_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)."},{"key":"e_1_3_2_2_33_1","volume-title":"Aaron Van den Oord, and Oriol Vinyals","author":"Razavi Ali","year":"2019","unstructured":"Ali Razavi, Aaron Van den Oord, and Oriol Vinyals. 2019. Generating diverse high-fidelity images with vq-vae-2. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_35_1","volume-title":"U-net: Convolutional networks for biomedical image segmentation. In Medical image computing and computer-assisted intervention--MICCAI 2015: 18th international conference","author":"Ronneberger Olaf","year":"2015","unstructured":"Olaf Ronneberger, Philipp Fischer, and Thomas Brox. 2015. U-net: Convolutional networks for biomedical image segmentation. In Medical image computing and computer-assisted intervention--MICCAI 2015: 18th international conference, Munich, Germany, October 5--9, 2015, proceedings, part III 18. Springer, 234--241."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-66823-5_14"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"e_1_3_2_2_38_1","first-page":"36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume":"35","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily L Denton, Kamyar Ghasemipour, Raphael Gontijo Lopes, Burcu Karagol Ayan, Tim Salimans, et al. 2022. Photorealistic text-to-image diffusion models with deep language understanding. Advances in Neural Information Processing Systems, Vol. 35 (2022), 36479--36494.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_39_1","volume-title":"32nd USENIX Security Symposium (USENIX Security 23)","author":"Shan Shawn","year":"2023","unstructured":"Shawn Shan, Jenna Cryan, Emily Wenger, Haitao Zheng, Rana Hanocka, and Ben Y Zhao. 2023. Glaze: Protecting Artists from Style Mimicry by $$Text-to-Image$$ Models. In 32nd USENIX Security Symposium (USENIX Security 23). 2187--2204."},{"key":"e_1_3_2_2_40_1","unstructured":"StabilityAI. 2024. Stable Diffusion. Hugging Face Model Hub. https:\/\/huggingface.co\/stabilityai"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01602"},{"key":"e_1_3_2_2_42_1","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision. 2116--2127","author":"Le Thanh Van","year":"2023","unstructured":"Thanh Van Le, Hao Phung, Thuan Hoang Nguyen, Quan Dao, Ngoc N Tran, and Anh Tran. 2023. Anti-DreamBooth: Protecting users from personalized text-to-image synthesis. In Proceedings of the IEEE\/CVF International Conference on Computer Vision. 2116--2127."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01145"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00687"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475518"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3588574","article-title":"Deep convolutional pooling transformer for deepfake detection","volume":"19","author":"Wang Tianyi","year":"2023","unstructured":"Tianyi Wang, Harry Cheng, Kam Pui Chow, and Liqiang Nie. 2023. Deep convolutional pooling transformer for deepfake detection. ACM Transactions on Multimedia Computing, Communications and Applications, Vol. 19, 6 (2023), 1--20.","journal-title":"ACM Transactions on Multimedia Computing, Communications and Applications"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01461"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612471"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00143"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN52387.2021.9533868"},{"key":"e_1_3_2_2_51_1","volume-title":"High Fidelity Face-Swapping With Style ConvTransformer and Latent Space Selection","author":"Yang Jiachen","year":"2023","unstructured":"Jiachen Yang, Chen Cheng, Shuai Xiao, Guipeng Lan, and Jiabao Wen. 2023. High Fidelity Face-Swapping With Style ConvTransformer and Latent Space Selection. IEEE Transactions on Multimedia (2023)."},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01588"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547913"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00222"},{"key":"e_1_3_2_2_55_1","first-page":"3590","article-title":"Opom: Customized invisible cloak towards face privacy protection","volume":"45","author":"Zhong Yaoyao","year":"2022","unstructured":"Yaoyao Zhong and Weihong Deng. 2022. Opom: Customized invisible cloak towards face privacy protection. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 45, 3 (2022), 3590--3603.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681243","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681243","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:42Z","timestamp":1750295862000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681243"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":55,"alternative-id":["10.1145\/3664647.3681243","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681243","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}