{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,3]],"date-time":"2026-05-03T05:07:46Z","timestamp":1777784866973,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"publisher","award":["FL190100149"],"award-info":[{"award-number":["FL190100149"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755142","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:30:51Z","timestamp":1761377451000},"page":"353-361","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["MiraGe: Multimodal Discriminative Representation Learning for Generalizable AI-Generated Image Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-2753-2270","authenticated-orcid":false,"given":"Kuo","family":"Shi","sequence":"first","affiliation":[{"name":"University of Technology Sydney, Ultimo, NSW, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0690-4732","authenticated-orcid":false,"given":"Jie","family":"Lu","sequence":"additional","affiliation":[{"name":"University of Technology Sydney, Ultimo, NSW, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6961-7455","authenticated-orcid":false,"given":"Shanshan","family":"Ye","sequence":"additional","affiliation":[{"name":"University of Technology Sydney, Ultimo, NSW, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3960-0583","authenticated-orcid":false,"given":"Guangquan","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Technology Sydney, Ultimo, NSW, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0602-6255","authenticated-orcid":false,"given":"Zhen","family":"Fang","sequence":"additional","affiliation":[{"name":"University of Technology Sydney, Ultimo, NSW, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"James Betker Gabriel Goh Li Jing Tim Brooks Jianfeng Wang Linjie Li Long Ouyang Juntang Zhuang Joyce Lee Yufei Guo et al. 2023. Improving image generation with better captions. Computer Science. https:\/\/cdn.openai.com\/papers\/dall-e-3.pdf Vol. 2 3 (2023) 8."},{"key":"e_1_3_2_1_2_1","unstructured":"Tim Brooks Bill Peebles Connor Holmes Will DePue Yufei Guo Li Jing David Schnurr Joe Taylor Troy Luhman Eric Luhman et al. 2024. Video generation models as world simulators. 2024. URL https:\/\/openai.com\/research\/video-generation-models-as-world-simulators Vol. 3 (2024)."},{"key":"e_1_3_2_1_3_1","first-page":"103","article-title":"What Makes Fake Images Detectable? Understanding Properties that Generalize. In ECCV (26) (Lecture Notes in Computer Science, Vol. 12371)","author":"Chai Lucy","year":"2020","unstructured":"Lucy Chai, David Bau, Ser-Nam Lim, and Phillip Isola. 2020. What Makes Fake Images Detectable? Understanding Properties that Generalize. In ECCV (26) (Lecture Notes in Computer Science, Vol. 12371). Springer, 103-120.","journal-title":"Springer"},{"key":"e_1_3_2_1_4_1","volume-title":"DRCT: Diffusion Reconstruction Contrastive Training towards Universal Detection of Diffusion Generated Images. In ICML. OpenReview.net.","author":"Chen Baoying","year":"2024","unstructured":"Baoying Chen, Jishen Zeng, Jianquan Yang, and Rui Yang. 2024. DRCT: Diffusion Reconstruction Contrastive Training towards Universal Detection of Diffusion Generated Images. In ICML. OpenReview.net."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2211477"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3258141"},{"key":"e_1_3_2_1_7_1","volume-title":"Infinity: Scaling Bitwise AutoRegressive Modeling for High-Resolution Image Synthesis. arXiv:2412.04431 [cs.CV] https:\/\/arxiv.org\/abs\/2412.04431","author":"Han Jian","year":"2024","unstructured":"Jian Han, Jinlai Liu, Yi Jiang, Bin Yan, Yuqi Zhang, Zehuan Yuan, Bingyue Peng, and Xiaobing Liu. 2024. Infinity: Scaling Bitwise AutoRegressive Modeling for High-Resolution Image Synthesis. arXiv:2412.04431 [cs.CV] https:\/\/arxiv.org\/abs\/2412.04431"},{"key":"e_1_3_2_1_8_1","unstructured":"Xue Jiang Feng Liu Zhen Fang Hong Chen Tongliang Liu Feng Zheng and Bo Han. 2024. Negative Label Guided OOD Detection with Pretrained Vision-Language Models. In ICLR. OpenReview.net."},{"key":"e_1_3_2_1_9_1","first-page":"3465","article-title":"Fusing Global and Local Features for Generalized AI-Synthesized Image Detection","author":"Ju Yan","year":"2022","unstructured":"Yan Ju, Shan Jia, Lipeng Ke, Hongfei Xue, Koki Nagano, and Siwei Lyu. 2022. Fusing Global and Local Features for Generalized AI-Synthesized Image Detection. In ICIP. IEEE, 3465-3469.","journal-title":"ICIP. IEEE"},{"key":"e_1_3_2_1_10_1","first-page":"1006","article-title":"CLIPping the Deception","author":"Khan Sohail Ahmed","year":"2024","unstructured":"Sohail Ahmed Khan and Duc-Tien Dang-Nguyen. 2024. CLIPping the Deception: Adapting Vision-Language Models for Universal Deepfake Detection. In ICMR. ACM, 1006-1015.","journal-title":"Adapting Vision-Language Models for Universal Deepfake Detection. In ICMR. ACM"},{"key":"e_1_3_2_1_11_1","first-page":"19113","article-title":"MaPLe","author":"Khattak Muhammad Uzair","year":"2023","unstructured":"Muhammad Uzair Khattak, Hanoona Abdul Rasheed, Muhammad Maaz, Salman H. Khan, and Fahad Shahbaz Khan. 2023. MaPLe: Multi-modal Prompt Learning. In CVPR. IEEE, 19113-19122.","journal-title":"Multi-modal Prompt Learning. In CVPR. IEEE"},{"key":"e_1_3_2_1_12_1","unstructured":"Prannay Khosla Piotr Teterwak Chen Wang Aaron Sarna Yonglong Tian Phillip Isola Aaron Maschinot Ce Liu and Dilip Krishnan. 2020. Supervised Contrastive Learning. In NeurIPS."},{"key":"e_1_3_2_1_13_1","first-page":"740","article-title":"Microsoft COCO: Common Objects in Context. In ECCV (5) (Lecture Notes in Computer Science, Vol. 8693)","author":"Lin Tsung-Yi","year":"2014","unstructured":"Tsung-Yi Lin, Michael Maire, Serge J. Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C. Lawrence Zitnick. 2014. Microsoft COCO: Common Objects in Context. In ECCV (5) (Lecture Notes in Computer Science, Vol. 8693). Springer, 740-755.","journal-title":"Springer"},{"key":"e_1_3_2_1_14_1","first-page":"95","article-title":"Detecting Generated Images by Real Images. In ECCV (14) (Lecture Notes in Computer Science, Vol. 13674)","author":"Liu Bo","year":"2022","unstructured":"Bo Liu, Fan Yang, Xiuli Bi, Bin Xiao, Weisheng Li, and Xinbo Gao. 2022. Detecting Generated Images by Real Images. In ECCV (14) (Lecture Notes in Computer Science, Vol. 13674). Springer, 95-110.","journal-title":"Springer"},{"key":"e_1_3_2_1_15_1","unstructured":"Zeyu Lu Di Huang Lei Bai Jingjing Qu Chengyue Wu Xihui Liu and Wanli Ouyang. 2023. Seeing is not always believing: Benchmarking Human and Model Perception of AI-Generated Images. In NeurIPS."},{"key":"e_1_3_2_1_16_1","first-page":"17006","article-title":"LaRE(^mbox2, )","author":"Luo Yunpeng","year":"2024","unstructured":"Yunpeng Luo, Junlong Du, Ke Yan, and Shouhong Ding. 2024. LaRE(^mbox2, ): Latent Reconstruction Error Based Method for Diffusion-Generated Image Detection. In CVPR. IEEE, 17006-17015.","journal-title":"In CVPR. IEEE"},{"key":"e_1_3_2_1_17_1","first-page":"638","article-title":"X-CLIP: End-to-End Multi-grained Contrastive Learning for Video-Text Retrieval","author":"Ma Yiwei","year":"2022","unstructured":"Yiwei Ma, Guohai Xu, Xiaoshuai Sun, Ming Yan, Ji Zhang, and Rongrong Ji. 2022. X-CLIP: End-to-End Multi-grained Contrastive Learning for Video-Text Retrieval. In ACM Multimedia. ACM, 638-647.","journal-title":"ACM Multimedia. ACM"},{"key":"e_1_3_2_1_18_1","volume-title":"B. S. Manjunath, Shivkumar Chandrasekaran, Arjuna Flenner, Jawadul H. Bappy, and Amit K. Roy-Chowdhury.","author":"Nataraj Lakshmanan","year":"2019","unstructured":"Lakshmanan Nataraj, Tajuddin Manhar Mohammed, B. S. Manjunath, Shivkumar Chandrasekaran, Arjuna Flenner, Jawadul H. Bappy, and Amit K. Roy-Chowdhury. 2019. Detecting GAN generated Fake Images using Co-occurrence Matrices. In Media Watermarking, Security, and Forensics. Society for Imaging Science and Technology."},{"key":"e_1_3_2_1_19_1","first-page":"24480","article-title":"Towards Universal Fake Image Detectors that Generalize Across Generative Models","author":"Ojha Utkarsh","year":"2023","unstructured":"Utkarsh Ojha, Yuheng Li, and Yong Jae Lee. 2023. Towards Universal Fake Image Detectors that Generalize Across Generative Models. In CVPR. IEEE, 24480-24489.","journal-title":"CVPR. IEEE"},{"key":"e_1_3_2_1_20_1","volume-title":"ICML (Proceedings of Machine Learning Research","volume":"8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In ICML (Proceedings of Machine Learning Research, Vol. 139). PMLR, 8748-8763."},{"key":"e_1_3_2_1_21_1","first-page":"10674","article-title":"High-Resolution Image Synthesis with Latent Diffusion Models","author":"Rombach Robin","year":"2022","unstructured":"Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, and Bj\u00f6rn Ommer. 2022. High-Resolution Image Synthesis with Latent Diffusion Models. In CVPR. IEEE, 10674-10685.","journal-title":"CVPR. IEEE"},{"key":"e_1_3_2_1_22_1","first-page":"3418","article-title":"DE-FAKE: Detection and Attribution of Fake Images Generated by Text-to-Image Generation Models","author":"Sha Zeyang","year":"2023","unstructured":"Zeyang Sha, Zheng Li, Ning Yu, and Yang Zhang. 2023. DE-FAKE: Detection and Attribution of Fake Images Generated by Text-to-Image Generation Models. In CCS. ACM, 3418-3432.","journal-title":"CCS. ACM"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TFUZZ.2024.3389705"},{"key":"e_1_3_2_1_24_1","first-page":"28130","article-title":"Rethinking the Up-Sampling Operations in CNN-Based Generative Network for Generalizable Deepfake Detection","author":"Tan Chuangchuang","year":"2024","unstructured":"Chuangchuang Tan, Huan Liu, Yao Zhao, Shikui Wei, Guanghua Gu, Ping Liu, and Yunchao Wei. 2024. Rethinking the Up-Sampling Operations in CNN-Based Generative Network for Generalizable Deepfake Detection. In CVPR. IEEE, 28130-28139.","journal-title":"CVPR. IEEE"},{"key":"e_1_3_2_1_25_1","first-page":"12105","article-title":"Learning on Gradients","author":"Tan Chuangchuang","year":"2023","unstructured":"Chuangchuang Tan, Yao Zhao, Shikui Wei, Guanghua Gu, and Yunchao Wei. 2023. Learning on Gradients: Generalized Artifacts Representation for GAN-Generated Images Detection. In CVPR. IEEE, 12105-12114.","journal-title":"Generalized Artifacts Representation for GAN-Generated Images Detection. In CVPR. IEEE"},{"key":"e_1_3_2_1_26_1","first-page":"1","article-title":"Prompt-Based Memory Bank for Continual Test-Time Domain Adaptation in Vision-Language Models","author":"Wang Ran","year":"2024","unstructured":"Ran Wang, Hua Zuo, Zhen Fang, and Jie Lu. 2024a. Prompt-Based Memory Bank for Continual Test-Time Domain Adaptation in Vision-Language Models. In IJCNN. IEEE, 1-8.","journal-title":"IJCNN. IEEE"},{"key":"e_1_3_2_1_27_1","first-page":"8604","article-title":"Towards Robustness Prompt Tuning with Fully Test-Time Adaptation for CLIP's Zero-Shot Generalization. In ACM Multimedia","author":"Wang Ran","year":"2024","unstructured":"Ran Wang, Hua Zuo, Zhen Fang, and Jie Lu. 2024b. Towards Robustness Prompt Tuning with Fully Test-Time Adaptation for CLIP's Zero-Shot Generalization. In ACM Multimedia. ACM, 8604-8612.","journal-title":"ACM"},{"key":"e_1_3_2_1_28_1","first-page":"8692","article-title":"CNN-Generated Images Are Surprisingly Easy to Spot... for Now","author":"Wang Sheng-Yu","year":"2020","unstructured":"Sheng-Yu Wang, Oliver Wang, Richard Zhang, Andrew Owens, and Alexei A. Efros. 2020. CNN-Generated Images Are Surprisingly Easy to Spot... for Now. In CVPR. Computer Vision Foundation \/ IEEE, 8692-8701.","journal-title":"CVPR. Computer Vision Foundation \/ IEEE"},{"key":"e_1_3_2_1_29_1","first-page":"22388","article-title":"DIRE for Diffusion-Generated Image Detection","author":"Wang Zhendong","year":"2023","unstructured":"Zhendong Wang, Jianmin Bao, Wengang Zhou, Weilun Wang, Hezhen Hu, Hong Chen, and Houqiang Li. 2023. DIRE for Diffusion-Generated Image Detection. In ICCV. IEEE, 22388-22398.","journal-title":"ICCV. IEEE"},{"key":"e_1_3_2_1_30_1","first-page":"7949","article-title":"Robust fine-tuning of zero-shot models","author":"Wortsman Mitchell","year":"2022","unstructured":"Mitchell Wortsman, Gabriel Ilharco, Jong Wook Kim, Mike Li, Simon Kornblith, Rebecca Roelofs, Raphael Gontijo Lopes, Hannaneh Hajishirzi, Ali Farhadi, Hongseok Namkoong, and Ludwig Schmidt. 2022. Robust fine-tuning of zero-shot models. In CVPR. IEEE, 7949-7961.","journal-title":"CVPR. IEEE"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2024.3511421"},{"key":"e_1_3_2_1_32_1","volume-title":"A Sanity Check for AI-generated Image Detection. CoRR","author":"Yan Shilin","year":"1943","unstructured":"Shilin Yan, Ouxiang Li, Jiayin Cai, Yanbin Hao, Xiaolong Jiang, Yao Hu, and Weidi Xie. 2024. A Sanity Check for AI-generated Image Detection. CoRR, Vol. abs\/2406.19435 (2024)."},{"key":"e_1_3_2_1_33_1","first-page":"23519","article-title":"Towards a Theoretical Framework of Out-of-Distribution Generalization","author":"Ye Haotian","year":"2021","unstructured":"Haotian Ye, Chuanlong Xie, Tianle Cai, Ruichen Li, Zhenguo Li, and Liwei Wang. 2021. Towards a Theoretical Framework of Out-of-Distribution Generalization. In NeurIPS. 23519-23531.","journal-title":"NeurIPS."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3369699"},{"key":"e_1_3_2_1_35_1","first-page":"1","article-title":"Detecting and Simulating Artifacts in GAN Fake Images","author":"Zhang Xu","year":"2019","unstructured":"Xu Zhang, Svebor Karaman, and Shih-Fu Chang. 2019. Detecting and Simulating Artifacts in GAN Fake Images. In WIFS. IEEE, 1-6.","journal-title":"WIFS. IEEE"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"},{"key":"e_1_3_2_1_37_1","unstructured":"Mingjian Zhu Hanting Chen Qiangyu Yan Xudong Huang Guanyu Lin Wei Li Zhijun Tu Hailin Hu Jie Hu and Yunhe Wang. 2023. GenImage: A Million-Scale Benchmark for Detecting AI-Generated Image. In NeurIPS."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755142","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:55:22Z","timestamp":1765310122000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755142"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":37,"alternative-id":["10.1145\/3746027.3755142","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755142","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}