{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:34:40Z","timestamp":1777656880669,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,30]],"date-time":"2024-05-30T00:00:00Z","timestamp":1717027200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"European Horizon 2020","award":["101158604"],"award-info":[{"award-number":["101158604"]}]},{"name":"Research Council of Norway","award":["309339"],"award-info":[{"award-number":["309339"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,30]]},"DOI":"10.1145\/3652583.3658035","type":"proceedings-article","created":{"date-parts":[[2024,6,7]],"date-time":"2024-06-07T06:30:40Z","timestamp":1717741840000},"page":"1006-1015","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":51,"title":["CLIPping the Deception: Adapting Vision-Language Models for Universal Deepfake Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5351-2278","authenticated-orcid":false,"given":"Sohail Ahmed","family":"Khan","sequence":"first","affiliation":[{"name":"University of Bergen, Norway, Bergen, NO"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2761-2213","authenticated-orcid":false,"given":"Duc-Tien","family":"Dang-Nguyen","sequence":"additional","affiliation":[{"name":"University of Bergen, Norway, Bergen, NO"}]}],"member":"320","published-online":{"date-parts":[[2024,6,7]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"23716","article-title":"Flamingo: a visual language model for few-shot learning","volume":"35","author":"Alayrac Jean-Baptiste","year":"2022","unstructured":"Jean-Baptiste Alayrac, Jeff Donahue, Pauline Luc, Antoine Miech, Iain Barr, Yana Hasson, Karel Lenc, Arthur Mensch, Katherine Millican, Malcolm Reynolds, et al. 2022. Flamingo: a visual language model for few-shot learning. Advances in Neural Information Processing Systems , Vol. 35 (2022), 23716--23736.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_2_1","volume-title":"Synthbuster: Towards detection of diffusion model generated images","author":"Bammey Quentin","year":"2023","unstructured":"Quentin Bammey. 2023. Synthbuster: Towards detection of diffusion model generated images. IEEE Open Journal of Signal Processing (2023)."},{"key":"e_1_3_2_1_3_1","volume-title":"Large scale GAN training for high fidelity natural image synthesis. arXiv preprint arXiv:1809.11096","author":"Brock Andrew","year":"2018","unstructured":"Andrew Brock, Jeff Donahue, and Karen Simonyan. 2018. Large scale GAN training for high fidelity natural image synthesis. arXiv preprint arXiv:1809.11096 (2018)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01565"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01815"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00916"},{"key":"e_1_3_2_1_7_1","volume-title":"Fakecatcher: Detection of synthetic portrait videos using biological signals","author":"Ciftci Umur Aybars","year":"2020","unstructured":"Umur Aybars Ciftci, Ilke Demir, and Lijun Yin. 2020. Fakecatcher: Detection of synthetic portrait videos using biological signals. IEEE transactions on pattern analysis and machine intelligence (2020)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095167"},{"key":"e_1_3_2_1_9_1","volume-title":"Raising the Bar of AI-generated Image Detection with CLIP. arXiv preprint arXiv:2312.00195","author":"Cozzolino Davide","year":"2023","unstructured":"Davide Cozzolino, Giovanni Poggi, Riccardo Corvi, Matthias Nie\u00dfner, and Luisa Verdoliva. 2023. Raising the Bar of AI-generated Image Detection with CLIP. arXiv preprint arXiv:2312.00195 (2023)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.5146400"},{"key":"e_1_3_2_1_11_1","volume-title":"Diffusion models beat gans on image synthesis. Advances in neural information processing systems","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. Advances in neural information processing systems , Vol. 34 (2021), 8780--8794."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330866"},{"key":"e_1_3_2_1_14_1","volume-title":"Clip-adapter: Better vision-language models with feature adapters. International Journal of Computer Vision","author":"Gao Peng","year":"2023","unstructured":"Peng Gao, Shijie Geng, Renrui Zhang, Teli Ma, Rongyao Fang, Yongfeng Zhang, Hongsheng Li, and Yu Qiao. 2023. Clip-adapter: Better vision-language models with feature adapters. International Journal of Computer Vision (2023), 1--15."},{"key":"e_1_3_2_1_15_1","volume-title":"Generative adversarial nets. Advances in neural information processing systems","author":"Goodfellow Ian","year":"2014","unstructured":"Ian Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley, Sherjil Ozair, Aaron Courville, and Yoshua Bengio. 2014. Generative adversarial nets. Advances in neural information processing systems , Vol. 27 (2014)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME51207.2021.9428429"},{"key":"e_1_3_2_1_17_1","volume-title":"Improved training of wasserstein gans. Advances in neural information processing systems","author":"Gulrajani Ishaan","year":"2017","unstructured":"Ishaan Gulrajani, Faruk Ahmed, Martin Arjovsky, Vincent Dumoulin, and Aaron C Courville. 2017. Improved training of wasserstein gans. Advances in neural information processing systems , Vol. 30 (2017)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_19_1","volume-title":"International conference on machine learning. PMLR, 4904--4916","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc Le, Yun-Hsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling up visual and vision-language representation learning with noisy text supervision. In International conference on machine learning. PMLR, 4904--4916."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"e_1_3_2_1_21_1","volume-title":"Progressive growing of gans for improved quality, stability, and variation. arXiv preprint arXiv:1710.10196","author":"Karras Tero","year":"2017","unstructured":"Tero Karras, Timo Aila, Samuli Laine, and Jaakko Lehtinen. 2017. Progressive growing of gans for improved quality, stability, and variation. arXiv preprint arXiv:1710.10196 (2017)."},{"key":"e_1_3_2_1_22_1","volume-title":"Training generative adversarial networks with limited data. Advances in neural information processing systems","author":"Karras Tero","year":"2020","unstructured":"Tero Karras, Miika Aittala, Janne Hellsten, Samuli Laine, Jaakko Lehtinen, and Timo Aila. 2020a. Training generative adversarial networks with limited data. Advances in neural information processing systems , Vol. 33 (2020), 12104--12114."},{"key":"e_1_3_2_1_23_1","first-page":"852","article-title":"Alias-free generative adversarial networks","volume":"34","author":"Karras Tero","year":"2021","unstructured":"Tero Karras, Miika Aittala, Samuli Laine, Erik H\"ark\u00f6nen, Janne Hellsten, Jaakko Lehtinen, and Timo Aila. 2021. Alias-free generative adversarial networks. Advances in Neural Information Processing Systems , Vol. 34 (2021), 852--863.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"e_1_3_2_1_26_1","volume-title":"Deepfake Detection: Analysing Model Generalisation Across Architectures, Datasets and Pre-Training Paradigms","author":"Khan Sohail Ahmed","year":"2023","unstructured":"Sohail Ahmed Khan and Duc-Tien Dang-Nguyen. 2023. Deepfake Detection: Analysing Model Generalisation Across Architectures, Datasets and Pre-Training Paradigms. IEEE Access (2023)."},{"key":"e_1_3_2_1_27_1","unstructured":"Konwoo Kim Michael Laskin Igor Mordatch and Deepak Pathak. 2021. How to adapt your large-scale vision-and-language model. (2021)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3560815"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3425780"},{"key":"e_1_3_2_1_31_1","volume-title":"Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741","author":"Nichol Alex","year":"2021","unstructured":"Alex Nichol, Prafulla Dhariwal, Aditya Ramesh, Pranav Shyam, Pamela Mishkin, Bob McGrew, Ilya Sutskever, and Mark Chen. 2021. Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741 (2021)."},{"key":"e_1_3_2_1_32_1","volume-title":"International conference on machine learning. PMLR, 2642--2651","author":"Odena Augustus","year":"2017","unstructured":"Augustus Odena, Christopher Olah, and Jonathon Shlens. 2017. Conditional image synthesis with auxiliary classifier gans. In International conference on machine learning. PMLR, 2642--2651."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02345"},{"key":"e_1_3_2_1_34_1","unstructured":"OpenAI. 2021a. guided-diffusion. https:\/\/github.com\/openai\/guided-diffusion."},{"key":"e_1_3_2_1_35_1","volume-title":"https:\/\/openai.com\/blog\/chatgpt. [Online","author":"Introducing AI.","year":"2023","unstructured":"OpenAI. 2021b. Introducing ChatGPT. https:\/\/openai.com\/blog\/chatgpt. [Online; accessed 08-August-2023]."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00244"},{"key":"e_1_3_2_1_37_1","volume-title":"Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952","author":"Podell Dustin","year":"2023","unstructured":"Dustin Podell, Zion English, Kyle Lacey, Andreas Blattmann, Tim Dockhorn, Jonas M\u00fcller, Joe Penna, and Robin Rombach. 2023. Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)."},{"key":"e_1_3_2_1_38_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_39_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125, Vol. 1, 2 (2022), 3."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00009"},{"key":"e_1_3_2_1_42_1","first-page":"36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume":"35","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily L Denton, Kamyar Ghasemipour, Raphael Gontijo Lopes, Burcu Karagol Ayan, Tim Salimans, et al. 2022. Photorealistic text-to-image diffusion models with deep language understanding. Advances in Neural Information Processing Systems , Vol. 35 (2022), 36479--36494.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_43_1","volume-title":"Improved techniques for training gans. Advances in neural information processing systems","author":"Salimans Tim","year":"2016","unstructured":"Tim Salimans, Ian Goodfellow, Wojciech Zaremba, Vicki Cheung, Alec Radford, and Xi Chen. 2016. Improved techniques for training gans. Advances in neural information processing systems , Vol. 29 (2016)."},{"key":"e_1_3_2_1_44_1","volume-title":"Laion-400m: Open dataset of clip-filtered 400 million image-text pairs. arXiv preprint arXiv:2111.02114","author":"Schuhmann Christoph","year":"2021","unstructured":"Christoph Schuhmann, Richard Vencu, Romain Beaumont, Robert Kaczmarczyk, Clayton Mullis, Aarush Katta, Theo Coombes, Jenia Jitsev, and Aran Komatsuzaki. 2021. Laion-400m: Open dataset of clip-filtered 400 million image-text pairs. arXiv preprint arXiv:2111.02114 (2021)."},{"key":"e_1_3_2_1_45_1","volume-title":"International conference on machine learning. PMLR, 2256--2265","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep unsupervised learning using nonequilibrium thermodynamics. In International conference on machine learning. PMLR, 2256--2265."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00872"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00917"},{"key":"e_1_3_2_1_48_1","volume-title":"DIRE for Diffusion-Generated Image Detection. arXiv preprint arXiv:2303.09295","author":"Wang Zhendong","year":"2023","unstructured":"Zhendong Wang, Jianmin Bao, Wengang Zhou, Weilun Wang, Hezhen Hu, Hong Chen, and Houqiang Li. 2023. DIRE for Diffusion-Generated Image Detection. arXiv preprint arXiv:2303.09295 (2023)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00780"},{"key":"e_1_3_2_1_50_1","volume-title":"Lsun: Construction of a large-scale image dataset using deep learning with humans in the loop. arXiv preprint arXiv:1506.03365","author":"Yu Fisher","year":"2015","unstructured":"Fisher Yu, Ari Seff, Yinda Zhang, Shuran Song, Thomas Funkhouser, and Jianxiong Xiao. 2015. Lsun: Construction of a large-scale image dataset using deep learning with humans in the loop. arXiv preprint arXiv:1506.03365 (2015)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00295"}],"event":{"name":"ICMR '24: International Conference on Multimedia Retrieval","location":"Phuket Thailand","acronym":"ICMR '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia","SIGSOFT ACM Special Interest Group on Software Engineering"]},"container-title":["Proceedings of the 2024 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652583.3658035","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3652583.3658035","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T08:51:18Z","timestamp":1755766278000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652583.3658035"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,30]]},"references-count":53,"alternative-id":["10.1145\/3652583.3658035","10.1145\/3652583"],"URL":"https:\/\/doi.org\/10.1145\/3652583.3658035","relation":{},"subject":[],"published":{"date-parts":[[2024,5,30]]},"assertion":[{"value":"2024-06-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}