{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T17:57:39Z","timestamp":1743098259249,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":65,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819786848"},{"type":"electronic","value":"9789819786855"}],"license":[{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8685-5_9","type":"book-chapter","created":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T05:11:26Z","timestamp":1730524286000},"page":"122-136","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Fake-GPT: Detecting Fake Image via Large Language Model"],"prefix":"10.1007","author":[{"given":"Yuming","family":"Fan","sequence":"first","affiliation":[]},{"given":"Dongming","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Jiguang","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Bang","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Yuexian","family":"Zou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,3]]},"reference":[{"key":"9_CR1","unstructured":"Dosovitskiy, A., Beyer, L., et al.: An image is worth 16 $$\\times $$ 16 words: transformers for image recognition at scale (2020). arXiv:2010.11929"},{"key":"9_CR2","unstructured":"Dodds, E., Culpepper, J., Herdade, S., Zhang, Y., Boakye, K.: Modality-agnostic attention fusion for visual search with text feedback (2020). arXiv:2007.00145"},{"key":"9_CR3","unstructured":"Zhou, B., Tian, Y., Sukhbaatar, S., Szlam, A., Fergus, R.: Simple baseline for visual question answering. arXiv:1512.02167,2015"},{"key":"9_CR4","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: hierarchical vision transformer using shifted windows. ICCV, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"9_CR5","doi-asserted-by":"crossref","unstructured":"Wang, S.-Y., Wang, O., Zhang, R., Owens, A., Efros, A.A.: CNN-generated images are surprisingly easy to spot... for now. CVPR, pp. 8695\u20138704 (2020)","DOI":"10.1109\/CVPR42600.2020.00872"},{"key":"9_CR6","unstructured":"Hu, E.J., Shen, Y., Wallis, P., Allen-Zhu, Z., Li, Y., Wang, S., Wang, L., Chen, W.: Lora: Low-rank adaptation of large language models (2021). arXiv:2106.09685"},{"key":"9_CR7","doi-asserted-by":"crossref","unstructured":"Wang, S.Y., Wang, O., et al.: CNN-generated images are surprisingly easy to spot... for now. CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00872"},{"key":"9_CR8","doi-asserted-by":"crossref","unstructured":"Plummer, B.A., et al.: Flickr30k entities: collecting region-to-phrase correspondences for richer image-to-sentence models. ICCV (2015)","DOI":"10.1109\/ICCV.2015.303"},{"key":"9_CR9","doi-asserted-by":"crossref","unstructured":"Tan, C., Zhao, Y., Wei, S., Gu, G., Wei, Y.: Learning on gradients: generalized artifacts representation for GAN-Generated images detection. CVPR, pp. 12105\u201312114 (2023)","DOI":"10.1109\/CVPR52729.2023.01165"},{"issue":"1","key":"9_CR10","doi-asserted-by":"publisher","first-page":"374","DOI":"10.1609\/aaai.v36i1.19914","volume":"36","author":"Y Chen","year":"2022","unstructured":"Chen, Y., Chen, D., Wang, T., Wang, Y., Liang, Y.: Causal intervention for subject-deconfounded facial action unit recognition. AAAI 36(1), 374\u2013382 (2022)","journal-title":"AAAI"},{"issue":"11","key":"9_CR11","doi-asserted-by":"publisher","first-page":"11864","DOI":"10.1609\/aaai.v36i11.21443","volume":"36","author":"L Ding","year":"2022","unstructured":"Ding, L., Yu, D., Xie, J., Guo, W., Hu, S., Liu, M., Kong, L., Dai, H., Bao, Y., Jiang, B.: Word embeddings via causal inference: gender bias reducing and semantic information preserving. AAAI 36(11), 11864\u201311872 (2022)","journal-title":"AAAI"},{"key":"9_CR12","doi-asserted-by":"crossref","unstructured":"Choi, S., Jeong, M., Han, H., Hwang, S.-W.: C2l: Causally contrastive learning for robust text classification. AAAI 36(10), 10526\u201310534 (2022)","DOI":"10.1609\/aaai.v36i10.21296"},{"key":"9_CR13","unstructured":"Bai, J., Bai, S., Chu, Y., Cui, Z., et al.: Qwen Technical Report (2023). arXiv:2309.16609"},{"key":"9_CR14","unstructured":"Baichuan: Baichuan 2: Open Large-scale Language Models (2023). arXiv:2309.10305"},{"key":"9_CR15","doi-asserted-by":"crossref","unstructured":"Du, Z., Qian, Y., et al.: GLM: general language model pretraining with autoregressive blank infilling. ACL, pp. 320\u2013335 (2022)","DOI":"10.18653\/v1\/2022.acl-long.26"},{"key":"9_CR16","doi-asserted-by":"crossref","unstructured":"Jeong, Y., Kim, D., Min, S., Joe, S., Gwon, Y., Choi, J.: BiHPF: bilateral high-pass filters for robust deepfake detection. WACV, pp. 48\u201357 (2022)","DOI":"10.1109\/WACV51458.2022.00293"},{"key":"9_CR17","unstructured":"Karras, T., Aila, T., Laine, S., Lehtinen, J.: Progressive Growing of GANs for Improved Quality, Stability, and Variation. ICLR (2018)"},{"key":"9_CR18","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aila, T.: A style-based generator architecture for generative adversarial networks. CVPR, pp. 4401\u20134410 (2019)","DOI":"10.1109\/CVPR.2019.00453"},{"key":"9_CR19","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aittala, M., Hellsten, J., Lehtinen, J., Aila, T.: Analyzing and improving the image quality of stylegan. CVPR, pp. 8110\u20138119 (2020)","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"9_CR20","unstructured":"Brock, A., Donahue, J., Simonyan, K.: Large scale GAN training for high fidelity natural image synthesis. ICLR (2018)"},{"key":"9_CR21","doi-asserted-by":"crossref","unstructured":"Zhu, J.-Y., Park, T., Isola, P., Efros, A.A.: Unpaired image-to-image translation using cycle-consistent adversarial networks. ICCV, pp. 2223\u20132232 (2017)","DOI":"10.1109\/ICCV.2017.244"},{"key":"9_CR22","doi-asserted-by":"crossref","unstructured":"Choi, Y., Choi, M., Kim, M., Ha, J.-W., Kim, S., Choo, J.: Stargan: unified generative adversarial networks for multi-domain image-to-image translation. CVPR, pp. 8789\u20138797 (2018)","DOI":"10.1109\/CVPR.2018.00916"},{"key":"9_CR23","doi-asserted-by":"crossref","unstructured":"Park, T., Liu, M.-Y., Wang, T.-C., Zhu, J.-Y.: Semantic image synthesis with spatially-adaptive normalization. CVPR, pp. 2337\u20132346 (2019)","DOI":"10.1109\/CVPR.2019.00244"},{"key":"9_CR24","doi-asserted-by":"crossref","unstructured":"Rossler, A., Cozzolino, D., Verdoliva, L., Riess, C., Thies, J., Niessner, M.: Faceforensics++: Learning to detect manipulated facial images. ICCV, pp. 1\u201311 (2019)","DOI":"10.1109\/ICCV.2019.00009"},{"key":"9_CR25","unstructured":"Yu, F., Seff, A., Zhang, Y., Song, S., Funkhouser, T., Xiao, J.: Lsun: construction of a large-scale image dataset using deep learning with humans in the loop (2015). arXiv:1506.03365"},{"issue":"3","key":"9_CR26","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., Huang, Z., Karpathy, A., Khosla, A., Bernstein, M., et al.: ImageNet large scale visual recognition challenge. IJCV 115(3), 211\u2013252 (2015)","journal-title":"IJCV"},{"key":"9_CR27","doi-asserted-by":"crossref","unstructured":"Liu, Z., Luo, P., Wang, X., Tang, X.: Deep learning face attributes in the wild. ICCV, pp. 3730\u20133738 (2015)","DOI":"10.1109\/ICCV.2015.425"},{"key":"9_CR28","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft COCO: common objects in context. ECCV, pp. 740\u2013755 (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"9_CR29","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. CVPR, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"9_CR30","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet Classification with Deep Convolutional Neural Networks. Adv. Neural Inf. Process. Syst. 25 (2012)"},{"key":"9_CR31","doi-asserted-by":"crossref","unstructured":"Jeong, Y., Kim, D., Ro, Y., Choi, J.: FrePGAN: Robust Deepfake Detection Using Frequency-level Perturbations. arxiv:2022","DOI":"10.1609\/aaai.v36i1.19990"},{"key":"9_CR32","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. ICLR (2015)"},{"key":"9_CR33","unstructured":"Ze\u010devi\u0107, M., Willig, M., Dhami, D.S., Kersting, K.: Causal Parrots: Large Language Models May Talk Causality but Are Not Causal (2023). arXiv:2308.13067"},{"key":"9_CR34","doi-asserted-by":"crossref","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. ICLR (2015)","DOI":"10.1109\/ICCV.2015.314"},{"issue":"1","key":"9_CR35","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1113\/jphysiol.1962.sp006837","volume":"160","author":"DH Hubel","year":"1962","unstructured":"Hubel, D.H., Wiesel, T.N.: Receptive fields, binocular interaction and functional architecture in the cat\u2019s visual cortex. J. Physiol. 160(1), 106 (1962)","journal-title":"J. Physiol."},{"issue":"11","key":"9_CR36","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., Haffner, P.: Gradient-Based learning applied to document recognition. Proc. IEEE 86(11), 2278\u20132324 (1998)","journal-title":"Proc. IEEE"},{"issue":"5","key":"9_CR37","first-page":"5052","volume":"38","author":"C Tan","year":"2024","unstructured":"Tan, C., Zhao, Y., Wei, S., Gu, G., Liu, P., Wei, Y.: Frequency-Aware deepfake detection: improving generalizability through frequency space domain learning. Proc. AAAI Conf. Artif. Intell. 38(5), 5052\u20135060 (2024)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"9_CR38","unstructured":"Tan, C., Liu, P., Tao, R.S., et al.: Data-independent operator: a training-free artifact representation extractor for generalizable deepfake detection[J] (2024). arXiv:2403.06803"},{"key":"9_CR39","doi-asserted-by":"crossref","unstructured":"Tan, C., Zhao, Y., Wei, S., et al.: Rethinking the up-sampling operations in CNN-based generative network for generalizable deepfake detection[C]. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 28130\u201328139 (2024)","DOI":"10.1109\/CVPR52733.2024.02657"},{"key":"9_CR40","unstructured":"Chen, L.-C., Papandreou, G., Schroff, F., Adam, H.: Rethinking Atrous Convolution for Semantic Image Segmentation (2017). arXiv:1706.05587"},{"key":"9_CR41","doi-asserted-by":"crossref","unstructured":"Zhu, J., Shen, Y., Zhao, D., Zhou, B.: In-Domain GAN inversion for real image editing. ECCV, pp. 592\u2013608 (2020)","DOI":"10.1007\/978-3-030-58520-4_35"},{"key":"9_CR42","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et al.: Learning Transferable Visual Models From Natural Language Supervision. ICML, 8748\u20138763 (2021)"},{"key":"9_CR43","unstructured":"Frank, J., Eisenhofer, T., Sch\u00f6nherr, L., Fischer, A., Kolossa, D., Holz, T.: Leveraging frequency analysis for deep fake image recognition. ICML, pp. 3247\u20133258 (2020)"},{"key":"9_CR44","doi-asserted-by":"crossref","unstructured":"Durall, R., et al.: Watch your up-convolution: CNN based generative deep neural networks are failing to reproduce spectral distributions. CVPR, pp. 7890\u20137899 (2020)","DOI":"10.1109\/CVPR42600.2020.00791"},{"key":"9_CR45","unstructured":"Goodfellow, I.J., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A.C., Bengio, Y.: Generative adversarial nets. NIPS (2014)"},{"key":"9_CR46","unstructured":"Frank, J., et al.: Leveraging frequency analysis for deep fake image recognition. In: International Conference on Machine Learning. PMLR, pp. 3247\u20133258 (2020)"},{"key":"9_CR47","unstructured":"Kingma, D.P., Welling, M.: Auto-Encoding Variational Bayes. arXiv:1312.6114"},{"key":"9_CR48","doi-asserted-by":"crossref","unstructured":"Ju, Y., et al.: Fusing global and local features for generalized ai-synthesized image detection. In: 2022 IEEE International Conference on Image Processing (ICIP), pp. 3465\u20133469. IEEE (2022)","DOI":"10.1109\/ICIP46576.2022.9897820"},{"key":"9_CR49","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Global texture enhancement for fake face detection in the wild. In: Proceedings of the CVPR, pp. 8060\u20138069 (2020)","DOI":"10.1109\/CVPR42600.2020.00808"},{"key":"9_CR50","doi-asserted-by":"crossref","unstructured":"Mandelli, S., Bonettini, N., Bestagini, P., Tubaro, S.: Detecting GAN-generated images by orthogonal training of multiple CNNs. In: IEEE International Conference on Image Processing (ICIP), vol. 2022, pp. 3091\u20133095 (2022)","DOI":"10.1109\/ICIP46576.2022.9897310"},{"key":"9_CR51","doi-asserted-by":"crossref","unstructured":"Ojha, U., et al.: Towards universal fake image detectors that generalize across generative models. In: Proceedings of the CVPR, pp. 24 480\u201324 489 (2023)","DOI":"10.1109\/CVPR52729.2023.02345"},{"key":"9_CR52","doi-asserted-by":"crossref","unstructured":"Fan, Y., Yang, D., He, X.: CTYUN-AI at SemEval-2024 Task 7: boosting numerical understanding with limited data through effective data alignment[C]. In: Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024), pp. 47\u201352 (2024)","DOI":"10.18653\/v1\/2024.semeval-1.8"},{"key":"9_CR53","unstructured":"Frank, J., Eisenhofer, T., Sch\u00f6nherr, L., Fischer, A., Kolossa, D., Holz, T.: Leveraging frequency analysis for deep fake image recognition. ICML, pp. 3247\u20133258 (2020)"},{"key":"9_CR54","unstructured":"Tan, C., Liu, P., Tao, R.S., et al.: Data-Independent Operator: A Training-Free Artifact Representation Extractor for Generalizable Deepfake Detection[J] (2024). arXiv:2403.06803"},{"key":"9_CR55","doi-asserted-by":"crossref","unstructured":"Yu, N., Davis, L.S., Fritz, M.: Attributing fake images to GANs: learning and analyzing GAN fingerprints. ICCV, pp. 7556\u20137566 (2019)","DOI":"10.1109\/ICCV.2019.00765"},{"key":"9_CR56","unstructured":"Brown, T.B., Mann, B., Ryder, N., Subbiah, M., et al.: Language Models are Few-Shot Learners (2020). arXiv:2005.14165"},{"key":"9_CR57","unstructured":"Rombach, R., et al.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10 684\u201310 695 (2022)"},{"key":"9_CR58","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., et al.: Diffusion models beat gans on image synthesis. Adv. Neural. Inf. Process. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"9_CR59","unstructured":"Nichol, A., et al.: Glide: towards photorealistic image generation and editing with text-guided diffusion models (2021). arXiv:2112.10741"},{"key":"9_CR60","unstructured":"Gu, S., et al.: Vector quantized diffusion model for textto-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10 696\u201310 706 (2022)"},{"key":"9_CR61","doi-asserted-by":"crossref","unstructured":"Liu, B., Yang, F., Bi, X., Xiao, B., Li, W., Gao, X.: Detecting generated images by real images. In: European Conference on Computer Vision, pp. 95\u2013110. Springer (2022)","DOI":"10.1007\/978-3-031-19781-9_6"},{"key":"9_CR62","unstructured":"Midjourney (2023). https:\/\/www.midjourney.com\/home\/"},{"key":"9_CR63","unstructured":"wukong (2023). https:\/\/xihe.mindspore.cn\/modelzoo\/wukong"},{"key":"9_CR64","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents, vol. 1, no. 2, p. 3 (2022). arXiv:2204.06125"},{"key":"9_CR65","unstructured":"Wu, S., Fei, H., Qu, L., Ji, W., Chua, T.-S.: NExT-GPT: Any-to-Any Multimodal LLM (2023). arXiv:2309.05519"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8685-5_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T05:17:28Z","timestamp":1730524648000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8685-5_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,3]]},"ISBN":["9789819786848","9789819786855"],"references-count":65,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8685-5_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,3]]},"assertion":[{"value":"3 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}