{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T21:00:57Z","timestamp":1775509257731,"version":"3.50.1"},"reference-count":83,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62406171"],"award-info":[{"award-number":["62406171"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62225601"],"award-info":[{"award-number":["62225601"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U23B2052"],"award-info":[{"award-number":["U23B2052"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62406172"],"award-info":[{"award-number":["62406172"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"Beijing Natural Science Foundation Project","doi-asserted-by":"publisher","award":["L242025"],"award-info":[{"award-number":["L242025"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Fundamental Research Funds for the Beijing University of Posts and Telecommunications","award":["2025AI4S15"],"award-info":[{"award-number":["2025AI4S15"]}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2023M741964"],"award-info":[{"award-number":["2023M741964"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]},{"name":"the Postdoctoral Fellowship Program of CPSF","award":["GZC20240841"],"award-info":[{"award-number":["GZC20240841"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. on Image Process."],"published-print":{"date-parts":[[2026]]},"DOI":"10.1109\/tip.2026.3673967","type":"journal-article","created":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T19:52:55Z","timestamp":1774554775000},"page":"3395-3410","source":"Crossref","is-referenced-by-count":0,"title":["Toward Generalizable Forgery Detection and Reasoning"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-5145-6719","authenticated-orcid":false,"given":"Yueying","family":"Gao","sequence":"first","affiliation":[{"name":"Pattern Recognition and Intelligent System Laboratory, School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4081-3001","authenticated-orcid":false,"given":"Dongliang","family":"Chang","sequence":"additional","affiliation":[{"name":"Pattern Recognition and Intelligent System Laboratory, School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9550-6554","authenticated-orcid":false,"given":"Bingyao","family":"Yu","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1022-2971","authenticated-orcid":false,"given":"Haotian","family":"Qin","sequence":"additional","affiliation":[{"name":"Pattern Recognition and Intelligent System Laboratory, School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-4423-4157","authenticated-orcid":false,"given":"Muxi","family":"Diao","sequence":"additional","affiliation":[{"name":"Pattern Recognition and Intelligent System Laboratory, School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4279-3892","authenticated-orcid":false,"given":"Lei","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4726-093X","authenticated-orcid":false,"given":"Kongming","family":"Liang","sequence":"additional","affiliation":[{"name":"Pattern Recognition and Intelligent System Laboratory, School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2950-2488","authenticated-orcid":false,"given":"Zhanyu","family":"Ma","sequence":"additional","affiliation":[{"name":"Pattern Recognition and Intelligent System Laboratory, School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1177\/09732586241277335"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.4018\/979-8-3693-6890-9.ch008"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1177\/14648849241263293"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3132828"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3246793"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01974"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2024.3482191"},{"key":"ref8","first-page":"718","article-title":"Self-supervised adversarial training for robust face forgery detection","volume-title":"Proc. BMVC","author":"Gao"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01743"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02657"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2025.3592576"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02345"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01024"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i7.32772"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73223-2_22"},{"key":"ref16","article-title":"FFAA: Multimodal large language model based explainable open-world face forgery analysis assistant","author":"Huang","year":"2024","journal-title":"arXiv:2408.10072"},{"key":"ref17","article-title":"FakeShield: Explainable image forgery detection and localization via multi-modal large language models","author":"Xu","year":"2024","journal-title":"arXiv:2410.02761"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02685"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00872"},{"key":"ref20","article-title":"Progressive growing of GANs for improved quality, stability, and variation","author":"Karras","year":"2017","journal-title":"arXiv:1710.10196"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02051"},{"key":"ref22","first-page":"7621","article-title":"DRCT: Diffusion reconstruction contrastive training towards universal detection of diffusion generated images","volume-title":"Proc. 41st Int. Conf. Mach. Learn.","author":"Chen"},{"key":"ref23","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford"},{"key":"ref24","article-title":"Qwen2.5-VL technical report","volume-title":"arXiv:2502.13923","author":"Bai","year":"2025"},{"key":"ref25","article-title":"ChatGLM: A family of large language models from GLM-130B to GLM-4 all tools","author":"Glm","year":"2024","journal-title":"arXiv:2406.12793"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.02283"},{"key":"ref27","article-title":"DeepSeek-VL2: Mixture-of-experts vision-language models for advanced multimodal understanding","author":"Wu","year":"2024","journal-title":"arXiv:2412.10302"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00019"},{"key":"ref29","article-title":"AIGI-Holmes: Towards explainable and generalizable AI-generated image detection via multimodal large language models","author":"Zhou","year":"2025","journal-title":"arXiv:2507.02664"},{"key":"ref30","article-title":"LEGION: Learning to ground and explain for synthetic image detection","author":"Kang","year":"2025","journal-title":"arXiv:2503.15264"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2019.2916364"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00308"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3306346.3323035"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00333"},{"key":"ref35","article-title":"FakeBench: Uncover the Achilles\u2019 heels of fake images with large multimodal models","author":"Li","year":"2024","journal-title":"arXiv:2404.13306v1"},{"issue":"3","key":"ref36","first-page":"8","article-title":"Improving image generation with better captions","volume":"2","author":"Betker","year":"2023","journal-title":"Comput. Sci."},{"key":"ref37","article-title":"LOKI: A comprehensive synthetic data detection benchmark using large multimodal models","author":"Ye","year":"2024","journal-title":"arXiv:2410.09732"},{"key":"ref38","volume-title":"Flux","year":"2024"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"ref40","article-title":"SegLLM: Multi-round reasoning segmentation","author":"Wang","year":"2024","journal-title":"arXiv:2410.18923"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02259"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00254"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02221"},{"key":"ref44","article-title":"DiffusionDB: A large-scale prompt gallery dataset for text-to-image generative models","author":"Wang","year":"2022","journal-title":"arXiv:2210.14896"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1833"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref47","article-title":"A-Bench: Are LMMs masters at evaluating AI-generated images?","author":"Zhang","year":"2024","journal-title":"arXiv:2406.03070"},{"key":"ref48","first-page":"34892","article-title":"Visual instruction tuning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Liu"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1093\/jamia\/ocae122"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.52202\/079017-0275"},{"key":"ref51","article-title":"LLaVA-CoT: Let vision language models reason step-by-step","author":"Xu","year":"2024","journal-title":"arXiv:2411.10440"},{"key":"ref52","article-title":"DriveRX: A vision-language reasoning model for cross-task autonomous driving","author":"Diao","year":"2025","journal-title":"arXiv:2505.20665"},{"key":"ref53","volume-title":"Kandinsky","year":"2023"},{"key":"ref54","volume-title":"PixArt-\u03b1","year":"2024"},{"key":"ref55","volume-title":"Flux","year":"2025"},{"key":"ref56","volume-title":"GPT-4O","year":"2025"},{"key":"ref57","article-title":"PatchCraft: Exploring texture patch for efficient AI-generated image detection","author":"Zhong","year":"2023","journal-title":"arXiv:2311.12397"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3180556"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.3390\/electronics13224466"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.52202\/075280-2142"},{"key":"ref61","article-title":"MiniGPT-v2: Large language model as a unified interface for vision-language multi-task learning","author":"Chen","year":"2023","journal-title":"arXiv:2310.09478"},{"key":"ref62","article-title":"Controlling vision-language models for multi-task image restoration","author":"Luo","year":"2023","journal-title":"arXiv:2310.01018"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01866"},{"key":"ref64","article-title":"RIGID: A training-free and model-agnostic framework for robust AI-generated image detection","author":"He","year":"2024","journal-title":"arXiv:2405.20112"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00914"},{"key":"ref67","article-title":"From CLIP to DINO: Visual encoders shout in multi-modal large language models","author":"Jiang","year":"2023","journal-title":"arXiv:2310.08825"},{"key":"ref68","article-title":"DINOv2: Learning robust visual features without supervision","author":"Oquab","year":"2023","journal-title":"arXiv:2304.07193"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref70","first-page":"1","article-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","volume-title":"Proc. ICLR","author":"Dosovitskiy"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1111\/exsy.13829"},{"key":"ref72","article-title":"AntifakePrompt: Prompt-tuned vision-language models are fake image detectors","author":"Chang","year":"2023","journal-title":"arXiv:2310.17419"},{"key":"ref73","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"27","author":"Sutskever"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1145\/3576915.3616588"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28310"},{"key":"ref76","article-title":"A sanity check for AI-generated image detection","author":"Yan","year":"2024","journal-title":"arXiv:2406.19435"},{"key":"ref77","article-title":"Orthogonal subspace decomposition for generalizable AI-generated image detection","author":"Yan","year":"2024","journal-title":"arXiv:2411.15633"},{"key":"ref78","first-page":"17246","article-title":"Lota: Bit-planes guided AI-generated image detection","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis.","author":"Wang"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"ref80","article-title":"LoRA: Low-rank adaptation of large language models","author":"Hu","year":"2021","journal-title":"arXiv:2106.09685"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.52202\/075280-3398"},{"key":"ref83","article-title":"SARATHI: Efficient LLM inference by piggybacking decodes with chunked prefills","author":"Agrawal","year":"2023","journal-title":"arXiv:2308.16369"}],"container-title":["IEEE Transactions on Image Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/83\/11355710\/11456362.pdf?arnumber=11456362","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T19:59:22Z","timestamp":1775505562000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11456362\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":83,"URL":"https:\/\/doi.org\/10.1109\/tip.2026.3673967","relation":{},"ISSN":["1057-7149","1941-0042"],"issn-type":[{"value":"1057-7149","type":"print"},{"value":"1941-0042","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}