{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,27]],"date-time":"2025-08-27T19:10:16Z","timestamp":1756321816892,"version":"3.44.0"},"reference-count":110,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100004377","name":"Hong Kong Polytechnic University","doi-asserted-by":"publisher","award":["P0048387","P0044520","P0049586","P0050657"],"award-info":[{"award-number":["P0048387","P0044520","P0049586","P0050657"]}],"id":[{"id":"10.13039\/501100004377","id-type":"DOI","asserted-by":"publisher"}]},{"name":"PolyU Research Institute for Sports Science and Technology","award":["P0044571"],"award-info":[{"award-number":["P0044571"]}]},{"DOI":"10.13039\/501100018537","name":"Tianjin Science and Technology Major Project","doi-asserted-by":"publisher","award":["24ZXZSSS00420"],"award-info":[{"award-number":["24ZXZSSS00420"]}],"id":[{"id":"10.13039\/501100018537","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. on Image Process."],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/tip.2025.3599101","type":"journal-article","created":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T18:31:56Z","timestamp":1755801116000},"page":"5400-5413","source":"Crossref","is-referenced-by-count":0,"title":["UpGen: Unleashing Potential of Foundation Models for Training-Free Camouflage Detection via Generative Models"],"prefix":"10.1109","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-9388-8146","authenticated-orcid":false,"given":"Ji","family":"Du","sequence":"first","affiliation":[{"name":"Department of Computing, The Hong Kong Polytechnic University, Hung Hom, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6941-3300","authenticated-orcid":false,"given":"Jiesheng","family":"Wu","sequence":"additional","affiliation":[{"name":"College of Artificial Intelligence, Nankai University, Tianjin, China"}]},{"given":"Desheng","family":"Kong","sequence":"additional","affiliation":[{"name":"College of Artificial Intelligence, Nankai University, Tianjin, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7687-5511","authenticated-orcid":false,"given":"Weiyun","family":"Liang","sequence":"additional","affiliation":[{"name":"College of Artificial Intelligence, Nankai University, Tianjin, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7178-9035","authenticated-orcid":false,"given":"Fangwei","family":"Hao","sequence":"additional","affiliation":[{"name":"College of Artificial Intelligence, Nankai University, Tianjin, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8532-2241","authenticated-orcid":false,"given":"Jing","family":"Xu","sequence":"additional","affiliation":[{"name":"College of Artificial Intelligence, Nankai University, Tianjin, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2832-3381","authenticated-orcid":false,"given":"Bin","family":"Wang","sequence":"additional","affiliation":[{"name":"College of Artificial Intelligence, Nankai University, Tianjin, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1880-4763","authenticated-orcid":false,"given":"Guiling","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Computer Science, New Jersey Institute of Technology, Newark, NJ, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1503-0240","authenticated-orcid":false,"given":"Ping","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Computing and Research Institute for Sports Science and Technology, The Hong Kong Polytechnic University, Hung Hom, Hong Kong"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00285"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3085766"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00538"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25167"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00101"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00446"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2023.3286787"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2024.3356416"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00083"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.3233\/faia230302"},{"key":"ref11","first-page":"1","article-title":"RUN: Reversible unfolding network for concealed object segmentation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"He"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01862"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25156"},{"key":"ref14","first-page":"30726","article-title":"Weakly-supervised concealed object segmentation with SAM-based pseudo labeling and multi-scale feature grouping","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"He"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01525"},{"key":"ref17","first-page":"29914","article-title":"Segment anything in high quality","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lei"},{"key":"ref18","article-title":"Faster segment anything: Towards lightweight SAM for mobile applications","author":"Zhang","year":"2023","journal-title":"arXiv:2306.14289"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-023-3881-x"},{"key":"ref20","article-title":"Can SAM segment anything? When SAM meets camouflaged object detection","author":"Tang","year":"2023","journal-title":"arXiv:2304.04709"},{"key":"ref21","article-title":"Towards real zero-shot camouflaged object segmentation without camouflaged annotations","author":"Lei","year":"2024","journal-title":"arXiv:2410.16953"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i11.29144"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680730"},{"key":"ref24","first-page":"107171","article-title":"Leveraging hallucinations to reduce manual prompt dependency in promptable segmentation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hu"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/s44267-024-00050-1"},{"key":"ref26","article-title":"Large model based referring camouflaged object detection","author":"Cheng","year":"2023","journal-title":"arXiv:2311.17122"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3571730"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1437"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.20"},{"key":"ref30","first-page":"1","article-title":"DINOv2: Learning robust visual features without supervision","author":"Oquab","year":"2024","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00584"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2025.111409"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.319"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01755"},{"key":"ref35","first-page":"23033","article-title":"SegCLIP: Patch aggregation with learnable centers for open-vocabulary semantic segmentation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Luo"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00288"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72664-4_18"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref39","first-page":"36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume-title":"Proc. NIPS","volume":"35","author":"Saharia"},{"key":"ref40","article-title":"Hierarchical text-conditional image generation with CLIP latents","author":"Ramesh","year":"2022","journal-title":"arXiv:2204.06125"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.487"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58601-0_41"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2015.2487833"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00249"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00866"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/142"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3217695"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3266659"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i3.20273"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/186"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2025.3528347"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01142"},{"key":"ref53","first-page":"1","article-title":"Strategic preys make acute predators: Enhancing camouflaged object detectors by generating camouflaged objects","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"He"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00220"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00467"},{"key":"ref56","first-page":"25278","article-title":"LAION-5B: An open large-scale dataset for training next generation image-text models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Schuhmann"},{"key":"ref57","first-page":"4904","article-title":"Scaling up visual and vision-language representation learning with noisy text supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jia"},{"key":"ref58","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"139","author":"Radford"},{"key":"ref59","first-page":"12888","article-title":"BLIP: Bootstrapping language-image pre-training for unified vision-language understanding and generation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li"},{"key":"ref60","first-page":"19730","article-title":"BLIP-2: Bootstrapping language-image pre-training with frozen image encoders and large language models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li"},{"key":"ref61","first-page":"34892","article-title":"Visual instruction tuning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Liu"},{"key":"ref62","first-page":"49250","article-title":"InstructBLIP: Towards general-purpose vision-language models with instruction tuning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Dai"},{"key":"ref63","first-page":"1","article-title":"MiniGPT-4: Enhancing vision-language understanding with advanced large language models","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Zhu"},{"key":"ref64","article-title":"OpenFlamingo: An open-source framework for training large autoregressive vision-language models","author":"Awadalla","year":"2023","journal-title":"arXiv:2308.01390"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"ref68","first-page":"9694","article-title":"Align before fuse: Vision and language representation learning with momentum distillation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Li"},{"key":"ref69","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ho"},{"key":"ref70","first-page":"1","article-title":"Denoising diffusion implicit models","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Song"},{"key":"ref71","first-page":"8780","article-title":"Diffusion models beat GANs on image synthesis","volume-title":"Proc. NIPS","volume":"34","author":"Dhariwal"},{"key":"ref72","first-page":"1","article-title":"Prompt-to-prompt image editing with cross-attention control","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Hertz"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00117"},{"key":"ref74","first-page":"54683","article-title":"DatasetDM: Synthesizing data with perception annotations using diffusion models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wu"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00341"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.26599\/AIR.2023.9150021"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00392"},{"issue":"8","key":"ref78","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI Blog"},{"key":"ref79","article-title":"LLaMA: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"arXiv:2302.13971"},{"key":"ref80","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023","journal-title":"arXiv:2307.09288"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"key":"ref82","first-page":"1","article-title":"Mitigating hallucination in large multi-modal models via robust instruction tuning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Liu"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02553"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01316"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01274"},{"issue":"6","key":"ref86","first-page":"7","article-title":"Animal camouflage analysis: Chameleon database","volume":"2","author":"Skurowski","year":"2018","journal-title":"Unpublished Manuscript"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2019.04.006"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6247743"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/97"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.39"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00994"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01280"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00411"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548178"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02111"},{"key":"ref96","article-title":"Grounded SAM: Assembling open-world models for diverse visual tasks","author":"Ren","year":"2024","journal-title":"arXiv:2401.14159"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72970-6_27"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2025.3532440"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.70"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2465960"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.407"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.43"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299184"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.404"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2010.5543739"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-71058-3_12"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3266163"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1186\/s13173-021-00117-7"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2020.xvi.018"}],"container-title":["IEEE Transactions on Image Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/83\/10795784\/11131534.pdf?arnumber=11131534","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,27]],"date-time":"2025-08-27T18:33:31Z","timestamp":1756319611000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11131534\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":110,"URL":"https:\/\/doi.org\/10.1109\/tip.2025.3599101","relation":{},"ISSN":["1057-7149","1941-0042"],"issn-type":[{"type":"print","value":"1057-7149"},{"type":"electronic","value":"1941-0042"}],"subject":[],"published":{"date-parts":[[2025]]}}}