{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T21:05:57Z","timestamp":1779224757600,"version":"3.51.4"},"reference-count":53,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100010418","name":"Institute of Information & Communications Technology Planning & Evaluation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100010418","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neural Networks"],"published-print":{"date-parts":[[2026,11]]},"DOI":"10.1016\/j.neunet.2026.109093","type":"journal-article","created":{"date-parts":[[2026,5,12]],"date-time":"2026-05-12T06:37:25Z","timestamp":1778567845000},"page":"109093","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Negative prompt-guided optimization: Enhancing soft prompt generalization in vision-language models"],"prefix":"10.1016","volume":"203","author":[{"given":"Suneung","family":"Kim","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6249-4996","authenticated-orcid":false,"given":"Seong-Whan","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.neunet.2026.109093_bib0001","series-title":"Advances in neural information processing systems (neurIPS)","first-page":"23716","article-title":"Flamingo: A visual language model for few-shot learning","author":"Alayrac","year":"2022"},{"key":"10.1016\/j.neunet.2026.109093_bib0002","series-title":"European conference on computer vision (ECCV)","first-page":"446","article-title":"Food-101\u2013mining discriminative components with random forests","author":"Bossard","year":"2014"},{"key":"10.1016\/j.neunet.2026.109093_bib0003","series-title":"In advances in neural information processing systems (neurIPS)","first-page":"1877","article-title":"Language models are few-shot learners","author":"Brown","year":"2020"},{"key":"10.1016\/j.neunet.2026.109093_bib0004","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"23232","article-title":"Lasp: Text-to-text optimization for language-aware soft prompting of vision & language models","author":"Bulat","year":"2023"},{"issue":"240","key":"10.1016\/j.neunet.2026.109093_bib0005","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery","year":"2023","journal-title":"Journal of Machine Learning Research"},{"key":"10.1016\/j.neunet.2026.109093_bib0006","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"3606","article-title":"Describing textures in the wild","author":"Cimpoi","year":"2014"},{"key":"10.1016\/j.neunet.2026.109093_bib0007","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"248","article-title":"Imagenet: A large-scale hierarchical image database","author":"Deng","year":"2009"},{"key":"10.1016\/j.neunet.2026.109093_bib0008","unstructured":"Dosovitskiy, A. (2020). An image is worth 16x16 words: Transformers for image recognition at scale.arXiv: 2010.11929."},{"key":"10.1016\/j.neunet.2026.109093_bib0009","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshop (CVPRW)","first-page":"178","article-title":"Learning generative visual models from few training examples: An incremental bayesian approach tested on 101 object categories","author":"Fei-Fei","year":"2004"},{"issue":"7","key":"10.1016\/j.neunet.2026.109093_bib0010","doi-asserted-by":"crossref","first-page":"2217","DOI":"10.1109\/JSTARS.2019.2918242","article-title":"Eurosat: A novel dataset and deep learning benchmark for land use and land cover classification","volume":"12","author":"Helber","year":"2019","journal-title":"IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing"},{"key":"10.1016\/j.neunet.2026.109093_bib0011","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","first-page":"8340","article-title":"The many faces of robustness: A critical analysis of out-of-distribution generalization","author":"Hendrycks","year":"2021"},{"key":"10.1016\/j.neunet.2026.109093_bib0012","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"15262","article-title":"Natural adversarial examples","author":"Hendrycks","year":"2021"},{"key":"10.1016\/j.neunet.2026.109093_bib0013","unstructured":"Huang, T., Chu, J., & Wei, F. (2022). Unsupervised prompt learning for vision-language models. arXiv: 2204.03649."},{"key":"10.1016\/j.neunet.2026.109093_bib0014","series-title":"International conference on machine learning (ICML)","first-page":"4904","article-title":"Scaling up visual and vision-language representation learning with noisy text supervision","author":"Jia","year":"2021"},{"key":"10.1016\/j.neunet.2026.109093_bib0015","doi-asserted-by":"crossref","first-page":"423","DOI":"10.1162\/tacl_a_00324","article-title":"How can we know what language models know?","volume":"8","author":"Jiang","year":"2020","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"10.1016\/j.neunet.2026.109093_bib0016","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"19113","article-title":"Maple: Multi-modal prompt learning","author":"Khattak","year":"2023"},{"key":"10.1016\/j.neunet.2026.109093_bib0017","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","first-page":"15190","article-title":"Self-regulating prompts: Foundational model adaptation without forgetting","author":"Khattak","year":"2023"},{"key":"10.1016\/j.neunet.2026.109093_bib0018","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"2585","article-title":"Exposing and mitigating spurious correlations for cross-modal retrieval","author":"Kim","year":"2023"},{"key":"10.1016\/j.neunet.2026.109093_bib0019","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision workshops (ICCVW)","first-page":"554","article-title":"3d object representations for fine-grained categorization","author":"Krause","year":"2013"},{"key":"10.1016\/j.neunet.2026.109093_bib0020","series-title":"IEEE international conference on systems, man, and cybernetics (smc)","first-page":"1151","article-title":"Promoticon: Prompt-based emotion controllable text-to-speech via prompt generation and matching","author":"Lee","year":"2024"},{"key":"10.1016\/j.neunet.2026.109093_bib0021","doi-asserted-by":"crossref","unstructured":"Lester, B., Al-Rfou, R., & Constant, N. (2021). The power of scale for parameter-efficient prompt tuning. arXiv: 2104.08691.","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"10.1016\/j.neunet.2026.109093_bib0022","article-title":"Supporting vision-language model few-shot inference with confounder-pruned knowledge prompt","author":"Li","year":"2025","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.109093_bib0023","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"17584","article-title":"Learning transferable negative prompts for out-of-distribution detection","author":"Li","year":"2024"},{"key":"10.1016\/j.neunet.2026.109093_bib0024","unstructured":"Li, X. L., & Liang, P. (2021). Prefix-tuning: Optimizing continuous prompts for generation.arXiv: 2101.00190."},{"key":"10.1016\/j.neunet.2026.109093_bib0025","doi-asserted-by":"crossref","unstructured":"Liu, X., Ji, K., Fu, Y., Tam, W. L., Du, Z., Yang, Z., & Tang, J. (2021). P-tuning v2: Prompt tuning can be comparable to fine-tuning universally across scales and tasks.arXiv: 2110.07602.","DOI":"10.18653\/v1\/2022.acl-short.8"},{"key":"10.1016\/j.neunet.2026.109093_bib0026","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"5206","article-title":"Prompt distribution learning","author":"Lu","year":"2022"},{"issue":"9","key":"10.1016\/j.neunet.2026.109093_bib0027","doi-asserted-by":"crossref","first-page":"4616","DOI":"10.1109\/TCSVT.2023.3245584","article-title":"Understanding and mitigating overfitting in prompt tuning for vision-language models","volume":"33","author":"Ma","year":"2023","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.109093_bib0028","unstructured":"Maji, S., Rahtu, E., Kannala, J., Blaschko, M., & Vedaldi, A. (2013). Fine-grained visual classification of aircraft. arXiv: 1306.5151."},{"key":"10.1016\/j.neunet.2026.109093_bib0029","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"2722","article-title":"Doubly right object recognition: A why prompt for visual rationales","author":"Mao","year":"2023"},{"key":"10.1016\/j.neunet.2026.109093_bib0030","unstructured":"Menon, S., & Vondrick, C. (2022). Visual classification via description from large language models. arXiv: 2210.07183."},{"key":"10.1016\/j.neunet.2026.109093_bib0031","series-title":"Indian conference on computer vision, graphics & image processing (ICVGIP)","first-page":"722","article-title":"Automated flower classification over a large number of classes","author":"Nilsback","year":"2008"},{"key":"10.1016\/j.neunet.2026.109093_bib0032","doi-asserted-by":"crossref","unstructured":"Park, J.-W., & Lee, S.-W. (2025). Mcot-re: Multi-faceted chain-of-thought and re-ranking for training-free zero-shot composed image retrieval. arXiv: 2507.12819.","DOI":"10.1109\/SMC58881.2025.11342756"},{"key":"10.1016\/j.neunet.2026.109093_bib0033","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"3498","article-title":"Cats and dogs","author":"Parkhi","year":"2012"},{"key":"10.1016\/j.neunet.2026.109093_bib0034","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","first-page":"15691","article-title":"What does a platypus look like? Generating customized prompts for zero-shot image classification","author":"Pratt","year":"2023"},{"key":"10.1016\/j.neunet.2026.109093_bib0035","series-title":"International conference on machine learning (ICML)","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"issue":"8","key":"10.1016\/j.neunet.2026.109093_bib0036","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"key":"10.1016\/j.neunet.2026.109093_bib0037","series-title":"International conference on machine learning (ICML)","first-page":"5389","article-title":"Do imagenet classifiers generalize to imagenet?","author":"Recht","year":"2019"},{"key":"10.1016\/j.neunet.2026.109093_bib0038","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","first-page":"15746","article-title":"Waffling around for performance: Visual classification with random words and broad concepts","author":"Roth","year":"2023"},{"key":"10.1016\/j.neunet.2026.109093_bib0039","unstructured":"Schick, T., & Sch\u00fctze, H. (2020). Exploiting cloze questions for few shot text classification and natural language inference. arXiv: 2001.07676."},{"issue":"11","key":"10.1016\/j.neunet.2026.109093_bib0040","first-page":"1","article-title":"A dataset of 101 human action classes from videos in the wild","volume":"2","author":"Soomro","year":"2012","journal-title":"Center for Research in Computer Vision"},{"key":"10.1016\/j.neunet.2026.109093_bib0041","series-title":"Advances in neural information processing systems (neurIPS)","article-title":"Attention is all you need","author":"Vaswani","year":"2017"},{"key":"10.1016\/j.neunet.2026.109093_bib0042","series-title":"Advances in neural information processing systems (neurIPS)","article-title":"Learning robust global representations by penalizing local predictive power","author":"Wang","year":"2019"},{"key":"10.1016\/j.neunet.2026.109093_bib0043","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2024.106272","article-title":"Layerwised multimodal knowledge distillation for vision-language pretrained model","volume":"175","author":"Wang","year":"2024","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.109093_bib0044","series-title":"Proceedings of the IEEE computer society conference on computer vision and pattern recognition","first-page":"3485","article-title":"Sun database: Large-scale scene recognition from abbey to zoo","author":"Xiao","year":"2010"},{"key":"10.1016\/j.neunet.2026.109093_bib0045","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","first-page":"3090","article-title":"Learning concise and descriptive attributes for visual recognition","author":"Yan","year":"2023"},{"key":"10.1016\/j.neunet.2026.109093_bib0046","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"19187","article-title":"Language in a bottle: Language model guided concept bottlenecks for interpretable image classification","author":"Yang","year":"2023"},{"key":"10.1016\/j.neunet.2026.109093_bib0047","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"6757","article-title":"Visual-language prompt tuning with knowledge-guided context optimization","author":"Yao","year":"2023"},{"key":"10.1016\/j.neunet.2026.109093_bib0048","series-title":"Proceedings of the AAAI conference on artificial intelligence","first-page":"7377","article-title":"Concept-guided prompt learning for generalization in vision-language models","volume":"vol. 38","author":"Zhang","year":"2024"},{"key":"10.1016\/j.neunet.2026.109093_bib0049","doi-asserted-by":"crossref","unstructured":"Zhong, Z., Friedman, D., & Chen, D. (2021). Factual probing is [mask]: Learning vs. learning to recall. arXiv: 2104.05240.","DOI":"10.18653\/v1\/2021.naacl-main.398"},{"key":"10.1016\/j.neunet.2026.109093_bib0050","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2024.107078","article-title":"Diccr: Double-gated intervention and confounder causal reasoning for vision-language navigation","volume":"184","author":"Zhou","year":"2025","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.109093_bib0051","series-title":"Pproceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"16816","article-title":"Conditional prompt learning for vision-language models","author":"Zhou","year":"2022"},{"issue":"9","key":"10.1016\/j.neunet.2026.109093_bib0052","doi-asserted-by":"crossref","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","article-title":"Learning to prompt for vision-language models","volume":"130","author":"Zhou","year":"2022","journal-title":"International Journal of Computer Vision"},{"key":"10.1016\/j.neunet.2026.109093_bib0053","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","first-page":"15659","article-title":"Prompt-aligned gradient for prompt tuning","author":"Zhu","year":"2023"}],"container-title":["Neural Networks"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026005538?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026005538?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T20:10:20Z","timestamp":1779221420000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0893608026005538"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,11]]},"references-count":53,"alternative-id":["S0893608026005538"],"URL":"https:\/\/doi.org\/10.1016\/j.neunet.2026.109093","relation":{},"ISSN":["0893-6080"],"issn-type":[{"value":"0893-6080","type":"print"}],"subject":[],"published":{"date-parts":[[2026,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Negative prompt-guided optimization: Enhancing soft prompt generalization in vision-language models","name":"articletitle","label":"Article Title"},{"value":"Neural Networks","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neunet.2026.109093","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"109093"}}