{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,16]],"date-time":"2025-12-16T07:50:33Z","timestamp":1765871433925,"version":"3.48.0"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T00:00:00Z","timestamp":1760659200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T00:00:00Z","timestamp":1760659200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key R&D Program of China","doi-asserted-by":"crossref","award":["No.2022YFE0196100"],"award-info":[{"award-number":["No.2022YFE0196100"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Innovation Capacity Enhancement Program Science and Technology Platform Project of Hebei Province","award":["22567623H"],"award-info":[{"award-number":["22567623H"]}]},{"name":"Hebei University High Level Innovative Talent Research Start-up Funding Project","award":["No.521000981092"],"award-info":[{"award-number":["No.521000981092"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["CCF Trans. HPC"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s42514-025-00238-x","type":"journal-article","created":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T17:15:25Z","timestamp":1760721325000},"page":"494-508","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["IEPT: input-enhanced prompt tuning for visual-language models"],"prefix":"10.1007","volume":"7","author":[{"given":"Chunru","family":"Dong","sequence":"first","affiliation":[]},{"given":"Junyuan","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Qiang","family":"Hua","sequence":"additional","affiliation":[]},{"given":"Jiahong","family":"Tang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8532-4871","authenticated-orcid":false,"given":"Feng","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,17]]},"reference":[{"key":"238_CR1","unstructured":"Azizi, S., Kornblith, S., Saharia, C., et al.: Synthetic data from diffusion models improves imagenet classification, arXiv preprint arXiv:2304.08466, (2023)"},{"key":"238_CR2","unstructured":"Bahng, H., Jahanian, A., Sankaranarayanan, S., et al.: Exploring visual prompts for adapting large-scale models, (2022)"},{"key":"238_CR3","first-page":"446","volume-title":"Food-101 - mining discriminative components with random forests, computer vision - ECCV","author":"L Bossard","year":"2014","unstructured":"Bossard, L., Guillaumin, M., Van Gool, L.: Food-101 - mining discriminative components with random forests, computer vision - ECCV, pp. 446\u2013461. Springer International Publishing, NY (2014)"},{"key":"238_CR4","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., Mann, B., Ryder, N., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"238_CR5","unstructured":"Chen, G., Yao, W., Song, X., et al.: Plot: prompt learning with optimal transport for vision-language models, arXiv preprint arXiv:2210.01253, (2022)"},{"key":"238_CR7","doi-asserted-by":"crossref","unstructured":"Cimpoi, M., Maji, S., Kokkinos, I., et al.: Describing textures in the wild, Proceedings of the IEEE conference on computer vision and pattern recognition, 3606\u20133613 (2014)","DOI":"10.1109\/CVPR.2014.461"},{"key":"238_CR8","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., et al.: ImageNet: a large-scale hierarchical image database. 2009 IEEE Conference on Computer Vision and Pattern Recognition, 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"238_CR9","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., et al.: An image is worth 16x16 words: Transformers for image recognition at scale, arXiv preprint arXiv:2010.11929, (2020)"},{"key":"238_CR10","unstructured":"Fei-Fei, L., Fergus, R., Perona, P.: Learning generative visual models from few training examples: an incremental bayesian approach tested on 101 object categories, 2004 Conference on Computer Vision and Pattern Recognition Workshop, (2004)"},{"key":"238_CR11","unstructured":"Ge, C., Huang, R., Xie, M., et al.: Domain adaptation via prompt learning, (2022)"},{"issue":"7","key":"238_CR12","doi-asserted-by":"publisher","first-page":"2217","DOI":"10.1109\/JSTARS.2019.2918242","volume":"12","author":"P Helber","year":"2019","unstructured":"Helber, P., Bischke, B., Dengel, A., et al.: Eurosat: a novel dataset and deep learning benchmark for land use and land cover classification. IEEE J. Sel. Top. Appl. Earth Obs. Remote. Sens. 12(7), 2217\u20132226 (2019)","journal-title":"IEEE J. Sel. Top. Appl. Earth Obs. Remote. Sens."},{"key":"238_CR13","doi-asserted-by":"crossref","unstructured":"Hendrycks, D., Basart, S., Mu, N., et al.: The many faces of robustness: a critical analysis of out-of-distribution generalization, Proceedings of the IEEE\/CVF international conference on computer vision, 8340\u20138349 (2021)","DOI":"10.1109\/ICCV48922.2021.00823"},{"key":"238_CR14","doi-asserted-by":"crossref","unstructured":"Hendrycks, D., Zhao, K., Basart, S., et al.: Natural adversarial examples, Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 15262\u201315271 (2021)","DOI":"10.1109\/CVPR46437.2021.01501"},{"key":"238_CR15","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network, arXiv : machine learning, (2015)"},{"key":"238_CR16","doi-asserted-by":"crossref","unstructured":"Hu, X., Liu, A., Tan, Z., et al.: Gda: generative data augmentation techniques for relation extraction tasks, arXiv preprint arXiv:2305.16663, (2023)","DOI":"10.18653\/v1\/2023.findings-acl.649"},{"key":"238_CR17","unstructured":"Huang, T., Chu, J., Wei, F.: Unsupervised prompt learning for vision-language models, arXiv preprint arXiv:2204.03649 (2022)"},{"key":"238_CR18","doi-asserted-by":"crossref","unstructured":"Jia, M., Tang, L., Chen, B., et al.: Visual prompt tuning, (2022)","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"238_CR19","doi-asserted-by":"crossref","unstructured":"Khattak, M., Rasheed, H., Maaz, M., et al.: Maple: multi-modal prompt learning, Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (2023)","DOI":"10.1109\/CVPR52729.2023.01832"},{"key":"238_CR20","doi-asserted-by":"crossref","unstructured":"Khattak, M., Wasim, S., Naseer, M., et al.: Self-regulating prompts: foundational model adaptation without forgetting, Proceedings of the IEEE\/CVF International Conference on Computer Vision, (2023)","DOI":"10.1109\/ICCV51070.2023.01394"},{"key":"238_CR21","doi-asserted-by":"crossref","unstructured":"Krause, J., Stark, M., Deng, J., et al.: 3D Object representations for fine-grained categorization. 2013 IEEE International Conference on Computer Vision Workshops, 554\u2013561 (2013)","DOI":"10.1109\/ICCVW.2013.77"},{"key":"238_CR22","doi-asserted-by":"crossref","unstructured":"Lester, B., Al-Rfou, R., Constant, N.: The power of scale for parameter-efficient prompt tuning, (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"238_CR23","doi-asserted-by":"crossref","unstructured":"Li, X., Liang, P.: Prefix-Tuning: optimizing continuous prompts for generation, Proceedings of the 59th annual meeting of the association for computational linguistics and the 11th international joint conference on natural language processing (Volume 1: Long Papers), (2021)","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"238_CR24","doi-asserted-by":"crossref","unstructured":"Liu, X., Ji, K., Fu, Y., et al.: P-Tuning v2: prompt tuning can be comparable to fine-tuning universally across scales and tasks, (2022)","DOI":"10.18653\/v1\/2022.acl-short.8"},{"key":"238_CR25","doi-asserted-by":"crossref","unstructured":"Lu, Y., Liu, J., Zhang, Y., et al.: Prompt distribution learning, Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, (2022)","DOI":"10.1109\/CVPR52688.2022.00514"},{"issue":"9","key":"238_CR26","doi-asserted-by":"publisher","first-page":"4616","DOI":"10.1109\/TCSVT.2023.3245584","volume":"33","author":"C Ma","year":"2023","unstructured":"Ma, C., Liu, Y., Deng, J., et al.: Understanding and mitigating overfitting in prompt tuning for vision-language models. IEEE Trans. Circuits Syst. Video Technol. 33(9), 4616\u20134629 (2023)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"238_CR27","unstructured":"Maji, S., Rahtu, E., Kannala, J., et al.: Fine-grained visual classification of aircraft, arXiv preprint arXiv:1306.5151, (2013)"},{"key":"238_CR28","doi-asserted-by":"crossref","unstructured":"Nilsback, M., Zisserman, A.: Automated flower classification over a large number of classes, Sixth Indian conference on computer vision, graphics & image processing, 722-729 (2008)","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"238_CR29","doi-asserted-by":"crossref","unstructured":"Parkhi, O., Vedaldi, A., Zisserman, A., et al.: Cats and dogs. 2012 IEEE conference on computer vision and pattern recognition, 3498\u20133505 (2012)","DOI":"10.1109\/CVPR.2012.6248092"},{"key":"238_CR30","unstructured":"Radford, A., Kim, J., Hallacy, C., et al.: Learning transferable visual models from natural language supervision, international conference on machine learning, (2021)"},{"key":"238_CR31","doi-asserted-by":"crossref","unstructured":"Rao, Y., Zhao, W., Chen, G., et al.: DenseCLIP: language-guided dense prediction with context-aware prompting, Cornell University - arXiv, (2021)","DOI":"10.1109\/CVPR52688.2022.01755"},{"key":"238_CR32","unstructured":"Recht, B., Roelofs, R., Schmidt, L., et al.: Do imagenet classifiers generalize to imagenet?, International conference on machine learning, 5389\u20135400 (2019)"},{"key":"238_CR33","doi-asserted-by":"crossref","unstructured":"Shipard, J., Wiliem, A., Thanh, K., et al.: Diversity is definitely needed: improving model-agnostic zero-shot classification via stable diffusion, Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, (2023)","DOI":"10.1109\/CVPRW59228.2023.00084"},{"key":"238_CR35","unstructured":"Soomro, K., Zamir, A., Shah, M.: UCF101: a dataset of 101 human actions classes from videos in the wild. ArXiv (2012)"},{"key":"238_CR36","unstructured":"Trabucco, B., Doherty, K., Gurinas, M., et al.: Effective data augmentation with diffusion models, arXiv preprint arXiv:2302.07944, (2023)"},{"key":"238_CR37","unstructured":"Wang, H., Ge, S., Lipton, Z., et al.: Learning robust global representations by penalizing local predictive power. Adv. Neural. Inf. Process. Syst. 32, (2019)"},{"key":"238_CR38","doi-asserted-by":"crossref","unstructured":"Whitehouse, C., Choudhury, M., Aji, A.: Llm-powered data augmentation for enhanced crosslingual performance, arXiv preprint arXiv:2305.14288, (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.44"},{"key":"238_CR39","doi-asserted-by":"crossref","unstructured":"Xian, Y., Schiele, B., Akata, Z.: Zero-Shot Learning \u2014 the good, the bad and the ugly, 2017 IEEE conference on computer vision and pattern recognition (CVPR), (2017)","DOI":"10.1109\/CVPR.2017.328"},{"key":"238_CR40","doi-asserted-by":"crossref","unstructured":"Xiao, J., Hays, J., Ehinger, K., et al.: SUN database: large-scale scene recognition from abbey to zoo, 2010 IEEE computer society conference on computer vision and pattern recognition, 3485-3492, (2010)","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"238_CR41","unstructured":"Xing, Y., Wu, Q., Cheng, D., et al.: Dual modality prompt tuning for vision-language pre-trained model, (2022)"},{"key":"238_CR42","doi-asserted-by":"publisher","first-page":"103885","DOI":"10.1016\/j.cag.2024.01.012","volume":"119","author":"J Xing","year":"2024","unstructured":"Xing, J., Liu, J., Wang, J., et al.: A survey of efficient fine-tuning methods for vision-language models \u2013 prompt and adapter. Comput. Graph. 119,(2024)","journal-title":"Comput. Graph."},{"key":"238_CR43","doi-asserted-by":"crossref","unstructured":"Yao, H., Zhang, R., Xu, C.: Visual-language prompt tuning with knowledge-guided context optimization, Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, (2023)","DOI":"10.1109\/CVPR52729.2023.00653"},{"key":"238_CR44","first-page":"9125","volume":"35","author":"L Yao","year":"2022","unstructured":"Yao, L., Han, J., Wen, Y., et al.: DetCLIP: dictionary-enriched visual-concept paralleled pre-training for open-world detection. Adv. Neural. Inf. Process. Syst. 35, 9125\u20139138 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"238_CR45","unstructured":"Yin, Y., Kaddour, J., Zhang, X., et al.: Ttida: controllable generative data augmentation via text-to-text and text-to-image models, arXiv preprint arXiv:2304.08821, (2023)"},{"key":"238_CR47","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, J., Loy, C., et al.: Conditional prompt learning for vision-language models, Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, (2022)","DOI":"10.1109\/CVPR52688.2022.01631"},{"issue":"9","key":"238_CR48","doi-asserted-by":"publisher","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","volume":"130","author":"K Zhou","year":"2022","unstructured":"Zhou, K., Yang, J., Loy, C., et al.: Learning to prompt for vision-language models. Int. J. Comput. Vis. 130(9), 2337\u20132348 (2022)","journal-title":"Int. J. Comput. Vis."},{"key":"238_CR49","doi-asserted-by":"crossref","unstructured":"Zhu, B., Niu, Y., Han, Y., et al.: Prompt-aligned gradient for prompt tuning, Proceedings of the IEEE\/CVF international conference on computer vision, (2023)","DOI":"10.1109\/ICCV51070.2023.01435"}],"container-title":["CCF Transactions on High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-025-00238-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42514-025-00238-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-025-00238-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,16]],"date-time":"2025-12-16T07:47:12Z","timestamp":1765871232000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42514-025-00238-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,17]]},"references-count":46,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["238"],"URL":"https:\/\/doi.org\/10.1007\/s42514-025-00238-x","relation":{},"ISSN":["2524-4922","2524-4930"],"issn-type":[{"type":"print","value":"2524-4922"},{"type":"electronic","value":"2524-4930"}],"subject":[],"published":{"date-parts":[[2025,10,17]]},"assertion":[{"value":"1 April 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 July 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"On behalf of all authors, the corresponding author states that there is no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}