{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T11:02:37Z","timestamp":1768993357434,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":45,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819556922","type":"print"},{"value":"9789819556939","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5693-9_24","type":"book-chapter","created":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T21:22:45Z","timestamp":1768944165000},"page":"345-359","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Text-Guided Noise Replacement Visual Prompt Learning for\u00a0Vision-Language Models"],"prefix":"10.1007","author":[{"given":"Xiaokang","family":"Shao","sequence":"first","affiliation":[]},{"given":"Tao","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Mengjin","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Zhaojun","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Junxian","family":"Duan","sequence":"additional","affiliation":[]},{"given":"Aihua","family":"Zheng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,21]]},"reference":[{"key":"24_CR1","doi-asserted-by":"crossref","unstructured":"Bossard, L., Guillaumin, M., Van\u00a0Gool, L.: Food-101\u2013mining discriminative components with random forests. In: Computer vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, 6\u201312 September 2014, Proceedings, Part VI 13, pp. 446\u2013461 (2014)","DOI":"10.1007\/978-3-319-10599-4_29"},{"key":"24_CR2","unstructured":"Brown, T., et al.: Language models are few-shot learners. In: Advances in Neural Information Processing Systems, pp. 1877\u20131901 (2020)"},{"key":"24_CR3","unstructured":"Chowdhery, A., et al.: Palm: scaling language modeling with pathways. J. Mach. Learn. Res. 1\u2013113 (2023)"},{"key":"24_CR4","doi-asserted-by":"crossref","unstructured":"Cimpoi, M., Maji, S., Kokkinos, I., Mohamed, S., Vedaldi, A.: Describing textures in the wild. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3606\u20133613 (2014)","DOI":"10.1109\/CVPR.2014.461"},{"key":"24_CR5","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"24_CR6","doi-asserted-by":"crossref","unstructured":"Ding, J., Xue, N., Xia, G.S., Dai, D.: Decoupling zero-shot semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11583\u201311592 (2022)","DOI":"10.1109\/CVPR52688.2022.01129"},{"key":"24_CR7","doi-asserted-by":"crossref","unstructured":"Du, Y., Liu, Z., Li, J., Zhao, W.X.: A survey of vision-language pre-trained models. arXiv preprint arXiv:2202.10936 (2022)","DOI":"10.24963\/ijcai.2022\/762"},{"key":"24_CR8","unstructured":"Fei-Fei, L., Fergus, R., Perona, P.: Learning generative visual models from few training examples: an incremental Bayesian approach tested on 101 object categories. In: 2004 Conference on Computer Vision and Pattern Recognition Workshop, p. 178 (2004)"},{"key":"24_CR9","doi-asserted-by":"crossref","unstructured":"Gao, P., et al.: Clip-adapter: better vision-language models with feature adapters. Int. J. Comput. Vision 581\u2013595 (2024)","DOI":"10.1007\/s11263-023-01891-x"},{"key":"24_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"24_CR11","doi-asserted-by":"crossref","unstructured":"He, R., Hu, B., Zheng, W.S., Guo, Y.: Two-stage sparse representation for robust recognition on large-scale database. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 475\u2013480 (2010)","DOI":"10.1609\/aaai.v24i1.7654"},{"key":"24_CR12","doi-asserted-by":"crossref","unstructured":"He, R., Tan, T., Wang, L.: Robust recovery of corrupted low-rankmatrix by implicit regularizers. IEEE Trans. Pattern Anal. Mach. Intell. 770\u2013783 (2013)","DOI":"10.1109\/TPAMI.2013.188"},{"key":"24_CR13","doi-asserted-by":"crossref","unstructured":"He, R., Zhang, M., Wang, L., Ji, Y., Yin, Q.: Cross-modal subspace learning via pairwise constraints. IEEE Trans. Image Process. 5543\u20135556 (2015)","DOI":"10.1109\/TIP.2015.2466106"},{"key":"24_CR14","doi-asserted-by":"crossref","unstructured":"Helber, P., Bischke, B., Dengel, A., Borth, D.: Eurosat: a novel dataset and deep learning benchmark for land use and land cover classification. IEEE J. Sel. Top. Appl. Earth Obs. Remote Sens. 2217\u20132226 (2019)","DOI":"10.1109\/JSTARS.2019.2918242"},{"key":"24_CR15","doi-asserted-by":"crossref","unstructured":"Hendrycks, D., et al.: The many faces of robustness: a critical analysis of out-of-distribution generalization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8340\u20138349 (2021)","DOI":"10.1109\/ICCV48922.2021.00823"},{"key":"24_CR16","doi-asserted-by":"crossref","unstructured":"Hendrycks, D., Zhao, K., Basart, S., Steinhardt, J., Song, D.: Natural adversarial examples. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15262\u201315271 (2021)","DOI":"10.1109\/CVPR46437.2021.01501"},{"key":"24_CR17","unstructured":"Ilharco, G., et al.: Openclip (2021)"},{"key":"24_CR18","unstructured":"Jia, C., et al.: Scaling up visual and vision-language representation learning with noisy text supervision. In: International Conference on Machine Learning, pp. 4904\u20134916 (2021)"},{"key":"24_CR19","doi-asserted-by":"crossref","unstructured":"Jia, M., et al.: Visual prompt tuning. In: European Conference on Computer Vision, pp. 709\u2013727 (2022)","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"24_CR20","doi-asserted-by":"crossref","unstructured":"Jin, Z., Wei, Y.: Umpa: unified multi-modal prompt with adapter for vision-language models. Multimedia Syst. 1\u201311 (2025)","DOI":"10.1007\/s00530-025-01707-7"},{"key":"24_CR21","doi-asserted-by":"crossref","unstructured":"Khattak, M.U., Rasheed, H., Maaz, M., Khan, S., Khan, F.S.: Maple: multi-modal prompt learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19113\u201319122 (2023)","DOI":"10.1109\/CVPR52729.2023.01832"},{"key":"24_CR22","doi-asserted-by":"crossref","unstructured":"Kim, G., Kim, S., Lee, S.: AAPL: adding attributes to prompt learning for vision-language models. In: CVPR Workshop (2024)","DOI":"10.1109\/CVPRW63382.2024.00164"},{"key":"24_CR23","doi-asserted-by":"crossref","unstructured":"Krause, J., Stark, M., Deng, J., Fei-Fei, L.: 3D object representations for fine-grained categorization. In: Proceedings of the IEEE International Conference on Computer Vision Workshops, pp. 554\u2013561 (2013)","DOI":"10.1109\/ICCVW.2013.77"},{"key":"24_CR24","unstructured":"Le\u00a0Scao, T., et al.: Bloom: a 176b-parameter open-access multilingual language model (2023)"},{"key":"24_CR25","unstructured":"Lialin, V., Deshpande, V., Rumshisky, A.: Scaling down to scale up: a guide to parameter-efficient fine-tuning. arXiv preprint arXiv:2303.15647 (2023)"},{"key":"24_CR26","unstructured":"Long, S., et al.: Mutual prompt leaning for vision language models. Int. J. Comput. Vision 1\u201319 (2024)"},{"key":"24_CR27","unstructured":"Maji, S., Rahtu, E., Kannala, J., Blaschko, M., Vedaldi, A.: Fine-grained visual classification of aircraft. arXiv preprint arXiv:1306.5151 (2013)"},{"key":"24_CR28","doi-asserted-by":"crossref","unstructured":"Nilsback, M.E., Zisserman, A.: Automated flower classification over a large number of classes. In: 2008 Sixth Indian Conference on Computer Vision, Graphics & Image Processing, pp. 722\u2013729 (2008)","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"24_CR29","unstructured":"Oord, A.V.D., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748 (2018)"},{"key":"24_CR30","doi-asserted-by":"crossref","unstructured":"Parkhi, O.M., Vedaldi, A., Zisserman, A., Jawahar, C.: Cats and dogs. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 3498\u20133505 (2012)","DOI":"10.1109\/CVPR.2012.6248092"},{"key":"24_CR31","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763 (2021)"},{"key":"24_CR32","unstructured":"Recht, B., Roelofs, R., Schmidt, L., Shankar, V.: Do imagenet classifiers generalize to imagenet? In: International Conference on Machine Learning, pp. 5389\u20135400 (2019)"},{"key":"24_CR33","unstructured":"Soomro, K., Zamir, A.R., Shah, M.: UCF101: a dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012)"},{"key":"24_CR34","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems (2017)"},{"key":"24_CR35","unstructured":"Wang, H., Ge, S., Lipton, Z., Xing, E.P.: Learning robust global representations by penalizing local predictive power. In: Advances in Neural Information Processing Systems (2019)"},{"key":"24_CR36","doi-asserted-by":"crossref","unstructured":"Xiao, J., Hays, J., Ehinger, K.A., Oliva, A., Torralba, A.: Sun database: large-scale scene recognition from abbey to zoo. In: 2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, pp. 3485\u20133492 (2010)","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"24_CR37","doi-asserted-by":"crossref","unstructured":"Yang, L., Zhang, R.Y., Chen, Q., Xie, X.: Learning with enriched inductive biases for vision-language models. Int. J. Comput. Vision 1\u201316 (2025)","DOI":"10.1007\/s11263-025-02354-1"},{"key":"24_CR38","doi-asserted-by":"crossref","unstructured":"Yang, L., Zhang, R.Y., Wang, Y., Xie, X.: MMA: multi-modal adapter for vision-language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 23826\u201323837 (2024)","DOI":"10.1109\/CVPR52733.2024.02249"},{"key":"24_CR39","doi-asserted-by":"crossref","unstructured":"Yu, T., Lu, Z., Jin, X., Chen, Z., Wang, X.: Task residual for tuning vision-language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10899\u201310909 (2023)","DOI":"10.1109\/CVPR52729.2023.01049"},{"key":"24_CR40","doi-asserted-by":"crossref","unstructured":"Zang, Y., Li, W., Zhou, K., Huang, C., Loy, C.C.: Open-vocabulary DETR with conditional matching. In: European Conference on Computer Vision, pp. 106\u2013122 (2022)","DOI":"10.1007\/978-3-031-20077-9_7"},{"key":"24_CR41","doi-asserted-by":"crossref","unstructured":"Zhai, X., et al.: LIT: zero-shot transfer with locked-image text tuning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18123\u201318133 (2022)","DOI":"10.1109\/CVPR52688.2022.01759"},{"key":"24_CR42","doi-asserted-by":"crossref","unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Oliva, A., Torralba, A.: Learning deep features for discriminative localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2921\u20132929 (2016)","DOI":"10.1109\/CVPR.2016.319"},{"key":"24_CR43","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Conditional prompt learning for vision-language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16816\u201316825 (2022)","DOI":"10.1109\/CVPR52688.2022.01631"},{"key":"24_CR44","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Learning to prompt for vision-language models. Int. J. Comput. Vision 2337\u20132348 (2022)","DOI":"10.1007\/s11263-022-01653-1"},{"key":"24_CR45","doi-asserted-by":"crossref","unstructured":"Zhu, B., Niu, Y., Han, Y., Wu, Y., Zhang, H.: Prompt-aligned gradient for prompt tuning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15659\u201315669 (2023)","DOI":"10.1109\/ICCV51070.2023.01435"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5693-9_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T21:22:49Z","timestamp":1768944169000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5693-9_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819556922","9789819556939"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5693-9_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"21 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}