{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,15]],"date-time":"2025-10-15T10:36:09Z","timestamp":1760524569230,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":45,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819984282"},{"type":"electronic","value":"9789819984299"}],"license":[{"start":{"date-parts":[[2023,12,24]],"date-time":"2023-12-24T00:00:00Z","timestamp":1703376000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,24]],"date-time":"2023-12-24T00:00:00Z","timestamp":1703376000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8429-9_16","type":"book-chapter","created":{"date-parts":[[2023,12,23]],"date-time":"2023-12-23T08:02:17Z","timestamp":1703318537000},"page":"197-209","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Unsupervised Prototype Adapter for\u00a0Vision-Language Models"],"prefix":"10.1007","author":[{"given":"Yi","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Ce","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Xueting","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Zhihai","family":"He","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,24]]},"reference":[{"key":"16_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"446","DOI":"10.1007\/978-3-319-10599-4_29","volume-title":"Computer Vision \u2013 ECCV 2014","author":"L Bossard","year":"2014","unstructured":"Bossard, L., Guillaumin, M., Van Gool, L.: Food-101 \u2013 mining discriminative components with random forests. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8694, pp. 446\u2013461. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10599-4_29"},{"key":"16_CR2","doi-asserted-by":"crossref","unstructured":"Cimpoi, M., Maji, S., Kokkinos, I., Mohamed, S., Vedaldi, A.: Describing textures in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3606\u20133613 (2014)","DOI":"10.1109\/CVPR.2014.461"},{"key":"16_CR3","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"16_CR4","unstructured":"Dosovitskiy, A., et al.: An image is worth 16$$\\times $$16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (2020)"},{"key":"16_CR5","doi-asserted-by":"crossref","unstructured":"Du, Y., Wei, F., Zhang, Z., Shi, M., Gao, Y., Li, G.: Learning to prompt for open-vocabulary object detection with vision-language model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14084\u201314093 (2022)","DOI":"10.1109\/CVPR52688.2022.01369"},{"key":"16_CR6","doi-asserted-by":"crossref","unstructured":"Duan, J., et al.: Multi-modal alignment using representation codebook. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15651\u201315660 (2022)","DOI":"10.1109\/CVPR52688.2022.01520"},{"key":"16_CR7","unstructured":"Fei-Fei, L., Fergus, R., Perona, P.: Learning generative visual models from few training examples: an incremental Bayesian approach tested on 101 object categories. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, pp. 178\u2013178 (2004)"},{"key":"16_CR8","unstructured":"Gao, P., et al.: Clip-adapter: better vision-language models with feature adapters. arXiv preprint arXiv:2110.04544 (2021)"},{"key":"16_CR9","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"7","key":"16_CR10","doi-asserted-by":"publisher","first-page":"2217","DOI":"10.1109\/JSTARS.2019.2918242","volume":"12","author":"P Helber","year":"2019","unstructured":"Helber, P., Bischke, B., Dengel, A., Borth, D.: EuroSAT: a novel dataset and deep learning benchmark for land use and land cover classification. IEEE J. Sel. Top. Appl. Earth Observ. Rem. Sens. 12(7), 2217\u20132226 (2019)","journal-title":"IEEE J. Sel. Top. Appl. Earth Observ. Rem. Sens."},{"key":"16_CR11","doi-asserted-by":"crossref","unstructured":"Hendrycks, D., et al.: The many faces of robustness: a critical analysis of out-of-distribution generalization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8340\u20138349 (2021)","DOI":"10.1109\/ICCV48922.2021.00823"},{"key":"16_CR12","doi-asserted-by":"crossref","unstructured":"Hendrycks, D., Zhao, K., Basart, S., Steinhardt, J., Song, D.: Natural adversarial examples. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15262\u201315271 (2021)","DOI":"10.1109\/CVPR46437.2021.01501"},{"key":"16_CR13","unstructured":"Huang, T., Chu, J., Wei, F.: Unsupervised prompt learning for vision-language models. arXiv preprint arXiv:2204.03649 (2022)"},{"key":"16_CR14","doi-asserted-by":"crossref","unstructured":"Huang, X., et al.: Idea: increasing text diversity via online multi-label recognition for vision-language pre-training. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 4573\u20134583 (2022)","DOI":"10.1145\/3503161.3548108"},{"key":"16_CR15","doi-asserted-by":"crossref","unstructured":"Krause, J., Stark, M., Deng, J., Fei-Fei, L.: 3d object representations for fine-grained categorization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops, pp. 554\u2013561 (2013)","DOI":"10.1109\/ICCVW.2013.77"},{"key":"16_CR16","doi-asserted-by":"crossref","unstructured":"Li, L.H., et al.: Grounded language-image pre-training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10965\u201310975 (2022)","DOI":"10.1109\/CVPR52688.2022.01069"},{"key":"16_CR17","unstructured":"Lu, J., Batra, D., Parikh, D., Lee, S.: VilBERT: pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"16_CR18","unstructured":"Maji, S., Rahtu, E., Kannala, J., Blaschko, M., Vedaldi, A.: Fine-grained visual classification of aircraft. arXiv preprint arXiv:1306.5151 (2013)"},{"key":"16_CR19","unstructured":"Manli, S., et al.: Test-time prompt tuning for zero-shot generalization in vision-language models. In: Advances in Neural Information Processing Systems (2022)"},{"key":"16_CR20","unstructured":"Menon, S., Vondrick, C.: Visual classification via description from large language models. In: International Conference on Learning Representations (2023)"},{"key":"16_CR21","doi-asserted-by":"crossref","unstructured":"Nilsback, M.E., Zisserman, A.: Automated flower classification over a large number of classes. In: Indian Conference on Computer Vision, Graphics and Image Processing (2008)","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"16_CR22","unstructured":"Novack, Z., McAuley, J., Lipton, Z.C., Garg, S.: Chils: zero-shot image classification with hierarchical label sets. In: International Conference on Machine Learning, pp. 26342\u201326362. PMLR (2023)"},{"key":"16_CR23","doi-asserted-by":"crossref","unstructured":"Parkhi, O.M., Vedaldi, A., Zisserman, A., Jawahar, C.: Cats and dogs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3498\u20133505 (2012)","DOI":"10.1109\/CVPR.2012.6248092"},{"key":"16_CR24","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning (2021)"},{"key":"16_CR25","unstructured":"Recht, B., Roelofs, R., Schmidt, L., Shankar, V.: Do imagenet classifiers generalize to imagenet? In: International Conference on Machine Learning (2019)"},{"key":"16_CR26","doi-asserted-by":"crossref","unstructured":"Ru, L., Zhan, Y., Yu, B., Du, B.: Learning affinity from attention: end-to-end weakly-supervised semantic segmentation with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16846\u201316855 (2022)","DOI":"10.1109\/CVPR52688.2022.01634"},{"issue":"3","key":"16_CR27","doi-asserted-by":"publisher","first-page":"363","DOI":"10.1109\/TIT.1965.1053799","volume":"11","author":"H Scudder","year":"1965","unstructured":"Scudder, H.: Probability of error of some adaptive pattern-recognition machines. IEEE Trans. Inf. Theory 11(3), 363\u2013371 (1965)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"16_CR28","doi-asserted-by":"crossref","unstructured":"Shi, H., Hayat, M., Wu, Y., Cai, J.: Proposalclip: unsupervised open-category object proposal generation via exploiting clip cues. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9611\u20139620 (2022)","DOI":"10.1109\/CVPR52688.2022.00939"},{"key":"16_CR29","unstructured":"Soomro, K., Zamir, A.R., Shah, M.: Ucf101: a dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012)"},{"key":"16_CR30","doi-asserted-by":"crossref","unstructured":"Tang, Y., Guo, Q., He, Z.: Cross-inferential networks for source-free unsupervised domain adaptation. arXiv preprint arXiv:2306.16957 (2023)","DOI":"10.1109\/ICIP49359.2023.10222707"},{"key":"16_CR31","doi-asserted-by":"crossref","unstructured":"Tang, Y., et al.: Neuro-modulated Hebbian learning for fully test-time adaptation. arXiv preprint arXiv:2303.00914 (2023)","DOI":"10.1109\/CVPR52729.2023.00363"},{"key":"16_CR32","doi-asserted-by":"crossref","unstructured":"Udandarao, V., Gupta, A., Albanie, S.: SUS-X: training-free name-only transfer of vision-language models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2023)","DOI":"10.1109\/ICCV51070.2023.00257"},{"issue":"2","key":"16_CR33","doi-asserted-by":"publisher","first-page":"373","DOI":"10.1007\/s10994-019-05855-6","volume":"109","author":"JE Van Engelen","year":"2020","unstructured":"Van Engelen, J.E., Hoos, H.H.: A survey on semi-supervised learning. Mach. Learn. 109(2), 373\u2013440 (2020)","journal-title":"Mach. Learn."},{"key":"16_CR34","unstructured":"Wang, H., Ge, S., Lipton, Z., Xing, E.P.: Learning robust global representations by penalizing local predictive power. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"issue":"3","key":"16_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3386252","volume":"53","author":"Y Wang","year":"2020","unstructured":"Wang, Y., Yao, Q., Kwok, J.T., Ni, L.M.: Generalizing from a few examples: a survey on few-shot learning. ACM Comput. Surv. 53(3), 1\u201334 (2020)","journal-title":"ACM Comput. Surv."},{"key":"16_CR36","unstructured":"Wang, Z., Yu, J., Yu, A.W., Dai, Z., Tsvetkov, Y., Cao, Y.: SimVLM: simple visual language model pretraining with weak supervision. In: International Conference on Learning Representations (2022)"},{"key":"16_CR37","doi-asserted-by":"crossref","unstructured":"Xiao, J., Hays, J., Ehinger, K.A., Oliva, A., Torralba, A.: Sun database: large-scale scene recognition from abbey to zoo. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3485\u20133492 (2010)","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"16_CR38","unstructured":"Yao, Y., Zhang, A., Zhang, Z., Liu, Z., Chua, T.S., Sun, M.: CPT: COLORFUL prompt tuning for pre-trained vision-language models. arXiv preprint arXiv:2109.11797 (2021)"},{"key":"16_CR39","unstructured":"Yu, J., Wang, Z., Vasudevan, V., Yeung, L., Seyedhosseini, M., Wu, Y.: COCA: Contrastive captioners are image-text foundation models. Transactions on Machine Learning Research (2022)"},{"key":"16_CR40","doi-asserted-by":"publisher","unstructured":"Zhang, R., et al.: Tip-adapter: training-free adaption of CLIP for few-shot classification. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision. ECCV 2022. LNCS, vol. 13695. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19833-5_29","DOI":"10.1007\/978-3-031-19833-5_29"},{"key":"16_CR41","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Zhang, C., Tang, Y., He, Z.: Cross-modal concept learning and inference for vision-language models. arXiv preprint arXiv:2307.15460 (2023)","DOI":"10.1016\/j.neucom.2024.127530"},{"key":"16_CR42","doi-asserted-by":"crossref","unstructured":"Zhou, K., Liu, Z., Qiao, Y., Xiang, T., Loy, C.C.: Domain generalization: a survey. In: IEEE Transactions on Pattern Analysis and Machine Intelligence (2022)","DOI":"10.1109\/TPAMI.2022.3195549"},{"key":"16_CR43","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Conditional prompt learning for vision-language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16816\u201316825 (2022)","DOI":"10.1109\/CVPR52688.2022.01631"},{"issue":"9","key":"16_CR44","doi-asserted-by":"publisher","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","volume":"130","author":"K Zhou","year":"2022","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Learning to prompt for vision-language models. Int. J. Comput. Vision 130(9), 2337\u20132348 (2022)","journal-title":"Int. J. Comput. Vision"},{"key":"16_CR45","doi-asserted-by":"crossref","unstructured":"Zhou, M., Yu, L., Singh, A., Wang, M., Yu, Z., Zhang, N.: Unsupervised vision-and-language pre-training via retrieval-based multi-granular alignment. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16485\u201316494 (2022)","DOI":"10.1109\/CVPR52688.2022.01599"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8429-9_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,6]],"date-time":"2024-11-06T19:35:40Z","timestamp":1730921740000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8429-9_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,24]]},"ISBN":["9789819984282","9789819984299"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8429-9_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023,12,24]]},"assertion":[{"value":"24 December 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Xiamen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/prcv2023.xmu.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1420","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"532","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"37% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,78","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,69","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}