{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T16:07:03Z","timestamp":1771344423436,"version":"3.50.1"},"reference-count":83,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2026,1,17]],"date-time":"2026-01-17T00:00:00Z","timestamp":1768608000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,17]],"date-time":"2026-01-17T00:00:00Z","timestamp":1768608000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1007\/s11263-025-02591-4","type":"journal-article","created":{"date-parts":[[2026,1,17]],"date-time":"2026-01-17T05:58:16Z","timestamp":1768629496000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["AmPLe: Supporting Vision-Language Models via Adaptive-Debiased Ensemble Multi-Prompt Learning"],"prefix":"10.1007","volume":"134","author":[{"given":"Fei","family":"Song","sequence":"first","affiliation":[]},{"given":"Yi","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3376-1522","authenticated-orcid":false,"given":"Jiangmeng","family":"Li","sequence":"additional","affiliation":[]},{"given":"Rui","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Changwen","family":"Zheng","sequence":"additional","affiliation":[]},{"given":"Fanjiang","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Hui","family":"Xiong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,17]]},"reference":[{"key":"2591_CR1","unstructured":"Ash, R. B. (2012). Information theory. Courier Corporation"},{"key":"2591_CR2","doi-asserted-by":"publisher","unstructured":"Bai, S., Zhang, M., Zhou, W., Huang,S.,Luan,Z.,Wang,D.,& Chen,B. (2024). Prompt-based distribution alignment for unsupervised domain adaptation. In: Wooldridge MJ, Dy JG, Natarajan S (eds) Thirty-Eighth AAAI Conference on Artificial Intelligence, AAAI 2024, Thirty-Sixth Conference on Innovative Applications of Artificial Intelligence, IAAI 2024, Fourteenth Symposium on Educational Advances in Artificial Intelligence, EAAI 2014, February 20-27, 2024, Vancouver, Canada. AAAI Press, pp 729\u2013737, https:\/\/doi.org\/10.1609\/AAAI.V38I2.27830","DOI":"10.1609\/AAAI.V38I2.27830"},{"key":"2591_CR3","doi-asserted-by":"publisher","unstructured":"Bang, J., Ahn, S., & Lee, J. (2024). Active prompt learning in vision language models. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2024, Seattle, WA, USA, June 16-22, 2024. IEEE, pp 26994\u201327004, https:\/\/doi.org\/10.1109\/CVPR52733.2024.02550","DOI":"10.1109\/CVPR52733.2024.02550"},{"key":"2591_CR4","doi-asserted-by":"publisher","unstructured":"Bossard, L., Guillaumin, M.,& Gool, L.V. (2014). Food-101 - mining discriminative components with random forests. In: Fleet DJ, Pajdla T, Schiele B, et\u00a0al (eds) Computer Vision - ECCV 2014 - 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part VI, Lecture Notes in Computer Science, vol 8694. Springer, pp 446\u2013461, https:\/\/doi.org\/10.1007\/978-3-319-10599-4_29","DOI":"10.1007\/978-3-319-10599-4_29"},{"issue":"2","key":"2591_CR5","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1007\/BF00058655","volume":"24","author":"L Breiman","year":"1996","unstructured":"Breiman, L. (1996). Bagging predictors. Mach Learn,24(2), 123\u2013140. https:\/\/doi.org\/10.1007\/BF00058655","journal-title":"Mach Learn"},{"key":"2591_CR6","unstructured":"Brown, T.B., Mann, B., Ryder, N., Subbiah,M.,Kaplan,J., Dhariwal,P., Neelakantan,A., Shyam,P., Sastry,G., Askell,A., Agarwal,S., Herbert-Voss,A., Krueger,G., Henighan,T., Child,R., Ramesh,A., Ziegler,D.M., Wu,J., Winter,C.,...Amodei,v. (2020). Language models are few-shot learners. In: Larochelle H, Ranzato M, Hadsell R, et\u00a0al (eds) Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual"},{"key":"2591_CR7","unstructured":"Buja, A., & Stuetzle, W. (2000). Smoothing effects of bagging. Preprint AT&T Labs-Research 3"},{"issue":"4","key":"2591_CR8","doi-asserted-by":"publisher","first-page":"1108","DOI":"10.1007\/S11263-023-01904-9","volume":"132","author":"A Bulat","year":"2024","unstructured":"Bulat, A., & Tzimiropoulos, G. (2024). Language-aware soft prompting: Text-to-text optimization for few- and zero-shot adaptation of V & l models. Int J Comput Vis,132(4), 1108\u20131125. https:\/\/doi.org\/10.1007\/S11263-023-01904-9","journal-title":"Int J Comput Vis"},{"key":"2591_CR9","unstructured":"Chen, G., Yao, W., Song, X., Li,X., Rao,Y., & Zhang,K. (2023a). PLOT: prompt learning with optimal transport for vision-language models. In: The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023. OpenReview.net"},{"key":"2591_CR10","unstructured":"Chen, L., Li, B., Shen, S., Yang, J., Li, C., Keutzer, K., Darrell, T., & Liu, Z., et al. (2023). (2023b). Large language models are visual reasoning coordinators. In A. Oh, T. Naumann, & A. Globerson (Eds.), Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023 (pp. 10\u201316). New Orleans: LA, USA, December."},{"key":"2591_CR11","doi-asserted-by":"publisher","unstructured":"Chen, Z., Huang, X., Guan, Q., Lin,L.,& Luo,W. (2023c). A retrospect to multi-prompt learning across vision and language. In: IEEE\/CVF International Conference on Computer Vision, ICCV 2023, Paris, France, October 1-6, 2023. IEEE, pp 22133\u201322144, https:\/\/doi.org\/10.1109\/ICCV51070.2023.02028","DOI":"10.1109\/ICCV51070.2023.02028"},{"key":"2591_CR12","doi-asserted-by":"publisher","unstructured":"Cimpoi, M., Maji, S., Kokkinos, I., Mohamed,S., & Vedaldi,A. (2014). Describing textures in the wild. In: 2014 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2014, Columbus, OH, USA, June 23-28, 2014. IEEE Computer Society, pp 3606\u20133613, https:\/\/doi.org\/10.1109\/CVPR.2014.461","DOI":"10.1109\/CVPR.2014.461"},{"key":"2591_CR13","unstructured":"Cortes, C., Mohri, M., & Syed, U. (2014). Deep boosting. In: Proceedings of the 31th International Conference on Machine Learning, ICML 2014, Beijing, China, 21-26 June 2014, JMLR Workshop and Conference Proceedings, vol\u00a032. JMLR.org, pp 1179\u20131187"},{"issue":"4","key":"2591_CR14","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/BF02551274","volume":"2","author":"G Cybenko","year":"1989","unstructured":"Cybenko, G. (1989). Approximation by superpositions of a sigmoidal function. Math Control Signals Syst,2(4), 303\u2013314. https:\/\/doi.org\/10.1007\/BF02551274","journal-title":"Math Control Signals Syst"},{"key":"2591_CR15","doi-asserted-by":"publisher","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li,K., & Fei-Fei,L. (2009). Imagenet: A large-scale hierarchical image database. In: 2009 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR 2009), 20-25 June 2009, Miami, Florida, USA. IEEE Computer Society, pp 248\u2013255, https:\/\/doi.org\/10.1109\/CVPR.2009.5206848","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"2591_CR16","doi-asserted-by":"publisher","unstructured":"Deng, L., Yu, D., & Platt, J.C. (2012). Scalable stacking and learning for building deep architectures. In: 2012 IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2012, Kyoto, Japan, March 25-30, 2012. IEEE, pp 2133\u20132136, https:\/\/doi.org\/10.1109\/ICASSP.2012.6288333","DOI":"10.1109\/ICASSP.2012.6288333"},{"key":"2591_CR17","unstructured":"Derakhshani, M.M., Sanchez, E., Bulat, A., da Costa,V.G.T., Snoek,C.G. M., Tzimiropoulos,G.,& Mart\u00ednez,B. (2022). Variational prompt tuning improves generalization of vision-language models. CoRR abs\/2210.02390. 10.48550\/ARXIV.2210.02390, 2210.02390"},{"issue":"4","key":"2591_CR18","doi-asserted-by":"publisher","first-page":"409","DOI":"10.1023\/A:1009868929893","volume":"3","author":"PM Domingos","year":"1999","unstructured":"Domingos, P. M. (1999). The role of occam\u2019s razor in knowledge discovery. Data Min Knowl Discov,3(4), 409\u2013425. https:\/\/doi.org\/10.1023\/A:1009868929893","journal-title":"Data Min Knowl Discov"},{"key":"2591_CR19","unstructured":"Du, Y., Sun, W., & Snoek, C., et al. (2024). (2024). IPO: interpretable prompt optimization for vision-language models. In A. Globersons, L. Mackey, & D. Belgrave (Eds.), Advances in Neural Information Processing Systems 38: Annual Conference on Neural Information Processing Systems 2024, NeurIPS 2024 (pp. 10\u201315). Vancouver: BC, Canada, December."},{"key":"2591_CR20","doi-asserted-by":"publisher","unstructured":"Duan, Z., Cheng, H., Xu, D., Wu,X., Zhang,X., Ye,X., & Xie,Z. (2024). Cityllava: Efficient fine-tuning for vlms in city scenario. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2024 - Workshops, Seattle, WA, USA, June 17-18, 2024. IEEE, pp 7180\u20137189, https:\/\/doi.org\/10.1109\/CVPRW63382.2024.00713","DOI":"10.1109\/CVPRW63382.2024.00713"},{"key":"2591_CR21","doi-asserted-by":"publisher","unstructured":"Fang, K., Tao, Q., Huang, X., & Yang,J.(2024). Revisiting deep ensemble for out-of-distribution detection:A loss landscape perspective. Int J Comput Vis132(12):6107\u20136126. https:\/\/doi.org\/10.1007\/S11263-024-02156-X","DOI":"10.1007\/S11263-024-02156-X"},{"key":"2591_CR22","doi-asserted-by":"publisher","unstructured":"Fei-Fei, L., Fergus, R.,& Perona, P. (2004). Learning generative visual models from few training examples: An incremental bayesian approach tested on 101 object categories. In: IEEE Conference on Computer Vision and Pattern Recognition Workshops, CVPR Workshops 2004, Washington, DC, USA, June 27 - July 2, 2004. IEEE Computer Society, p 178, https:\/\/doi.org\/10.1109\/CVPR.2004.383","DOI":"10.1109\/CVPR.2004.383"},{"key":"#cr-split#-2591_CR23.1","unstructured":"Freund, Y., & Schapire, R.E. (1996). Experiments with a new boosting algorithm. In: Saitta L"},{"key":"#cr-split#-2591_CR23.2","unstructured":"(ed) Machine Learning, Proceedings of the Thirteenth International Conference (ICML '96), Bari, Italy, July 3-6, 1996. Morgan Kaufmann, pp 148-156"},{"key":"2591_CR24","doi-asserted-by":"crossref","unstructured":"Friedman, J.H. (2001). Greedy function approximation: a gradient boosting machine. Annals of statistics, pp 1189\u20131232","DOI":"10.1214\/aos\/1013203451"},{"key":"2591_CR25","doi-asserted-by":"publisher","unstructured":"Goldberger, J., Gordon, S., & Greenspan, H. (2003). An efficient image similarity measure based on approximations of kl-divergence between two gaussian mixtures. In: 9th IEEE International Conference on Computer Vision (ICCV 2003), 14-17 October 2003, Nice, France. IEEE Computer Society, pp 487\u2013493, https:\/\/doi.org\/10.1109\/ICCV.2003.1238387","DOI":"10.1109\/ICCV.2003.1238387"},{"key":"2591_CR26","doi-asserted-by":"publisher","unstructured":"Gomez-Bigorda, L., Patel, Y., Rusi\u00f1ol, M., Karatzas,D., & Jawahar,C. V. (2017). Self-supervised learning of visual features through embedding images into text topic spaces. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017, Honolulu, HI, USA, July 21-26, 2017. IEEE Computer Society, pp 2017\u20132026, https:\/\/doi.org\/10.1109\/CVPR.2017.218","DOI":"10.1109\/CVPR.2017.218"},{"key":"2591_CR27","doi-asserted-by":"publisher","unstructured":"Guo, M., Yi, H., Qin, Z., Wang,H., Men,A., & Lao,Q. (2023). Multiple prompt fusion for zero-shot lesion detection using vision-language models. In: Greenspan H, Madabhushi A, Mousavi P, et\u00a0al (eds) Medical Image Computing and Computer Assisted Intervention - MICCAI 2023 - 26th International Conference, Vancouver, BC, Canada, October 8-12, 2023, Proceedings, Part V, Lecture Notes in Computer Science, vol 14224. Springer, pp 283\u2013292, https:\/\/doi.org\/10.1007\/978-3-031-43904-9_28","DOI":"10.1007\/978-3-031-43904-9_28"},{"key":"2591_CR28","doi-asserted-by":"publisher","unstructured":"Guo, Y.,& Gu, X. (2025). MMRL: multi-modal representation learning for vision-language models. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2025, Nashville, TN, USA, June 11-15, 2025. Computer Vision Foundation \/ IEEE, pp 25015\u201325025, https:\/\/doi.org\/10.1109\/CVPR52734.2025.02329, https:\/\/openaccess.thecvf.com\/content\/CVPR2025\/html\/Guo_MMRL_Multi-Modal_Representation_Learning_for_Vision-Language_Models_CVPR_2025_paper.html","DOI":"10.1109\/CVPR52734.2025.02329"},{"issue":"7","key":"2591_CR29","doi-asserted-by":"publisher","first-page":"2217","DOI":"10.1109\/JSTARS.2019.2918242","volume":"12","author":"P Helber","year":"2019","unstructured":"Helber, P., Bischke, B., Dengel, A., & Borth, D. (2019). Eurosat: A novel dataset and deep learning benchmark for land use and land cover classification. IEEE J Sel Top Appl Earth Obs Remote Sens,12(7), 2217\u20132226. https:\/\/doi.org\/10.1109\/JSTARS.2019.2918242","journal-title":"IEEE J Sel Top Appl Earth Obs Remote Sens"},{"key":"2591_CR30","doi-asserted-by":"publisher","unstructured":"Hendrycks, D., Basart, S., Mu, N., Kadavath,S.,Wang,F., Dorundo,E., Desai,R., Zhu,T., Parajuli,S., Guo,M., Song,D., Steinhardt,J.,& Gilmer,J. (2021a). The many faces of robustness: A critical analysis of out-of-distribution generalization. In: 2021 IEEE\/CVF International Conference on Computer Vision, ICCV 2021, Montreal, QC, Canada, October 10-17, 2021. IEEE, pp 8320\u20138329, https:\/\/doi.org\/10.1109\/ICCV48922.2021.00823","DOI":"10.1109\/ICCV48922.2021.00823"},{"key":"2591_CR31","doi-asserted-by":"publisher","unstructured":"Hendrycks, D., Zhao, K., Basart, S., Steinhardt,J.,&Song,D. (2021b). Natural adversarial examples. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2021, virtual, June 19-25, 2021. Computer Vision Foundation \/ IEEE, pp 15262\u201315271, https:\/\/doi.org\/10.1109\/CVPR46437.2021.01501","DOI":"10.1109\/CVPR46437.2021.01501"},{"issue":"2","key":"2591_CR32","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1016\/0893-6080(91)90009-T","volume":"4","author":"K Hornik","year":"1991","unstructured":"Hornik, K. (1991). Approximation capabilities of multilayer feedforward networks. Neural Networks,4(2), 251\u2013257. https:\/\/doi.org\/10.1016\/0893-6080(91)90009-T","journal-title":"Neural Networks"},{"key":"2591_CR33","unstructured":"Hu, H., Lin, T., Wang, J., Sun,Z., & Xu,Y. (2023). Context-aware prompt tuning for vision-language model with dual-alignment. CoRR abs\/2309.04158. 10.48550\/ARXIV.2309.04158, 2309.04158"},{"key":"2591_CR34","unstructured":"Jia, C., Yang, Y., Xia, Y., Chen,Y.T., Parekh,Z., Pham,H., Le,Q.V., Sung,Y.H., Li,Z.,& Duerig,T. (2021) Scaling up visual and vision-language representation learning with noisy text supervision. In: Meila M, Zhang T (eds) Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18-24 July 2021, Virtual Event, Proceedings of Machine Learning Research, vol 139. PMLR, pp 4904\u20134916"},{"key":"2591_CR35","unstructured":"Jiang, Y., & Veitch, V. (2022). Invariant and transportable representations for anti-causal domain shifts. In: Koyejo S, Mohamed S, Agarwal A, et\u00a0al (eds) Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022"},{"key":"2591_CR36","doi-asserted-by":"publisher","unstructured":"Khattak, M.U., Rasheed, H.A., Maaz, M., Khan,S.H., &Khan,F.S. (2023a). Maple: Multi-modal prompt learning. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023, Vancouver, BC, Canada, June 17-24, 2023. IEEE, pp 19113\u201319122, https:\/\/doi.org\/10.1109\/CVPR52729.2023.01832","DOI":"10.1109\/CVPR52729.2023.01832"},{"key":"2591_CR37","doi-asserted-by":"publisher","unstructured":"Khattak, M.U., Wasim, S.T., Naseer, M., Khan,S., Yang,M.H. & Khan,F.S. (2023b). Self-regulating prompts: Foundational model adaptation without forgetting. In: IEEE\/CVF International Conference on Computer Vision, ICCV 2023, Paris, France, October 1-6, 2023. IEEE, pp 15144\u201315154, https:\/\/doi.org\/10.1109\/ICCV51070.2023.01394","DOI":"10.1109\/ICCV51070.2023.01394"},{"key":"2591_CR38","doi-asserted-by":"publisher","unstructured":"Krause, J., Stark, M., Deng, J.,& Fei-Fei,L. (2013). 3d object representations for fine-grained categorization. In: 2013 IEEE International Conference on Computer Vision Workshops, ICCV Workshops 2013, Sydney, Australia, December 1-8, 2013. IEEE Computer Society, pp 554\u2013561, https:\/\/doi.org\/10.1109\/ICCVW.2013.77","DOI":"10.1109\/ICCVW.2013.77"},{"key":"2591_CR39","unstructured":"Li, H., Zhang, R., Yao, H., Song, X., Hao, Y., Zhao, Y., Li, L., & Chen, Y., et al. (2023). (2023). Learning domain-aware detection head with prompt tuning. In A. Oh, T. Naumann, & A. Globerson (Eds.), Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023 (pp. 10\u201316). New Orleans: LA, USA, December."},{"key":"2591_CR40","unstructured":"Li, J., Li, D., Xiong, C., &Hoi,S. C. H. (2022a). BLIP: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: Chaudhuri K, Jegelka S, Song L, et\u00a0al (eds) International Conference on Machine Learning, ICML 2022, 17-23 July 2022, Baltimore, Maryland, USA, Proceedings of Machine Learning Research, vol 162. PMLR, pp 12888\u201312900, https:\/\/proceedings.mlr.press\/v162\/li22n.html"},{"key":"2591_CR41","unstructured":"Li, Y., Liang, F., Zhao, L., Cui,Y.,Ouyang,W., Shao,J., Yu,F., & Yan,J. (2022b) Supervision exists everywhere: A data efficient contrastive language-image pre-training paradigm. In: The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, April 25-29, 2022. OpenReview.net"},{"key":"2591_CR42","doi-asserted-by":"publisher","unstructured":"Liu, X., Shang, Y., & Chen, Y. (2024). Trimpl: Masked multi-prompt learning with knowledge mixing for vision-language few-shot learning. In: Gurrin C, Kongkachandra R, Schoeffmann K, et\u00a0al (eds) Proceedings of the 2024 International Conference on Multimedia Retrieval, ICMR 2024, Phuket, Thailand, June 10-14, 2024. ACM, pp 552\u2013560, https:\/\/doi.org\/10.1145\/3652583.3658106","DOI":"10.1145\/3652583.3658106"},{"key":"2591_CR43","doi-asserted-by":"crossref","unstructured":"Long, S., Zhao, Z., Yuan, J., Tan, Z.,Liu, J.,Feng, J.,Wang, S.,& Wang,J. (2024). Mutual prompt leaning for vision language models. International Journal of Computer Vision, 1-19","DOI":"10.1007\/s11263-024-02243-z"},{"key":"2591_CR44","doi-asserted-by":"publisher","unstructured":"Lu, Y., Liu, J., Zhang, Y., Liu,Y.,& Tian,X. (2022). Prompt distribution learning. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, June 18-24, 2022. IEEE, pp 5196\u20135205, https:\/\/doi.org\/10.1109\/CVPR52688.2022.00514","DOI":"10.1109\/CVPR52688.2022.00514"},{"key":"2591_CR45","unstructured":"Lu, Z., Bai, J., Li, X., Xiao,Z., & Wang,X. (2024). Beyond sole strength: Customized ensembles for generalized vision-language models. In: Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024. OpenReview.net"},{"key":"2591_CR46","unstructured":"Van\u00a0der Maaten, L.,& Hinton, G. (2008). Visualizing data using t-sne. Journal of machine learning research, 9(11)"},{"key":"2591_CR47","unstructured":"Maji, S., Rahtu, E., Kannala, J., Blaschko,M.B., & Vedaldi,A. (2013). Fine-grained visual classification of aircraft. CoRR abs\/1306.5151. 1306.5151"},{"key":"2591_CR48","unstructured":"Mendenhall, W., Beaver, R.J.,& Beaver, B.M. (2020). Introduction to probability and statistics. Cengage"},{"key":"2591_CR49","unstructured":"Menon, S., & Vondrick, C. (2023). Visual classification via description from large language models. In: The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023. OpenReview.net"},{"key":"2591_CR50","doi-asserted-by":"publisher","unstructured":"Mirza, M.J., Karlinsky, L., Lin, W., Doveh,S., Micorek,J., Kozinski,M., Kuehne,H., & Possegger,H. (2024). Meta-prompting for automating zero-shot visual recognition with llms. In: Leonardis A, Ricci E, Roth S, et\u00a0al (eds) Computer Vision - ECCV 2024 - 18th European Conference, Milan, Italy, September 29-October 4, 2024, Proceedings, Part II, Lecture Notes in Computer Science, vol 15060. Springer, pp 370\u2013387, https:\/\/doi.org\/10.1007\/978-3-031-72627-9_21","DOI":"10.1007\/978-3-031-72627-9_21"},{"issue":"12","key":"2591_CR51","doi-asserted-by":"publisher","first-page":"5759","DOI":"10.1007\/S11263-024-02152-1","volume":"132","author":"U Muhammad","year":"2024","unstructured":"Muhammad, U., Laaksonen, J., Beddiar, D. R., & Oussalah, M. (2024). Domain generalization via ensemble stacking for face presentation attack detection. Int J Comput Vis,132(12), 5759\u20135782. https:\/\/doi.org\/10.1007\/S11263-024-02152-1","journal-title":"Int J Comput Vis"},{"key":"2591_CR52","unstructured":"Nasiriany, S., Xia, F., Yu, W., Xiao,T.,Liang,J., Dasgupta,I., Xie,A., Driess,D., Wahid,A., Xu,Z., Vuong,Q., Zhang,T., Lee,T.W.E., Lee,K.H., Xu,P., Kirmani,S.,Zhu,Y., Zeng,A., Hausman,K., Heess,N., Finn,C., Levine,S., & Ichter,B. (2024). PIVOT: iterative visual prompting elicits actionable knowledge for vlms. In: Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024. OpenReview.net"},{"key":"2591_CR53","doi-asserted-by":"publisher","unstructured":"Nilsback, M., & Zisserman, A. (2008). Automated flower classification over a large number of classes. In: Sixth Indian Conference on Computer Vision, Graphics & Image Processing, ICVGIP 2008, Bhubaneswar, India, 16-19 December 2008. IEEE Computer Society, pp 722\u2013729, https:\/\/doi.org\/10.1109\/ICVGIP.2008.47","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"2591_CR54","unstructured":"van\u00a0den Oord, A., Li, Y., & Vinyals, O. (2018). Representation learning with contrastive predictive coding. CoRR abs\/1807.03748. 1807.03748"},{"key":"2591_CR55","unstructured":"OpenAI (2023) GPT-4 technical report. CoRR abs\/2303.08774. 10.48550\/ARXIV.2303.08774,2303.08774"},{"key":"2591_CR56","doi-asserted-by":"publisher","unstructured":"Parkhi, O.M., Vedaldi, A., Zisserman, A.,& Jawahar,C. V. (2012). Cats and dogs. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, Providence, RI, USA, June 16-21, 2012. IEEE Computer Society, pp 3498\u20133505, https:\/\/doi.org\/10.1109\/CVPR.2012.6248092","DOI":"10.1109\/CVPR.2012.6248092"},{"key":"2591_CR57","doi-asserted-by":"crossref","unstructured":"Pearl, J. (2009). Causality. Cambridge University Press.","DOI":"10.1017\/CBO9780511803161"},{"key":"2591_CR58","unstructured":"Pearl, J., Glymour, M., & Jewell, N.P. (2016). Causal inference in statistics: A primer. John Wiley & Sons"},{"key":"2591_CR59","unstructured":"Peyr\u00e9, G., & Cuturi, M., et\u00a0al. (2019). Computational optimal transport: With applications to data science. Foundations and Trends\u00ae in Machine Learning11(5-6):355\u2013607"},{"key":"2591_CR60","unstructured":"Qiao, F., & Peng, X. (2024). Ensemble pruning for out-of-distribution generalization. In: Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024. OpenReview.net"},{"key":"2591_CR61","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh,A., Goh,G., Agarwal,S., Sastry,G., Askell,A., Mishkin,P., Clark,J., Krueger,G., & Sutskever,I. (2021). Learning transferable visual models from natural language supervision. In: Meila M, Zhang T (eds) Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18-24 July 2021, Virtual Event, Proceedings of Machine Learning Research, vol 139. PMLR, pp 8748\u20138763"},{"key":"2591_CR62","unstructured":"Rahman, U., Yaqub, M., & Mahapatra, D. (2025). Dimple - disentangled multi-modal prompt learning: Enhancing out-of-distribution alignment with invariant and spurious feature separation. CoRR abs\/2506.21237. 10.48550\/ARXIV.2506.21237,2506.21237"},{"key":"2591_CR63","unstructured":"Recht, B., Roelofs, R., Schmidt, L.,& Shankar,V. (2019). Do imagenet classifiers generalize to imagenet? In: Chaudhuri K, Salakhutdinov R (eds) Proceedings of the 36th International Conference on Machine Learning, ICML 2019, 9-15 June 2019, Long Beach, California, USA, Proceedings of Machine Learning Research, vol\u00a097. PMLR, pp 5389\u20135400"},{"key":"2591_CR64","doi-asserted-by":"publisher","unstructured":"Roth, K., Kim, J., Koepke, A.S., Vinyals,O., Schmid,C., & Akata,Z. (2023). Waffling around for performance: Visual classification with random words and broad concepts. In: IEEE\/CVF International Conference on Computer Vision, ICCV 2023, Paris, France, October 1-6, 2023. IEEE, pp 15700\u201315711, https:\/\/doi.org\/10.1109\/ICCV51070.2023.01443","DOI":"10.1109\/ICCV51070.2023.01443"},{"key":"2591_CR65","doi-asserted-by":"publisher","unstructured":"Sagi, O., & Rokach, L. (2018). Ensemble learning: A survey. WIREs Data Mining Knowl Discov,8(4). https:\/\/doi.org\/10.1002\/WIDM.1249","DOI":"10.1002\/WIDM.1249"},{"key":"2591_CR66","doi-asserted-by":"publisher","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam,R., Parikh,D., & Batra,D. (2017). Grad-cam: Visual explanations from deep networks via gradient-based localization. In: IEEE International Conference on Computer Vision, ICCV 2017, Venice, Italy, October 22-29, 2017. IEEE Computer Society, pp 618\u2013626, https:\/\/doi.org\/10.1109\/ICCV.2017.74","DOI":"10.1109\/ICCV.2017.74"},{"issue":"3","key":"2591_CR67","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1002\/J.1538-7305.1948.TB01338.X","volume":"27","author":"CE Shannon","year":"1948","unstructured":"Shannon, C. E. (1948). A mathematical theory of communication. Bell Syst Tech J,27(3), 379\u2013423. https:\/\/doi.org\/10.1002\/J.1538-7305.1948.TB01338.X","journal-title":"Bell Syst Tech J"},{"key":"2591_CR68","unstructured":"Socher, R., Ganjoo, M., Manning, C.D.,& Ng,A.Y. (2013). Zero-shot learning through cross-modal transfer. In: Burges CJC, Bottou L, Ghahramani Z, et\u00a0al (eds) Advances in Neural Information Processing Systems 26: 27th Annual Conference on Neural Information Processing Systems 2013. Proceedings of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States, pp 935\u2013943"},{"key":"2591_CR69","unstructured":"Soomro, K., Zamir, A.R., & Shah, M. (2012). UCF101: A dataset of 101 human actions classes from videos in the wild. CoRR abs\/1212.0402. 1212.0402"},{"key":"2591_CR70","doi-asserted-by":"publisher","unstructured":"Tian, X., Zou, S., Yang, Z.,& Zhang,J. (2024). Argue: Attribute-guided prompt tuning for vision-language models. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2024, Seattle, WA, USA, June 16-22, 2024. IEEE, pp 28578\u201328587, https:\/\/doi.org\/10.1109\/CVPR52733.2024.02700","DOI":"10.1109\/CVPR52733.2024.02700"},{"key":"2591_CR71","unstructured":"Wang, H., Ge, S., Lipton, Z.C.,& Xing,E.P. (2019). Learning robust global representations by penalizing local predictive power. In: Wallach HM, Larochelle H, Beygelzimer A, et\u00a0al (eds) Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada, pp 10506\u201310518"},{"issue":"2","key":"2591_CR72","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1016\/S0893-6080(05)80023-1","volume":"5","author":"DH Wolpert","year":"1992","unstructured":"Wolpert, D. H. (1992). Stacked generalization. Neural Networks,5(2), 241\u2013259. https:\/\/doi.org\/10.1016\/S0893-6080(05)80023-1","journal-title":"Neural Networks"},{"key":"2591_CR73","doi-asserted-by":"publisher","unstructured":"Wu, G., Zhang, X., Li, Z., Chen,Z., Liang,J.,Yang,J., & Li,X. (2024). Cascade prompt learning for vision-language model adaptation. In: Leonardis A, Ricci E, Roth S, et\u00a0al (eds) Computer Vision - ECCV 2024 - 18th European Conference, Milan, Italy, September 29-October 4, 2024, Proceedings, Part L, Lecture Notes in Computer Science, vol 15108. Springer, pp 304\u2013321, https:\/\/doi.org\/10.1007\/978-3-031-72973-7_18","DOI":"10.1007\/978-3-031-72973-7_18"},{"key":"2591_CR74","doi-asserted-by":"publisher","unstructured":"Xiao, J., Hays, J., Ehinger, K.A., Oliva,A.,& Torralba,A. (2010). SUN database: Large-scale scene recognition from abbey to zoo. In: The Twenty-Third IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2010, San Francisco, CA, USA, 13-18 June 2010. IEEE Computer Society, pp 3485\u20133492, https:\/\/doi.org\/10.1109\/CVPR.2010.5539970","DOI":"10.1109\/CVPR.2010.5539970"},{"issue":"2","key":"2591_CR75","doi-asserted-by":"publisher","first-page":"511","DOI":"10.1007\/S11263-024-02172-X","volume":"133","author":"C Xu","year":"2025","unstructured":"Xu, C., Zhu, Y., Shen, H., Chen, B., Liao, Y., Chen, X., & Wang, L. (2025). Progressive visual prompt learning with contrastive feature re-formation. Int J Comput Vis,133(2), 511\u2013526. https:\/\/doi.org\/10.1007\/S11263-024-02172-X","journal-title":"Int J Comput Vis"},{"key":"2591_CR76","doi-asserted-by":"publisher","unstructured":"Yang, L., Zhang, R., Wang, Y., & Xie,X. (2024). MMA: multi-modal adapter for vision-language models. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2024, Seattle, WA, USA, June 16-22, 2024. IEEE, pp 23826\u201323837, https:\/\/doi.org\/10.1109\/CVPR52733.2024.02249","DOI":"10.1109\/CVPR52733.2024.02249"},{"key":"2591_CR77","doi-asserted-by":"crossref","unstructured":"Yang, L., Zhang, R.Y., Chen, Q., & Xie, X. (2025). Learning with enriched inductive biases for vision-language models. International Journal of Computer Vision .1\u201316","DOI":"10.1007\/s11263-025-02354-1"},{"key":"2591_CR78","doi-asserted-by":"crossref","unstructured":"Zhan, Q., Li, S., Liu, Q.,& Wang, Y. (2025). Attriprompt: Dynamic prompt composition learning for clip. arXiv preprint arXiv:2509.05949","DOI":"10.1145\/3746027.3755636"},{"key":"2591_CR79","unstructured":"Zhang, J., Ma, X., Guo, S., Li,P., Xu,W.,Tang,X., & Hong,Z. (2024). Amend to alignment: Decoupled prompt tuning for mitigating spurious correlation in vision-language models. In: Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024. OpenReview.net"},{"key":"2591_CR80","doi-asserted-by":"publisher","first-page":"1348","DOI":"10.1109\/TIP.2024.3362062","volume":"33","author":"C Zhao","year":"2024","unstructured":"Zhao, C., Wang, Y., Jiang, X., Shen, Y., Song, K., Li, D., & Miao, D. (2024). Learning domain invariant prompt for vision-language models. IEEE Trans Image Process,33, 1348\u20131360. https:\/\/doi.org\/10.1109\/TIP.2024.3362062","journal-title":"IEEE Trans Image Process"},{"key":"2591_CR81","doi-asserted-by":"publisher","unstructured":"Zhou, K., Yang, J., Loy, C.C., & Liu,Z. (2022a). Conditional prompt learning for vision-language models. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, June 18-24, 2022. IEEE, pp 16795\u201316804, https:\/\/doi.org\/10.1109\/CVPR52688.2022.01631","DOI":"10.1109\/CVPR52688.2022.01631"},{"issue":"9","key":"2591_CR82","doi-asserted-by":"publisher","first-page":"2337","DOI":"10.1007\/S11263-022-01653-1","volume":"130","author":"K Zhou","year":"2022","unstructured":"Zhou, K., Yang, J., Loy, C. C., & Liu, Z. (2022). Learning to prompt for vision-language models. Int J Comput Vis,130(9), 2337\u20132348. https:\/\/doi.org\/10.1007\/S11263-022-01653-1","journal-title":"Int J Comput Vis"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02591-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02591-4","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02591-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T15:20:38Z","timestamp":1771341638000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02591-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,17]]},"references-count":83,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2026,2]]}},"alternative-id":["2591"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02591-4","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,17]]},"assertion":[{"value":"11 March 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 December 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"67"}}