{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T16:16:01Z","timestamp":1771604161745,"version":"3.50.1"},"reference-count":103,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,23]],"date-time":"2025-12-23T00:00:00Z","timestamp":1766448000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,23]],"date-time":"2025-12-23T00:00:00Z","timestamp":1766448000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1007\/s11263-025-02618-w","type":"journal-article","created":{"date-parts":[[2025,12,23]],"date-time":"2025-12-23T18:42:40Z","timestamp":1766515360000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["From Small to Large: In-Context Learning as a New Paradigm for Domain Generalization"],"prefix":"10.1007","volume":"134","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1305-2622","authenticated-orcid":false,"given":"Guanglin","family":"Zhou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhongyi","family":"Han","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shaoan","family":"Xie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shiming","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Biwei","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liming","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xin","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lina","family":"Yao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Salman","family":"Khan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,12,23]]},"reference":[{"key":"2618_CR1","first-page":"3438","volume":"34","author":"K Ahuja","year":"2021","unstructured":"Ahuja, K., Caballero, E., Zhang, D., Gagnon-Audet, J.-C., Bengio, Y., Mitliagkas, I., & Rish, I. (2021). Invariance principle meets information bottleneck for out-of-distribution generalization. Advances in Neural Information Processing Systems, 34, 3438\u20133450.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2618_CR2","first-page":"23716","volume":"35","author":"J-B Alayrac","year":"2022","unstructured":"Alayrac, J.-B., Donahue, J., Luc, P., Miech, A., Barr, I., Hasson, Y., Lenc, K., Mensch, A., Millican, K., Reynolds, M., et al. (2022). Flamingo: a visual language model for few-shot learning. Advances in Neural Information Processing Systems, 35, 23716\u201323736.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2618_CR3","unstructured":"Anthropic (2023) Model Card and Evaluations for Claude Models. https:\/\/www-cdn.anthropic.com\/files\/4zrzovbb\/website\/bd2a28d2535bfb0494cc8e2a3bf135d2e7523226.pdf. Accessed: 2024-03-07"},{"key":"2618_CR4","unstructured":"Anthropic (2024) Claude 3 haiku: our fastest model yet . Available at: https:\/\/www.anthropic.com\/news\/claude-3-haiku"},{"key":"2618_CR5","unstructured":"Arjovsky, M., Bottou, L., Gulrajani, I., & Lopez-Paz, D. (2019) Invariant risk minimization. arXiv preprint arXiv:1907.02893"},{"key":"2618_CR6","unstructured":"Bai, J., Bai, S., Yang, S., Wang, S., Tan, S., Wang, P., Lin, J., Zhou, C., & Zhou, J. (2023) Qwen-VL: A Versatile Vision-Language Model for Understanding, Localization, Text Reading, and Beyond . arxiv: 2308.12966"},{"key":"2618_CR7","unstructured":"Bai, S., Chen, K., Liu, X., Wang, J., Ge, W., Song, S., Dang, K., Wang, P., Wang, S., Tang, J., et al. (2025) Qwen2. 5-vl technical report. arXiv preprint arXiv:2502.13923"},{"key":"2618_CR8","unstructured":"Balazevic, I., Steiner, D., Parthasarathy, N., Arandjelovi\u0107, R., & Henaff, O. (2024) Towards in-context scene understanding. Advances in Neural Information Processing Systems 36"},{"key":"2618_CR9","doi-asserted-by":"crossref","unstructured":"Baldassini, F.B., Shukor, M., Cord, M., Soulier, L., & Piwowarski, B. (2024) What makes multimodal in-context learning work? In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 1539\u20131550","DOI":"10.1109\/CVPRW63382.2024.00161"},{"issue":"2","key":"2618_CR10","doi-asserted-by":"publisher","first-page":"550","DOI":"10.1109\/TMI.2018.2867350","volume":"38","author":"P Bandi","year":"2018","unstructured":"Bandi, P., Geessink, O., Manson, Q., Van Dijk, M., Balkenhol, M., Hermsen, M., Bejnordi, B. E., Lee, B., Paeng, K., Zhong, A., et al. (2018). From detection of individual metastases to classification of lymph node status at the patient level: the camelyon17 challenge. IEEE transactions on medical imaging, 38(2), 550\u2013560.","journal-title":"IEEE transactions on medical imaging"},{"key":"2618_CR11","first-page":"25005","volume":"35","author":"A Bar","year":"2022","unstructured":"Bar, A., Gandelsman, Y., Darrell, T., Globerson, A., & Efros, A. (2022). Visual prompting via image inpainting. Advances in Neural Information Processing Systems, 35, 25005\u201325017.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2618_CR12","unstructured":"Beery, S., Agarwal, A., Cole, E., & Birodkar, V. (2021) The iwildcam 2021 competition dataset. arXiv preprint arXiv:2105.03494"},{"key":"2618_CR13","doi-asserted-by":"crossref","unstructured":"Bertini Baldassini, F., Shukor, M., Cord, M., Soulier, L., & Piwowarski, B. (2024) What makes multimodal in-context learning work? arXiv e-prints, 2404","DOI":"10.1109\/CVPRW63382.2024.00161"},{"key":"2618_CR14","unstructured":"Bommasani, R., Hudson, D.A., Adeli, E., Altman, R., Arora, S., Arx, S., Bernstein, M.S., Bohg, J., Bosselut, A., Brunskill, E., et al. (2021) On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258"},{"key":"2618_CR15","unstructured":"Bordt, S., Nori, H., Rodrigues, V., Nushi, B., & Caruana, R. (2024) Elephants never forget: Memorization and learning of tabular data in large language models. arXiv preprint arXiv:2404.06209"},{"key":"2618_CR16","unstructured":"Bordt, S., Srinivas, S., Boreiko, V., & Luxburg, U. (2024) How much can we forget about data contamination? arXiv preprint arXiv:2410.03249"},{"key":"2618_CR17","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., Mann, B., Ryder, N., Subbiah, M., Kaplan, J. D., Dhariwal, P., Neelakantan, A., Shyam, P., Sastry, G., Askell, A., et al. (2020). Language models are few-shot learners. Advances in neural information processing systems, 33, 1877\u20131901.","journal-title":"Advances in neural information processing systems"},{"key":"2618_CR18","doi-asserted-by":"crossref","unstructured":"Cha, J., Lee, K., Park, S., & Chun, S. (2022) Domain generalization by mutual-information regularization with pre-trained models. In: Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXIII, 440\u2013457 . Springer","DOI":"10.1007\/978-3-031-20050-2_26"},{"key":"2618_CR19","first-page":"22405","volume":"34","author":"J Cha","year":"2021","unstructured":"Cha, J., Chun, S., Lee, K., Cho, H.-C., Park, S., Lee, Y., & Park, S. (2021). Swad: Domain generalization by seeking flat minima. Advances in Neural Information Processing Systems, 34, 22405\u201322418.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2618_CR20","unstructured":"Chen, T., Kornblith, S., Norouzi, M., & Hinton, G. (2020) A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, pp. 1597\u20131607 . PMLR"},{"key":"2618_CR21","unstructured":"Chevalley, M., Bunne, C., Krause, A., & Bauer, S. (2022) Invariant causal mechanisms through distribution matching. arXiv preprint arXiv:2206.11646"},{"issue":"10","key":"2618_CR22","doi-asserted-by":"publisher","first-page":"6614","DOI":"10.1109\/TPAMI.2021.3094760","volume":"44","author":"R Christiansen","year":"2021","unstructured":"Christiansen, R., Pfister, N., Jakobsen, M. E., Gnecco, N., & Peters, J. (2021). A causal framework for distribution generalization. IEEE Transactions on Pattern Analysis and Machine Intelligence, 44(10), 6614\u20136630.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2618_CR23","doi-asserted-by":"crossref","unstructured":"Christie, G., Fendley, N., Wilson, J., & Mukherjee, R. (2018) Functional map of the world. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 6172\u20136180","DOI":"10.1109\/CVPR.2018.00646"},{"key":"2618_CR24","unstructured":"Comanici, G., Bieber, E., Schaekermann, M., Pasupat, I., Sachdeva, N., Dhillon, I., Blistein, M., Ram, O., Zhang, D., Rosen, E., et al. (2025) Gemini 2.5: Pushing the frontier with advanced reasoning, multimodality, long context, and next generation agentic capabilities. arXiv preprint arXiv:2507.06261"},{"key":"2618_CR25","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., & Fei-Fei, L. (2009) Imagenet: A large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, 248\u2013255 . Ieee","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"2618_CR26","unstructured":"Dong, Q., Li, L., Dai, D., Zheng, C., Wu, Z., Chang, B., Sun, X., Xu, J., & Sui, Z. (2022) A survey for in-context learning. arXiv preprint arXiv:2301.00234"},{"key":"2618_CR27","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., & Houlsby, N. (2020) An image is worth 16x16 words: Transformers for image recognition at scale. ArXiv abs\/2010.11929"},{"key":"2618_CR28","unstructured":"Du, S.S., Hu, W., Kakade, S.M., Lee, J.D., & Lei, Q. (2021) Few-shot learning via learning the representation, provably. In: International Conference on Learning Representations . https:\/\/openreview.net\/forum?id=pW2Q2xLwIMD"},{"key":"2618_CR29","doi-asserted-by":"crossref","unstructured":"Ferber, D., W\u00f6lflein, G., Wiest, I.C., Ligero, M., Sainath, S., Laleh, N.G., El Nahhas, O.S., M\u00fcller-Franzes, G., J\u00e4ger, D., Truhn, D., et al. (2024) In-context learning enables multimodal large language models to classify cancer pathology images. arXiv preprint arXiv:2403.07407","DOI":"10.1038\/s41467-024-51465-9"},{"key":"2618_CR30","unstructured":"Ganin, Y., & Lempitsky, V. (2015) Unsupervised domain adaptation by backpropagation. In: International Conference on Machine Learning, 1180\u20131189 . PMLR"},{"issue":"1","key":"2618_CR31","first-page":"2096","volume":"17","author":"Y Ganin","year":"2016","unstructured":"Ganin, Y., Ustinova, E., Ajakan, H., Germain, P., Larochelle, H., Laviolette, F., Marchand, M., & Lempitsky, V. (2016). Domain-adversarial training of neural networks. The journal of machine learning research, 17(1), 2096\u20132030.","journal-title":"The journal of machine learning research"},{"key":"2618_CR32","doi-asserted-by":"crossref","unstructured":"Gokhale, T., Anirudh, R., Thiagarajan, J.J., Kailkhura, B., Baral, C., & Yang, Y. (2023) Improving diversity with adversarially learned transformations for domain generalization. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, 434\u2013443","DOI":"10.1109\/WACV56688.2023.00051"},{"key":"2618_CR33","unstructured":"Gulrajani, I., & Lopez-Paz, D. (2020) In search of lost domain generalization. In: International Conference on Learning Representations"},{"key":"2618_CR34","doi-asserted-by":"crossref","unstructured":"Han, Z., He, R., Li, T., Wei, B., Wang, J., & Yin, Y. (2021) Semi-supervised screening of covid-19 from positive and unlabeled data with constraint non-negative risk estimator. In: Information Processing in Medical Imaging: 27th International Conference, IPMI 2021, Virtual Event, June 28\u2013June 30, 2021, Proceedings 27, 611\u2013623 . Springer","DOI":"10.1007\/978-3-030-78191-0_47"},{"key":"2618_CR35","unstructured":"Han, Z., Zhou, G., He, R., Wang, J., Wu, T., Yin, Y., Khan, S., Yao, L., Liu, T., & Zhang, K. (2024) How well does GPT-4v(ision) adapt to distribution shifts? a preliminary investigation. In: ICLR 2024 Workshop on Mathematical and Empirical Understanding of Foundation Models . https:\/\/openreview.net\/forum?id=J8V4EwZkez"},{"key":"2618_CR36","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., & Girshick, R. (2022) Masked autoencoders are scalable vision learners. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 16000\u201316009","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"2618_CR37","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"2618_CR38","unstructured":"Hurst, A., Lerer, A., Goucher, A.P., Perelman, A., Ramesh, A., Clark, A., Ostrow, A., Welihinda, A., Hayes, A., Radford, A., et al. (2024) Gpt-4o system card. arXiv preprint arXiv:2410.21276"},{"issue":"2","key":"2618_CR39","first-page":"3","volume":"1","author":"EJ Hu","year":"2022","unstructured":"Hu, E. J., Shen, Y., Wallis, P., Allen-Zhu, Z., Li, Y., Wang, S., Wang, L., Chen, W., et al. (2022). Lora: Low-rank adaptation of large language models. ICLR, 1(2), 3.","journal-title":"ICLR"},{"key":"2618_CR40","doi-asserted-by":"crossref","unstructured":"Jia, M., Tang, L., Chen, B.-C., Cardie, C., Belongie, S., Hariharan, B., & Lim, S.-N. (2022) Visual prompt tuning. In: European Conference on Computer Vision, 709\u2013727 . Springer","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"2618_CR41","unstructured":"Jiang, Y., Irvin, J., Wang, J.H., Chaudhry, M.A., Chen, J.H., & Ng, A.Y. (2024) Many-Shot In-Context Learning in Multimodal Foundation Models"},{"key":"2618_CR42","unstructured":"Jiang, Y., Irvin, J., Wang, J.H., Chaudhry, M.A., Chen, J.H., & Ng, A.Y. (2024) Many-shot in-context learning in multimodal foundation models. arXiv preprint arXiv:2405.09798"},{"key":"2618_CR43","doi-asserted-by":"publisher","first-page":"3636","DOI":"10.1109\/TMM.2021.3104379","volume":"24","author":"X Jin","year":"2021","unstructured":"Jin, X., Lan, C., Zeng, W., & Chen, Z. (2021). Style normalization and restitution for domain generalization and adaptation. IEEE Transactions on Multimedia, 24, 3636\u20133651.","journal-title":"IEEE Transactions on Multimedia"},{"key":"2618_CR44","doi-asserted-by":"crossref","unstructured":"Jo, S.-Y., & Yoon, S.W. (2023) Poem:polarization of embeddings for domain-invariant representations. Association for the Advancement of Artificial Intelligence (AAAI)","DOI":"10.1609\/aaai.v37i7.25984"},{"key":"2618_CR45","unstructured":"Kaplan, J., McCandlish, S., Henighan, T., Brown, T.B., Chess, B., Child, R., Gray, S., Radford, A., Wu, J., & Amodei, D. (2020) Scaling laws for neural language models. arXiv preprint arXiv:2001.08361"},{"key":"2618_CR46","doi-asserted-by":"crossref","unstructured":"Khattak, M.U., Rasheed, H., Maaz, M., Khan, S., & Khan, F.S. (2023) Maple: Multi-modal prompt learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 19113\u201319122","DOI":"10.1109\/CVPR52729.2023.01832"},{"issue":"13","key":"2618_CR47","doi-asserted-by":"publisher","first-page":"3521","DOI":"10.1073\/pnas.1611835114","volume":"114","author":"J Kirkpatrick","year":"2017","unstructured":"Kirkpatrick, J., Pascanu, R., Rabinowitz, N., Veness, J., Desjardins, G., Rusu, A. A., Milan, K., Quan, J., Ramalho, T., Grabska-Barwinska, A., et al. (2017). Overcoming catastrophic forgetting in neural networks. Proceedings of the national academy of sciences, 114(13), 3521\u20133526.","journal-title":"Proceedings of the national academy of sciences"},{"key":"2618_CR48","unstructured":"Kumar, A., Raghunathan, A., Jones, R.M., Ma, T., & Liang, P. (2022) Fine-tuning can distort pretrained features and underperform out-of-distribution. In: International Conference on Learning Representations . https:\/\/openreview.net\/forum?id=UYneFzXSJWh"},{"key":"2618_CR49","unstructured":"Kumar, A., Raghunathan, A., Jones, R.M., Ma, T., & Liang, P. (2022) Fine-tuning can distort pretrained features and underperform out-of-distribution. In: International Conference on Learning Representations . https:\/\/openreview.net\/forum?id=UYneFzXSJWh"},{"key":"2618_CR50","unstructured":"Lauren\u00e7on, H., Saulnier, L., Tronchon, L., Bekman, S., Singh, A., Lozhkov, A., Wang, T., Karamcheti, S., Rush, A., Kiela, D., et al. (2024) Obelics: An open web-scale filtered dataset of interleaved image-text documents. Advances in Neural Information Processing Systems 36"},{"key":"2618_CR51","unstructured":"Lee, Y., Chen, A.S., Tajwar, F., Kumar, A., Yao, H., Liang, P., & Finn, C. (2022) Surgical fine-tuning improves adaptation to distribution shifts. In: The Eleventh International Conference on Learning Representations"},{"key":"2618_CR52","doi-asserted-by":"crossref","unstructured":"Li, C., Gan, Z., Yang, Z., Yang, J., Li, L., Wang, L., Gao, J., et al. (2024) Multimodal foundation models: From specialists to general-purpose assistants. Foundations and Trends\u00ae in Computer Graphics and Vision 16(1-2), 1\u2013214","DOI":"10.1561\/0600000110"},{"key":"2618_CR53","unstructured":"Li, J., Li, D., Savarese, S., & Hoi, S. (2023) Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. ArXiv abs\/2301.12597"},{"key":"2618_CR54","unstructured":"Li, B., Shen, Y., Yang, J., Wang, Y., Ren, J., Che, T., Zhang, J., & Liu, Z. (2022) Sparse mixture-of-experts are domain generalizable learners. In: The Eleventh International Conference on Learning Representations"},{"key":"2618_CR55","doi-asserted-by":"crossref","unstructured":"Li, D., Yang, Y., Song, Y.-Z., & Hospedales, T.M. (2017) Deeper, broader and artier domain generalization. In: Proceedings of the IEEE International Conference on Computer Vision, 5542\u20135550","DOI":"10.1109\/ICCV.2017.591"},{"key":"2618_CR56","doi-asserted-by":"crossref","unstructured":"Li, D., Yang, Y., Song, Y.-Z., & Hospedales, T. (2018) Learning to generalize: Meta-learning for domain generalization. In: Proceedings of the AAAI Conference on Artificial Intelligence, 32","DOI":"10.1609\/aaai.v32i1.11596"},{"key":"2618_CR57","unstructured":"Liu, H., Li, C., Wu, Q., & Lee, Y.J. (2023) Visual instruction tuning. arXiv preprint arXiv:2304.08485"},{"key":"2618_CR58","doi-asserted-by":"crossref","unstructured":"Liu, J., Shen, D., Zhang, Y., Dolan, B., Carin, L., & Chen, W. (2021) What makes good in-context examples for gpt-$$3 $$? arXiv preprint arXiv:2101.06804","DOI":"10.18653\/v1\/2022.deelio-1.10"},{"key":"2618_CR59","unstructured":"Liu, Y., Xiong, Z., Li, Y., Tian, X., & Zha, Z.-J. (2021) Domain generalization via encoding and resampling in a unified latent space. IEEE Transactions on Multimedia"},{"key":"2618_CR60","unstructured":"Loshchilov, I., & Hutter, F. (2017) Decoupled weight decay regularization. In: International Conference on Learning Representations"},{"key":"2618_CR61","doi-asserted-by":"crossref","unstructured":"Lu, X., Groeneveld, D., Liu, P., Zoph, B., Wei, J., Lepikhin, D., Bosma, M., Zhou, Y., Chi, E., Dean, J., et al. (2022) Fantastically ordered prompts and where to find them: Overcoming few-shot prompt order sensitivity. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing (EMNLP) . Association for Computational Linguistics","DOI":"10.18653\/v1\/2022.acl-long.556"},{"key":"2618_CR62","doi-asserted-by":"crossref","unstructured":"Luo, Y., Kang, G., Liu, K., Zhuang, F., & L\u00fc, J. (2023) Taking a closer look at factor disentanglement: Dual-path variational autoencoder learning for domain generalization. IEEE Transactions on Multimedia","DOI":"10.1109\/TMM.2023.3340552"},{"key":"2618_CR63","unstructured":"Maaten, L., Hinton, G. (2008) Visualizing data using t-sne. Journal of machine learning research 9(11)"},{"issue":"9","key":"2618_CR64","doi-asserted-by":"publisher","first-page":"2419","DOI":"10.1109\/TMM.2019.2902100","volume":"21","author":"X Ma","year":"2019","unstructured":"Ma, X., Zhang, T., & Xu, C. (2019). Deep multi-modality adversarial networks for unsupervised domain adaptation. IEEE Transactions on Multimedia, 21(9), 2419\u20132431.","journal-title":"IEEE Transactions on Multimedia"},{"key":"2618_CR65","doi-asserted-by":"crossref","unstructured":"Min, S., Lyu, X., Holtzman, A., Artetxe, M., Lewis, M., Hajishirzi, H., & Zettlemoyer, L. (2022) Rethinking the role of demonstrations: What makes in-context learning work? arXiv preprint arXiv:2202.12837","DOI":"10.18653\/v1\/2022.emnlp-main.759"},{"key":"2618_CR66","doi-asserted-by":"crossref","unstructured":"Niu, Z., Yuan, J., Ma, X., Xu, Y., Liu, J., Chen, Y.-W., Tong, R., & Lin, L. (2023) Knowledge distillation-based domain-invariant representation learning for domain generalization. IEEE Transactions on Multimedia","DOI":"10.1109\/TMM.2023.3263549"},{"key":"2618_CR67","unstructured":"OpenAI (2023) Gpt-4v(ision) system card"},{"key":"2618_CR68","first-page":"3043","volume":"34","author":"C Park","year":"2021","unstructured":"Park, C., Awadalla, A., Kohno, T., & Patel, S. (2021). Reliable and trustworthy machine learning for health using dataset shift detection. Advances in Neural Information Processing Systems, 34, 3043\u20133056.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2618_CR69","doi-asserted-by":"crossref","unstructured":"Qiao, F., Zhao, L., & Peng, X. (2020) Learning to learn single domain generalization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 12556\u201312565","DOI":"10.1109\/CVPR42600.2020.01257"},{"key":"2618_CR70","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et al. (2021) Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, 8748\u20138763 . PMLR"},{"key":"2618_CR71","unstructured":"Ram\u00e9, A., Ahuja, K., Zhang, J., Cord, M., Bottou, L., & Lopez-Paz, D. (2023) Model Ratatouille: Recycling Diverse Models for Out-of-Distribution Generalization"},{"issue":"1","key":"2618_CR72","first-page":"1309","volume":"19","author":"M Rojas-Carulla","year":"2018","unstructured":"Rojas-Carulla, M., Sch\u00f6lkopf, B., Turner, R., & Peters, J. (2018). Invariant models for causal transfer learning. The Journal of Machine Learning Research, 19(1), 1309\u20131342.","journal-title":"The Journal of Machine Learning Research"},{"key":"2618_CR73","doi-asserted-by":"crossref","unstructured":"Shao, R., Lan, X., Li, J., & Yuen, P.C. (2019) Multi-adversarial discriminative deep domain generalization for face presentation attack detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 10023\u201310031","DOI":"10.1109\/CVPR.2019.01026"},{"key":"2618_CR74","doi-asserted-by":"crossref","unstructured":"Sun, Q., Cui, Y., Zhang, X., Zhang, F., Yu, Q., Wang, Y., Rao, Y., Liu, J., Huang, T., & Wang, X. (2024) Generative multimodal models are in-context learners. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 14398\u201314409","DOI":"10.1109\/CVPR52733.2024.01365"},{"key":"2618_CR75","first-page":"16846","volume":"34","author":"X Sun","year":"2021","unstructured":"Sun, X., Wu, B., Zheng, X., Liu, C., Chen, W., Qin, T., & Liu, T.-Y. (2021). Recovering latent causal factor for generalization to distributional shifts. Advances in Neural Information Processing Systems, 34, 16846\u201316859.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2618_CR76","unstructured":"Team, G., Anil, R., Borgeaud, S., Wu, Y., Alayrac, J.-B., Yu, J., Soricut, R., Schalkwyk, J., Dai, A.M., Hauth, A., et al. (2023) Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805"},{"key":"2618_CR77","unstructured":"Team, G., Georgiev, P., Lei, V.I., Burnell, R., Bai, L., Gulati, A., Tanzer, G., Vincent, D., Pan, Z., Wang, S., et al. (2024) Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context. arXiv preprint arXiv:2403.05530"},{"issue":"1","key":"2618_CR78","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/sdata.2018.161","volume":"5","author":"P Tschandl","year":"2018","unstructured":"Tschandl, P., Rosendahl, C., & Kittler, H. (2018). The ham10000 dataset, a large collection of multi-source dermatoscopic images of common pigmented skin lesions. Scientific data, 5(1), 1\u20139.","journal-title":"Scientific data"},{"key":"2618_CR79","first-page":"200","volume":"34","author":"M Tsimpoukelli","year":"2021","unstructured":"Tsimpoukelli, M., Menick, J. L., Cabi, S., Eslami, S., Vinyals, O., & Hill, F. (2021). Multimodal few-shot learning with frozen language models. Advances in Neural Information Processing Systems, 34, 200\u2013212.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2618_CR80","doi-asserted-by":"crossref","unstructured":"Volpi, R., & Murino, V. (2019) Addressing model vulnerability to distributional shifts over image transformation sets. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 7980\u20137989","DOI":"10.1109\/ICCV.2019.00807"},{"key":"2618_CR81","first-page":"16451","volume":"34","author":"J Von K\u00fcgelgen","year":"2021","unstructured":"Von K\u00fcgelgen, J., Sharma, Y., Gresele, L., Brendel, W., Sch\u00f6lkopf, B., Besserve, M., & Locatello, F. (2021). Self-supervised learning with data augmentations provably isolates content from style. Advances in neural information processing systems, 34, 16451\u201316467.","journal-title":"Advances in neural information processing systems"},{"key":"2618_CR82","unstructured":"Wang, P., Bai, S., Tan, S., Wang, S., Fan, Z., Bai, J., Chen, K., Liu, X., Wang, J., Ge, W., et al. (2024) Qwen2-vl: Enhancing vision-language model\u2019s perception of the world at any resolution. arXiv preprint arXiv:2409.12191"},{"key":"2618_CR83","doi-asserted-by":"crossref","unstructured":"Wang, G., Han, H., Shan, S., & Chen, X. (2020) Cross-domain face presentation attack detection via multi-domain disentangled representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 6678\u20136687","DOI":"10.1109\/CVPR42600.2020.00671"},{"key":"2618_CR84","unstructured":"Wang, Z., Jiang, Y., Lu, Y., He, P., Chen, W., Wang, Z., Zhou, M., et al. (2024) In-context learning unlocked for diffusion models. Advances in Neural Information Processing Systems 36"},{"key":"2618_CR85","doi-asserted-by":"crossref","unstructured":"Wang, J., Lan, C., Liu, C., Ouyang, Y., Qin, T., Lu, W., Chen, Y., Zeng, W., & Yu, P. (2022) Generalizing to unseen domains: A survey on domain generalization. IEEE Transactions on Knowledge and Data Engineering","DOI":"10.1109\/TKDE.2022.3178128"},{"key":"2618_CR86","doi-asserted-by":"crossref","unstructured":"Wang, Z., Luo, Y., Qiu, R., Huang, Z., & Baktashmotlagh, M. (2021) Learning to diversify for single domain generalization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 834\u2013843","DOI":"10.1109\/ICCV48922.2021.00087"},{"key":"2618_CR87","doi-asserted-by":"crossref","unstructured":"Wang, X., Peng, Y., Lu, L., Lu, Z., Bagheri, M., & Summers, R.M. (2017) Chestx-ray8: Hospital-scale chest x-ray database and benchmarks on weakly-supervised classification and localization of common thorax diseases. In: CVPR, 2097\u20132106","DOI":"10.1109\/CVPR.2017.369"},{"key":"2618_CR88","doi-asserted-by":"crossref","unstructured":"Wang, Z., Zhang, Z., Lee, C.-Y., Zhang, H., Sun, R., Ren, X., Su, G., Perot, V., Dy, J., & Pfister, T. (2022) Learning to prompt for continual learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 139\u2013149","DOI":"10.1109\/CVPR52688.2022.00024"},{"issue":"7972","key":"2618_CR89","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1038\/s41586-023-06221-2","volume":"620","author":"H Wang","year":"2023","unstructured":"Wang, H., Fu, T., Du, Y., Gao, W., Huang, K., Liu, Z., Chandak, P., Liu, S., Van Katwyk, P., Deac, A., et al. (2023). Scientific discovery in the age of artificial intelligence. Nature, 620(7972), 47\u201360.","journal-title":"Nature"},{"key":"2618_CR90","first-page":"24824","volume":"35","author":"J Wei","year":"2022","unstructured":"Wei, J., Wang, X., Schuurmans, D., Bosma, M., Xia, F., Chi, E., Le, Q. V., Zhou, D., et al. (2022). Chain-of-thought prompting elicits reasoning in large language models. Advances in Neural Information Processing Systems, 35, 24824\u201324837.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2618_CR91","unstructured":"Wiles, O., Gowal, S., Stimberg, F., Rebuffi, S.-A., Ktena, I., Dvijotham, K.D., & Cemgil, A.T. (2022) A fine-grained analysis on distribution shift. In: International Conference on Learning Representations . https:\/\/openreview.net\/forum?id=Dl4LetuLdyK"},{"key":"2618_CR92","doi-asserted-by":"crossref","unstructured":"Wortsman, M., Ilharco, G., Kim, J.W., Li, M., Kornblith, S., Roelofs, R., Lopes, R.G., Hajishirzi, H., Farhadi, A., Namkoong, H., et al. (2022) Robust fine-tuning of zero-shot models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 7959\u20137971","DOI":"10.1109\/CVPR52688.2022.00780"},{"key":"2618_CR93","unstructured":"Wu, S., Zhang, H.R., & R\u00e9, C. (2020) Understanding and improving information transfer in multi-task learning. In: International Conference on Learning Representations . https:\/\/openreview.net\/forum?id=SylzhkBtDB"},{"key":"2618_CR94","unstructured":"Yang, Z., Li, L., Lin, K., Wang, J., Lin, C.-C., Liu, Z., & Wang, L. (2023) The dawn of lmms: Preliminary explorations with gpt-4v (ision). arXiv preprint arXiv:2309.17421"},{"key":"2618_CR95","unstructured":"Yang, C., Westover, M.B., & Sun, J. (2023) ManyDG: Many-domain generalization for healthcare applications. In: The Eleventh International Conference on Learning Representations . https:\/\/openreview.net\/forum?id=lcSfirnflpW"},{"key":"2618_CR96","unstructured":"Yasunaga, M., Chen, X., Li, Y., Pasupat, P., Leskovec, J., Liang, P., Chi, E.H., & Zhou, D. (2024) Large language models as analogical reasoners. In: The Twelfth International Conference on Learning Representations . https:\/\/openreview.net\/forum?id=AgDICX1h50"},{"key":"2618_CR97","doi-asserted-by":"crossref","unstructured":"Yue, X., Zhang, Y., Zhao, S., Sangiovanni-Vincentelli, A., Keutzer, K., & Gong, B. (2019) Domain randomization and pyramid consistency: Simulation-to-real generalization without accessing target domain data. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2100\u20132110","DOI":"10.1109\/ICCV.2019.00219"},{"key":"2618_CR98","doi-asserted-by":"crossref","unstructured":"Zhang, C., Lin, K., Yang, Z., Wang, J., Li, L., Lin, C.-C., Liu, Z., & Wang, L. (2024) Mm-narrator: Narrating long-form videos with multimodal in-context learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13647\u201313657","DOI":"10.1109\/CVPR52733.2024.01295"},{"key":"2618_CR99","doi-asserted-by":"crossref","unstructured":"Zhou, G., Huang, C., Chen, X., Xu, X., Wang, C., Zhu, L., & Yao, L. (2023) Contrastive counterfactual learning for causality-aware interpretable recommender systems. In: Proceedings of the 32nd ACM International Conference on Information and Knowledge Management, 3564\u20133573","DOI":"10.1145\/3583780.3614823"},{"key":"2618_CR100","doi-asserted-by":"crossref","unstructured":"Zhou, K., Liu, Z., Qiao, Y., Xiang, T., & Loy, C.C. (2022) Domain generalization: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence","DOI":"10.1109\/TPAMI.2022.3195549"},{"key":"2618_CR101","doi-asserted-by":"publisher","first-page":"13025","DOI":"10.1609\/aaai.v34i07.7003","volume":"34","author":"K Zhou","year":"2020","unstructured":"Zhou, K., Yang, Y., Hospedales, T., & Xiang, T. (2020). Deep domain-adversarial image generation for domain generalisation. Proceedings of the AAAI Conference on Artificial Intelligence, 34, 13025\u201313032.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"issue":"9","key":"2618_CR102","doi-asserted-by":"publisher","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","volume":"130","author":"K Zhou","year":"2022","unstructured":"Zhou, K., Yang, J., Loy, C. C., & Liu, Z. (2022). Learning to prompt for vision-language models. International Journal of Computer Vision, 130(9), 2337\u20132348.","journal-title":"International Journal of Computer Vision"},{"key":"2618_CR103","unstructured":"Zhu, J., Wang, W., Chen, Z., Liu, Z., Ye, S., Gu, L., Tian, H., Duan, Y., Su, W., Shao, J., et al. (2025) Internvl3: Exploring advanced training and test-time recipes for open-source multimodal models. arXiv preprint arXiv:2504.10479"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02618-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02618-w","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02618-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T15:39:07Z","timestamp":1771601947000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02618-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,23]]},"references-count":103,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,1]]}},"alternative-id":["2618"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02618-w","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,23]]},"assertion":[{"value":"26 March 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"9"}}