{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,3]],"date-time":"2026-01-03T05:38:22Z","timestamp":1767418702516,"version":"3.48.0"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783032049803"},{"type":"electronic","value":"9783032049810"}],"license":[{"start":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T00:00:00Z","timestamp":1758326400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T00:00:00Z","timestamp":1758326400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-04981-0_22","type":"book-chapter","created":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T05:10:30Z","timestamp":1758258630000},"page":"226-236","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Exposing and\u00a0Mitigating Calibration Biases and\u00a0Demographic Unfairness in\u00a0MLLM Few-Shot In-Context Learning for\u00a0Medical Image Classification"],"prefix":"10.1007","author":[{"given":"Xing","family":"Shen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Justin","family":"Szeto","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingyang","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hengguan","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tal","family":"Arbel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,9,20]]},"reference":[{"key":"22_CR1","doi-asserted-by":"crossref","unstructured":"Baldassini, F.B., Shukor, M., Cord, M., Soulier, L., Piwowarski, B.: What makes multimodal in-context learning work? In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1539\u20131550 (2024)","DOI":"10.1109\/CVPRW63382.2024.00161"},{"key":"22_CR2","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"22_CR3","doi-asserted-by":"crossref","unstructured":"Cao, B., et al.: Knowledgeable or educated guess? revisiting language models as knowledge bases. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 1860\u20131874 (2021)","DOI":"10.18653\/v1\/2021.acl-long.146"},{"issue":"1","key":"22_CR4","doi-asserted-by":"publisher","first-page":"10104","DOI":"10.1038\/s41467-024-51465-9","volume":"15","author":"D Ferber","year":"2024","unstructured":"Ferber, D., et al.: In-context learning enables multimodal large language models to classify cancer pathology images. Nat. Commun. 15(1), 10104 (2024)","journal-title":"Nat. Commun."},{"key":"22_CR5","unstructured":"Guo, C., Pleiss, G., Sun, Y., Weinberger, K.Q.: On calibration of modern neural networks. In: International Conference on Machine Learning, pp. 1321\u20131330. PMLR (2017)"},{"key":"22_CR6","unstructured":"Han, Z., Hao, Y., Dong, L., Sun, Y., Wei, F.: Prototypical calibration for few-shot learning of language models. In: The Eleventh International Conference on Learning Representations (2023)"},{"key":"22_CR7","doi-asserted-by":"crossref","unstructured":"He, K., Long, Y., Roy, K.: Prompt-based bias calibration for better zero\/few-shot learning of language models. In: Findings of the Association for Computational Linguistics: EMNLP 2024, pp. 12673\u201312691. Association for Computational Linguistics (2024)","DOI":"10.18653\/v1\/2024.findings-emnlp.741"},{"key":"22_CR8","unstructured":"Hurst, A., et\u00a0al.: Gpt-4o system card. arXiv preprint arXiv:2410.21276 (2024)"},{"key":"22_CR9","unstructured":"Jin, R., et al.: FairmedFM: fairness benchmarking for medical imaging foundation models. In: The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track (2024)"},{"key":"22_CR10","doi-asserted-by":"crossref","unstructured":"Johnson, A.E., et al.: Mimic-cxr-jpg, a large publicly available database of labeled chest radiographs. arXiv preprint arXiv:1901.07042 (2019)","DOI":"10.1038\/s41597-019-0322-0"},{"issue":"1","key":"22_CR11","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1038\/s41597-022-01388-1","volume":"9","author":"O Kovalyk","year":"2022","unstructured":"Kovalyk, O., Morales-S\u00e1nchez, J., Verd\u00fa-Monedero, R., Sell\u00e9s-Navarro, I., Palaz\u00f3n-Cabanes, A., Sancho-G\u00f3mez, J.L.: Papila: dataset with fundus images and clinical data of both eyes of the same patient for glaucoma assessment. Sci. Data 9(1), 291 (2022)","journal-title":"Sci. Data"},{"key":"22_CR12","unstructured":"Kull, M., Perello\u00a0Nieto, M., K\u00e4ngsepp, M., Silva\u00a0Filho, T., Song, H., Flach, P.: Beyond temperature scaling: Obtaining well-calibrated multi-class probabilities with dirichlet calibration. Adv. Neural Inform. Process. Syst. 32 (2019)"},{"key":"22_CR13","doi-asserted-by":"crossref","unstructured":"Lu, Y., Bartolo, M., Moore, A., Riedel, S., Stenetorp, P.: Fantastically ordered prompts and where to find them: overcoming few-shot prompt order sensitivity. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 8086\u20138098 (2022)","DOI":"10.18653\/v1\/2022.acl-long.556"},{"key":"22_CR14","doi-asserted-by":"crossref","unstructured":"Luo, Y., et al.: Harvard glaucoma fairness: a retinal nerve disease dataset for fairness learning and fair identity normalization. IEEE Trans. Med. Imaging (2024)","DOI":"10.1109\/TMI.2024.3377552"},{"key":"22_CR15","unstructured":"Ma, H., et al.: Fairness-guided few-shot prompting for large language models. Adv. Neural Inform. Process. Syst. 36 (2024)"},{"issue":"6","key":"22_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3457607","volume":"54","author":"N Mehrabi","year":"2021","unstructured":"Mehrabi, N., Morstatter, F., Saxena, N., Lerman, K., Galstyan, A.: A survey on bias and fairness in machine learning. ACM Comput. Surv. 54(6), 1\u201335 (2021)","journal-title":"ACM Comput. Surv."},{"key":"22_CR17","unstructured":"PhysioNet: Responsible use of mimic data with online services like GPT (2023). https:\/\/physionet.org\/news\/post\/gpt-responsible-use"},{"key":"22_CR18","doi-asserted-by":"publisher","unstructured":"Shui, C., Szeto, J., Mehta, R., Arnold, D.L., Arbel, T.: Mitigating calibration bias without fixed attribute grouping for improved fairness in medical imaging analysis. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 189\u2013198. Springer (2023). https:\/\/doi.org\/10.1007\/978-3-031-43898-1_19","DOI":"10.1007\/978-3-031-43898-1_19"},{"key":"22_CR19","unstructured":"Team, G., et\u00a0al.: Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context. arXiv preprint arXiv:2403.05530 (2024)"},{"key":"22_CR20","unstructured":"Tian, Y., Shi, M., Luo, Y., Kouhana, A., Elze, T., Wang, M.: Fairseg: a large-scale medical image segmentation dataset for fairness learning using segment anything model with fair error-bound scaling. In: The Twelfth International Conference on Learning Representations (2024)"},{"issue":"1","key":"22_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/sdata.2018.161","volume":"5","author":"P Tschandl","year":"2018","unstructured":"Tschandl, P., Rosendahl, C., Kittler, H.: The ham10000 dataset, a large collection of multi-source dermatoscopic images of common pigmented skin lesions. Scientific data 5(1), 1\u20139 (2018)","journal-title":"Scientific data"},{"key":"22_CR22","doi-asserted-by":"crossref","unstructured":"Wang, X., Zhang, X., Cao, Y., Wang, W., Shen, C., Huang, T.: Seggpt: towards segmenting everything in context. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1130\u20131140 (2023)","DOI":"10.1109\/ICCV51070.2023.00110"},{"key":"22_CR23","unstructured":"Wu, Z., et al.: Prompt optimization with EASE? efficient ordering-aware automated selection of exemplars. In: The Thirty-Eighth Annual Conference on Neural Information Processing Systems (2024)"},{"key":"22_CR24","doi-asserted-by":"crossref","unstructured":"Xiao, Y., Liang, P.P., Bhatt, U., Neiswanger, W., Salakhutdinov, R., Morency, L.P.: Uncertainty quantification with pre-trained language models: A large-scale empirical analysis. In: Findings of the Association for Computational Linguistics: EMNLP 2022, pp. 7273\u20137284 (2022)","DOI":"10.18653\/v1\/2022.findings-emnlp.538"},{"key":"22_CR25","unstructured":"Xiong, M., et al.: Can LLMs express their uncertainty? an empirical evaluation of confidence elicitation in LLMs. In: The Twelfth International Conference on Learning Representations (2024)"},{"key":"22_CR26","unstructured":"Xu, G., CHEN, Q., Ling, C., Wang, B., Shui, C.: Intersectional unfairness discovery. In: Forty-first International Conference on Machine Learning (2024)"},{"key":"22_CR27","unstructured":"Xu, Z., Peng, K., Ding, L., Tao, D., Lu, X.: Take care of your prompt bias! investigating and mitigating prompt bias in factual knowledge extraction. In: Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pp. 15552\u201315565 (2024)"},{"key":"22_CR28","unstructured":"Zhao, Z., Wallace, E., Feng, S., Klein, D., Singh, S.: Calibrate before use: Improving few-shot performance of language models. In: International Conference on Machine Learning, pp. 12697\u201312706. PMLR (2021)"},{"key":"22_CR29","unstructured":"Zong, Y., Yang, Y., Hospedales, T.: MEDFAIR: benchmarking fairness for medical imaging. In: The Eleventh International Conference on Learning Representations (2023)"}],"container-title":["Lecture Notes in Computer Science","Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-04981-0_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,3]],"date-time":"2026-01-03T05:33:51Z","timestamp":1767418431000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-04981-0_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,20]]},"ISBN":["9783032049803","9783032049810"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-04981-0_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,9,20]]},"assertion":[{"value":"20 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"MICCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Medical Image Computing and Computer-Assisted Intervention","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miccai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conferences.miccai.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}