{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T15:13:32Z","timestamp":1760195612803,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032078445","type":"print"},{"value":"9783032078452","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T00:00:00Z","timestamp":1760227200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T00:00:00Z","timestamp":1760227200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-07845-2_9","type":"book-chapter","created":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T14:28:43Z","timestamp":1760192923000},"page":"87-97","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Improving Medical Visual Instruction Tuning with\u00a0Labeled Datasets"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4016-7799","authenticated-orcid":false,"given":"Amin","family":"Dada","sequence":"first","affiliation":[]},{"given":"Amanda Butler","family":"Contreras","sequence":"additional","affiliation":[]},{"given":"Constantin","family":"Seibold","sequence":"additional","affiliation":[]},{"given":"Osman Alperen","family":"Kora\u015f","sequence":"additional","affiliation":[]},{"given":"Julius","family":"Keyl","sequence":"additional","affiliation":[]},{"given":"Aokun","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Cheng","family":"Peng","sequence":"additional","affiliation":[]},{"given":"Alexander","family":"Brehmer","sequence":"additional","affiliation":[]},{"given":"Kaleb E.","family":"Smith","sequence":"additional","affiliation":[]},{"given":"Jiang","family":"Bian","sequence":"additional","affiliation":[]},{"given":"Yonghui","family":"Wu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8686-0682","authenticated-orcid":false,"given":"Jens","family":"Kleesiek","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,12]]},"reference":[{"issue":"1","key":"9_CR1","doi-asserted-by":"publisher","first-page":"4128","DOI":"10.1038\/s41467-022-30695-9","volume":"13","author":"M Antonelli","year":"2022","unstructured":"Antonelli, M., et al.: The medical segmentation decathlon. Nature Commun. 13(1), 4128 (2022)","journal-title":"Nature Commun."},{"key":"9_CR2","unstructured":"Armato\u00a0III, S.G., et\u00a0al.: Data from LIDC-IDRI a completed reference database of lung nodules on CT scans. Cancer Imaging Arch. (2015)"},{"key":"9_CR3","unstructured":"Bae, S., et\u00a0al.: EHRXQA: a multi-modal question answering dataset for electronic health records with chest x-ray images. In: Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track (2023)"},{"key":"9_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2023.102789","volume":"86","author":"B Billot","year":"2023","unstructured":"Billot, B., et al.: Synthseg: segmentation of brain MRI scans of any contrast and resolution without retraining. Med. Image Anal. 86, 102789 (2023)","journal-title":"Med. Image Anal."},{"issue":"1","key":"9_CR5","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1038\/s41597-020-00622-y","volume":"7","author":"H Borgli","year":"2020","unstructured":"Borgli, H., et al.: HyperKvasir, a comprehensive multi-class image and video dataset for gastrointestinal endoscopy. Sci. Data 7(1), 283 (2020)","journal-title":"Sci. Data"},{"key":"9_CR6","doi-asserted-by":"crossref","unstructured":"Chen, J., et\u00a0al.: Towards injecting medical visual knowledge into multimodal LLMs at scale. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pp. 7346\u20137370 (2024)","DOI":"10.18653\/v1\/2024.emnlp-main.418"},{"key":"9_CR7","unstructured":"Chen, P., et\u00a0al.: Gmai-mmbench: a comprehensive multimodal evaluation benchmark towards general medical AI. arXiv preprint arXiv:2408.03361 (2024)"},{"key":"9_CR8","unstructured":"Dubey, A., et\u00a0al.: The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)"},{"key":"9_CR9","unstructured":"Gonz\u00e1lez, G., et\u00a0al.: Computer aided detection for pulmonary embolism challenge (cad-pe). arXiv preprint arXiv:2003.13440 (2020)"},{"key":"9_CR10","doi-asserted-by":"crossref","unstructured":"Groh, M., et\u00a0al.: Evaluating deep neural networks trained on clinical images in dermatology with the fitzpatrick 17k dataset. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1820\u20131828 (2021)","DOI":"10.1109\/CVPRW53098.2021.00201"},{"issue":"CSCW2","key":"9_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3555634","volume":"6","author":"M Groh","year":"2022","unstructured":"Groh, M., et al.: Towards transparency in dermatology image datasets with skin tone annotations by experts, crowds, and an algorithm. Proc. ACM Human-Comput. Interact. 6(CSCW2), 1\u201326 (2022)","journal-title":"Proc. ACM Human-Comput. Interact."},{"key":"9_CR12","unstructured":"He, X., et\u00a0al.: Pathvqa: 30000+ questions for medical visual question answering. arXiv preprint arXiv:2003.10286 (2020)"},{"key":"9_CR13","unstructured":"Heller, N., et\u00a0al.: The kits19 challenge data: 300 kidney tumor cases with clinical context, CT semantic segmentations, and surgical outcomes. arXiv preprint arXiv:1904.00445 (2019)"},{"key":"9_CR14","doi-asserted-by":"crossref","unstructured":"Holste, G., et\u00a0al.: Long-tailed classification of thorax diseases on chest x-ray: A new benchmark study. In: MICCAI Workshop on Data Augmentation, Labelling, and Imperfections, pp. 22\u201332. Springer (2022)","DOI":"10.1007\/978-3-031-17027-0_3"},{"key":"9_CR15","doi-asserted-by":"crossref","unstructured":"Jaus, A., et\u00a0al.: Towards unifying anatomy segmentation: Automated generation of a full-body CT dataset via knowledge aggregation and anatomical guidelines. arXiv preprint arXiv:2307.13375 (2023)","DOI":"10.1109\/ICIP51287.2024.10647307"},{"key":"9_CR16","doi-asserted-by":"crossref","unstructured":"Jeong, D.P., et\u00a0al.: Medical adaptation of large language and vision-language models: are we making progress? arXiv preprint arXiv:2411.04118 (2024)","DOI":"10.18653\/v1\/2024.emnlp-main.677"},{"key":"9_CR17","doi-asserted-by":"crossref","unstructured":"Jha, D., et\u00a0al.: Gastrovision: a multi-class endoscopy image dataset for computer aided gastrointestinal disease detection. In: ICML Workshop on Machine Learning for Multimodal Healthcare Data (ML4MHD 2023) (2023)","DOI":"10.1007\/978-3-031-47679-2_10"},{"key":"9_CR18","doi-asserted-by":"crossref","unstructured":"Jin, L., et\u00a0al.: Deep-learning-assisted detection and segmentation of rib fractures from CT scans: Development and validation of fracnet. eBioMedicine (2020)","DOI":"10.1016\/j.ebiom.2020.103106"},{"issue":"1","key":"9_CR19","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1038\/s41597-019-0322-0","volume":"6","author":"AE Johnson","year":"2019","unstructured":"Johnson, A.E., et al.: Mimic-CXR, a de-identified publicly available database of chest radiographs with free-text reports. Sci. data 6(1), 317 (2019)","journal-title":"Sci. data"},{"key":"9_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.celrep.2022.110424","volume":"38","author":"D Komura","year":"2022","unstructured":"Komura, D., et al.: Universal encoding of pan-cancer histology by deep texture representations. Cell Rep. 38, 110424 (2022)","journal-title":"Cell Rep."},{"key":"9_CR21","doi-asserted-by":"publisher","first-page":"1022967","DOI":"10.3389\/fonc.2022.1022967","volume":"12","author":"K Kriegsmann","year":"2022","unstructured":"Kriegsmann, K., et al.: Deep learning for the detection of anatomical tissue structures and neoplasms of the skin on scanned histopathological tissue sections. Front. Oncol. 12, 1022967 (2022)","journal-title":"Front. Oncol."},{"key":"9_CR22","doi-asserted-by":"crossref","unstructured":"Labrak, Y., et\u00a0al.: Biomistral: a collection of open-source pretrained large language models for medical domains. In: Findings of the Association for Computational Linguistics ACL 2024, pp. 5848\u20135864 (2024)","DOI":"10.18653\/v1\/2024.findings-acl.348"},{"issue":"1","key":"9_CR23","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/sdata.2018.251","volume":"5","author":"JJ Lau","year":"2018","unstructured":"Lau, J.J., et al.: A dataset of clinically generated visual questions and answers about radiology images. Sci. data 5(1), 1\u201310 (2018)","journal-title":"Sci. data"},{"key":"9_CR24","doi-asserted-by":"crossref","unstructured":"Li, C., et\u00a0al.: LLaVA-med: training a large language-and-vision assistant for biomedicine in one day. In: Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track (2023)","DOI":"10.32388\/VLXB6M"},{"key":"9_CR25","doi-asserted-by":"crossref","unstructured":"Liu, B., et\u00a0al.: Slake: a semantically-labeled knowledge-enhanced dataset for medical visual question answering. In: 2021 IEEE 18th International Symposium on Biomedical Imaging (ISBI), pp. 1650\u20131654. IEEE (2021)","DOI":"10.1109\/ISBI48211.2021.9434010"},{"key":"9_CR26","doi-asserted-by":"crossref","unstructured":"Liu, H., et\u00a0al.: Improved baselines with visual instruction tuning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 26296\u201326306 (2024)","DOI":"10.1109\/CVPR52733.2024.02484"},{"issue":"7956","key":"9_CR27","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1038\/s41586-023-05881-4","volume":"616","author":"M Moor","year":"2023","unstructured":"Moor, M., et al.: Foundation models for generalist medical artificial intelligence. Nature 616(7956), 259\u2013265 (2023)","journal-title":"Nature"},{"key":"9_CR28","unstructured":"Moor, M., et\u00a0al.: Med-flamingo: a multimodal medical few-shot learner. arXiv preprint arXiv:2307.15189 (2023)"},{"issue":"1","key":"9_CR29","doi-asserted-by":"publisher","first-page":"429","DOI":"10.1038\/s41597-022-01498-w","volume":"9","author":"HQ Nguyen","year":"2022","unstructured":"Nguyen, H.Q., et al.: Vindr-CXR: an open dataset of chest x-rays with radiologist\u2019s annotations. Sci. Data 9(1), 429 (2022)","journal-title":"Sci. Data"},{"key":"9_CR30","doi-asserted-by":"publisher","unstructured":"Nguyen, H.T., et al.: VinDr-SpineXR: a deep learning framework for spinal lesions detection and classification from radiographs. In: de Bruijne, M., Cattin, P.C., Cotin, S., Padoy, N., Speidel, S., Zheng, Y., Essert, C. (eds.) MICCAI 2021. LNCS, vol. 12905, pp. 291\u2013301. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-87240-3_28","DOI":"10.1007\/978-3-030-87240-3_28"},{"issue":"1","key":"9_CR31","doi-asserted-by":"publisher","first-page":"277","DOI":"10.1038\/s41597-023-02100-7","volume":"10","author":"HT Nguyen","year":"2023","unstructured":"Nguyen, H.T., et al.: Vindr-mammo: a large-scale benchmark dataset for computer-aided diagnosis in full-field digital mammography. Sci. Data 10(1), 277 (2023)","journal-title":"Sci. Data"},{"key":"9_CR32","unstructured":"Pham, H.H., et\u00a0al.: Vindr-PCXR: an open, large-scale pediatric chest x-ray dataset for interpretation of common thoracic diseases. PhysioNet (2022)"},{"key":"9_CR33","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"9_CR34","unstructured":"Saab, K., et\u00a0al.: Capabilities of Gemini models in medicine. arXiv preprint arXiv:2404.18416 (2024)"},{"key":"9_CR35","doi-asserted-by":"crossref","unstructured":"Sun, G., et\u00a0al.: Self-training large language and vision assistant for medical question answering. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pp. 20052\u201320060 (2024)","DOI":"10.18653\/v1\/2024.emnlp-main.1119"},{"issue":"1","key":"9_CR36","doi-asserted-by":"publisher","DOI":"10.1038\/sdata.2018.161","volume":"5","author":"P Tschandl","year":"2018","unstructured":"Tschandl, P., et al.: The ham10000 dataset, a large collection of multi-source Dermatoscopic images of common pigmented skin lesions. Sci. Data 5(1), 180161 (2018)","journal-title":"Sci. Data"},{"key":"9_CR37","unstructured":"de\u00a0Vente, C., et\u00a0al.: Airogs: artificial intelligence for robust glaucoma screening challenge. arXiv preprint arXiv:2302.01738 (2023)"},{"key":"9_CR38","doi-asserted-by":"crossref","unstructured":"Wasserthal, J., et\u00a0al.: Totalsegmentator: robust segmentation of 104 anatomic structures in CT images. Radiol. Artif. Intell. 5(5), e230024 (2023)","DOI":"10.1148\/ryai.230024"},{"key":"9_CR39","unstructured":"Yang, A., et\u00a0al.: Qwen2. 5 technical report. arXiv preprint arXiv:2412.15115 (2024)"},{"key":"9_CR40","doi-asserted-by":"crossref","unstructured":"Zhang, S., et\u00a0al.: A multimodal biomedical foundation model trained from fifteen million image\u2013text pairs. NEJM AI 2(1) (2024)","DOI":"10.1056\/AIoa2400640"}],"container-title":["Lecture Notes in Computer Science","Foundation Models for General Medical AI"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-07845-2_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T14:28:47Z","timestamp":1760192927000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-07845-2_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"ISBN":["9783032078445","9783032078452"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-07845-2_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,12]]},"assertion":[{"value":"12 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MedAGI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Foundation Models for General Medical AI","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"medagi2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/medagi2025.github.io\/#\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}