{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T04:03:22Z","timestamp":1775621002583,"version":"3.50.1"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032049803","type":"print"},{"value":"9783032049810","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T00:00:00Z","timestamp":1758326400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T00:00:00Z","timestamp":1758326400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-04981-0_32","type":"book-chapter","created":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T05:12:02Z","timestamp":1758258722000},"page":"337-347","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["MedVLM-R1: Incentivizing Medical Reasoning Capability of\u00a0Vision-Language Models (VLMs) via\u00a0Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Jiazhen","family":"Pan","sequence":"first","affiliation":[]},{"given":"Che","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Junde","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Fenglin","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Jiayuan","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Hongwei Bran","family":"Li","sequence":"additional","affiliation":[]},{"given":"Chen","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Cheng","family":"Ouyang","sequence":"additional","affiliation":[]},{"given":"Daniel","family":"Rueckert","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,20]]},"reference":[{"key":"32_CR1","unstructured":"Achiam, J., Adler, S., Agarwal, S., Ahmad, L., Akkaya, I., Aleman, F.L., Almeida, D., Altenschmidt, J., Altman, S., Anadkat, S., et\u00a0al.: Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"key":"32_CR2","doi-asserted-by":"publisher","first-page":"1120989","DOI":"10.3389\/fdata.2023.1120989","volume":"6","author":"Y Akhter","year":"2023","unstructured":"Akhter, Y., Singh, R., Vatsa, M.: Ai-based radiodiagnosis using chest x-rays: A review. Frontiers in big data 6, 1120989 (2023)","journal-title":"Frontiers in big data"},{"key":"32_CR3","unstructured":"Anthropic: Claude 3.7 sonnet system card (2025)"},{"key":"32_CR4","unstructured":"Bai, J., Bai, S., Yang, S., Wang, S., Tan, S., Wang, P., Lin, J., Zhou, C., Zhou, J.: Qwen-vl: A versatile vision-language model for understanding, localization, text reading, and beyond. arXiv preprint arXiv:2308.12966 (2023)"},{"key":"32_CR5","unstructured":"Chaves, J.M.Z., Huang, S.C., Xu, Y., Xu, H., Usuyama, N., Zhang, S., Wang, F., Xie, Y., Khademi, M., Yang, Z., et\u00a0al.: Towards a clinically accessible radiology foundation model: open-access and lightweight, with automated evaluation. arXiv preprint arXiv:2403.08002 (2024)"},{"key":"32_CR6","doi-asserted-by":"crossref","unstructured":"Chen, J., Yang, D., Jiang, Y., Li, M., Wei, J., Hou, X., Zhang, L.: Efficiency in focus: Layernorm as a catalyst for fine-tuning medical visual language pre-trained models. arXiv preprint arXiv:2404.16385 (2024)","DOI":"10.1145\/3664647.3680834"},{"key":"32_CR7","doi-asserted-by":"crossref","unstructured":"Chen, J., Ouyang, R., Gao, A., Chen, S., Chen, G.H., Wang, X., Zhang, R., Cai, Z., Ji, K., Yu, G., Wan, X., Wang, B.: Huatuogpt-vision, towards injecting medical visual knowledge into multimodal llms at scale (2024)","DOI":"10.18653\/v1\/2024.emnlp-main.418"},{"key":"32_CR8","unstructured":"Chen, L., Li, L., Zhao, H., Song, Y., Vinci: R1-v: Reinforcing super generalization ability in vision-language models with less than $3. https:\/\/github.com\/Deep-Agent\/R1-V (2025), accessed: 2025-02-02"},{"key":"32_CR9","unstructured":"Chen, W., Ma, X., Wang, X., Cohen, W.W.: Program of thoughts prompting: Disentangling computation from reasoning for numerical reasoning tasks. arXiv preprint arXiv:2211.12588 (2022)"},{"key":"32_CR10","unstructured":"Christiano, P.F., Leike, J., Brown, T., Martic, M., Legg, S., Amodei, D.: Deep reinforcement learning from human preferences. Advances in neural information processing systems 30 (2017)"},{"key":"32_CR11","unstructured":"Chu, T., Zhai, Y., Yang, J., Tong, S., Xie, S., Schuurmans, D., Le, Q.V., Levine, S., Ma, Y.: Sft memorizes, rl generalizes: A comparative study of foundation model post-training. arXiv preprint arXiv:2501.17161 (2025)"},{"key":"32_CR12","unstructured":"Guo, D., Yang, D., Zhang, H., Song, J., Zhang, R., Xu, R., Zhu, Q., Ma, S., Wang, P., Bi, X., et\u00a0al.: Deepseek-r1: Incentivizing reasoning capability in llms via reinforcement learning. arXiv preprint arXiv:2501.12948 (2025)"},{"key":"32_CR13","doi-asserted-by":"crossref","unstructured":"Hartsock, I., Rasool, G.: Vision-language models for medical report generation and visual question answering: A review. Frontiers in Artificial Intelligence 7 (2024)","DOI":"10.3389\/frai.2024.1430984"},{"key":"32_CR14","doi-asserted-by":"crossref","unstructured":"He, X., Zhang, Y., Mou, L., Xing, E., Xie, P.: Pathvqa: 30000+ questions for medical visual question answering. arXiv preprint arXiv:2003.10286 (2020)","DOI":"10.36227\/techrxiv.13127537.v1"},{"key":"32_CR15","doi-asserted-by":"crossref","unstructured":"Hu, Y., Li, T., Lu, Q., Shao, W., He, J., Qiao, Y., Luo, P.: Omnimedvqa: A new large-scale comprehensive evaluation benchmark for medical lvlm. In: Conference on Computer Vision and Pattern Recognition. pp. 22170\u201322183 (2024)","DOI":"10.1109\/CVPR52733.2024.02093"},{"key":"32_CR16","unstructured":"Hurst, A., Lerer, A., Goucher, A.P., Perelman, A., Ramesh, A., Clark, A., Ostrow, A., Welihinda, A., Hayes, A., Radford, A., et\u00a0al.: Gpt-4o system card. arXiv preprint arXiv:2410.21276 (2024)"},{"key":"32_CR17","unstructured":"Kumar, A., Zhuang, V., Agarwal, R., Su, Y., Co-Reyes, J.D., Singh, A., Baumli, K., Iqbal, S., Bishop, C., Roelofs, R., et\u00a0al.: Training language models to self-correct via reinforcement learning. arXiv preprint arXiv:2409.12917 (2024)"},{"issue":"1","key":"32_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/sdata.2018.251","volume":"5","author":"JJ Lau","year":"2018","unstructured":"Lau, J.J., Gayen, S., Ben Abacha, A., et al.: A dataset of clinically generated visual questions and answers about radiology images. Scientific data 5(1), 1\u201310 (2018)","journal-title":"Scientific data"},{"key":"32_CR19","first-page":"28541","volume":"36","author":"C Li","year":"2023","unstructured":"Li, C., Wong, C., Zhang, S., Usuyama, N., Liu, H., Yang, J., Naumann, T., Poon, H., Gao, J.: Llava-med: Training a large language-and-vision assistant for biomedicine in one day. Adv. Neural. Inf. Process. Syst. 36, 28541\u201328564 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"32_CR20","doi-asserted-by":"crossref","unstructured":"Li, L.H., Hessel, J., Yu, Y., Ren, X., Chang, K.W., Choi, Y.: Symbolic chain-of-thought distillation: Small models can also\" think\" step-by-step. arXiv preprint arXiv:2306.14050 (2023)","DOI":"10.18653\/v1\/2023.acl-long.150"},{"key":"32_CR21","unstructured":"Lian, C., Zhou, H.Y., Yu, Y., Wang, L.: Less could be better: Parameter-efficient fine-tuning advances medical vision foundation models. arXiv preprint arXiv:2401.12215 (2024)"},{"key":"32_CR22","doi-asserted-by":"crossref","unstructured":"Liu, B., Zhan, L.M., Xu, L., Ma, L., Yang, Y., Wu, X.M.: Slake: A semantically-labeled knowledge-enhanced dataset for medical visual question answering. In: international symposium on biomedical imaging (ISBI). pp. 1650\u20131654 (2021)","DOI":"10.1109\/ISBI48211.2021.9434010"},{"key":"32_CR23","unstructured":"LMMs-Lab: open-r1-multimodal. https:\/\/github.com\/EvolvingLMMs-Lab\/open-r1-multimodal (2025), accessed: 2025-01-27"},{"key":"32_CR24","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang, L., Wu, J., Jiang, X., Almeida, D., Wainwright, C., Mishkin, P., Zhang, C., Agarwal, S., Slama, K., Ray, A., et al.: Training language models to follow instructions with human feedback. Adv. Neural. Inf. Process. Syst. 35, 27730\u201327744 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"32_CR25","unstructured":"Qwen-Team: Qwq: Reflect deeply on the boundaries of the unknown (2024)"},{"key":"32_CR26","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"32_CR27","unstructured":"Shao, Z., Wang, P., Zhu, Q., Xu, R., Song, J., Bi, X., Zhang, H., Zhang, M., Li, Y., Wu, Y., et\u00a0al.: Deepseekmath: Pushing the limits of mathematical reasoning in open language models. arXiv preprint arXiv:2402.03300 (2024)"},{"key":"32_CR28","unstructured":"Shen, H., Zhang, Z., Zhang, Q., Xu, R., Zhao, T.: Vlm-r1: A stable and generalizable r1-style large vision-language model. https:\/\/github.com\/om-ai-lab\/VLM-R1 (2025), accessed: 2025-02-15"},{"key":"32_CR29","doi-asserted-by":"crossref","unstructured":"Silver, D., Schrittwieser, J., Simonyan, K., Antonoglou, I., Huang, A., Guez, A., Hubert, T., Baker, L., Lai, M., Bolton, A., et\u00a0al.: Mastering the game of go without human knowledge. nature 550(7676), 354\u2013359 (2017)","DOI":"10.1038\/nature24270"},{"key":"32_CR30","unstructured":"Wang, P., Bai, S., Tan, S., Wang, S., Fan, Z., Bai, J., Chen, K., Liu, X., Wang, J., Ge, W., et\u00a0al.: Qwen2-vl: Enhancing vision-language model\u2019s perception of the world at any resolution. arXiv preprint arXiv:2409.12191 (2024)"},{"key":"32_CR31","first-page":"24824","volume":"35","author":"J Wei","year":"2022","unstructured":"Wei, J., Wang, X., Schuurmans, D., Bosma, M., Xia, F., Chi, E., Le, Q.V., Zhou, D., et al.: Chain-of-thought prompting elicits reasoning in large language models. Adv. Neural. Inf. Process. Syst. 35, 24824\u201324837 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"32_CR32","unstructured":"Wu, C., Zhang, X., Zhang, Y., Wang, Y., Xie, W.: Towards generalist foundation model for radiology by leveraging web-scale 2d &3d medical data. arXiv preprint arXiv:2308.02463 (2023)"},{"key":"32_CR33","doi-asserted-by":"crossref","unstructured":"Zhang, K., Zhou, R., Adhikarla, E., Yan, Z., Liu, Y., Yu, J., Liu, Z., Chen, X., Davison, B.D., Ren, H., et\u00a0al.: A generalist vision\u2013language foundation model for diverse biomedical tasks. Nature Medicine pp. 1\u201313 (2024)","DOI":"10.1038\/s41591-024-03185-2"},{"key":"32_CR34","unstructured":"Zhang, X., Wu, C., Zhao, Z., Lin, W., Zhang, Y., Wang, Y., Xie, W.: Pmc-vqa: Visual instruction tuning for medical visual question answering. arXiv preprint arXiv:2305.10415 (2023)"},{"key":"32_CR35","unstructured":"Ziegler, D.M., Stiennon, N., Wu, J., Brown, T.B., Radford, A., Amodei, D., Christiano, P., Irving, G.: Fine-tuning language models from human preferences. arXiv preprint arXiv:1909.08593 (2019)"}],"container-title":["Lecture Notes in Computer Science","Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-04981-0_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,3]],"date-time":"2026-01-03T05:33:54Z","timestamp":1767418434000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-04981-0_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,20]]},"ISBN":["9783032049803","9783032049810"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-04981-0_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,20]]},"assertion":[{"value":"20 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"MICCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Medical Image Computing and Computer-Assisted Intervention","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miccai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conferences.miccai.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}