{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,23]],"date-time":"2025-09-23T00:37:42Z","timestamp":1758587862931,"version":"3.44.0"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032060037","type":"print"},{"value":"9783032060044","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,22]],"date-time":"2025-09-22T00:00:00Z","timestamp":1758499200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,22]],"date-time":"2025-09-22T00:00:00Z","timestamp":1758499200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-06004-4_20","type":"book-chapter","created":{"date-parts":[[2025,9,22]],"date-time":"2025-09-22T17:22:12Z","timestamp":1758561732000},"page":"197-205","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing 3D Medical Vision-Language Models with\u00a0Slice-Wise Visual Prompt"],"prefix":"10.1007","author":[{"given":"Soo Yong","family":"Kim","sequence":"first","affiliation":[]},{"given":"Seunghyeok","family":"Hong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,22]]},"reference":[{"key":"20_CR1","doi-asserted-by":"crossref","unstructured":"Kwee, T.C., Kwee, R.M.: Workload of diagnostic radiologists in the foreseeable future based on recent (2024) scientific advances: updated growth expectations. Eur. J. Radiol. 187 (2025). Art.\u00a0no.\u00a0112103","DOI":"10.1016\/j.ejrad.2025.112103"},{"key":"20_CR2","doi-asserted-by":"crossref","unstructured":"Kurmukov, A., Chernina, V., Gareeva, R., et al.: The impact of deep-learning aid on the workload and interpretation accuracy of radiologists on chest computed tomography: a cross-over reader study. arXiv preprint arXiv:2406.08137 (2024)","DOI":"10.1007\/978-981-96-3863-5_34"},{"key":"20_CR3","doi-asserted-by":"crossref","unstructured":"Hamamci, I.E., Er, S., Wang, C.: et al.: Developing generalist foundation models from a multimodal dataset for 3D computed tomography. arXiv preprint arXiv:2403.17834 (2025)","DOI":"10.21203\/rs.3.rs-5271327\/v1"},{"key":"20_CR4","unstructured":"Zhang, X., et al.: RadGenome-Chest CT: a grounded vision-language dataset for chest CT analysis. arXiv preprint arXiv:2404.16754 (2024)"},{"key":"20_CR5","doi-asserted-by":"crossref","unstructured":"Xin, Y., Ates, G.C., Gong, K., Shao, W.: Med3DVLM: an efficient vision-language model for 3D medical image analysis. arXiv preprint arXiv:2503.20047 (2025)","DOI":"10.1109\/JBHI.2025.3604595"},{"key":"20_CR6","doi-asserted-by":"crossref","unstructured":"Zhai, X., Mustafa, B., Kolesnikov, A., Beyer, L.: Sigmoid loss for language-image pre-training. In: Proceedings IEEE\/CVF International Conference Computer Vision (ICCV), pp.\u00a011975\u201311985 (2023)","DOI":"10.1109\/ICCV51070.2023.01100"},{"key":"20_CR7","doi-asserted-by":"crossref","unstructured":"Shinde, G., Ravi, A., Dey, E., Sakib, S., Rampure, M., Roy, N.: A survey on efficient vision-language models. arXiv preprint arXiv:2504.09724 (2025)","DOI":"10.1002\/widm.70036"},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"Cai, M., Liu, H., Park, D., et al.: ViP-LLaVA: making large multimodal models understand arbitrary visual prompts. arXiv preprint arXiv:2312.00784 (2023)","DOI":"10.1109\/CVPR52733.2024.01227"},{"key":"20_CR9","doi-asserted-by":"crossref","unstructured":"Zhu, K., Qin, Z., Yi, H., et al.: Guiding medical vision-language models with explicit visual prompts: framework design and comprehensive exploration of prompt variations. arXiv preprint arXiv:2501.02385 (2025)","DOI":"10.18653\/v1\/2025.naacl-long.587"},{"key":"20_CR10","unstructured":"Yang, A., Yang, B., Zhang, B., et al.: Qwen 2.5 technical report. arXiv preprint arXiv:2412.15115 (2025)"},{"key":"20_CR11","unstructured":"He, J., Li, P., Liu, G., et al.: PeFoMed: parameter-efficient fine-tuning of multimodal large language models for medical imaging. arXiv preprint arXiv:2401.02797 (2024)"},{"key":"20_CR12","unstructured":"Ates, G.C., Xin, Y., Gong, K., Shao, W.: DCFormer: efficient 3D vision\u2013language modeling with decomposed convolutions. arXiv preprint arXiv:2502.05091 (2025)"},{"key":"20_CR13","doi-asserted-by":"crossref","unstructured":"Hamamci, I.E., Er, S., Wang, C., et al.: Developing generalist foundation models from a multimodal dataset for 3D computed tomography (CT-RATE). arXiv preprint arXiv:2403.17834 (2024)","DOI":"10.21203\/rs.3.rs-5271327\/v1"},{"key":"20_CR14","doi-asserted-by":"crossref","unstructured":"He, Y., Guo, P., Tang, Y., et al.: VISTA3D: a unified segmentation foundation model for 3D medical imaging. arXiv preprint arXiv:2406.05285 (2024)","DOI":"10.1109\/CVPR52734.2025.01943"},{"key":"20_CR15","unstructured":"Bai, F., Du, Y., Huang, T., Meng, M.Q.-H., Zhao, B.: M3D: advancing 3D medical image analysis with multi-modal large language models. arXiv preprint arXiv:2404.00578 (2024)"},{"key":"20_CR16","unstructured":"Zhang, X., et al.: PMC-VQA: visual instruction tuning for medical visual question answering. arXiv preprint arXiv:2305.10415 (2023)"},{"key":"20_CR17","unstructured":"Shah, J., Bikshandi, G., Zhang, Y., Thakkar, V., Ramani, P., Dao, T.: FlashAttention-3: fast and accurate attention with asynchrony and low-precision. arXiv preprint arXiv:2407.08608 (2024)"},{"key":"20_CR18","unstructured":"Hu, E.J., Shen, Y., Wallis, P., et al.: LoRA: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"20_CR19","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings IEEE International Conference Computer Vision (ICCV), pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"20_CR20","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: Proceedings International Conference Learning Representations (ICLR) (2019)"},{"key":"20_CR21","unstructured":"Su, J., Lu, Y., Pan, S., et al.: RoFormer: enhanced transformer with rotary position embedding. arXiv preprint arXiv:2104.09864 (2021)"},{"key":"20_CR22","unstructured":"Bertasius, G., Wang, H., Torresani, L.: Is space\u2013time attention all you need for video understanding? In: Proceedings International Conference Machine Learning (ICML) (2021)"},{"key":"20_CR23","doi-asserted-by":"crossref","unstructured":"Wasserthal, J., Breit, H.-C., Meyer, M.T., et al.: TotalSegmentator: robust segmentation of 104 anatomical structures in CT images. Radiol. Artif. Intell. 5(5) (2023). Art. e230024","DOI":"10.1148\/ryai.230024"},{"issue":"6","key":"20_CR24","doi-asserted-by":"publisher","first-page":"1595","DOI":"10.1148\/rg.266065168","volume":"26","author":"CP Langlotz","year":"2006","unstructured":"Langlotz, C.P.: Radlex: a new method for indexing online educational materials. Radiographics 26(6), 1595\u20131597 (2006)","journal-title":"Radiographics"},{"key":"20_CR25","unstructured":"Dao, T., Fu, D.Y., Ermon, S., Rudra, A., R\u00e9, C.: FlashAttention: fast and memory-efficient exact attention with IO-awareness. In: Advances in Neural Information Processing Systems (NeurIPS) (2022)"}],"container-title":["Lecture Notes in Computer Science","AI for Clinical Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-06004-4_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,22]],"date-time":"2025-09-22T17:22:21Z","timestamp":1758561741000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-06004-4_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,22]]},"ISBN":["9783032060037","9783032060044"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-06004-4_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,22]]},"assertion":[{"value":"22 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CREATE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Clinical-Driven Robotics and Embodied AI Technology","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"create2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/sites.google.com\/view\/create-2025\/home","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}