{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,13]],"date-time":"2025-05-13T07:10:08Z","timestamp":1747120208211,"version":"3.40.5"},"reference-count":20,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,14]],"date-time":"2025-04-14T00:00:00Z","timestamp":1744588800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,14]],"date-time":"2025-04-14T00:00:00Z","timestamp":1744588800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,14]]},"DOI":"10.1109\/isbi60581.2025.10981156","type":"proceedings-article","created":{"date-parts":[[2025,5,12]],"date-time":"2025-05-12T17:38:51Z","timestamp":1747071531000},"page":"1-5","source":"Crossref","is-referenced-by-count":0,"title":["Medvisiochat: A Multimodal Large Language Model Framework for Interpretable Diagnosis with Visual Grounding in CXRs"],"prefix":"10.1109","author":[{"given":"Ling","family":"Yang","sequence":"first","affiliation":[{"name":"School of Electrical and Computer Engineering, The University of Sydney,Australia"}]},{"given":"Zhanyu","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Electrical and Computer Engineering, The University of Sydney,Australia"}]},{"given":"Luping","family":"Zhou","sequence":"additional","affiliation":[{"name":"School of Electrical and Computer Engineering, The University of Sydney,Australia"}]}],"member":"263","reference":[{"article-title":"Gpt-4 technical report","volume-title":"OpenAI","year":"2023","key":"ref1"},{"journal-title":"Qwen-vl: A versatile vision-language model for understanding, localization, text reading, and beyond","year":"2023","author":"Bai","key":"ref2"},{"key":"ref3","article-title":"Llm-cxr: Instruction-finetuned llm for cxr image understanding and generation","volume-title":"ICLR","author":"Lee","year":"2023"},{"key":"ref4","article-title":"Medxchat: Bridging cxr modalities with a unified multimodal large model","author":"Yang","year":"2023","journal-title":"arXiv preprint"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/s42979-022-01390-9"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.111030"},{"key":"ref7","article-title":"Palm-e: An embodied multimodal language model","author":"Driess","year":"2023","journal-title":"arXiv preprint"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1056\/aioa2300138"},{"key":"ref9","article-title":"Llava-med: Training a large language-and-vision assistant for biomedicine in one day","volume-title":"NIPS","author":"Li","year":"2024"},{"key":"ref10","article-title":"A clinical validation of vindr-cxr, an ai system for detecting abnormal chest radiographs","author":"Huy Nguyen","year":"2021","journal-title":"arXiv preprint"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-43990-2_35"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00179"},{"key":"ref13","article-title":"Lora: Low-rank adaptation of large language models","volume-title":"ICLR","author":"Hu","year":"2021"},{"journal-title":"An image is worth 16x16 words: Transformers for image recognition at scale","year":"2020","author":"Alexey","key":"ref14"},{"volume-title":"Openclip","year":"2021","author":"Gabriel","key":"ref15"},{"key":"ref16","article-title":"Attention is all you need","volume-title":"NIPS","author":"Vaswani","year":"2017"},{"key":"ref17","article-title":"Seqtr: A simple yet universal network for visual grounding","volume-title":"ECCV","author":"Shen","year":"2022"},{"key":"ref18","article-title":"Mimic-cxr-jpg, a large publicly available database of labeled chest radio-graphs","author":"Johnson","year":"2019","journal-title":"arXiv preprint"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01498-w"},{"key":"ref20","article-title":"Adam: A method for stochastic optimization","volume-title":"ICLR","author":"Kingma","year":"2015"}],"event":{"name":"2025 IEEE 22nd International Symposium on Biomedical Imaging (ISBI)","start":{"date-parts":[[2025,4,14]]},"location":"Houston, TX, USA","end":{"date-parts":[[2025,4,17]]}},"container-title":["2025 IEEE 22nd International Symposium on Biomedical Imaging (ISBI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10980665\/10980666\/10981156.pdf?arnumber=10981156","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,13]],"date-time":"2025-05-13T06:48:49Z","timestamp":1747118929000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10981156\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,14]]},"references-count":20,"URL":"https:\/\/doi.org\/10.1109\/isbi60581.2025.10981156","relation":{},"subject":[],"published":{"date-parts":[[2025,4,14]]}}}