{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,11]],"date-time":"2025-04-11T04:05:13Z","timestamp":1744344313976,"version":"3.40.4"},"publisher-location":"Singapore","reference-count":35,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819646050"},{"type":"electronic","value":"9789819646067"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-4606-7_22","type":"book-chapter","created":{"date-parts":[[2025,4,8]],"date-time":"2025-04-08T21:21:50Z","timestamp":1744147310000},"page":"263-273","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["MiniMedMind: Small-Scale LLM for High-Performance Medical VQA on Chest X-ray"],"prefix":"10.1007","author":[{"given":"Duong Quoc","family":"Khanh","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hoang Trung","family":"Kien","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Duong Thanh","family":"Nam","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6033-6484","authenticated-orcid":false,"given":"Phan Duy","family":"Hung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,3,24]]},"reference":[{"key":"22_CR1","unstructured":"Team, G., et al.: GemMa: open models based on Gemini research and technology, https:\/\/arxiv.org\/abs\/2403.08295"},{"key":"22_CR2","unstructured":"Team, G., Riviere, M., Pathak, S., Sessa, P.G., Hardin, C., Bhupatiraju, S., et al.: Gemma 2: improving open language models at a practical size, https:\/\/arxiv.org\/abs\/2408.00118"},{"key":"22_CR3","unstructured":"Stable LM 2 1.6B Technical Report. https:\/\/arxiv.org\/html\/2402.17834v1"},{"key":"22_CR4","unstructured":"Bai, J., et al.: Qwen Technical Report. https:\/\/arxiv.org\/abs\/2309.16609"},{"key":"22_CR5","unstructured":"Hughes, A.: Phi-2: the surprising power of small language models - Microsoft Research. https:\/\/www.microsoft.com\/en-us\/research\/blog\/phi-2-the-surprising-power-of-small-language-models\/"},{"key":"22_CR6","unstructured":"Abdin, M., Aneja, J., Awadalla, H., Awadallah, A., Awan, A.A., et al.: PHI-3 Technical Report: a highly capable language model locally on your phone. https:\/\/arxiv.org\/abs\/2404.14219"},{"key":"22_CR7","unstructured":"Zhang, P., Zeng, G., Wang, T., Lu, W.: TinyLlama: an Open-Source Small Language Model. https:\/\/arxiv.org\/abs\/2401.02385"},{"key":"22_CR8","unstructured":"Taylor, R., et al.: Galactica: a large language model for science. https:\/\/arxiv.org\/abs\/2211.09085"},{"key":"22_CR9","unstructured":"Zhang, S., et al.: OPT: Open pre-trained transformer language models. https:\/\/arxiv.org\/abs\/2205.01068"},{"key":"22_CR10","unstructured":"Biderman, S., et al.: Pythia: a suite for analyzing large language models across training and scaling. https:\/\/arxiv.org\/abs\/2304.01373"},{"key":"22_CR11","unstructured":"Chu, X., et al.: MobileVLM\u00a0: a fast, strong and open vision language assistant for mobile devices. https:\/\/arxiv.org\/abs\/2312.16886"},{"key":"22_CR12","unstructured":"Lin, B., et al.: MOE-LLAVA: mixture of experts for large Vision-Language models. https:\/\/arxiv.org\/abs\/2401.15947"},{"key":"22_CR13","unstructured":"Wei, H., et al.: Small language model meets with reinforced vision vocabulary. https:\/\/arxiv.org\/abs\/2401.12503"},{"key":"22_CR14","unstructured":"Yuan, Z., Li, Z., Huang, W., Ye, Y., Sun, L.: TinyGPT-V: efficient multimodal large language model via small backbones. https:\/\/arxiv.org\/abs\/2312.16862"},{"key":"22_CR15","unstructured":"Wang, J., et al.: Mobile-agent: autonomous multi-modal mobile device agent with visual perception. https:\/\/arxiv.org\/abs\/2401.16158"},{"key":"22_CR16","unstructured":"Li, C., et al.: LLAVA-MeD: Training a large Language-and-Vision assistant for biomedicine in one day. https:\/\/arxiv.org\/abs\/2306.00890"},{"key":"22_CR17","unstructured":"Moor, M., et al.: Med-flamingo: a multimodal medical few-shot learner. https:\/\/arxiv.org\/abs\/2307.15189"},{"key":"22_CR18","unstructured":"Thawkar, O., et al.: XrayGPT: Chest radiographs summarization using medical vision-language models. https:\/\/arxiv.org\/abs\/2306.07971"},{"key":"22_CR19","unstructured":"Chen, Z., et al.: A Vision-Language foundation model to enhance efficiency of chest x-ray interpretation. https:\/\/arxiv.org\/abs\/2401.12208"},{"key":"22_CR20","unstructured":"Dai, D., et al.: PA-LLAVA: a large Language-Vision assistant for human Pathology image understanding, https:\/\/arxiv.org\/abs\/2408.09530"},{"key":"22_CR21","unstructured":"Visual Med-Alpaca, https:\/\/cambridgeltl.github.io\/visual-med-alpaca\/"},{"key":"22_CR22","unstructured":"Jiang, S., Zheng, T., Zhang, Y., Jin, Y., Yuan, L., Liu, Z.: Med-MOE: mixture of Domain-Specific experts for lightweight medical Vision-Language models. https:\/\/arxiv.org\/abs\/2404.10237"},{"key":"22_CR23","doi-asserted-by":"publisher","unstructured":"Tanwani, A.K., Barral, J., Freedman, D.: RepsNet: combining vision with language for automated medical reports. In: Lecture Notes in Computer Science, pp. 714\u2013724 (2022). https:\/\/doi.org\/10.1007\/978-3-031-16443-9_68","DOI":"10.1007\/978-3-031-16443-9_68"},{"key":"22_CR24","doi-asserted-by":"publisher","first-page":"3129","DOI":"10.1038\/s41591-024-03185-2","volume":"30","author":"K Zhang","year":"2024","unstructured":"Zhang, K., et al.: A generalist vision\u2013language foundation model for diverse biomedical tasks. Nat. Med. 30, 3129\u20133141 (2024). https:\/\/doi.org\/10.1038\/s41591-024-03185-2","journal-title":"Nat. Med."},{"key":"22_CR25","unstructured":"Sun, Q., Fang, Y., Wu, L., Wang, X., Cao, Y.: EVA-CLIP: Improved training techniques for CLIP at scale. https:\/\/arxiv.org\/abs\/2303.15389"},{"key":"22_CR26","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. https:\/\/arxiv.org\/abs\/2010.11929"},{"key":"22_CR27","unstructured":"Zhang, Q., Zhang, J., Xu, Y., Tao, D.: Vision transformer with quadrangle attention. https:\/\/arxiv.org\/abs\/2303.15105"},{"key":"22_CR28","unstructured":"meta-llama\/Llama-3.2-3B-Instruct \u00b7 Hugging Face. https:\/\/huggingface.co\/meta-llama\/Llama-3.2-3B-Instruct"},{"key":"22_CR29","unstructured":"MIMIC-CXR Database v2.0.0. https:\/\/www.physionet.org\/content\/mimic-cxr\/2.0.0\/"},{"key":"22_CR30","unstructured":"OpenINGWeb. https:\/\/openi.nlm.nih.gov\/faq"},{"key":"22_CR31","unstructured":"MIMIC-EXT-MIMIC-CXR-VQA: A complex, diverse, and large-scale visual question answering dataset for chest x-ray images V1.0.0. https:\/\/physionet.org\/content\/mimic-ext-mimic-cxr-vqa\/1.0.0\/"},{"key":"22_CR32","unstructured":"Liu, B., Zhan, L.-M., Xu, L., Ma, L., Yang, Y., Wu, X.-M.: SLAKE: a Semantically-Labeled Knowledge-Enhanced dataset for medical visual question answering. https:\/\/arxiv.org\/abs\/2102.09542"},{"key":"22_CR33","doi-asserted-by":"publisher","unstructured":"Lau, J.J., Gayen, S., Abacha, A.B., Demner-Fushman, D.: A dataset of clinically generated visual questions and answers about radiology images. Sci. Data. 5 (2018). https:\/\/doi.org\/10.1038\/sdata.2018.251","DOI":"10.1038\/sdata.2018.251"},{"key":"22_CR34","unstructured":"Zhang, T., Kishore, V., Wu, F., Weinberger, K.Q., Artzi, Y.: BERTScore: evaluating text generation with BERT. https:\/\/arxiv.org\/abs\/1904.09675"},{"key":"22_CR35","unstructured":"Lin, C.-Y.: ROUGE: a package for automatic evaluation of summaries. https:\/\/aclanthology.org\/W04-1013\/"}],"container-title":["Lecture Notes in Computer Science","Integrated Uncertainty in Knowledge Modelling and Decision Making"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-4606-7_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,10]],"date-time":"2025-04-10T09:29:06Z","timestamp":1744277346000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-4606-7_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819646050","9789819646067"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-4606-7_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"24 March 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IUKM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Integrated Uncertainty in Knowledge Modelling and Decision Making","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ho Chi Minh City","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 March 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 March 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iukm2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.jaist.ac.jp\/IUKM\/IUKM2025\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}