{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T09:23:12Z","timestamp":1776158592087,"version":"3.50.1"},"reference-count":59,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62366036"],"award-info":[{"award-number":["62366036"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Engineering Applications of Artificial Intelligence"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.engappai.2026.114516","type":"journal-article","created":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T10:15:06Z","timestamp":1773828906000},"page":"114516","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["A knowledge prompt augmented lightweight multimodal language assistant for biomedicine"],"prefix":"10.1016","volume":"174","author":[{"given":"Lei","family":"Liu","sequence":"first","affiliation":[]},{"given":"Xiangdong","family":"Su","sequence":"additional","affiliation":[]},{"given":"Xingxiang","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Guanglai","family":"Gao","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.engappai.2026.114516_b1","series-title":"Gpt-4 technical report","author":"Achiam","year":"2023"},{"key":"10.1016\/j.engappai.2026.114516_b2","series-title":"Augmenting LLMs with knowledge: A survey on hallucination prevention","author":"Andriopoulos","year":"2023"},{"issue":"3","key":"10.1016\/j.engappai.2026.114516_b3","doi-asserted-by":"crossref","first-page":"380","DOI":"10.3390\/bioengineering10030380","article-title":"Vision\u2013language model for visual question answering in medical imagery","volume":"10","author":"Bazi","year":"2023","journal-title":"Bioengineering"},{"key":"10.1016\/j.engappai.2026.114516_b4","article-title":"Translating embeddings for modeling multi-relational data","volume":"26","author":"Bordes","year":"2013","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"3","key":"10.1016\/j.engappai.2026.114516_b5","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3641289","article-title":"A survey on evaluation of large language models","volume":"15","author":"Chang","year":"2024","journal-title":"ACM Trans. Intell. Syst. Technol."},{"key":"10.1016\/j.engappai.2026.114516_b6","series-title":"Medical Image Computing and Computer Assisted Intervention\u2013MICCAI 2022: 25th International Conference, Singapore, September 18\u201322, 2022, Proceedings, Part V","first-page":"679","article-title":"Multi-modal masked autoencoders for medical vision-and-language pre-training","author":"Chen","year":"2022"},{"key":"10.1016\/j.engappai.2026.114516_b7","series-title":"Mobilevlm v2: Faster and stronger baseline for vision language model","author":"Chu","year":"2024"},{"key":"10.1016\/j.engappai.2026.114516_b8","series-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018"},{"key":"10.1016\/j.engappai.2026.114516_b9","series-title":"Medical Image Computing and Computer Assisted Intervention\u2013MICCAI 2021: 24th International Conference, Strasbourg, France, September 27\u2013October 1, 2021, Proceedings, Part V 24","first-page":"64","article-title":"Multiple meta-model quantifying for medical visual question answering","author":"Do","year":"2021"},{"key":"10.1016\/j.engappai.2026.114516_b10","series-title":"Findings of the Association for Computational Linguistics: EACL 2023","first-page":"1181","article-title":"Pubmedclip: How much does clip benefit visual question answering in the medical domain?","author":"Eslami","year":"2023"},{"key":"10.1016\/j.engappai.2026.114516_b11","series-title":"Retrieval-augmented generation for large language models: A survey","author":"Gao","year":"2023"},{"key":"10.1016\/j.engappai.2026.114516_b12","series-title":"Pathvqa: 30000+ questions for medical visual question answering","author":"He","year":"2020"},{"key":"10.1016\/j.engappai.2026.114516_b13","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J., 2016. Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"issue":"8","key":"10.1016\/j.engappai.2026.114516_b14","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","article-title":"Long short-term memory","volume":"9","author":"Hochreiter","year":"1997","journal-title":"Neural Comput."},{"key":"10.1016\/j.engappai.2026.114516_b15","doi-asserted-by":"crossref","unstructured":"Hu, Y., Li, T., Lu, Q., Shao, W., He, J., Qiao, Y., Luo, P., 2024. Omnimedvqa: A new large-scale comprehensive evaluation benchmark for medical lvlm. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 22170\u201322183.","DOI":"10.1109\/CVPR52733.2024.02093"},{"key":"10.1016\/j.engappai.2026.114516_b16","series-title":"Lora: Low-rank adaptation of large language models","author":"Hu","year":"2021"},{"key":"10.1016\/j.engappai.2026.114516_b17","series-title":"Can knowledge editing really correct hallucinations?","author":"Huang","year":"2024"},{"issue":"12","key":"10.1016\/j.engappai.2026.114516_b18","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3571730","article-title":"Survey of hallucination in natural language generation","volume":"55","author":"Ji","year":"2023","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.engappai.2026.114516_b19","unstructured":"Karim, A.R., Uzuner, O., 2025. MasonNLP at MEDIQA-WV 2025: Multimodal Retrieval-Augmented Generation with Large Language Models for Medical VQA. In: Proceedings of the 7th Clinical Natural Language Processing Workshop. pp. 84\u201394."},{"key":"10.1016\/j.engappai.2026.114516_b20","series-title":"2021 IEEE 18th International Symposium on Biomedical Imaging","first-page":"1033","article-title":"Mmbert: Multimodal bert pretraining for improved medical vqa","author":"Khare","year":"2021"},{"key":"10.1016\/j.engappai.2026.114516_b21","article-title":"Bilinear attention networks","volume":"31","author":"Kim","year":"2018","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.engappai.2026.114516_b22","series-title":"Med-r1: Reinforcement learning for generalizable medical reasoning in vision-language models","author":"Lai","year":"2025"},{"issue":"1","key":"10.1016\/j.engappai.2026.114516_b23","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1038\/sdata.2018.251","article-title":"A dataset of clinically generated visual questions and answers about radiology images","volume":"5","author":"Lau","year":"2018","journal-title":"Sci. Data"},{"key":"10.1016\/j.engappai.2026.114516_b24","first-page":"9459","article-title":"Retrieval-augmented generation for knowledge-intensive nlp tasks","volume":"33","author":"Lewis","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.engappai.2026.114516_b25","doi-asserted-by":"crossref","DOI":"10.1016\/j.jbi.2024.104769","article-title":"Biomedrag: A retrieval augmented large language model for biomedicine","volume":"162","author":"Li","year":"2025","journal-title":"J. Biomed. Inform."},{"key":"10.1016\/j.engappai.2026.114516_b26","series-title":"2023 IEEE 20th International Symposium on Biomedical Imaging","first-page":"1","article-title":"Self-supervised vision-language pretraining for medial visual question answering","author":"Li","year":"2023"},{"key":"10.1016\/j.engappai.2026.114516_b27","article-title":"Llava-med: Training a large language-and-vision assistant for biomedicine in one day","volume":"36","author":"Li","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.engappai.2026.114516_b28","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"525","article-title":"Pmc-clip: Contrastive language-image pre-training using biomedical documents","author":"Lin","year":"2023"},{"key":"10.1016\/j.engappai.2026.114516_b29","article-title":"Visual instruction tuning","volume":"36","author":"Liu","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.engappai.2026.114516_b30","series-title":"International Conference on Information Processing in Medical Imaging","first-page":"445","article-title":"Q2atransformer: Improving medical vqa via an answer querying decoder","author":"Liu","year":"2023"},{"key":"10.1016\/j.engappai.2026.114516_b31","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"210","article-title":"Contrastive pre-training and representation distillation for medical visual question answering based on radiology images","author":"Liu","year":"2021"},{"key":"10.1016\/j.engappai.2026.114516_b32","series-title":"2021 IEEE 18th International Symposium on Biomedical Imaging","first-page":"1650","article-title":"SLAKE: A semantically-labeled knowledge-enhanced dataset for medical visual question answering","author":"Liu","year":"2021"},{"key":"10.1016\/j.engappai.2026.114516_b33","unstructured":"Miller, J.J., 2013. Graph database applications and concepts with Neo4j. In: Proceedings of the Southern Association for Information Systems Conference, vol. 2324, (36), Atlanta, GA, USA, pp. 141\u2013147."},{"key":"10.1016\/j.engappai.2026.114516_b34","series-title":"Machine Learning for Health","first-page":"353","article-title":"Med-flamingo: a multimodal medical few-shot learner","author":"Moor","year":"2023"},{"key":"10.1016\/j.engappai.2026.114516_b35","series-title":"Bimedix2: Bio-medical expert lmm for diverse medical modalities","author":"Mullappilly","year":"2024"},{"key":"10.1016\/j.engappai.2026.114516_b36","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"522","article-title":"Overcoming data limitation in medical visual question answering","author":"Nguyen","year":"2019"},{"issue":"1","key":"10.1016\/j.engappai.2026.114516_b37","doi-asserted-by":"crossref","first-page":"429","DOI":"10.1038\/s41597-022-01498-w","article-title":"VinDr-CXR: An open dataset of chest X-rays with radiologist\u2019s annotations","volume":"9","author":"Nguyen","year":"2022","journal-title":"Sci. Data"},{"key":"10.1016\/j.engappai.2026.114516_b38","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"337","article-title":"Medvlm-r1: Incentivizing medical reasoning capability of vision-language models (vlms) via reinforcement learning","author":"Pan","year":"2025"},{"key":"10.1016\/j.engappai.2026.114516_b39","series-title":"International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.engappai.2026.114516_b40","series-title":"A survey of hallucination in large foundation models","author":"Rawte","year":"2023"},{"issue":"1","key":"10.1016\/j.engappai.2026.114516_b41","doi-asserted-by":"crossref","first-page":"688","DOI":"10.1038\/s41597-024-03496-6","article-title":"ROCOv2: Radiology objects in context version 2, an updated multimodal image dataset","volume":"11","author":"R\u00fcckert","year":"2024","journal-title":"Sci. Data"},{"key":"10.1016\/j.engappai.2026.114516_b42","series-title":"Medgemma technical report","author":"Sellergren","year":"2025"},{"key":"10.1016\/j.engappai.2026.114516_b43","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D., 2017. Grad-cam: Visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE International Conference on Computer Vision. pp. 618\u2013626.","DOI":"10.1109\/ICCV.2017.74"},{"key":"10.1016\/j.engappai.2026.114516_b44","series-title":"Rotate: Knowledge graph embedding by relational rotation in complex space","author":"Sun","year":"2019"},{"key":"10.1016\/j.engappai.2026.114516_b45","series-title":"Qwen2.5: A party of foundation models","author":"Team","year":"2024"},{"key":"10.1016\/j.engappai.2026.114516_b46","series-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"10.1016\/j.engappai.2026.114516_b47","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"726","article-title":"Open-ended medical visual question answering through prefix tuning of language models","author":"Van Sonsbeek","year":"2023"},{"key":"10.1016\/j.engappai.2026.114516_b48","series-title":"2023 IEEE International Conference on Big Data (BigData)","first-page":"2247","article-title":"Multimodal large language models: A survey","author":"Wu","year":"2023"},{"key":"10.1016\/j.engappai.2026.114516_b49","series-title":"Towards generalist foundation model for radiology by leveraging web-scale 2d&3d medical data","author":"Wu","year":"2023"},{"key":"10.1016\/j.engappai.2026.114516_b50","series-title":"Mmed-rag: Versatile multimodal rag system for medical vision language models","author":"Xia","year":"2024"},{"key":"10.1016\/j.engappai.2026.114516_b51","series-title":"Lingshu: A generalist foundation model for unified multimodal medical understanding and reasoning","author":"Xu","year":"2025"},{"key":"10.1016\/j.engappai.2026.114516_b52","doi-asserted-by":"crossref","unstructured":"Yang, Z., He, X., Gao, J., Deng, L., Smola, A., 2016. Stacked attention networks for image question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 21\u201329.","DOI":"10.1109\/CVPR.2016.10"},{"key":"10.1016\/j.engappai.2026.114516_b53","series-title":"Qwen3 technical report","author":"Yang","year":"2025"},{"key":"10.1016\/j.engappai.2026.114516_b54","series-title":"Qwen2 technical report","author":"Yang","year":"2024"},{"key":"10.1016\/j.engappai.2026.114516_b55","doi-asserted-by":"crossref","unstructured":"Yu, Z., Yu, J., Fan, J., Tao, D., 2017. Multi-modal factorized bilinear pooling with co-attention learning for visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision. pp. 1821\u20131830.","DOI":"10.1109\/ICCV.2017.202"},{"key":"10.1016\/j.engappai.2026.114516_b56","series-title":"Findings of the Association for Computational Linguistics: EMNLP 2023","first-page":"10859","article-title":"Huatuogpt, towards taming language model to be a doctor","author":"Zhang","year":"2023"},{"key":"10.1016\/j.engappai.2026.114516_b57","series-title":"Pmc-vqa: Visual instruction tuning for medical visual question answering","author":"Zhang","year":"2023"},{"key":"10.1016\/j.engappai.2026.114516_b58","series-title":"Large-scale domain-specific pretraining for biomedical vision-language processing","first-page":"6","author":"Zhang","year":"2023"},{"key":"10.1016\/j.engappai.2026.114516_b59","series-title":"BiomedCLIP: a multimodal biomedical foundation model pretrained from fifteen million scientific image-text pairs","author":"Zhang","year":"2023"}],"container-title":["Engineering Applications of Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626007979?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626007979?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T08:35:00Z","timestamp":1776155700000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0952197626007979"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":59,"alternative-id":["S0952197626007979"],"URL":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114516","relation":{},"ISSN":["0952-1976"],"issn-type":[{"value":"0952-1976","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"A knowledge prompt augmented lightweight multimodal language assistant for biomedicine","name":"articletitle","label":"Article Title"},{"value":"Engineering Applications of Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114516","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"114516"}}