{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,24]],"date-time":"2026-06-24T15:02:25Z","timestamp":1782313345086,"version":"3.54.5"},"reference-count":94,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/100000057","name":"National Institute of General Medical Sciences of the National Institutes of Health","doi-asserted-by":"publisher","award":["P20GM104420"],"award-info":[{"award-number":["P20GM104420"]}],"id":[{"id":"10.13039\/100000057","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/access.2025.3591750","type":"journal-article","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T18:44:30Z","timestamp":1753296270000},"page":"136320-136335","source":"Crossref","is-referenced-by-count":4,"title":["Automated Skin Cancer Report Generation via a Knowledge-Distilled Vision-Language Model"],"prefix":"10.1109","volume":"13","author":[{"given":"Lawhori","family":"Chakrabarti","sequence":"first","affiliation":[{"name":"Department of Computer Science, University of Idaho, Moscow, ID, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9401-6163","authenticated-orcid":false,"given":"Boyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Idaho, Moscow, ID, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hengyi","family":"Tian","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Idaho, Moscow, ID, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3365-1291","authenticated-orcid":false,"given":"Aleksandar","family":"Vakanski","sequence":"additional","affiliation":[{"name":"Department of Nuclear Engineering and Industrial Management, University of Idaho, Idaho Falls, ID, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6098-4441","authenticated-orcid":false,"given":"Min","family":"Xian","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Idaho, Idaho Falls, ID, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Visual instruction tuning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Liu"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2024.3393018"},{"key":"ref3","article-title":"Homoclinic floer homology via direct limits","author":"Hohloch","year":"2024","journal-title":"arXiv:2402.12345"},{"key":"ref4","article-title":"Parameter-efficient fine-tuning medical multimodal large language models for medical visual grounding","author":"He","year":"2024","journal-title":"arXiv:2410.23822"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.clinicalnlp-1.24"},{"key":"ref6","article-title":"MMed-RAG: Versatile multimodal RAG system for medical vision language models","author":"Xia","year":"2024","journal-title":"arXiv:2410.13085"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/bibm62325.2024.10822640"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59713-9_54"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.3399\/bjgp13X667213"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3485766"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"ref12","first-page":"74","article-title":"ROUGE: A package for automatic evaluation of summaries","volume-title":"Proc. Workshop Text Summarization Branches Out","author":"Lin"},{"key":"ref13","article-title":"A comprehensive survey on evaluating large language model applications in the medical industry","author":"Huang","year":"2024","journal-title":"arXiv:2402.12696"},{"key":"ref14","article-title":"Retrieval-augmented generation for large language models: A survey","author":"Gao","year":"2024","journal-title":"arXiv:2312.10997"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-eacl.88"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6989"},{"key":"ref17","article-title":"BiomedCLIP: A multimodal biomedical foundation model pretrained from fifteen million scientific image-text pairs","author":"Zhang","year":"2023","journal-title":"arXiv:2303.00915"},{"key":"ref18","article-title":"Resource-efficient medical report generation using large language models","volume-title":"arXiv:2410.15642","author":"Hamza","year":"2024"},{"key":"ref19","article-title":"RaDialog: A large vision-language model for radiology report generation and conversational assistance","volume-title":"arXiv:2311.18681","author":"Pellegrini","year":"2023"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1503.02531"},{"key":"ref21","first-page":"414","article-title":"Knowledge distillation in deep learning and its applications","volume":"7","author":"Alkhulaifi","year":"2020","journal-title":"PeerJ Comput. Sci."},{"key":"ref22","article-title":"Distilling task-specific knowledge from BERT into simple neural networks","author":"Tang","year":"2019","journal-title":"arXiv:1903.12136"},{"key":"ref23","article-title":"Model compression via distillation and quantization","author":"Polino","year":"2018","journal-title":"arXiv:1802.05668"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58529-7_2"},{"key":"ref25","first-page":"7528","article-title":"Knowledge distillation by on-the-fly native ensemble","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Zhu"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01065"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00726"},{"key":"ref28","article-title":"A survey of model compression and acceleration for deep neural networks","author":"Cheng","year":"2017","journal-title":"arXiv:1710.09282"},{"key":"ref29","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","volume":"35","author":"Wei"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.507"},{"key":"ref31","article-title":"LLM-powered low-budget knowledge distillation with chain-of-thought","author":"Zhou","year":"2024","journal-title":"arXiv:2402.09064"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-024-75331-2"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-023-00879-8"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/jbhi.2023.3316750"},{"key":"ref35","article-title":"Atom: Low-bit quantization for efficient and accurate LLM serving","author":"Zhao","year":"2023","journal-title":"arXiv:2310.19102"},{"key":"ref36","article-title":"EcoAssistant: Using LLM assistant more affordably and accurately","author":"Zhang","year":"2023","journal-title":"arXiv:2310.03046"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939720"},{"key":"ref38","article-title":"Deep learning models are not robust against noise in clinical text","author":"Moradi","year":"2021","journal-title":"arXiv:2108.12242"},{"key":"ref39","volume-title":"GPT-4V Fails in Generating Reliable Clinical Content","year":"2024"},{"key":"ref40","article-title":"Hallucinations in health care LLMs: A regulatory science perspective","author":"Thambisetty","year":"2024","journal-title":"arXiv:2402.03620"},{"key":"ref41","article-title":"Investigating factuality in RAG for long-form QA","author":"Wei","year":"2024","journal-title":"arXiv:2407.12216"},{"key":"ref42","article-title":"Seven failure points when engineering a RAG system","author":"Huang","year":"2024","journal-title":"arXiv:2401.05856"},{"key":"ref43","article-title":"Understanding failure of LLMs in clinical settings: Hallucinations and retrieval drift","author":"Datta","year":"2024","journal-title":"arXiv:2403.01432"},{"key":"ref44","volume-title":"Common Failure Points of LLM-RAG Systems","author":"Sahin","year":"2024"},{"key":"ref45","article-title":"Towards understanding retrieval accuracy and prompt alignment in LLMs","author":"Zhao","year":"2024","journal-title":"arXiv:2411.19463"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/j.soncn.2013.06.004"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2021.105037"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1016\/j.clindermatol.2024.06.016"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1093\/ced\/llae112"},{"key":"ref50","volume-title":"GPT-4 Technical Report","year":"2023"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/3673791.3698415"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.3389\/frai.2024.1430984"},{"key":"ref53","article-title":"Customizing general-purpose foundation models for medical report generation","volume-title":"arXiv:2306.05642","author":"Yang","year":"2023"},{"key":"ref54","article-title":"Unifying vision-and-language tasks via text generation","volume-title":"Proc. 38th Int. Conf. Mach. Learn. (ICML)","author":"Cho"},{"key":"ref55","article-title":"Qwen technical report","volume-title":"arXiv:2309.16609","author":"Bai","year":"2023"},{"key":"ref56","first-page":"353","article-title":"Med-flamingo: A multimodal medical few-shot learner","volume-title":"Proc. 3rd Mach. Learn. Health Symp. (ML4H)","volume":"225","author":"Moor"},{"key":"ref57","first-page":"28541","article-title":"LLaVA-Med: Training a large language-and-vision assistant for biomedicine in one day","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Li"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"ref59","article-title":"Prefix-tuning: Optimizing continuous prompts for generation","author":"Lisa Li","year":"2021","journal-title":"arXiv:2101.00190"},{"key":"ref60","article-title":"Retrieval-augmented generation for knowledge-intensive NLP tasks","volume-title":"Proc. 34th Conf. Neural Inf. Process. Syst. (NeurIPS)","author":"Lewis"},{"key":"ref61","article-title":"Scaling up visual and vision-language representation learning with noisy text supervision","volume-title":"Proc. 38th Int. Conf. Mach. Learn. (ICML)","volume":"139","author":"Jia"},{"key":"ref62","volume-title":"LlamaIndex","author":"Liu","year":"2022"},{"key":"ref63","volume-title":"LangChain","author":"Chase","year":"2022"},{"key":"ref64","volume-title":"Haystack: The end-to-end NLP Framework for Pragmatic Builders","author":"Pietsch","year":"2019"},{"key":"ref65","first-page":"81234","article-title":"Knowledge distillation and transformer based framework for automatic spine CT report generation","volume-title":"IEEE Access","volume":"13","author":"Batool","year":"2025"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/access.2025.3546131"},{"key":"ref67","first-page":"13748","article-title":"Exploring and distilling posterior and prior knowledge for radiology report generation","volume-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR)","author":"Liu"},{"key":"ref68","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. 38th Int. Conf. Mach. Learn. (ICML)","volume":"139","author":"Radford"},{"key":"ref69","article-title":"Flamingo: A visual language model for few-shot learning","author":"Alayrac","year":"2022","journal-title":"arXiv:2204.14198"},{"key":"ref70","first-page":"21855","article-title":"BLIP-2: Bootstrapping language-image pre-training with frozen image encoders and large language models","volume-title":"Proc. 40th Int. Conf. Mach. Learn. (ICML)","volume":"202","author":"Li"},{"key":"ref71","article-title":"Fine-tuning language models from human preferences","author":"Ziegler","year":"2019","journal-title":"arXiv:1909.08593"},{"key":"ref72","article-title":"Training language models to follow instructions with human feedback","author":"Ouyang","year":"2022","journal-title":"arXiv:2203.02155"},{"key":"ref73","article-title":"LoRA: Low-rank adaptation of large language models","author":"Hu","year":"2021","journal-title":"arXiv:2106.09685"},{"key":"ref74","article-title":"MedAlpaca\u2014An open-source collection of medical conversational AI models and training data","author":"Han","year":"2023","journal-title":"arXiv:2304.08247"},{"key":"ref75","article-title":"Instruction tuning for large language models: A survey","author":"Zhang","year":"2023","journal-title":"arXiv:2308.10792"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72661-3_27"},{"key":"ref77","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/2024.emnlp-main.62","article-title":"RULE: Reliable multimodal RAG for factuality in medical vision language models","volume-title":"Proc. Conf. Empirical Methods Natural Lang. Process.","author":"Xia"},{"key":"ref78","article-title":"Evaluation of retrieval-augmented generation: A survey","author":"Yu","year":"2024","journal-title":"arXiv:2405.07437"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.naacl-long.28"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611830"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1147\/jrd.2015.2393193"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.112"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.459"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i8.32936"},{"key":"ref85","article-title":"A systematic review of deep learning-based research on radiology report generation","volume-title":"arXiv:2311.14199","author":"Liu","year":"2023"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.bionlp-1.35"},{"key":"ref87","article-title":"Automatic report generation for histopathology images using pre-trained vision transformers","volume-title":"arXiv:2311.06176","author":"Sengupta","year":"2023"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.101817"},{"key":"ref89","first-page":"249","article-title":"Re-evaluating the role of bleu in machine translation research","volume-title":"Proc. 11th Conf. Eur. Chapter Assoc. Comput. Linguistics (EACL)","author":"Callison-Burch"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1007\/s41019-025-00297-8"},{"key":"ref91","first-page":"2577","article-title":"On the automatic generation of medical imaging reports","volume-title":"Proc. 56th Annu. Meeting Assoc. Comput. Linguistics, Long Papers","volume":"1","author":"Jing"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.6025\/jmpt\/2018\/9\/4\/124-133"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1111\/j.1365-2230.1991.tb00329.x"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejrad.2024.111458"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10820123\/11091320.pdf?arnumber=11091320","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,8]],"date-time":"2025-08-08T04:38:21Z","timestamp":1754627901000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11091320\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":94,"URL":"https:\/\/doi.org\/10.1109\/access.2025.3591750","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}