{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T06:45:41Z","timestamp":1767077141287,"version":"3.48.0"},"reference-count":29,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T00:00:00Z","timestamp":1764720000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T00:00:00Z","timestamp":1764720000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,3]]},"DOI":"10.1109\/dicta68720.2025.11302446","type":"proceedings-article","created":{"date-parts":[[2025,12,29]],"date-time":"2025-12-29T18:36:22Z","timestamp":1767033382000},"page":"1-5","source":"Crossref","is-referenced-by-count":0,"title":["MedGemma-Critic: Fine-Tuning Medical Language Models for Domain-Specialised Text Evaluation"],"prefix":"10.1109","author":[{"given":"Sirui","family":"Liu","sequence":"first","affiliation":[{"name":"School of Computer Science, The University of Sydney,Sydney,Australia"}]},{"given":"Jinman","family":"Kim","sequence":"additional","affiliation":[{"name":"School of Computer Science, The University of Sydney,Sydney,Australia"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1093\/bib\/bbad493"},{"volume-title":"Medgemma technical report","year":"2025","author":"Sellergren","key":"ref2"},{"key":"ref3","first-page":"311","article-title":"Bleu: a method for automatic evaluation of machine translation","volume-title":"Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics","author":"Papineni"},{"key":"ref4","first-page":"74","article-title":"ROUGE: A package for automatic evaluation of summaries","volume-title":"Text Summarization Branches Out","author":"Lin","year":"2004"},{"volume-title":"Detecting and evaluating medical hallucinations in large vision language models","year":"2024","author":"Chen","key":"ref5"},{"volume-title":"Judging llm-as-a-judge with mt-bench and chatbot arena","year":"2023","author":"Zheng","key":"ref6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-019-0322-0"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1038\/sdata.2018.251"},{"volume-title":"Llava-critic: Learning to evaluate multimodal models","year":"2025","author":"Xiong","key":"ref9"},{"volume-title":"Mmedpo: Aligning medical vision-language models with clinicalaware multimodal preference optimization","year":"2025","author":"Zhu","key":"ref10"},{"volume-title":"Multimedeval: A benchmark and a toolkit for evaluating medical vision-language models","year":"2024","author":"Royer","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1186\/s12911-025-02954-4"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btz682"},{"volume-title":"Llava-med: Training a large language-and-vision assistant for biomedicine in one day","year":"2023","author":"Li","key":"ref14"},{"volume-title":"Med-flamingo: a multimodal medical few-shot learner","year":"2023","author":"Moor","key":"ref15"},{"volume-title":"Pmc-vqa: Visual instruction tuning for medical visual question answering","year":"2024","author":"Zhang","key":"ref16"},{"key":"ref17","first-page":"440","article-title":"XrayGPT: Chest radiographs summarization using large medical vision-language models","volume-title":"Proceedings of the 23rd Workshop on Biomedical Natural Language Processing","author":"Thawakar"},{"volume-title":"Gpt-4 technical report","year":"2024","key":"ref18"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"11286","DOI":"10.18653\/v1\/2024.findings-acl.672","article-title":"Prometheus-vision: Vision-language model as a judge for fine-grained evaluation","volume-title":"Findings of the Association for Computational Linguistics: ACL 2024","author":"Lee","year":"2024"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.02484"},{"journal-title":"Llava-next: Improved reasoning, ocr, and world knowledge","year":"2024","author":"Liu","key":"ref21"},{"volume-title":"Mm-vet: Evaluating large multimodal models for integrated capabilities","year":"2024","author":"Yu","key":"ref22"},{"volume-title":"Wildvision: Evaluating vision-language models in the wild with human preferences","year":"2024","author":"Lu","key":"ref23"},{"journal-title":"Meditron-70b: Scaling medical pretraining for large language models","year":"2023","author":"Chen","key":"ref24"},{"key":"ref25","doi-asserted-by":"crossref","DOI":"10.1109\/ICCV51070.2023.01100","volume-title":"Sigmoid loss for language image pre-training","author":"Zhai","year":"2023"},{"volume-title":"Lora: Low-rank adaptation of large language models","year":"2021","author":"Hu","key":"ref26"},{"volume-title":"Self-rewarding language models","year":"2025","author":"Yuan","key":"ref27"},{"volume-title":"RLAIF: Scaling reinforcement learning from human feedback with AI feedback","year":"2024","author":"Lee","key":"ref28"},{"volume-title":"Direct preference optimization: Your language model is secretly a reward model","year":"2024","author":"Rafailov","key":"ref29"}],"event":{"name":"2025 International Conference on Digital Image Computing: Techniques and Applications (DICTA)","start":{"date-parts":[[2025,12,3]]},"location":"Adelaide, Australia","end":{"date-parts":[[2025,12,5]]}},"container-title":["2025 International Conference on Digital Image Computing: Techniques and Applications (DICTA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11302408\/11302416\/11302446.pdf?arnumber=11302446","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T06:41:04Z","timestamp":1767076864000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11302446\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,3]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/dicta68720.2025.11302446","relation":{},"subject":[],"published":{"date-parts":[[2025,12,3]]}}}