{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T20:06:31Z","timestamp":1765310791083,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":72,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62021001"],"award-info":[{"award-number":["62021001"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755135","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:30:51Z","timestamp":1761377451000},"page":"3635-3644","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["MeDKCoOp: Dual Knowledge-guided Graph Prompt Learning for Biomedical Vision-Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3833-4205","authenticated-orcid":false,"given":"Yijun","family":"Wang","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, AnHui, HeFei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9752-258X","authenticated-orcid":false,"given":"Siying","family":"Wu","sequence":"additional","affiliation":[{"name":"Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, AnHui, HeFei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4290-0995","authenticated-orcid":false,"given":"Lubin","family":"Gan","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, AnHui, HeFei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7487-4673","authenticated-orcid":false,"given":"Zheyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, AnHui, HeFei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0384-8712","authenticated-orcid":false,"given":"Jing","family":"Zhang","sequence":"additional","affiliation":[{"name":"Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, AnHui, HeFei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4030-825X","authenticated-orcid":false,"given":"Zhangchi","family":"Hu","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, AnHui, HeFei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7286-2281","authenticated-orcid":false,"given":"Huyue","family":"Zhu","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, AnHui, HeFei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1376-9628","authenticated-orcid":false,"given":"Peixi","family":"Wu","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, AnHui, HeFei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3638-5566","authenticated-orcid":false,"given":"Xiaoyan","family":"Sun","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, AnHui, HeFei, China and Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, AnHui, HeFei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72390-2_72"},{"key":"e_1_3_2_1_2_1","volume-title":"Translating embeddings for modeling multi-relational data. Advances in neural information processing systems","author":"Bordes Antoine","year":"2013","unstructured":"Antoine Bordes, Nicolas Usunier, Alberto Garcia-Duran, Jason Weston, and Oksana Yakhnenko. 2013. Translating embeddings for modeling multi-relational data. Advances in neural information processing systems, Vol. 26 (2013)."},{"key":"e_1_3_2_1_3_1","volume-title":"Lung and colon cancer histopathological image dataset (lc25000). arXiv preprint arXiv:1912.12142","author":"Borkowski Andrew A","year":"2019","unstructured":"Andrew A Borkowski, Marilyn M Bui, L Brannon Thomas, Catherine P Wilson, Lauren A DeLand, and Stephen M Mastorides. 2019. Lung and colon cancer histopathological image dataset (lc25000). arXiv preprint arXiv:1912.12142 (2019)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i2.27853"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02515"},{"key":"e_1_3_2_1_6_1","volume-title":"Ultra-dp: Unifying graph pre-training with multi-task graph dual prompt. arXiv preprint arXiv:2310.14845","author":"Chen Mouxiang","year":"2023","unstructured":"Mouxiang Chen, Zemin Liu, Chenghao Liu, Jundong Li, Qiheng Mao, and Jianling Sun. 2023. Ultra-dp: Unifying graph pre-training with multi-task graph dual prompt. arXiv preprint arXiv:2310.14845 (2023)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547948"},{"key":"e_1_3_2_1_8_1","volume-title":"Dia-LLaMA: Towards large language model-driven ct report generation. arXiv preprint arXiv:2403.16386","author":"Chen Zhixuan","year":"2024","unstructured":"Zhixuan Chen, Luyang Luo, Yequan Bie, and Hao Chen. 2024. Dia-LLaMA: Towards large language model-driven ct report generation. arXiv preprint arXiv:2403.16386 (2024)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01941"},{"key":"e_1_3_2_1_10_1","volume-title":"Does clip benefit visual question answering in the medical domain as much as it does in the general domain? arXiv preprint arXiv:2112.13906","author":"Eslami Sedigheh","year":"2021","unstructured":"Sedigheh Eslami, Gerard de Melo, and Christoph Meinel. 2021. Does clip benefit visual question answering in the medical domain as much as it does in the general domain? arXiv preprint arXiv:2112.13906 (2021)."},{"key":"e_1_3_2_1_11_1","first-page":"52464","article-title":"Universal prompt tuning for graph neural networks","volume":"36","author":"Fang Taoran","year":"2023","unstructured":"Taoran Fang, Yunchao Zhang, Yang Yang, Chunping Wang, and Lei Chen. 2023. Universal prompt tuning for graph neural networks. Advances in Neural Information Processing Systems, Vol. 36 (2023), 52464-52489.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72117-5_6"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01891-x"},{"key":"e_1_3_2_1_14_1","volume-title":"Making pre-trained language models better few-shot learners. arXiv preprint arXiv:2012.15723","author":"Gao Tianyu","year":"2020","unstructured":"Tianyu Gao, Adam Fisch, and Danqi Chen. 2020. Making pre-trained language models better few-shot learners. arXiv preprint arXiv:2012.15723 (2020)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1148\/radiol.232714"},{"key":"e_1_3_2_1_16_1","volume-title":"Prompt tuning for multi-view graph contrastive learning. arXiv preprint arXiv:2310.10362","author":"Gong Chenghua","year":"2023","unstructured":"Chenghua Gong, Xiang Li, Jianxiang Yu, Cheng Yao, Jiaqi Tan, Chengcheng Yu, and Dawei Yin. 2023. Prompt tuning for multi-view graph contrastive learning. arXiv preprint arXiv:2310.10362 (2023)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599244"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01453"},{"key":"e_1_3_2_1_19_1","volume-title":"Mscpt: Few-shot whole slide image classification with multi-scale and context-focused prompt tuning","author":"Han Minghao","year":"2025","unstructured":"Minghao Han, Linhao Qu, Dingkang Yang, Xukun Zhang, Xiaoying Wang, and Lihua Zhang. 2025. Mscpt: Few-shot whole slide image classification with multi-scale and context-focused prompt tuning. IEEE Transactions on Medical Imaging (2025)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00391"},{"key":"e_1_3_2_1_22_1","volume-title":"A visual-language foundation model for pathology image analysis using medical twitter. Nature medicine","author":"Huang Zhi","year":"2023","unstructured":"Zhi Huang, Federico Bianchi, Mert Yuksekgonul, Thomas J Montine, and James Zou. 2023. A visual-language foundation model for pathology image analysis using medical twitter. Nature medicine, Vol. 29, 9 (2023), 2307-2316."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-022-15634-4"},{"key":"e_1_3_2_1_24_1","volume-title":"International conference on machine learning. PMLR, 4904-4916","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc Le, Yun-Hsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling up visual and vision-language representation learning with noisy text supervision. In International conference on machine learning. PMLR, 4904-4916."},{"key":"e_1_3_2_1_25_1","volume-title":"Multi-class texture analysis in colorectal cancer histology. Scientific reports","author":"Kather Jakob Nikolas","year":"2016","unstructured":"Jakob Nikolas Kather, Cleo-Aron Weis, Francesco Bianconi, Susanne M Melchers, Lothar R Schad, Timo Gaiser, Alexander Marx, and Frank Gerrit Z\u00f6llner. 2016. Multi-class texture analysis in colorectal cancer histology. Scientific reports, Vol. 6, 1 (2016), 1-11."},{"key":"e_1_3_2_1_26_1","volume-title":"Huiying Liang, Sally L Baxter, Alex McKeown, Ge Yang, Xiaokang Wu, Fangbing Yan, et al.","author":"Kermany Daniel S","year":"2018","unstructured":"Daniel S Kermany, Michael Goldbaum, Wenjia Cai, Carolina CS Valentim, Huiying Liang, Sally L Baxter, Alex McKeown, Ge Yang, Xiaokang Wu, Fangbing Yan, et al., 2018. Identifying medical diagnoses and treatable diseases by image-based deep learning. cell, Vol. 172, 5 (2018), 1122-1131."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01832"},{"key":"e_1_3_2_1_28_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CBMS.2013.6627771"},{"key":"e_1_3_2_1_30_1","volume-title":"BiomedCoOp: Learning to Prompt for Biomedical Vision-Language Models. arXiv preprint arXiv:2411.15232","author":"Koleilat Taha","year":"2024","unstructured":"Taha Koleilat, Hojat Asgariandehkordi, Hassan Rivaz, and Yiming Xiao. 2024. BiomedCoOp: Learning to Prompt for Biomedical Vision-Language Models. arXiv preprint arXiv:2411.15232 (2024)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1148\/radiol.232715"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i5.32482"},{"key":"e_1_3_2_1_33_1","volume-title":"The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691","author":"Lester Brian","year":"2021","unstructured":"Brian Lester, Rami Al-Rfou, and Noah Constant. 2021. The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691 (2021)."},{"key":"e_1_3_2_1_34_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023a. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730-19742."},{"key":"e_1_3_2_1_35_1","first-page":"13448","article-title":"Graphadapter: Tuning vision-language models with dual knowledge graph","volume":"36","author":"Li Xin","year":"2023","unstructured":"Xin Li, Dongze Lian, Zhihe Lu, Jiawang Bai, Zhibo Chen, and Xinchao Wang. 2023b. Graphadapter: Tuning vision-language models with dual knowledge graph. Advances in Neural Information Processing Systems, Vol. 36 (2023), 13448-13466.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2025.3575853"},{"key":"e_1_3_2_1_37_1","volume-title":"Healthgpt: A medical large vision-language model for unifying comprehension and generation via heterogeneous knowledge adaptation. arXiv preprint arXiv:2502.09838","author":"Lin Tianwei","year":"2025","unstructured":"Tianwei Lin, Wenqiao Zhang, Sijing Li, Yuqian Yuan, Binhe Yu, Haoyuan Li, Wanggui He, Hao Jiang, Mengze Li, Xiaohui Song, et al., 2025. Healthgpt: A medical large vision-language model for unifying comprehension and generation via heterogeneous knowledge adaptation. arXiv preprint arXiv:2502.09838 (2025)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01454"},{"key":"e_1_3_2_1_39_1","volume-title":"Git-mol: A multi-modal large language model for molecular science with graph, image, and text. Computers in biology and medicine","author":"Liu Pengfei","year":"2024","unstructured":"Pengfei Liu, Yiming Ren, Jun Tao, and Zhixiang Ren. 2024. Git-mol: A multi-modal large language model for molecular science with graph, image, and text. Computers in biology and medicine, Vol. 171 (2024), 108073."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583386"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-024-44824-z"},{"key":"e_1_3_2_1_42_1","volume-title":"Segment Anything in Medical Images and Videos: Benchmark and Deployment. arXiv preprint arXiv:2408.03322","author":"Ma Jun","year":"2024","unstructured":"Jun Ma, Sumin Kim, Feifei Li, Mohammed Baharoon, Reza Askereh, Hongwei Lyu, and Bo Wang. 2024b. Segment Anything in Medical Images and Videos: Benchmark and Deployment. arXiv preprint arXiv:2408.03322 (2024)."},{"key":"e_1_3_2_1_43_1","first-page":"27922","article-title":"Lvm-med: Learning large-scale self-supervised vision models for medical imaging via second-order graph matching","volume":"36","author":"Nguyen Duy MH","year":"2023","unstructured":"Duy MH Nguyen, Hoang Nguyen, Nghiem Diep, Tan Ngoc Pham, Tri Cao, Binh Nguyen, Paul Swoboda, Nhat Ho, Shadi Albarqouni, Pengtao Xie, et al., 2023. Lvm-med: Learning large-scale self-supervised vision models for medical imaging via second-order graph matching. Advances in Neural Information Processing Systems, Vol. 36 (2023), 27922-27950.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_44_1","volume-title":"Nghiem Tuong Diep, Trung Quoc Nguyen, Nhat Ho, Jacqueline Michelle Metsch, Miriam Cindy Maurer, Daniel Sonntag, Hanibal Bohnenberger, and Anne-Christin Hauschild.","author":"Nguyen Anh-Tien","year":"2025","unstructured":"Anh-Tien Nguyen, Duy Minh Ho Nguyen, Nghiem Tuong Diep, Trung Quoc Nguyen, Nhat Ho, Jacqueline Michelle Metsch, Miriam Cindy Maurer, Daniel Sonntag, Hanibal Bohnenberger, and Anne-Christin Hauschild. 2025. MGPATH: Vision-Language Model with Multi-Granular Prompt Learning for Few-Shot WSI Classification. arXiv preprint arXiv:2502.07409 (2025)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","unstructured":"Msoud Nickparvar. 2021. Brain Tumor MRI Dataset. doi:10.34740\/KAGGLE\/DSV\/2645886","DOI":"10.34740\/KAGGLE\/DSV\/2645886"},{"key":"e_1_3_2_1_46_1","volume-title":"AMVLM: Alignment-Multiplicity Aware Vision-Language Model for Semi-Supervised Medical Image Segmentation","author":"Pan Qingtao","year":"2025","unstructured":"Qingtao Pan, Zhengrong Li, Wenhao Qiao, Jingjiao Lou, Qing Yang, Guang Yang, and Bing Ji. 2025. AMVLM: Alignment-Multiplicity Aware Vision-Language Model for Semi-Supervised Medical Image Segmentation. IEEE Transactions on Medical Imaging (2025)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2024.103225"},{"key":"e_1_3_2_1_48_1","volume-title":"Tan-Huong Pham, and Truong-Son Hy.","author":"Pham Tan-Hanh","year":"2025","unstructured":"Tan-Hanh Pham, Chris Ngo, Trong-Duong Bui, Minh Luu Quang, Tan-Huong Pham, and Truong-Son Hy. 2025. SilVar-Med: A Speech-Driven Visual Language Model for Explainable Abnormality Detection in Medical Imaging. arXiv preprint arXiv:2504.10642 (2025)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3083187.3083212"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.3390\/data3030025"},{"key":"e_1_3_2_1_51_1","volume-title":"Medical image understanding with pretrained vision language models: A comprehensive study. arXiv preprint arXiv:2209.15517","author":"Qin Ziyuan","year":"2022","unstructured":"Ziyuan Qin, Huahui Yi, Qicheng Lao, and Kang Li. 2022. Medical image understanding with pretrained vision language models: A comprehensive study. arXiv preprint arXiv:2209.15517 (2022)."},{"key":"e_1_3_2_1_52_1","volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"key":"e_1_3_2_1_53_1","unstructured":"Zhongyi Shui Jianpeng Zhang Weiwei Cao Sinuo Wang Ruizhe Guo Le Lu Lin Yang Xianghua Ye Tingbo Liang Qi Zhang et al. 2025. Large-scale and Fine-grained Vision-language Pre-training for Enhanced CT Image Understanding. arXiv preprint arXiv:2501.14548 (2025)."},{"key":"e_1_3_2_1_54_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_55_1","volume-title":"Amith Khandakar, Tawsifur Rahman, Yazan Qiblawey, Uzair Khurshid, Serkan Kiranyaz, Nabil Ibtehaz, M Sohel Rahman, Somaya Al-Maadeed, et al.","author":"Tahir Anas M","year":"2021","unstructured":"Anas M Tahir, Muhammad EH Chowdhury, Amith Khandakar, Tawsifur Rahman, Yazan Qiblawey, Uzair Khurshid, Serkan Kiranyaz, Nabil Ibtehaz, M Sohel Rahman, Somaya Al-Maadeed, et al., 2021. COVID-19 infection localization and severity grading from chest X-ray images. Computers in biology and medicine, Vol. 139 (2021), 105002."},{"key":"e_1_3_2_1_56_1","volume-title":"Self-supervised image-text pre-training with mixed data in chest x-rays. arXiv preprint arXiv:2103.16022","author":"Wang Xiaosong","year":"2021","unstructured":"Xiaosong Wang, Ziyue Xu, Leo Tam, Dong Yang, and Daguang Xu. 2021. Self-supervised image-text pre-training with mixed data in chest x-rays. arXiv preprint arXiv:2103.16022 (2021)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01917-4"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02206"},{"key":"e_1_3_2_1_59_1","volume-title":"Hierarchical relational learning for few-shot knowledge graph completion. arXiv preprint arXiv:2209.01205","author":"Wu Han","year":"2022","unstructured":"Han Wu, Jie Yin, Bala Rajaratnam, and Jianyuan Guo. 2022. Hierarchical relational learning for few-shot knowledge graph completion. arXiv preprint arXiv:2209.01205 (2022)."},{"key":"e_1_3_2_1_60_1","volume-title":"One-shot relational learning for knowledge graphs. arXiv preprint arXiv:1808.09040","author":"Xiong Wenhan","year":"2018","unstructured":"Wenhan Xiong, Mo Yu, Shiyu Chang, Xiaoxiao Guo, and William Yang Wang. 2018. One-shot relational learning for knowledge graphs. arXiv preprint arXiv:1808.09040 (2018)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.3390\/bioengineering11101034"},{"key":"e_1_3_2_1_62_1","unstructured":"An Yan Yu Wang Yiwu Zhong Zexue He Petros Karypis Zihan Wang Chengyu Dong Amilcare Gentili Chun-Nan Hsu Jingbo Shang et al. 2023. Robust and interpretable medical image classifiers via concept bottleneck models. arXiv preprint arXiv:2310.03182 (2023)."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00653"},{"key":"e_1_3_2_1_64_1","volume-title":"P3Net: Progressive and Periodic Perturbation for Semi-Supervised Medical Image Segmentation. arXiv preprint arXiv:2505.15861","author":"Yao Zhenyan","year":"2025","unstructured":"Zhenyan Yao, Miao Zhang, Lanhu Wu, Yongri Piao, Feng Tian, Weibing Sun, and Huchuan Lu. 2025. P3Net: Progressive and Periodic Perturbation for Semi-Supervised Medical Image Segmentation. arXiv preprint arXiv:2505.15861 (2025)."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01049"},{"key":"e_1_3_2_1_66_1","unstructured":"Sheng Zhang Yanbo Xu Naoto Usuyama Hanwen Xu Jaspreet Bagga Robert Tinn Sam Preston Rajesh Rao Mu Wei Naveen Valluri et al. 2023. Biomedclip: a multimodal biomedical foundation model pretrained from fifteen million scientific image-text pairs. arXiv preprint arXiv:2303.00915 (2023)."},{"key":"e_1_3_2_1_67_1","first-page":"2","article-title":"Contrastive learning of medical visual representations from paired images and text. In Machine learning for healthcare conference","author":"Zhang Yuhao","year":"2022","unstructured":"Yuhao Zhang, Hang Jiang, Yasuhide Miura, Christopher D Manning, and Curtis P Langlotz. 2022. Contrastive learning of medical visual representations from paired images and text. In Machine learning for healthcare conference. PMLR, 2-25.","journal-title":"PMLR"},{"key":"e_1_3_2_1_68_1","unstructured":"Zihao Zhao Yuxiao Liu Han Wu Mei Wang Yonghao Li Sheng Wang Lin Teng Disheng Liu Zhiming Cui Qian Wang et al. 2023. Clip in medical imaging: A comprehensive survey. arXiv preprint arXiv:2312.07353 (2023)."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-021-00425-9"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01631"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"},{"key":"e_1_3_2_1_72_1","volume-title":"MMedPO: Aligning Medical Vision-Language Models with Clinical-Aware Multimodal Preference Optimization. arXiv preprint arXiv:2412.06141","author":"Zhu Kangyu","year":"2024","unstructured":"Kangyu Zhu, Peng Xia, Yun Li, Hongtu Zhu, Sheng Wang, and Huaxiu Yao. 2024. MMedPO: Aligning Medical Vision-Language Models with Clinical-Aware Multimodal Preference Optimization. arXiv preprint arXiv:2412.06141 (2024)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755135","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T20:02:53Z","timestamp":1765310573000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755135"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":72,"alternative-id":["10.1145\/3746027.3755135","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755135","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}