{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T02:45:37Z","timestamp":1778294737070,"version":"3.51.4"},"reference-count":59,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62106222"],"award-info":[{"award-number":["62106222"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Natural Science Foundation of Zhejiang Province, China","award":["LZ23F020008"],"award-info":[{"award-number":["LZ23F020008"]}]},{"name":"Zhejiang University-Angelalign Inc. R&amp;D Center for Intelligent Healthcare"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Emerg. Top. Comput. Intell."],"published-print":{"date-parts":[[2024,8]]},"DOI":"10.1109\/tetci.2023.3311333","type":"journal-article","created":{"date-parts":[[2023,9,20]],"date-time":"2023-09-20T18:01:57Z","timestamp":1695232917000},"page":"2816-2826","source":"Crossref","is-referenced-by-count":39,"title":["Parameter-Efficient Transfer Learning for Medical Visual Question Answering"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1764-0322","authenticated-orcid":false,"given":"Jiaxiang","family":"Liu","sequence":"first","affiliation":[{"name":"Stomatology Hospital, School of Stomatology, Zhejiang University School of Medicine, Zhejiang Provincial Clinical Research Center for Oral Diseases, Key Laboratory of Oral Biomedical Research of Zhejiang Province, Cancer Center of Zhejiang University, Engineering Research Center of Oral Biomaterials and Devices of Zhejiang Province, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianxiang","family":"Hu","sequence":"additional","affiliation":[{"name":"Zhejiang University - University of Illinois at Urbana Champaign Institute, Zhejiang University, Haining, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yan","family":"Zhang","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yang","family":"Feng","sequence":"additional","affiliation":[{"name":"Angelalign Technology inc., Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6685-2017","authenticated-orcid":false,"given":"Jin","family":"Hao","sequence":"additional","affiliation":[{"name":"ChohoTech Inc., Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junhui","family":"Lv","sequence":"additional","affiliation":[{"name":"Sir Run Run Shaw Hospital, College of Medicine, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7816-502X","authenticated-orcid":false,"given":"Zuozhu","family":"Liu","sequence":"additional","affiliation":[{"name":"Stomatology Hospital, School of Stomatology, Zhejiang University School of Medicine, Zhejiang Provincial Clinical Research Center for Oral Diseases, Key Laboratory of Oral Biomedical Research of Zhejiang Province, Cancer Center of Zhejiang University, Engineering Research Center of Oral Biomaterials and Devices of Zhejiang Province, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547948"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3460426.3463584"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2980024"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ISBI48211.2021.9434063"},{"key":"ref5","first-page":"951","article-title":"Plug-and-play VQA: Zero-shot VQA by conjoining large pretrained models with zero training","volume-title":"Proc. Findings Assoc. Comput. Linguistics: EMNLP","author":"Tiong","year":"2022"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.302"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.142"},{"key":"ref8","first-page":"1571","article-title":"Bilinear attention networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kim","year":"2018"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-32251-9_57"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413761"},{"key":"ref11","article-title":"Does clip benefit visual question answering in the medical domain as much as it does in the general domain","author":"Eslami","year":"2021"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.421"},{"key":"ref13","article-title":"CLIP-TD: Clip targeted distillation for vision-language tasks","author":"Wang","year":"2022"},{"key":"ref14","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford","year":"2021"},{"key":"ref15","first-page":"12888","article-title":"Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li","year":"2022"},{"key":"ref16","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Brown","year":"2020"},{"key":"ref17","article-title":"Hierarchical text-conditional image generation with clip latents","author":"Ramesh","year":"2022"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01139"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00836"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.07.028"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01364-6_20"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ISBI48211.2021.9434010"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350993"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"ref26","article-title":"NLM at ImageCLEF 2018 visual question answering in the medical domain.","author":"Abacha","year":"2018","journal-title":"CLEF (Working Notes)"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-21735-7_7"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/d16-1044"},{"key":"ref29","article-title":"Deep multimodal learning for medical visual question answering","volume-title":"CLEF (Working Notes)","author":"Shi","year":"2019"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.202"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1038\/sdata.2018.251"},{"key":"ref32","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Finn","year":"2017"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-87240-3_7"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-87196-3_20"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_8"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1238"},{"key":"ref38","first-page":"4904","article-title":"Scaling up visual and vision-language representation learning with noisy text supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jia","year":"2021"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01891-x"},{"key":"ref40","first-page":"2022","article-title":"SVL-Adapter: Self-supervised adapter for vision-language pretrained models","volume-title":"Proc. Brit. Mach. Vis. Conf.","author":"Pantazis"},{"key":"ref41","article-title":"How much can clip benefit vision-and-language tasks?","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Shen","year":"2022"},{"key":"ref42","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","volume-title":"Proc. NAACL-HLT","author":"Kenton","year":"2019"},{"issue":"8","key":"ref43","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00847"},{"key":"ref45","first-page":"2790","article-title":"Parameter-efficient transfer learning for NLP","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Houlsby","year":"2019"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.47"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"ref48","first-page":"1022","article-title":"Compacter: Efficient low-rank hypercomplex adapter layers","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Karimi Mahabadi","year":"2021"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.378"},{"key":"ref50","article-title":"LoRA: Low-rank adaptation of large language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hu","year":"2022"},{"key":"ref51","article-title":"Learning multiple visual domains with residual adapters","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Rebuffi","year":"2017"},{"key":"ref52","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Dosovitskiy","year":"2021"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.514"},{"key":"ref55","article-title":"Regularizing neural networks by penalizing confident output distributions","author":"Pereyra","year":"2017"},{"key":"ref56","article-title":"When does label smoothing help","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Mller","year":"2019"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.10"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00516"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00045"}],"container-title":["IEEE Transactions on Emerging Topics in Computational Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7433297\/10607834\/10256025.pdf?arnumber=10256025","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T18:55:27Z","timestamp":1732647327000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10256025\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8]]},"references-count":59,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tetci.2023.3311333","relation":{},"ISSN":["2471-285X"],"issn-type":[{"value":"2471-285X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8]]}}}