{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T08:07:12Z","timestamp":1761898032029},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,7,15]]},"DOI":"10.1109\/icme57554.2024.10687572","type":"proceedings-article","created":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T17:24:16Z","timestamp":1727717056000},"page":"1-6","source":"Crossref","is-referenced-by-count":2,"title":["MALIP: Improving Few-Shot Image Classification with Multimodal Fusion Enhancement"],"prefix":"10.1109","author":[{"given":"Kaifen","family":"Cai","sequence":"first","affiliation":[{"name":"Sun Yat-Sen University,School of Computer Science and Engineering,Guangzhou,China"}]},{"given":"Kaiyu","family":"Song","sequence":"additional","affiliation":[{"name":"Sun Yat-Sen University,School of Artificial Intelligence,Guangzhou,China"}]},{"given":"Yan","family":"Pan","sequence":"additional","affiliation":[{"name":"Sun Yat-Sen University,School of Computer Science and Engineering,Guangzhou,China"}]},{"given":"Hanjiang","family":"Lai","sequence":"additional","affiliation":[{"name":"Sun Yat-Sen University,School of Computer Science and Engineering,Guangzhou,China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Prototypical networks for few-shot learning","author":"Snell","year":"2017","journal-title":"NeurIPS"},{"journal-title":"Caltech-ucsd birds 200","year":"2010","author":"Welinder","key":"ref2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58526-6_45"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/762"},{"key":"ref5","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021","journal-title":"ICML"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01891-x"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19833-5_29"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00880"},{"key":"ref10","article-title":"A simple cache model for image recognition","author":"Orhan","year":"2018","journal-title":"NeurIPS"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"journal-title":"An image is worth 16x16 words: Transformers for image recognition at scale","year":"2020","author":"Dosovitskiy","key":"ref12"},{"key":"ref13","article-title":"Parameter-efficient transfer learning for nlp","author":"Houlsby","year":"2019","journal-title":"ICML"},{"journal-title":"Auto-encoding variational bayes","year":"2013","author":"Kingma","key":"ref14"},{"key":"ref15","article-title":"Improving zero-shot generalization for clip with synthesized prompts","author":"Wang","year":"2023","journal-title":"ICCV"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2013.77"},{"key":"ref18","article-title":"Learning generative visual models from few training examples: An incremental bayesian approach tested on 101 object categories","author":"Fei-Fei","year":"2004","journal-title":"CVPRW"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248092"},{"journal-title":"Ucf101: A dataset of 101 human actions classes from videos in the wild","year":"2012","author":"Soomro","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10599-4_29"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2019.2918242"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.461"},{"journal-title":"Fine-grained visual classification of aircraft","year":"2013","author":"Maji","key":"ref26"}],"event":{"name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","start":{"date-parts":[[2024,7,15]]},"location":"Niagara Falls, ON, Canada","end":{"date-parts":[[2024,7,19]]}},"container-title":["2024 IEEE International Conference on Multimedia and Expo (ICME)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10685847\/10687354\/10687572.pdf?arnumber=10687572","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T06:14:35Z","timestamp":1727763275000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10687572\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,15]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/icme57554.2024.10687572","relation":{},"subject":[],"published":{"date-parts":[[2024,7,15]]}}}