{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:09:44Z","timestamp":1765544984405,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Science and Technology Project of State Grid Corporation of China","award":["5700-202341285A-1-1-ZN"],"award-info":[{"award-number":["5700-202341285A-1-1-ZN"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3679963","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:11Z","timestamp":1729452851000},"page":"4258-4262","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Meta-Prompt Tuning Vision-Language Model for Multi-Label Few-Shot Image Recognition"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8373-9366","authenticated-orcid":false,"given":"Feng","family":"Zhang","sequence":"first","affiliation":[{"name":"School of Computer Science, Peking University &amp; Institute of Computational Social Science, Peking University (Qingdao), Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4069-6423","authenticated-orcid":false,"given":"Wei","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Computer Science, Peking University &amp; Institute of Computational Social Science, Peking University (Qingdao), Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5431-4744","authenticated-orcid":false,"given":"Fei","family":"Ding","sequence":"additional","affiliation":[{"name":"School of Intelligence Science and Technology, Peking University &amp; Institute for Artificial Intelligence, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7395-5567","authenticated-orcid":false,"given":"Tengjiao","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer Science, Peking University &amp; Institute of Computational Social Science, Peking University (Qingdao), Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3787-2107","authenticated-orcid":false,"given":"Dawei","family":"Lu","sequence":"additional","affiliation":[{"name":"State Grid Information and Telecommunication Group, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9832-1125","authenticated-orcid":false,"given":"Jiabin","family":"Zheng","sequence":"additional","affiliation":[{"name":"School of Computer Science, Peking University &amp; Institute of Computational Social Science, Peking University (Qingdao), Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"volume-title":"LaSO: Label-Set Operations Networks for Multi-Label Few-Shot Learning. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Alfassy Amit","key":"e_1_3_2_1_1_1","unstructured":"Amit Alfassy, Leonid Karlinsky, Amit Aides, Joseph Shtok, Sivan Harary, Rog\u00e9rio Schmidt Feris, Raja Giryes, and Alexander M. Bronstein. 2019. LaSO: Label-Set Operations Networks for Multi-Label Few-Shot Learning. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_2_1","volume-title":"Visual Instruction Tuning with Polite Flamingo. In AAAI Conference on Artificial Intelligence (AAAI). 17745--17753","author":"Chen Delong","year":"2024","unstructured":"Delong Chen, Jianfeng Liu, Wenliang Dai, and Baoyuan Wang. 2024. Visual Instruction Tuning with Polite Flamingo. In AAAI Conference on Artificial Intelligence (AAAI). 17745--17753."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3025814"},{"key":"e_1_3_2_1_4_1","volume-title":"Multi-Label Image Recognition With Graph Convolutional Networks. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Chen Zhao-Min","year":"2019","unstructured":"Zhao-Min Chen, Xiu-Shen Wei, Peng Wang, and Yanwen Guo. 2019. Multi-Label Image Recognition With Graph Convolutional Networks. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_5_1","volume-title":"Exploring Structured Semantic Prior for Multi Label Recognition with Incomplete Labels. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023","author":"Ding Zixuan","year":"2023","unstructured":"Zixuan Ding, Ao Wang, Hui Chen, Qiang Zhang, Pengzhang Liu, Yongjun Bao, Weipeng Yan, and Jungong Han. 2023. Exploring Structured Semantic Prior for Multi Label Recognition with Incomplete Labels. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023, Vancouver, BC, Canada, June 17--24, 2023. IEEE, 3398--3407."},{"key":"e_1_3_2_1_6_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_7_1","volume-title":"Christopher K. I. Williams, John M. Winn, and Andrew Zisserman.","author":"Everingham Mark","year":"2010","unstructured":"Mark Everingham, Luc Van Gool, Christopher K. I. Williams, John M. Winn, and Andrew Zisserman. 2010. The Pascal Visual Object Classes (VOC) Challenge. Int. J. Comput. Vis. (2010)."},{"key":"e_1_3_2_1_8_1","volume-title":"Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks. In International Conference on Machine Learning (ICML).","author":"Finn Chelsea","year":"2017","unstructured":"Chelsea Finn, Pieter Abbeel, and Sergey Levine. 2017. Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks. In International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3088605"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00275"},{"key":"e_1_3_2_1_11_1","volume-title":"Deep Residual Learning for Image Recognition. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"He Kaiming","year":"2016","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2016. Deep Residual Learning for Image Recognition. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_12_1","volume-title":"CLIPScore: A Reference-free Evaluation Metric for Image Captioning. In Conference on Empirical Methods in Natural Language Processing (EMNLP). 7514--7528","author":"Hessel Jack","year":"2021","unstructured":"Jack Hessel, Ari Holtzman, Maxwell Forbes, Ronan Le Bras, and Yejin Choi. 2021. CLIPScore: A Reference-free Evaluation Metric for Image Captioning. In Conference on Empirical Methods in Natural Language Processing (EMNLP). 7514--7528."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3346405"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning (ICML)","volume":"139","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc V. Le, Yun-Hsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling Up Visual and Vision-Language Representation Learning With Noisy Text Supervision. In Proceedings of the 38th International Conference on Machine Learning (ICML), Vol. 139. 4904--4916."},{"key":"e_1_3_2_1_15_1","volume-title":"International Conference on Machine Learning (ICML).","author":"Jiang Weisen","year":"2023","unstructured":"Weisen Jiang, Yu Zhang, and James Kwok. 2023. Effective Structured Prompting by Meta-Learning and Representative Verbalizer. In International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_16_1","volume-title":"MaPLe: Multi-modal Prompt Learning. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 19113--19122","author":"Khattak Muhammad Uzair","year":"2023","unstructured":"Muhammad Uzair Khattak, Hanoona Abdul Rasheed, Muhammad Maaz, Salman H. Khan, and Fahad Shahbaz Khan. 2023. MaPLe: Multi-modal Prompt Learning. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 19113--19122."},{"volume-title":"International Conference on Machine Learning (ICML).","author":"Li Junnan","key":"e_1_3_2_1_17_1","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven C. H. Hoi. 2023. BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models. In International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_18_1","unstructured":"Tsung-Yi Lin Michael Maire Serge J. Belongie James Hays Pietro Perona Deva Ramanan Piotr Doll\u00e1r and C. Lawrence Zitnick. 2014. Microsoft COCO: Common Objects in Context. In ECCV."},{"key":"e_1_3_2_1_19_1","volume-title":"Visual Instruction Tuning. In Annual Conference on Neural Information Processing Systems 'NeurIPS).","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2023. Visual Instruction Tuning. In Annual Conference on Neural Information Processing Systems 'NeurIPS)."},{"key":"e_1_3_2_1_20_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Liu Yanbin","year":"2019","unstructured":"Yanbin Liu, Juho Lee, Minseop Park, Saehoon Kim, Eunho Yang, Sung Ju Hwang, and Yi Yang. 2019. Learning to Propagate Labels: Transductive Propagation Network for Few-Shot Learning. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_21_1","volume-title":"Meta Networks. In International Conference on Machine Learning (ICML).","author":"Munkhdalai Tsendsuren","year":"2017","unstructured":"Tsendsuren Munkhdalai and Hong Yu. 2017. Meta Networks. In International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_22_1","volume-title":"The Neglected Tails of Vision-Language Models. CoRR","author":"Parashar Shubham","year":"2024","unstructured":"Shubham Parashar, Zhiqiu Lin, Tian Liu, Xiangjue Dong, Yanan Li, Deva Ramanan, James Caverlee, and Shu Kong. 2024. The Neglected Tails of Vision-Language Models. CoRR, Vol. abs\/2401.12425 (2024)."},{"key":"e_1_3_2_1_23_1","volume-title":"Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning (ICML)","volume":"139","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning (ICML), Vol. 139. 8748--8763."},{"key":"e_1_3_2_1_24_1","volume-title":"Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning (ICML).","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_25_1","volume-title":"Asymmetric Loss For Multi-Label Classification. In IEEE\/CVF International Conference on Computer Vision (ICCV).","author":"Ridnik Tal","year":"2021","unstructured":"Tal Ridnik, Emanuel Ben Baruch, Nadav Zamir, Asaf Noy, Itamar Friedman, Matan Protter, and Lihi Zelnik-Manor. 2021. Asymmetric Loss For Multi-Label Classification. In IEEE\/CVF International Conference on Computer Vision (ICCV)."},{"key":"e_1_3_2_1_26_1","volume-title":"Embedding Propagation: Smoother Manifold for Few-Shot Classification. In ECCV.","author":"Rodr\u00edguez Pau","year":"2020","unstructured":"Pau Rodr\u00edguez, Issam H. Laradji, Alexandre Drouin, and Alexandre Lacoste. 2020. Embedding Propagation: Smoother Manifold for Few-Shot Classification. In ECCV."},{"key":"e_1_3_2_1_27_1","volume-title":"LoGoPrompt: Synthetic Text Images Can Be Good Visual Prompts for Vision-Language Models. In IEEE\/CVF International Conference on Computer Vision (ICCV). 2920--2929","author":"Shi Cheng","year":"2023","unstructured":"Cheng Shi and Sibei Yang. 2023. LoGoPrompt: Synthetic Text Images Can Be Good Visual Prompts for Vision-Language Models. In IEEE\/CVF International Conference on Computer Vision (ICCV). 2920--2929."},{"volume-title":"Prototypical Networks for Few-shot Learning. In Annual Conference on Neural Information Processing Systems (NeurIPS).","author":"Snell Jake","key":"e_1_3_2_1_28_1","unstructured":"Jake Snell, Kevin Swersky, and Richard S. Zemel. 2017. Prototypical Networks for Few-shot Learning. In Annual Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_29_1","volume-title":"Annual Conference on Neural Information Processing Systems (NeurIPS).","author":"Sun Ximeng","year":"2022","unstructured":"Ximeng Sun, Ping Hu, and Kate Saenko. 2022. DualCoOp: Fast Adaptation to Multi-Label Recognition with Limited Annotations. In Annual Conference on Neural Information Processing Systems (NeurIPS)."},{"volume-title":"IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Sung Flood","key":"e_1_3_2_1_30_1","unstructured":"Flood Sung, Yongxin Yang, Li Zhang, Tao Xiang, Philip H. S. Torr, and Timothy M. Hospedales. 2018. Learning to Compare: Relation Network for Few-Shot Learning. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_31_1","volume-title":"Pretraining Concept Frequency Determines Multimodal Model Performance. CoRR","author":"Udandarao Vishaal","year":"2024","unstructured":"Vishaal Udandarao, Ameya Prabhu, Adhiraj Ghosh, Yash Sharma, Philip H. S. Torr, Adel Bibi, Samuel Albanie, and Matthias Bethge. 2024. No \"Zero-Shot\" Without Exponential Data: Pretraining Concept Frequency Determines Multimodal Model Performance. CoRR, Vol. abs\/2404.04125 (2024)."},{"key":"e_1_3_2_1_32_1","volume-title":"Visualizing data using t-SNE. JMLR","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. JMLR (2008)."},{"key":"e_1_3_2_1_33_1","volume-title":"Hierarchical Prompt Learning for Compositional Zero-Shot Recognition. In International Joint Conference on Artificial Intelligence (IJCAI). 1470--1478","author":"Wang Henan","year":"2023","unstructured":"Henan Wang, Muli Yang, Kun Wei, and Cheng Deng. 2023. Hierarchical Prompt Learning for Compositional Zero-Shot Recognition. In International Joint Conference on Artificial Intelligence (IJCAI). 1470--1478."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00024"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i9.26263"},{"key":"e_1_3_2_1_36_1","volume-title":"Inferring Prototypes for Multi-Label Few-Shot Image Classification with Word Vector Guided Attention. In AAAI Conference on Artificial Intelligence (AAAI).","author":"Yan Kun","year":"2022","unstructured":"Kun Yan, Chenbin Zhang, Jun Hou, Ping Wang, Zied Bouraoui, Shoaib Jameel, and Steven Schockaert. 2022. Inferring Prototypes for Multi-Label Few-Shot Image Classification with Word Vector Guided Attention. In AAAI Conference on Artificial Intelligence (AAAI)."},{"volume-title":"Orderless Recurrent Models for Multi-Label Classification. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Yazici Vacit Oguz","key":"e_1_3_2_1_37_1","unstructured":"Vacit Oguz Yazici, Abel Gonzalez-Garcia, Arnau Ramisa, Bartlomiej Twardowski, and Joost van de Weijer. 2020. Orderless Recurrent Models for Multi-Label Classification. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"volume-title":"Orderless Recurrent Models for Multi-Label Classification. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Yazici Vacit Oguz","key":"e_1_3_2_1_38_1","unstructured":"Vacit Oguz Yazici, Abel Gonzalez-Garcia, Arnau Ramisa, Bartlomiej Twardowski, and Joost van de Weijer. 2020. Orderless Recurrent Models for Multi-Label Classification. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_39_1","volume-title":"Bayesian Model-Agnostic Meta-Learning. In Annual Conference on Neural Information Processing Systems (NeurIPS). 7343--7353","author":"Yoon Jaesik","year":"2018","unstructured":"Jaesik Yoon, Taesup Kim, Ousmane Dia, Sungwoong Kim, Yoshua Bengio, and Sungjin Ahn. 2018. Bayesian Model-Agnostic Meta-Learning. In Annual Conference on Neural Information Processing Systems (NeurIPS). 7343--7353."},{"key":"e_1_3_2_1_40_1","volume-title":"Cross-Modality Attention with Semantic Graph Embedding for Multi-Label Classification. In AAAI Conference on Artificial Intelligence (AAAI).","author":"You Renchun","year":"2020","unstructured":"Renchun You, Zhiyao Guo, Lei Cui, Xiang Long, Yingze Bao, and Shilei Wen. 2020. Cross-Modality Attention with Semantic Graph Embedding for Multi-Label Classification. In AAAI Conference on Artificial Intelligence (AAAI)."},{"key":"e_1_3_2_1_41_1","volume-title":"Transformer-based Dual Relation Graph for Multi-label Image Recognition. In IEEE\/CVF International Conference on Computer Vision (ICCV). 163--172","author":"Zhao Jiawei","year":"2021","unstructured":"Jiawei Zhao, Ke Yan, Yifan Zhao, Xiaowei Guo, Feiyue Huang, and Jia Li. 2021. Transformer-based Dual Relation Graph for Multi-label Image Recognition. In IEEE\/CVF International Conference on Computer Vision (ICCV). 163--172."},{"key":"e_1_3_2_1_42_1","volume-title":"Conditional Prompt Learning for Vision-Language Models. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 16795--16804","author":"Zhou Kaiyang","year":"2022","unstructured":"Kaiyang Zhou, Jingkang Yang, Chen Change Loy, and Ziwei Liu. 2022. Conditional Prompt Learning for Vision-Language Models. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 16795--16804."},{"key":"e_1_3_2_1_43_1","volume-title":"Chen Change Loy, and Ziwei Liu","author":"Zhou Kaiyang","year":"2022","unstructured":"Kaiyang Zhou, Jingkang Yang, Chen Change Loy, and Ziwei Liu. 2022. Learning to Prompt for Vision-Language Models. Int. J. Comput. Vis. (2022)."},{"key":"e_1_3_2_1_44_1","volume-title":"MiniGPT-4: Enhancing Vision-Language Understanding with Advanced Large Language Models. CoRR","author":"Zhu Deyao","year":"2023","unstructured":"Deyao Zhu, Jun Chen, Xiaoqian Shen, Xiang Li, and Mohamed Elhoseiny. 2023. MiniGPT-4: Enhancing Vision-Language Understanding with Advanced Large Language Models. CoRR, Vol. abs\/2304.10592 (2023)."}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Boise ID USA","acronym":"CIKM '24"},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679963","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3679963","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:09Z","timestamp":1750294689000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679963"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":44,"alternative-id":["10.1145\/3627673.3679963","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3679963","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}