{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:56:00Z","timestamp":1765310160137,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":61,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2024YFB3312900"],"award-info":[{"award-number":["2024YFB3312900"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Zhejiang Provincial Natural Science Foundation of China","award":["LD25F020001"],"award-info":[{"award-number":["LD25F020001"]}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["226-2025-00057"],"award-info":[{"award-number":["226-2025-00057"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Hong Kong SAR RGC Early Career Scheme","award":["26208924"],"award-info":[{"award-number":["26208924"]}]},{"name":"National Natural Science Foundation of China Young Scholar Fund","award":["62402408"],"award-info":[{"award-number":["62402408"]}]},{"name":"HKUST Sports Science and Technology Research Grant","award":["SSTRG24EG04"],"award-info":[{"award-number":["SSTRG24EG04"]}]},{"name":"AI Chip Center for Emerging Smart Systems"},{"name":"InnoHK initiative of the Innovation and Technology Commission of the Hong Kong Special Administrative Region Government"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755208","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:26:38Z","timestamp":1761377198000},"page":"3827-3836","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Compositional Zero-shot Learning via Progressive Language-based Observations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5678-4487","authenticated-orcid":false,"given":"Lin","family":"Li","sequence":"first","affiliation":[{"name":"AI Chip Center for Emerging Smart Systems, Hong Kong, China and The Hong Kong University of Science and Technology, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9227-007X","authenticated-orcid":false,"given":"Guikun","family":"Chen","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-1091-7994","authenticated-orcid":false,"given":"Zhen","family":"Wang","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6142-9914","authenticated-orcid":false,"given":"Jun","family":"Xiao","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6148-9709","authenticated-orcid":false,"given":"Long","family":"Chen","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"23716","volume-title":"NIPS","volume":"35","author":"Alayrac Jean-Baptiste","year":"2022","unstructured":"Jean-Baptiste Alayrac, Jeff Donahue, Pauline Luc, Antoine Miech, Iain Barr, Yana Hasson, Karel Lenc, Arthur Mensch, Katherine Millican, Malcolm Reynolds, et al., 2022. Flamingo: a visual language model for few-shot learning. In NIPS, Vol. 35. 23716-23736."},{"key":"e_1_3_2_1_2_1","first-page":"4645","article-title":"On leveraging variational graph embeddings for open world compositional zero-shot learning","author":"Anwaar Muhammad Umer","year":"2022","unstructured":"Muhammad Umer Anwaar, Zhihui Pan, and Martin Kleinsteuber. 2022. On leveraging variational graph embeddings for open world compositional zero-shot learning. In ACM MM. 4645-4654.","journal-title":"ACM MM."},{"key":"e_1_3_2_1_3_1","volume-title":"European Conference on Computer Vision. Springer, 107-123","author":"Bao Wentao","year":"2024","unstructured":"Wentao Bao, Lichang Chen, Heng Huang, and Yu Kong. 2024. Prompting language-informed distribution for compositional zero-shot learning. In European Conference on Computer Vision. Springer, 107-123."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"e_1_3_2_1_5_1","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, et al., 2020. Language models are few-shot learners. In NIPS, Vol. 33. 1877-1901.","journal-title":"NIPS"},{"key":"e_1_3_2_1_6_1","first-page":"15315","article-title":"Learning Attention as Disentangler for Compositional Zero-shot Learning","author":"Hao Shaozhe","year":"2023","unstructured":"Shaozhe Hao, Kai Han, and Kwan-Yee K Wong. 2023. Learning Attention as Disentangler for Compositional Zero-shot Learning. In CVPR. 15315-15324.","journal-title":"CVPR."},{"key":"e_1_3_2_1_7_1","first-page":"2790","article-title":"Parameter-efficient transfer learning for NLP","author":"Houlsby Neil","year":"2019","unstructured":"Neil Houlsby, Andrei Giurgiu, Stanislaw Jastrzebski, Bruna Morrone, Quentin De Laroussilhe, Andrea Gesmundo, Mona Attariyan, and Sylvain Gelly. 2019. Parameter-efficient transfer learning for NLP. In ICML. PMLR, 2790-2799.","journal-title":"ICML. PMLR"},{"key":"e_1_3_2_1_8_1","unstructured":"Edward J Hu Yelong Shen Phillip Wallis Zeyuan Allen-Zhu Yuanzhi Li Shean Wang Lu Wang and Weizhu Chen. 2022b. LoRA: Low-Rank Adaptation of Large Language Models. In ICLR."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25168"},{"key":"e_1_3_2_1_10_1","volume-title":"Promptcap: Prompt-guided task-aware image captioning. arXiv preprint arXiv:2211.09699","author":"Hu Yushi","year":"2022","unstructured":"Yushi Hu, Hang Hua, Zhengyuan Yang, Weijia Shi, Noah A Smith, and Jiebo Luo. 2022a. Promptcap: Prompt-guided task-aware image captioning. arXiv preprint arXiv:2211.09699 (2022)."},{"key":"e_1_3_2_1_11_1","volume-title":"Troika: Multi-Path Cross-Modal Traction for Compositional Zero-Shot Learning. arXiv preprint arXiv:2303.15230","author":"Huang Siteng","year":"2023","unstructured":"Siteng Huang, Biao Gong, Yutong Feng, Yiliang Lv, and Donglin Wang. 2023. Troika: Multi-Path Cross-Modal Traction for Compositional Zero-Shot Learning. arXiv preprint arXiv:2303.15230 (2023)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i11.29164"},{"key":"e_1_3_2_1_13_1","first-page":"1383","article-title":"Discovering states and transformations in image collections","author":"Isola Phillip","year":"2015","unstructured":"Phillip Isola, Joseph J Lim, and Edward H Adelson. 2015. Discovering states and transformations in image collections. In CVPR. 1383-1391.","journal-title":"CVPR."},{"key":"e_1_3_2_1_14_1","volume-title":"Revealing the Proximate Long-Tail Distribution in Compositional Zero-Shot Learning. AAAI","author":"Jiang Chenyi","year":"2024","unstructured":"Chenyi Jiang and Haofeng Zhang. 2024. Revealing the Proximate Long-Tail Distribution in Compositional Zero-Shot Learning. AAAI (2024)."},{"key":"e_1_3_2_1_15_1","first-page":"9336","article-title":"Kg-sp: Knowledge guided simple primitives for open world compositional zero-shot learning","author":"Karthik Shyamgopal","year":"2022","unstructured":"Shyamgopal Karthik, Massimiliano Mancini, and Zeynep Akata. 2022. Kg-sp: Knowledge guided simple primitives for open world compositional zero-shot learning. In CVPR. 9336-9345.","journal-title":"CVPR."},{"key":"e_1_3_2_1_16_1","first-page":"3828","article-title":"Learning attention propagation for compositional zero-shot learning","author":"Zain Ali Khan Muhammad Gul","year":"2023","unstructured":"Muhammad Gul Zain Ali Khan, Muhammad Ferjad Naeem, Luc Van Gool, Alain Pagani, Didier Stricker, and Muhammad Zeshan Afzal. 2023. Learning attention propagation for compositional zero-shot learning. In WACV. 3828-3837.","journal-title":"WACV."},{"key":"e_1_3_2_1_17_1","volume-title":"Hierarchical Visual Primitive Experts for Compositional Zero-Shot Learning. ICCV","author":"Kim Hanjae","year":"2023","unstructured":"Hanjae Kim, Jiyoung Lee, Seongheon Park, and Kwanghoon Sohn. 2023. Hierarchical Visual Primitive Experts for Compositional Zero-Shot Learning. ICCV (2023)."},{"key":"e_1_3_2_1_18_1","volume-title":"Proc. Advances Neural Inf. Process. Syst","volume":"36","author":"Konkle Talia","year":"2024","unstructured":"Talia Konkle and George Alvarez. 2024. Cognitive steering in deep neural networks via long-range modulatory feedback connections. Proc. Advances Neural Inf. Process. Syst, Vol. 36 (2024)."},{"key":"e_1_3_2_1_19_1","first-page":"12888","article-title":"Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022a. Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In ICML. PMLR, 12888-12900.","journal-title":"ICML. PMLR"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611724"},{"key":"e_1_3_2_1_21_1","volume-title":"Zero-shot Visual Relation Detection via Composite Visual Cues from Large Language Models. arXiv preprint arXiv:2305.12476","author":"Li Lin","year":"2023","unstructured":"Lin Li, Jun Xiao, Guikun Chen, Jian Shao, Yueting Zhuang, and Long Chen. 2023b. Zero-shot Visual Relation Detection via Composite Visual Cues from Large Language Models. arXiv preprint arXiv:2305.12476 (2023)."},{"key":"e_1_3_2_1_22_1","first-page":"9326","article-title":"Siamese contrastive embedding network for compositional zero-shot learning","author":"Li Xiangyu","year":"2022","unstructured":"Xiangyu Li, Xu Yang, Kun Wei, Cheng Deng, and Muli Yang. 2022b. Siamese contrastive embedding network for compositional zero-shot learning. In CVPR. 9326-9335.","journal-title":"CVPR."},{"key":"e_1_3_2_1_23_1","volume-title":"Context-based and Diversity-driven Specificity in Compositional Zero-Shot Learning. arXiv preprint arXiv:2402.17251","author":"Li Yun","year":"2024","unstructured":"Yun Li, Zhe Liu, Hang Chen, and Lina Yao. 2024. Context-based and Diversity-driven Specificity in Compositional Zero-Shot Learning. arXiv preprint arXiv:2402.17251 (2024)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00171"},{"key":"e_1_3_2_1_25_1","first-page":"11316","article-title":"Symmetry and group in attribute-object compositions","author":"Li Yong-Lu","year":"2020","unstructured":"Yong-Lu Li, Yue Xu, Xiaohan Mao, and Cewu Lu. 2020. Symmetry and group in attribute-object compositions. In CVPR. 11316-11325.","journal-title":"CVPR."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-02000-2"},{"key":"e_1_3_2_1_27_1","first-page":"23560","article-title":"Decomposed soft prompt guided fusion enhancing for compositional zero-shot learning","author":"Lu Xiaocheng","year":"2023","unstructured":"Xiaocheng Lu, Song Guo, Ziming Liu, and Jingcai Guo. 2023a. Decomposed soft prompt guided fusion enhancing for compositional zero-shot learning. In CVPR. 23560-23569.","journal-title":"CVPR."},{"key":"e_1_3_2_1_28_1","volume-title":"DRPT: Disentangled and Recurrent Prompt Tuning for Compositional Zero-Shot Learning. arXiv preprint arXiv:2305.01239","author":"Lu Xiaocheng","year":"2023","unstructured":"Xiaocheng Lu, Ziming Liu, Song Guo, Jingcai Guo, Fushuo Huo, Sikai Bai, and Tao Han. 2023b. DRPT: Disentangled and Recurrent Prompt Tuning for Compositional Zero-Shot Learning. arXiv preprint arXiv:2305.01239 (2023)."},{"key":"e_1_3_2_1_29_1","first-page":"5206","article-title":"Prompt distribution learning","author":"Lu Yuning","year":"2022","unstructured":"Yuning Lu, Jianzhuang Liu, Yonggang Zhang, Yajing Liu, and Xinmei Tian. 2022. Prompt distribution learning. In CVPR]. 5206-5215.","journal-title":"CVPR]."},{"key":"e_1_3_2_1_30_1","first-page":"5222","article-title":"Open world compositional zero-shot learning","author":"Mancini Massimiliano","year":"2021","unstructured":"Massimiliano Mancini, Muhammad Ferjad Naeem, Yongqin Xian, and Zeynep Akata. 2021. Open world compositional zero-shot learning. In CVPR. 5222-5230.","journal-title":"CVPR."},{"key":"e_1_3_2_1_31_1","volume-title":"Yongqin Xian, and Zeynep Akata.","author":"Mancini Massimiliano","year":"2022","unstructured":"Massimiliano Mancini, Muhammad Ferjad Naeem, Yongqin Xian, and Zeynep Akata. 2022. Learning graph embeddings for open world compositional zero-shot learning. TPAMI (2022)."},{"key":"e_1_3_2_1_32_1","unstructured":"Sachit Menon and Carl Vondrick. 2023. Visual classification via description from large language models. In ICLR."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01895-7"},{"key":"e_1_3_2_1_34_1","first-page":"1792","article-title":"From red wine to red tomato: Composition with context","author":"Misra Ishan","year":"2017","unstructured":"Ishan Misra, Abhinav Gupta, and Martial Hebert. 2017. From red wine to red tomato: Composition with context. In CVPR. 1792-1801.","journal-title":"CVPR."},{"key":"e_1_3_2_1_35_1","first-page":"953","article-title":"Learning graph embeddings for compositional zero-shot learning","author":"Naeem Muhammad Ferjad","year":"2021","unstructured":"Muhammad Ferjad Naeem, Yongqin Xian, Federico Tombari, and Zeynep Akata. 2021. Learning graph embeddings for compositional zero-shot learning. In CVPR. 953-962.","journal-title":"CVPR."},{"key":"e_1_3_2_1_36_1","volume-title":"Learning to compose soft prompts for compositional zero-shot learning. ICLR","author":"Nayak Nihal V","year":"2023","unstructured":"Nihal V Nayak, Peilin Yu, and Stephen H Bach. 2023. Learning to compose soft prompts for compositional zero-shot learning. ICLR (2023)."},{"key":"e_1_3_2_1_37_1","first-page":"26342","article-title":"Chils: Zero-shot image classification with hierarchical label sets","author":"Novack Zachary","year":"2023","unstructured":"Zachary Novack, Julian McAuley, Zachary Chase Lipton, and Saurabh Garg. 2023. Chils: Zero-shot image classification with hierarchical label sets. In ICML. PMLR, 26342-26362.","journal-title":"ICML. PMLR"},{"volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","key":"e_1_3_2_1_38_1","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In NIPS, Vol. 32."},{"key":"e_1_3_2_1_39_1","first-page":"3593","article-title":"Task-driven modular networks for zero-shot compositional learning","author":"Purushwalkam Senthil","year":"2019","unstructured":"Senthil Purushwalkam, Maximilian Nickel, Abhinav Gupta, and Marc'Aurelio Ranzato. 2019. Task-driven modular networks for zero-shot compositional learning. In ICCV. 3593-3602.","journal-title":"ICCV."},{"key":"e_1_3_2_1_40_1","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In ICML. PMLR, 8748-8763.","journal-title":"ICML. PMLR"},{"key":"e_1_3_2_1_41_1","volume-title":"Counterfactual co-occurring learning for bias mitigation in weakly-supervised object localization","author":"Shao Feifei","year":"2025","unstructured":"Feifei Shao, Yawei Luo, Lei Chen, Ping Liu, Wei Yang, Yi Yang, and Jun Xiao. 2025. Counterfactual co-occurring learning for bias mitigation in weakly-supervised object localization. IEEE Trans. Multimedia (2025)."},{"key":"e_1_3_2_1_42_1","volume-title":"Son Duy Dao, and Jianfei Cai","author":"Shi Hengcan","year":"2024","unstructured":"Hengcan Shi, Son Duy Dao, and Jianfei Cai. 2024a. LLMFormer: Large Language Model for Open-Vocabulary Semantic Segmentation. Int. J. Comput. Vis. (2024), 1-18."},{"key":"e_1_3_2_1_43_1","volume-title":"Int. J. Comput. Vis.","author":"Shi Hanrong","year":"2024","unstructured":"Hanrong Shi, Lin Li, Jun Xiao, Yueting Zhuang, and Long Chen. 2024b. From Easy to Hard: Learning Curricular Shape-aware Features for Robust Panoptic Scene Graph Generation. Int. J. Comput. Vis. (2024), 1-20."},{"key":"e_1_3_2_1_44_1","unstructured":"Rishabh Singh. 2022. Rethinking Open World Compositional Zero Shot Learning: methods features and the road ahead. Master's thesis. ETH Zurich Computer Vision Laboratory."},{"key":"e_1_3_2_1_45_1","first-page":"200","volume-title":"NIPS","volume":"34","author":"Tsimpoukelli Maria","year":"2021","unstructured":"Maria Tsimpoukelli, Jacob L Menick, Serkan Cabi, SM Eslami, Oriol Vinyals, and Felix Hill. 2021. Multimodal few-shot learning with frozen language models. In NIPS, Vol. 34. 200-212."},{"key":"e_1_3_2_1_46_1","first-page":"3930","article-title":"Domain-Adaptive Semantic Segmentation Emerges From Vision-Language Supervised Domain-Debiased Self-Training","author":"Wang Huayu","year":"2024","unstructured":"Huayu Wang, Zekun Jiang, Lingxi Xie, Dongsheng Jiang, Wei Shen, and Qi Tian. 2024. Domain-Adaptive Semantic Segmentation Emerges From Vision-Language Supervised Domain-Debiased Self-Training. In ICASSP. IEEE, 3930-3934.","journal-title":"ICASSP. IEEE"},{"key":"e_1_3_2_1_47_1","first-page":"11197","article-title":"Learning Conditional Attributes for Compositional Zero-Shot Learning","author":"Wang Qingsheng","year":"2023","unstructured":"Qingsheng Wang, Lingqiao Liu, Chenchen Jing, Hao Chen, Guoqiang Liang, Peng Wang, and Chunhua Shen. 2023. Learning Conditional Attributes for Compositional Zero-Shot Learning. In CVPR. 11197-11206.","journal-title":"CVPR."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-02179-4"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.3025661"},{"key":"e_1_3_2_1_50_1","volume-title":"GIPCOL: Graph-Injected Soft Prompting for Compositional Zero-Shot Learning. WACV","author":"Xu Guangyue","year":"2023","unstructured":"Guangyue Xu, Joyce Chai, and Parisa Kordjamshidi. 2023. GIPCOL: Graph-Injected Soft Prompting for Compositional Zero-Shot Learning. WACV (2023)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","unstructured":"Yue Yang Artemis Panagopoulou Shenghao Zhou Daniel Jin Chris Callison-Burch and Mark Yatskar. 2023. Language in a bottle: Language model guided concept bottlenecks for interpretable image classification. In CVPR.","DOI":"10.1109\/CVPR52729.2023.01839"},{"key":"e_1_3_2_1_52_1","first-page":"192","article-title":"Fine-grained visual comparisons with local learning","author":"Yu Aron","year":"2014","unstructured":"Aron Yu and Kristen Grauman. 2014. Fine-grained visual comparisons with local learning. In CVPR. 192-199.","journal-title":"CVPR."},{"key":"e_1_3_2_1_53_1","volume-title":"Int. J. Comput. Vis.","author":"Zang Yuhang","year":"2024","unstructured":"Yuhang Zang, Wei Li, Jun Han, Kaiyang Zhou, and Chen Change Loy. 2024. Contextual object detection with multimodal large language models. Int. J. Comput. Vis. (2024), 1-19."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"crossref","unstructured":"Renrui Zhang Xiangfei Hu Bohao Li Siyuan Huang Hanqiu Deng Yu Qiao Peng Gao and Hongsheng Li. 2023. Prompt generate then cache: Cascade of foundation models makes strong few-shot learners. In CVPR.","DOI":"10.1109\/CVPR52729.2023.01460"},{"volume-title":"Learning invariant visual representations for compositional zero-shot learning","author":"Zhang Tian","key":"e_1_3_2_1_55_1","unstructured":"Tian Zhang, Kongming Liang, Ruoyi Du, Xian Sun, Zhanyu Ma, and Jun Guo. 2022. Learning invariant visual representations for compositional zero-shot learning. In ECCV. Springer, 339-355."},{"key":"e_1_3_2_1_56_1","first-page":"1721","article-title":"CAILA","author":"Zheng Zhaoheng","year":"2024","unstructured":"Zhaoheng Zheng, Haidong Zhu, and Ram Nevatia. 2024. CAILA: Concept-Aware Intra-Layer Adapters for Compositional Zero-Shot Learning. In WACV. 1721-1731.","journal-title":"Concept-Aware Intra-Layer Adapters for Compositional Zero-Shot Learning. In WACV."},{"key":"e_1_3_2_1_57_1","first-page":"16816","article-title":"Conditional prompt learning for vision-language models","author":"Zhou Kaiyang","year":"2022","unstructured":"Kaiyang Zhou, Jingkang Yang, Chen Change Loy, and Ziwei Liu. 2022a. Conditional prompt learning for vision-language models. In CVPR. 16816-16825.","journal-title":"CVPR."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i3.25496"},{"key":"e_1_3_2_1_60_1","volume-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592","author":"Zhu Deyao","year":"2023","unstructured":"Deyao Zhu, Jun Chen, Xiaoqian Shen, Xiang Li, and Mohamed Elhoseiny. 2023a. Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592 (2023)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-02036-4"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755208","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:52:38Z","timestamp":1765309958000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755208"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":61,"alternative-id":["10.1145\/3746027.3755208","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755208","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}