{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:32:35Z","timestamp":1778081555397,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":75,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681387","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"4302-4311","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Calibrating Prompt from History for Continual Vision-Language Retrieval and Grounding"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3564-1628","authenticated-orcid":false,"given":"Tao","family":"Jin","sequence":"first","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9306-0179","authenticated-orcid":false,"given":"Weicai","family":"Yan","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0974-9834","authenticated-orcid":false,"given":"Ye","family":"Wang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8693-7142","authenticated-orcid":false,"given":"Sihang","family":"Cai","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0403-7855","authenticated-orcid":false,"given":"Qifan","family":"Shuai","sequence":"additional","affiliation":[{"name":"Southeast University Cheng Xian College, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6121-0384","authenticated-orcid":false,"given":"Zhou","family":"Zhao","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Afra Feyza Aky\u00fcrek Ekin Aky\u00fcrek Derry Wijaya and Jacob Andreas. 2022. Subspace Regularizers for Few-Shot Class Incremental Learning. In ICLR."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Rahaf Aljundi Francesca Babiloni Mohamed Elhoseiny Marcus Rohrbach and Tinne Tuytelaars. 2018. Memory aware synapses: Learning what (not) to forget. In ECCV.","DOI":"10.1007\/978-3-030-01219-9_9"},{"key":"e_1_3_2_1_3_1","volume-title":"Visual Prompting: Modifying Pixel Space to Adapt Pre-trained Models. arXiv preprint arXiv:2203.17274","author":"Bahng Hyojin","year":"2022","unstructured":"Hyojin Bahng, Ali Jahanian, Swami Sankaranarayanan, and Phillip Isola. 2022. Visual Prompting: Modifying Pixel Space to Adapt Pre-trained Models. arXiv preprint arXiv:2203.17274 (2022)."},{"key":"e_1_3_2_1_4_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems Vol. 33 (2020) 1877--1901."},{"key":"e_1_3_2_1_5_1","unstructured":"Hyuntak Cha Jaeho Lee and Jinwoo Shin. 2021. Co2l: Contrastive continual learning. In ICCV."},{"key":"e_1_3_2_1_6_1","volume-title":"Efficient lifelong learning with a-gem. arXiv preprint arXiv:1812.00420","author":"Chaudhry Arslan","year":"2018","unstructured":"Arslan Chaudhry, Marc'Aurelio Ranzato, Marcus Rohrbach, and Mohamed Elhoseiny. 2018. Efficient lifelong learning with a-gem. arXiv preprint arXiv:1812.00420 (2018)."},{"key":"e_1_3_2_1_7_1","volume-title":"Philip HS Torr, and Marc'Aurelio Ranzato.","author":"Chaudhry Arslan","year":"2019","unstructured":"Arslan Chaudhry, Marcus Rohrbach, Mohamed Elhoseiny, Thalaiyasingam Ajanthan, Puneet Kumar Dokania, Philip HS Torr, and Marc'Aurelio Ranzato. 2019. Continual Learning with Tiny Episodic Memories. arXiv preprint arXiv:1902.10486 (2019)."},{"key":"e_1_3_2_1_8_1","volume-title":"International conference on machine learning. PMLR, 1691--1703","author":"Chen Mark","year":"2020","unstructured":"Mark Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, and Ilya Sutskever. 2020. Generative pretraining from pixels. In International conference on machine learning. PMLR, 1691--1703."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01442"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547943"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i10.29054"},{"key":"e_1_3_2_1_12_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_13_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_14_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly Jakob Uszkoreit and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In ICLR."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Arthur Douillard Alexandre Ram\u00e9 Guillaume Couairon and Matthieu Cord. 2022. DyTox: Transformers for Continual Learning with DYnamic TOken eXpansion. In CVPR.","DOI":"10.1109\/CVPR52688.2022.00907"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01637"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681347"},{"key":"e_1_3_2_1_18_1","volume-title":"Multimodal Prompt Learning with Missing Modalities for Sentiment Analysis and Emotion Recognition. arXiv preprint arXiv:2407.05374","author":"Guo Zirun","year":"2024","unstructured":"Zirun Guo, Tao Jin, and Zhou Zhao. 2024. Multimodal Prompt Learning with Missing Modalities for Sentiment Analysis and Emotion Recognition. arXiv preprint arXiv:2407.05374 (2024)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"e_1_3_2_1_20_1","unstructured":"Wenpeng Hu Zhou Lin Bing Liu Chongyang Tao Zhengwei Tao Jinwen Ma Dongyan Zhao and Rui Yan. 2018. Overcoming catastrophic forgetting for continual learning via model adaptation. In ICLR."},{"key":"e_1_3_2_1_21_1","volume-title":"Unsupervised Prompt Learning for Vision-Language Models. arXiv preprint arXiv:2204.03649","author":"Huang Tony","year":"2022","unstructured":"Tony Huang, Jack Chu, and Fangyun Wei. 2022. Unsupervised Prompt Learning for Vision-Language Models. arXiv preprint arXiv:2204.03649 (2022)."},{"key":"e_1_3_2_1_22_1","volume-title":"Pixel-bert: Aligning image pixels with text by deep multi-modal transformers. arXiv preprint arXiv:2004.00849","author":"Huang Zhicheng","year":"2020","unstructured":"Zhicheng Huang, Zhaoyang Zeng, Bei Liu, Dongmei Fu, and Jianlong Fu. 2020. Pixel-bert: Aligning image pixels with text by deep multi-modal transformers. arXiv preprint arXiv:2004.00849 (2020)."},{"key":"e_1_3_2_1_23_1","unstructured":"Ching-Yi Hung Cheng-Hao Tu Cheng-En Wu Chien-Hung Chen Yi-Ming Chan and Chu-Song Chen. 2019. Compacting picking and growing for unforgetting continual learning. In NeurIPS."},{"key":"e_1_3_2_1_24_1","volume-title":"Visual Prompt Tuning. arXiv preprint arXiv:2203.12119","author":"Jia Menglin","year":"2022","unstructured":"Menglin Jia, Luming Tang, Bor-Chun Chen, Claire Cardie, Serge Belongie, Bharath Hariharan, and Ser-Nam Lim. 2022. Visual Prompt Tuning. arXiv preprint arXiv:2203.12119 (2022)."},{"key":"e_1_3_2_1_25_1","volume-title":"Low-rank hoca: Efficient high-order cross-modal attention for video captioning. arXiv preprint arXiv:1911.00212","author":"Jin Tao","year":"2019","unstructured":"Tao Jin, Siyu Huang, Yingming Li, and Zhongfei Zhang. 2019. Low-rank hoca: Efficient high-order cross-modal attention for video captioning. arXiv preprint arXiv:1911.00212 (2019)."},{"key":"e_1_3_2_1_26_1","volume-title":"Continual learning of a mixed sequence of similar and dissimilar tasks. Advances in neural information processing systems","author":"Ke Zixuan","year":"2020","unstructured":"Zixuan Ke, Bing Liu, and Xingchang Huang. 2020. Continual learning of a mixed sequence of similar and dissimilar tasks. Advances in neural information processing systems, Vol. 33 (2020), 18493--18504."},{"key":"e_1_3_2_1_27_1","unstructured":"Ronald Kemker and Christopher Kanan. 2018. FearNet: Brain-Inspired Model for Incremental Learning. In ICLR."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01832"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1611835114"},{"key":"e_1_3_2_1_30_1","unstructured":"Sang-Woo Lee Jin-Hwa Kim Jaehyun Jun Jung-Woo Ha and Byoung-Tak Zhang. 2017. Overcoming catastrophic forgetting by incremental moment matching. In NeurIPS."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6795"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3264524"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01069"},{"key":"e_1_3_2_1_34_1","volume-title":"Referring transformer: A one-step approach to multi-task visual grounding. Advances in neural information processing systems","author":"Li Muchen","year":"2021","unstructured":"Muchen Li and Leonid Sigal. 2021. Referring transformer: A one-step approach to multi-task visual grounding. Advances in neural information processing systems, Vol. 34 (2021), 19652--19664."},{"key":"e_1_3_2_1_35_1","volume-title":"Prefix-tuning: Optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190","author":"Li Xiang Lisa","year":"2021","unstructured":"Xiang Lisa Li and Percy Liang. 2021. Prefix-tuning: Optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190 (2021)."},{"key":"e_1_3_2_1_36_1","volume-title":"Learning without forgetting","author":"Li Zhizhong","year":"2017","unstructured":"Zhizhong Li and Derek Hoiem. 2017. Learning without forgetting. IEEE transactions on pattern analysis and machine intelligence, Vol. 40, 12 (2017), 2935--2947."},{"key":"e_1_3_2_1_37_1","unstructured":"Tsung-Yi Lin Michael Maire Serge Belongie James Hays Pietro Perona Deva Ramanan Piotr Doll\u00e1r and C Lawrence Zitnick. 2014. Microsoft coco: Common objects in context. In ECCV."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.836"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01402"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-short.8"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.504"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_43_1","unstructured":"David Lopez-Paz and Marc'Aurelio Ranzato. 2017. Gradient episodic memory for continual learning. In NeurIPS."},{"key":"e_1_3_2_1_44_1","volume-title":"Sgdr: Stochastic gradient descent with warm restarts. arXiv preprint arXiv:1608.03983","author":"Loshchilov Ilya","year":"2016","unstructured":"Ilya Loshchilov and Frank Hutter. 2016. Sgdr: Stochastic gradient descent with warm restarts. arXiv preprint arXiv:1608.03983 (2016)."},{"key":"e_1_3_2_1_45_1","volume-title":"Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. In Advances in Neural Information Processing Systems. 13--23.","author":"Lu Jiasen","year":"2019","unstructured":"Jiasen Lu, Dhruv Batra, Devi Parikh, and Stefan Lee. 2019. Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. In Advances in Neural Information Processing Systems. 13--23."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.07.028"},{"key":"e_1_3_2_1_47_1","volume-title":"Towards calibrated robust fine-tuning of vision-language models. arXiv preprint arXiv:2311.01723","author":"Oh Changdae","year":"2023","unstructured":"Changdae Oh, Mijoo Kim, Hyesu Lim, Junhyeok Park, Euiseog Jeong, Zhi-Qi Cheng, and Kyungwoo Song. 2023. Towards calibrated robust fine-tuning of vision-language models. arXiv preprint arXiv:2311.01723 (2023)."},{"key":"e_1_3_2_1_48_1","volume-title":"Imagebert: Cross-modal pre-training with large-scale weak-supervised image-text data. arXiv preprint arXiv:2001.07966","author":"Qi Di","year":"2020","unstructured":"Di Qi, Lin Su, Jia Song, Edward Cui, Taroon Bharti, and Arun Sacheti. 2020. Imagebert: Cross-modal pre-training with large-scale weak-supervised image-text data. arXiv preprint arXiv:2001.07966 (2020)."},{"key":"e_1_3_2_1_49_1","volume-title":"Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al.","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In ICML."},{"key":"e_1_3_2_1_50_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever et al. 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_1_51_1","volume-title":"Progressive prompts: Continual learning for language models. arXiv preprint arXiv:2301.12314","author":"Razdaibiedina Anastasia","year":"2023","unstructured":"Anastasia Razdaibiedina, Yuning Mao, Rui Hou, Madian Khabsa, Mike Lewis, and Amjad Almahairi. 2023. Progressive prompts: Continual learning for language models. arXiv preprint arXiv:2301.12314 (2023)."},{"key":"e_1_3_2_1_52_1","unstructured":"Sylvestre-Alvise Rebuffi Alexander Kolesnikov Georg Sperl and Christoph H Lampert. 2017. icarl: Incremental classifier and representation learning. In CVPR."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1080\/09540099550039318"},{"key":"e_1_3_2_1_54_1","volume-title":"Jaehong Kim, and Jiwon Kim.","author":"Shin Hanul","year":"2017","unstructured":"Hanul Shin, Jung Kwon Lee, Jaehong Kim, and Jiwon Kim. 2017. Continual learning with deep generative replay. arXiv preprint arXiv:1705.08690 (2017)."},{"key":"e_1_3_2_1_55_1","volume-title":"Vl-bert: Pre-training of generic visual-linguistic representations. arXiv preprint arXiv:1908.08530","author":"Su Weijie","year":"2019","unstructured":"Weijie Su, Xizhou Zhu, Yue Cao, Bin Li, Lewei Lu, Furu Wei, and Jifeng Dai. 2019. Vl-bert: Pre-training of generic visual-linguistic representations. arXiv preprint arXiv:1908.08530 (2019)."},{"key":"e_1_3_2_1_56_1","volume-title":"Lxmert: Learning cross-modality encoder representations from transformers. arXiv preprint arXiv:1908.07490","author":"Tan Hao","year":"2019","unstructured":"Hao Tan and Mohit Bansal. 2019. Lxmert: Learning cross-modality encoder representations from transformers. arXiv preprint arXiv:1908.07490 (2019)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Gido M van de Ven Zhe Li and Andreas S Tolias. 2021. Class-Incremental Learning with Generative Classifiers. In CVPR.","DOI":"10.1109\/CVPRW53098.2021.00400"},{"key":"e_1_3_2_1_58_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_59_1","volume-title":"Prompt-Singer: Controllable Singing-Voice-Synthesis with Natural Language Prompt. arXiv preprint arXiv:2403.11780","author":"Wang Yongqi","year":"2024","unstructured":"Yongqi Wang, Ruofan Hu, Rongjie Huang, Zhiqing Hong, Ruiqi Li, Wenrui Liu, Fuming You, Tao Jin, and Zhou Zhao. 2024. Prompt-Singer: Controllable Singing-Voice-Synthesis with Natural Language Prompt. arXiv preprint arXiv:2403.11780 (2024)."},{"key":"e_1_3_2_1_60_1","volume-title":"S-Prompts Learning with Pre-trained Transformers: An Occam's Razor for Domain Incremental Learning. arXiv preprint arXiv:2207.12819","author":"Wang Yabin","year":"2022","unstructured":"Yabin Wang, Zhiwu Huang, and Xiaopeng Hong. 2022. S-Prompts Learning with Pre-trained Transformers: An Occam's Razor for Domain Incremental Learning. arXiv preprint arXiv:2207.12819 (2022)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.621"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"crossref","unstructured":"Zifeng Wang Zizhao Zhang Sayna Ebrahimi Ruoxi Sun Han Zhang Chen-Yu Lee Xiaoqi Ren Guolong Su Vincent Perot Jennifer Dy et al. 2022. DualPrompt: Complementary Prompting for Rehearsal-free Continual Learning. arXiv preprint arXiv:2204.04799 (2022).","DOI":"10.1007\/978-3-031-19809-0_36"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"crossref","unstructured":"Zifeng Wang Zizhao Zhang Chen-Yu Lee Han Zhang Ruoxi Sun Xiaoqi Ren Guolong Su Vincent Perot Jennifer Dy and Tomas Pfister. 2022. Learning to Prompt for Continual Learning. In CVPR.","DOI":"10.1109\/CVPR52688.2022.00024"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"crossref","unstructured":"Yue Wu Yinpeng Chen Lijuan Wang Yuancheng Ye Zicheng Liu Yandong Guo and Yun Fu. 2019. Large scale incremental learning. In CVPR.","DOI":"10.1109\/CVPR.2019.00046"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.571"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681264"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681386"},{"key":"e_1_3_2_1_68_1","volume-title":"Lifelong learning with dynamically expandable networks. arXiv preprint arXiv:1708.01547","author":"Yoon Jaehong","year":"2017","unstructured":"Jaehong Yoon, Eunho Yang, Jeongtae Lee, and Sung Ju Hwang. 2017. Lifelong learning with dynamically expandable networks. arXiv preprint arXiv:1708.01547 (2017)."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02191"},{"key":"e_1_3_2_1_70_1","volume-title":"Proceedings, Part II 14","author":"Yu Licheng","year":"2016","unstructured":"Licheng Yu, Patrick Poirson, Shan Yang, Alexander C Berg, and Tamara L Berg. 2016. Modeling context in referring expressions. In Computer Vision--ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11--14, 2016, Proceedings, Part II 14. Springer, 69--85."},{"key":"e_1_3_2_1_71_1","unstructured":"Friedemann Zenke Ben Poole and Surya Ganguli. 2017. Continual learning through synaptic intelligence. In ICML."},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01752"},{"key":"e_1_3_2_1_73_1","volume-title":"Chen Change Loy, and Ziwei Liu","author":"Zhou Kaiyang","year":"2021","unstructured":"Kaiyang Zhou, Jingkang Yang, Chen Change Loy, and Ziwei Liu. 2021. Learning to prompt for vision-language models. arXiv preprint arXiv:2109.01134 (2021)."},{"key":"e_1_3_2_1_74_1","volume-title":"Chen Change Loy, and Ziwei Liu","author":"Zhou Kaiyang","year":"2022","unstructured":"Kaiyang Zhou, Jingkang Yang, Chen Change Loy, and Ziwei Liu. 2022. Conditional Prompt Learning for Vision-Language Models. In CVPR."},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.7005"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681387","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681387","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:44Z","timestamp":1750295864000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681387"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":75,"alternative-id":["10.1145\/3664647.3681387","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681387","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}