{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T11:40:02Z","timestamp":1755862802822,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,2,2]],"date-time":"2024-02-02T00:00:00Z","timestamp":1706832000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Science and Technology Innovation Major Project of Ningbo","award":["2022Z082)"],"award-info":[{"award-number":["2022Z082)"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,2,2]]},"DOI":"10.1145\/3651671.3651674","type":"proceedings-article","created":{"date-parts":[[2024,6,7]],"date-time":"2024-06-07T18:55:50Z","timestamp":1717786550000},"page":"517-524","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Multi-modal Contextual Prompt Learning for Multi-label Classification with Partial Labels"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-6027-1091","authenticated-orcid":false,"given":"Rui","family":"Wang","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Zhejiang Sci-Tech University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2003-0728","authenticated-orcid":false,"given":"Zhengxin","family":"Pan","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9618-8965","authenticated-orcid":false,"given":"Fangyu","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Advanced Technology, Xi'an-Jiaotong Liverpool University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8045-9932","authenticated-orcid":false,"given":"Yifan","family":"Lv","sequence":"additional","affiliation":[{"name":"School of Computing and Data Engineering, NingboTech University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5762-5763","authenticated-orcid":false,"given":"Bailing","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computing and Data Engineering, NingboTech University, China"}]}],"member":"320","published-online":{"date-parts":[[2024,6,7]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-00563-8_22"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3025814"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19910"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12281"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00061"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00532"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00099"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_9_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00074"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"},{"key":"e_1_3_2_1_12_1","volume-title":"Clip-adapter: Better vision-language models with feature adapters. arXiv preprint arXiv:2110.04544","author":"Gao Peng","year":"2021","unstructured":"Peng Gao, Shijie Geng, Renrui Zhang, Teli Ma, Rongyao Fang, Yongfeng Zhang, Hongsheng Li, and Yu Qiao. 2021. Clip-adapter: Better vision-language models with feature adapters. arXiv preprint arXiv:2110.04544 (2021)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_14_1","volume-title":"Long short-term memory. Neural computation 9, 8","author":"Hochreiter Sepp","year":"1997","unstructured":"Sepp Hochreiter and J\u00fcrgen Schmidhuber. 1997. Long short-term memory. Neural computation 9, 8 (1997), 1735\u20131780."},{"key":"e_1_3_2_1_15_1","volume-title":"Unsupervised prompt learning for vision-language models. arXiv preprint arXiv:2204.03649","author":"Huang Tony","year":"2022","unstructured":"Tony Huang, Jack Chu, and Fangyun Wei. 2022. Unsupervised prompt learning for vision-language models. arXiv preprint arXiv:2204.03649 (2022)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00944"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00880"},{"key":"e_1_3_2_1_18_1","volume-title":"International conference on machine learning. PMLR, 4904\u20134916","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc Le, Yun-Hsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling up visual and vision-language representation learning with noisy text supervision. In International conference on machine learning. PMLR, 4904\u20134916."},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings, Part VII 14","author":"Joulin Armand","year":"2016","unstructured":"Armand Joulin, Laurens Van Der\u00a0Maaten, Allan Jabri, and Nicolas Vasilache. 2016. Learning visual features from large weakly supervised data. In Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part VII 14. Springer, 67\u201384."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01376"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/MMIT.2010.34"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.443"},{"key":"e_1_3_2_1_24_1","volume-title":"On the optimality of classifier chain for multi-label classification. Advances in Neural Information Processing Systems 28","author":"Liu Weiwei","year":"2015","unstructured":"Weiwei Liu and Ivor Tsang. 2015. On the optimality of classifier chain for multi-label classification. Advances in Neural Information Processing Systems 28 (2015)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00514"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00695"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_12"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i10.17123"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i2.20105"},{"key":"e_1_3_2_1_31_1","volume-title":"International conference on machine learning. PMLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00015"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_42"},{"key":"e_1_3_2_1_34_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_35_1","first-page":"30569","article-title":"Dualcoop: Fast adaptation to multi-label recognition with limited annotations","volume":"35","author":"Sun Ximeng","year":"2022","unstructured":"Ximeng Sun, Ping Hu, and Kate Saenko. 2022. Dualcoop: Fast adaptation to multi-label recognition with limited annotations. Advances in Neural Information Processing Systems 35 (2022), 30569\u201330582.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.4018\/jdwm.2007070101"},{"key":"e_1_3_2_1_37_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.251"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6909"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.58"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.58"},{"key":"e_1_3_2_1_42_1","volume-title":"Proceedings, Part XXI 16","author":"Ye Jin","year":"2020","unstructured":"Jin Ye, Junjun He, Xiaojiang Peng, Wenhao Wu, and Yu Qiao. 2020. Attention-driven dynamic graph convolutional network for multi-label image recognition. In Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXI 16. Springer, 649\u2013665."},{"key":"e_1_3_2_1_43_1","volume-title":"Instance-weighted central similarity for multi-label image retrieval. arXiv preprint arXiv:2108.05274","author":"Zhang Zhiwei","year":"2021","unstructured":"Zhiwei Zhang and Hanyu Peng. 2021. Instance-weighted central similarity for multi-label image retrieval. arXiv preprint arXiv:2108.05274 (2021)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/WI-IAT.2014.110"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01631"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"}],"event":{"name":"ICMLC 2024: 2024 16th International Conference on Machine Learning and Computing","acronym":"ICMLC 2024","location":"Shenzhen China"},"container-title":["Proceedings of the 2024 16th International Conference on Machine Learning and Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3651671.3651674","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3651671.3651674","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T11:20:45Z","timestamp":1755861645000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3651671.3651674"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,2]]},"references-count":46,"alternative-id":["10.1145\/3651671.3651674","10.1145\/3651671"],"URL":"https:\/\/doi.org\/10.1145\/3651671.3651674","relation":{},"subject":[],"published":{"date-parts":[[2024,2,2]]},"assertion":[{"value":"2024-06-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}