{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T16:03:40Z","timestamp":1778256220507,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["2023CDJYGRH-YB18"],"award-info":[{"award-number":["2023CDJYGRH-YB18"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62176030"],"award-info":[{"award-number":["62176030"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681171","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"2146-2155","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Category-Prompt Refined Feature Learning for Long-Tailed Multi-Label Image Classification"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-0691-2666","authenticated-orcid":false,"given":"Jiexuan","family":"Yan","sequence":"first","affiliation":[{"name":"School of Big Data &amp; Software Engineering, Chongqing University, Chongqing, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5610-0826","authenticated-orcid":false,"given":"Sheng","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Big Data &amp; Software Engineering, Chongqing University, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9713-4416","authenticated-orcid":false,"given":"NanKun","family":"Mu","sequence":"additional","affiliation":[{"name":"College of Computer Science, Chongqing University, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3926-7901","authenticated-orcid":false,"given":"Luwen","family":"Huangfu","sequence":"additional","affiliation":[{"name":"Fowler College of Business, San Diego State University, San Diego, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8678-2271","authenticated-orcid":false,"given":"Bo","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Information Engineering, Hefei University of Technology, Hefei, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00130"},{"key":"e_1_3_2_2_2_1","volume-title":"A systematic study of the class imbalance problem in convolutional neural networks. Neural networks","author":"Buda Mateusz","year":"2018","unstructured":"Mateusz Buda, Atsuto Maki, and Maciej A Mazurowski. 2018. A systematic study of the class imbalance problem in convolutional neural networks. Neural networks, Vol. 106 (2018), 249--259."},{"key":"e_1_3_2_2_3_1","volume-title":"International conference on machine learning. PMLR, 872--881","author":"Byrd Jonathon","year":"2019","unstructured":"Jonathon Byrd and Zachary Lipton. 2019. What is the effect of importance weighting in deep learning?. In International conference on machine learning. PMLR, 872--881."},{"key":"e_1_3_2_2_4_1","volume-title":"Learning imbalanced datasets with label-distribution-aware margin loss. Advances in neural information processing systems","author":"Cao Kaidi","year":"2019","unstructured":"Kaidi Cao, Colin Wei, Adrien Gaidon, Nikos Arechiga, and Tengyu Ma. 2019. Learning imbalanced datasets with label-distribution-aware margin loss. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_2_5_1","volume-title":"Class-aware Learning for Imbalanced Multi-Label Classification. In 2023 IEEE 5th International Conference on Civil Aviation Safety and Information Technology (ICCASIT). IEEE, 903--907","author":"Chen Jiayao","year":"2023","unstructured":"Jiayao Chen and Shaoyuan Li. 2023. Class-aware Learning for Imbalanced Multi-Label Classification. In 2023 IEEE 5th International Conference on Civil Aviation Safety and Information Technology (ICCASIT). IEEE, 903--907."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12230"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00061"},{"key":"e_1_3_2_2_8_1","unstructured":"Zhaomin Chen Quan Cui Xiaoqin Zhang Ruoxi Deng Chaoqun Xia and Shijian Lu. [n. d.]. Towards Gradient Equalization and Feature Diversification for Long-Tailed Multi-Label Image Recognition. Available at SSRN 4518263 ( [n. d.])."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00532"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME52920.2022.9860016"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00949"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_2_13_1","volume-title":"The Eleventh International Conference on Learning Representations.","author":"Dong Bowen","year":"2022","unstructured":"Bowen Dong, Pan Zhou, Shuicheng Yan, and Wangmeng Zuo. 2022. LPT: long-tailed prompt tuning for image classification. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_2_14_1","volume-title":"Luc Van Gool, Christopher KI Williams, John Winn, and Andrew Zisserman.","author":"Everingham Mark","year":"2015","unstructured":"Mark Everingham, SM Ali Eslami, Luc Van Gool, Christopher KI Williams, John Winn, and Andrew Zisserman. 2015. The pascal visual object classes challenge: A retrospective. International journal of computer vision, Vol. 111 (2015), 98--136."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3088605"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01484"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00275"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2008.239"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25159"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.580"},{"key":"e_1_3_2_2_22_1","volume-title":"International Conference on Learning Representations.","author":"Kang Bingyi","year":"2020","unstructured":"Bingyi Kang, Yu Li, Sa Xie, Zehuan Yuan, and Jiashi Feng. 2020. Exploring balanced feature spaces for representation learning. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_23_1","volume-title":"Decoupling representation and classifier for long-tailed recognition. arXiv preprint arXiv:1910.09217","author":"Kang Bingyi","year":"2019","unstructured":"Bingyi Kang, Saining Xie, Marcus Rohrbach, Zhicheng Yan, Albert Gordo, Jiashi Feng, and Yannis Kalantidis. 2019. Decoupling representation and classifier for long-tailed recognition. arXiv preprint arXiv:1910.09217 (2019)."},{"key":"e_1_3_2_2_24_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_2_25_1","unstructured":"Alex Krizhevsky Geoffrey Hinton et al. 2009. Learning multiple layers of features from tiny images. (2009)."},{"key":"e_1_3_2_2_26_1","volume-title":"Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. 2012. Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems, Vol. 25 (2012)."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01621"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/SMC53992.2023.10394132"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25244"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"e_1_3_2_2_31_1","volume-title":"Proceedings, Part V 13","author":"Lin Tsung-Yi","year":"2014","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C Lawrence Zitnick. 2014. Microsoft coco: Common objects in context. In Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6--12, 2014, Proceedings, Part V 13. Springer, 740--755."},{"key":"e_1_3_2_2_32_1","volume-title":"Query2label: A simple transformer way to multi-label classification. arXiv preprint arXiv:2107.10834","author":"Liu Shilong","year":"2021","unstructured":"Shilong Liu, Lei Zhang, Xiao Yang, Hang Su, and Jun Zhu. 2021. Query2label: A simple transformer way to multi-label classification. arXiv preprint arXiv:2107.10834 (2021)."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00264"},{"key":"e_1_3_2_2_35_1","volume-title":"A simple long-tailed recognition baseline via vision-language model. arXiv preprint arXiv:2111.14745","author":"Ma Teli","year":"2021","unstructured":"Teli Ma, Shijie Geng, Mengmeng Wang, Jing Shao, Jiasen Lu, Hongsheng Li, Peng Gao, and Yu Qiao. 2021. A simple long-tailed recognition baseline via vision-language model. arXiv preprint arXiv:2111.14745 (2021)."},{"key":"e_1_3_2_2_36_1","volume-title":"Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. 2013. Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i10.17098"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_2_39_1","volume-title":"Vt-clip: Enhancing vision-language models with visual-guided texts. arXiv preprint arXiv:2112.02399","author":"Qiu Longtian","year":"2021","unstructured":"Longtian Qiu, Renrui Zhang, Ziyu Guo, Ziyao Zeng, Zilu Guo, Yafeng Li, and Guangnan Zhang. 2021. Vt-clip: Enhancing vision-language models with visual-guided texts. arXiv preprint arXiv:2112.02399 (2021)."},{"key":"e_1_3_2_2_40_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00015"},{"key":"e_1_3_2_2_42_1","volume-title":"Proceedings, Part VII 14","author":"Shen Li","year":"2016","unstructured":"Li Shen, Zhouchen Lin, and Qingming Huang. 2016. Relay backpropagation for effective learning of deep convolutional neural networks. In Computer Vision--ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11--14, 2016, Proceedings, Part VII 14. Springer, 467--482."},{"key":"e_1_3_2_2_43_1","volume-title":"Parameter-Efficient Long-Tailed Recognition. arXiv preprint arXiv:2309.10019","author":"Shi Jiang-Xin","year":"2023","unstructured":"Jiang-Xin Shi, Tong Wei, Zhi Zhou, Xin-Yan Han, Jie-Jing Shao, and Yu-Feng Li. 2023. Parameter-Efficient Long-Tailed Recognition. arXiv preprint arXiv:2309.10019 (2023)."},{"key":"e_1_3_2_2_44_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_2_45_1","volume-title":"Adaptively Weighted Copy-Decoupling Resampling Strategy for Long-Tailed Multi-label Classification. In 2023 IEEE 6th International Conference on Pattern Recognition and Artificial Intelligence (PRAI)","author":"Song Pengpeng","unstructured":"Pengpeng Song, Anyi Ju, Wenbin Xu, and Fei Guo. 2023. Adaptively Weighted Copy-Decoupling Resampling Strategy for Long-Tailed Multi-label Classification. In 2023 IEEE 6th International Conference on Pattern Recognition and Artificial Intelligence (PRAI). IEEE, 437--442."},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19806-9_5"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.4018\/jdwm.2007070101"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.251"},{"key":"e_1_3_2_2_50_1","volume-title":"Learning to model the tail. Advances in neural information processing systems","author":"Wang Yu-Xiong","year":"2017","unstructured":"Yu-Xiong Wang, Deva Ramanan, and Martial Hebert. 2017. Learning to model the tail. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_2_51_1","volume-title":"Proceedings, Part IV 16","author":"Wu Tong","year":"2020","unstructured":"Tong Wu, Qingqiu Huang, Ziwei Liu, Yu Wang, and Dahua Lin. 2020. Distribution-balanced loss for multi-label classification in long-tailed datasets. In Computer Vision--ECCV 2020: 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part IV 16. Springer, 162--178."},{"key":"e_1_3_2_2_52_1","volume-title":"LMPT: Prompt Tuning with Class-Specific Embedding Loss for Long-tailed Multi-Label Visual Recognition. arXiv preprint arXiv:2305.04536","author":"Xia Peng","year":"2023","unstructured":"Peng Xia, Di Xu, Lie Ju, Ming Hu, Jun Chen, and Zongyuan Ge. 2023. LMPT: Prompt Tuning with Class-Specific Embedding Loss for Long-tailed Multi-Label Visual Recognition. arXiv preprint arXiv:2305.04536 (2023)."},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00137"},{"key":"e_1_3_2_2_54_1","volume-title":"Test-agnostic long-tailed recognition by test-time aggregating diverse experts with self-supervision. arXiv preprint arXiv:2107.09249","author":"Zhang Yifan","year":"2021","unstructured":"Yifan Zhang, Bryan Hooi, Lanqing Hong, and Jiashi Feng. 2021. Test-agnostic long-tailed recognition by test-time aggregating diverse experts with self-supervision. arXiv preprint arXiv:2107.09249, Vol. 2, 5 (2021), 6."},{"key":"e_1_3_2_2_55_1","volume-title":"Deep long-tailed learning: A survey","author":"Zhang Yifan","year":"2023","unstructured":"Yifan Zhang, Bingyi Kang, Bryan Hooi, Shuicheng Yan, and Jiashi Feng. 2023. Deep long-tailed learning: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence (2023)."},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00974"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01631"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00440"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681171","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681171","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:02Z","timestamp":1750295882000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681171"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":59,"alternative-id":["10.1145\/3664647.3681171","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681171","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}