{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:04:34Z","timestamp":1765339474571,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754878","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:55:00Z","timestamp":1761375300000},"page":"3270-3279","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards Space and Semantics: Object-Purified Representation Learning for Multi-Label Image Classification"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5300-0683","authenticated-orcid":false,"given":"Haifeng","family":"Zhao","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Anhui University, Hefei, Anhui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6655-9401","authenticated-orcid":false,"given":"Shuo","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Anhui University, Hefei, Anhui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8681-0765","authenticated-orcid":false,"given":"Leilei","family":"Ma","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Anhui University, Hefei, Anhui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3699-7703","authenticated-orcid":false,"given":"Yufei","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Anhui University, Hefei, Anhui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3860-5139","authenticated-orcid":false,"given":"Lei","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, Jiangsu, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0164-7944","authenticated-orcid":false,"given":"Dengdi","family":"Sun","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Anhui University, Hefei, Anhui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12230"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00061"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3148867"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2019.00113"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00532"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3063496"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Tat-Seng Chua Jinhui Tang Richang Hong Haojie Li Zhiping Luo and Yan-Tao Zheng. 2009. NUS-WIDE: A Real-World Web Image Database from National University of Singapore. Santorini Greece.","DOI":"10.1145\/1646396.1646452"},{"key":"e_1_3_2_1_8_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Cordonnier Jean-Baptiste","year":"2020","unstructured":"Jean-Baptiste Cordonnier, Andreas Loukas, and Martin Jaggi. 2020. On the Relationship between Self-Attention and Convolutional Layers. In International Conference on Learning Representations (ICLR)."},{"volume-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW).","author":"Cubuk Ekin D.","key":"e_1_3_2_1_9_1","unstructured":"Ekin D. Cubuk, Barret Zoph, Jonathon Shlens, and Quoc V. Le. 2020. Randaugment: Practical automated data augmentation with a reduced search space. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.126605"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3171095"},{"key":"e_1_3_2_1_12_1","volume-title":"Improved regularization of convolutional neural networks with cutout. arXiv preprint arXiv:1708.04552","author":"DeVries Terrance","year":"2017","unstructured":"Terrance DeVries and Graham W Taylor. 2017. Improved regularization of convolutional neural networks with cutout. arXiv preprint arXiv:1708.04552 (2017)."},{"key":"e_1_3_2_1_13_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-014-0733-5"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3088605"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_17_1","volume-title":"Deep Convolutional Ranking for Multilabel Image Annotation. In International Conference on Learning Representations (ICLR).","author":"Ioffe Sergey","year":"2013","unstructured":"Sergey Ioffe, Alexander Toshev, Yangqing Jia, Thomas Leung, and Yunchao Gong. 2013. Deep Convolutional Ranking for Multilabel Image Annotation. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3181490"},{"volume-title":"Semi-Supervised Classification with Graph Convolutional Networks. In International Conference on Learning Representations (ICLR).","author":"Thomas","key":"e_1_3_2_1_19_1","unstructured":"Thomas N. Kipf and Max Welling. 2017. Semi-Supervised Classification with Graph Convolutional Networks. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Ranjay Krishna Yuke Zhu Oliver Groth Justin Johnson Kenji Hata Joshua Kravitz Stephanie Chen Yannis Kalantidis Li-Jia Li David A Shamma et al. 2017. Visual genome: Connecting language and vision using crowdsourced dense image annotations. Vol. 123 (2017) 32-73.","DOI":"10.1007\/s11263-016-0981-7"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01621"},{"key":"e_1_3_2_1_22_1","first-page":"1","article-title":"Multi-label Image Classification with A Probabilistic Label Enhancement Model","volume":"1","author":"Li Xin","year":"2014","unstructured":"Xin Li, Feipeng Zhao, and Yuhong Guo. 2014. Multi-label Image Classification with A Probabilistic Label Enhancement Model. In UAI, Vol. 1. 1-10.","journal-title":"UAI"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3240195"},{"key":"e_1_3_2_1_24_1","volume-title":"Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision (ECCV). Springer, 740-755","author":"Lin Tsung-Yi","year":"2014","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Dollar, and Larry Zitnick. 2014. Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision (ECCV). Springer, 740-755."},{"key":"e_1_3_2_1_25_1","volume-title":"Causality Compensated Attention for Contextual Biased Visual Recognition. In International Conference on Learning Representations (ICLR).","author":"Liu Ruyang","year":"2023","unstructured":"Ruyang Liu, Jingjia Huang, Thomas H. Li, and Ge Li. 2023. Causality Compensated Attention for Contextual Biased Visual Recognition. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_26_1","volume-title":"Query2label: A simple transformer way to multi-label classification. arXiv preprint arXiv:2107.10834","author":"Liu Shilong","year":"2021","unstructured":"Shilong Liu, Lei Zhang, Xiao Yang, Hang Su, and Jun Zhu. 2021b. Query2label: A simple transformer way to multi-label classification. arXiv preprint arXiv:2107.10834 (2021)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3119334"},{"key":"e_1_3_2_1_28_1","volume-title":"Decoupled Weight Decay Regularization. In International Conference on Learning Representations (ICLR).","author":"Loshchilov Ilya","year":"2019","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled Weight Decay Regularization. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_29_1","volume-title":"Semantic-Aware Dual Contrastive Learning for Multi-label Image Classification. In 26th European Conference on Artificial Intelligence. 1656-1663","author":"Ma Leilei","year":"2023","unstructured":"Leilei Ma, Dengdi Sun, Lei Wang, Haifeng Zhao, and Bin Luo. 2023. Semantic-Aware Dual Contrastive Learning for Multi-label Image Classification. In 26th European Conference on Artificial Intelligence. 1656-1663."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680815"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00015"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00012"},{"key":"e_1_3_2_1_33_1","volume-title":"Super-convergence: Very fast training of neural networks using large learning rates. In Artificial intelligence and machine learning for multi-domain operations applications","author":"Smith Leslie N","year":"2019","unstructured":"Leslie N Smith and Nicholay Topin. 2019. Super-convergence: Very fast training of neural networks using large learning rates. In Artificial intelligence and machine learning for multi-domain operations applications, Vol. 11006. SPIE, 369-386."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/s44267-024-00038-x"},{"key":"e_1_3_2_1_35_1","first-page":"24261","article-title":"Mlp-mixer: An all-mlp architecture for vision","volume":"34","author":"Tolstikhin Ilya O","year":"2021","unstructured":"Ilya O Tolstikhin, Neil Houlsby, Alexander Kolesnikov, Lucas Beyer, Xiaohua Zhai, Thomas Unterthiner, Jessica Yung, Andreas Steiner, Daniel Keysers, Jakob Uszkoreit, et al., 2021. Mlp-mixer: An all-mlp architecture for vision. Advances in Neural Information Processing Systems (NeurIPS), Vol. 34 (2021), 24261-24272.","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"e_1_3_2_1_36_1","first-page":"6000","article-title":"Attention is all you need","volume":"30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in Neural Information Processing Systems (NeurIPS), Vol. 30 (2017), 6000-6010.","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.251"},{"key":"e_1_3_2_1_38_1","volume-title":"SpliceMix: A Cross-scale and Semantic Blending Augmentation Strategy for Multi-label Image Classification","author":"Wang Lei","year":"2025","unstructured":"Lei Wang, Yibing Zhan, Leilei Ma, Dapeng Tao, Liang Ding, and Chen Gong. 2025. SpliceMix: A Cross-scale and Semantic Blending Augmentation Strategy for Multi-label Image Classification. IEEE Transactions on Multimedia (2025), 1-15."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6909"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.58"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3414046"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3268997"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3288205"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/163"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00358"},{"key":"e_1_3_2_1_46_1","first-page":"18430","article-title":"Label-Aware Global Consistency for Multi-Label Learning with Single Positive Labels","volume":"35","author":"Xie Ming-Kun","year":"2022","unstructured":"Ming-Kun Xie, Jiahao Xiao, and Sheng-Jun Huang. 2022. Label-Aware Global Consistency for Multi-Label Learning with Single Positive Labels. Advances in Neural Information Processing Systems (NeurIPS), Vol. 35 (2022), 18430-18441.","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"e_1_3_2_1_47_1","volume-title":"Proceedings of the International Conference on Machine Learning (ICML). 54576-54589","author":"Xie Ming-Kun","year":"2024","unstructured":"Ming-Kun Xie, Jia-Hao Xiao, Pei Peng, Gang Niu, Masashi Sugiyama, and Sheng-Jun Huang. 2024. Counterfactual Reasoning for Multi-Label Image Classification via Patching-Based Training. In Proceedings of the International Conference on Machine Learning (ICML). 54576-54589."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/208"},{"volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 13440-13449","author":"Yazici Vacit Oguz","key":"e_1_3_2_1_49_1","unstructured":"Vacit Oguz Yazici, Abel Gonzalez-Garcia, Arnau Ramisa, Bartlomiej Twardowski, and Joost van de Weijer. 2020. Orderless recurrent models for multi-label classification. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 13440-13449."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58589-1_39"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3266161"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2013.39"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00023"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3277279"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550278"},{"key":"e_1_3_2_1_56_1","article-title":"Double Attention Based on Graph Attention Network for Image Multi-Label Classification","volume":"19","author":"Zhou Wei","year":"2023","unstructured":"Wei Zhou, Zhiwu Xia, Peng Dou, Tao Su, and Haifeng Hu. 2023b. Double Attention Based on Graph Attention Network for Image Multi-Label Classification. ACM Transactions on Multimedia Computing Communications and Applications, Vol. 19, 1, Article 18 (2023).","journal-title":"ACM Transactions on Multimedia Computing Communications and Applications"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3284812"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.219"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00025"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.3001583"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548343"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3408256"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00142"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754878","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:00:37Z","timestamp":1765339237000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754878"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":63,"alternative-id":["10.1145\/3746027.3754878","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754878","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}