{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:20:25Z","timestamp":1765308025445,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755024","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:47:42Z","timestamp":1761371262000},"page":"3242-3250","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Contrastive Lie Algebra Learning for Ultra-Fine-Grained Visual Categorization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6186-0520","authenticated-orcid":false,"given":"Xiaohan","family":"Yu","sequence":"first","affiliation":[{"name":"Macquarie University, Sydney, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5890-0165","authenticated-orcid":false,"given":"Zicheng","family":"Pan","sequence":"additional","affiliation":[{"name":"Griffith University, Brisbane, QLD, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5252-658X","authenticated-orcid":false,"given":"Yang","family":"Zhao","sequence":"additional","affiliation":[{"name":"La Trobe University, Bundoora, VIC, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1449-5046","authenticated-orcid":false,"given":"Qin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shenzhen University, Shenzhen City, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5382-5351","authenticated-orcid":false,"given":"Yongsheng","family":"Gao","sequence":"additional","affiliation":[{"name":"Griffith University, Brisbane, QLD, Australia"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"VICReg: Variance-Invariance-Covariance Regularization for Self-Supervised Learning. In International Conference on Learning Representations.","author":"Bardes Adrien","year":"2022","unstructured":"Adrien Bardes, Jean Ponce, and Yann LeCun. 2022a. VICReg: Variance-Invariance-Covariance Regularization for Self-Supervised Learning. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_2_1","unstructured":"Adrien Bardes Jean Ponce and Yann Lecun. 2022b. VICReg: Variance-Invariance-Covariance Regularization For Self-Supervised Learning. In ICLR."},{"volume-title":"Food-101-mining discriminative components with random forests","author":"Bossard Lukas","key":"e_1_3_2_2_3_1","unstructured":"Lukas Bossard, Matthieu Guillaumin, and Luc Van Gool. 2014. Food-101-mining discriminative components with random forests. In ECCV. Springer, 446-461."},{"key":"e_1_3_2_2_4_1","first-page":"16664","article-title":"Adaptformer: Adapting vision transformers for scalable visual recognition","volume":"35","author":"Chen Shoufa","year":"2022","unstructured":"Shoufa Chen, Chongjian Ge, Zhan Tong, Jiangliu Wang, Yibing Song, Jue Wang, and Ping Luo. 2022. Adaptformer: Adapting vision transformers for scalable visual recognition. NeurIPS, Vol. 35 (2022), 16664-16678.","journal-title":"NeurIPS"},{"key":"e_1_3_2_2_5_1","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. 2020b. A simple framework for contrastive learning of visual representations. In ICML. 1597-1607.","journal-title":"ICML."},{"key":"e_1_3_2_2_6_1","volume-title":"Improved baselines with momentum contrastive learning. arXiv preprint arXiv:2003.04297","author":"Chen Xinlei","year":"2020","unstructured":"Xinlei Chen, Haoqi Fan, Ross Girshick, and Kaiming He. 2020a. Improved baselines with momentum contrastive learning. arXiv preprint arXiv:2003.04297 (2020)."},{"key":"e_1_3_2_2_7_1","first-page":"5157","article-title":"Destruction and Construction Learning for Fine-grained Image Recognition","author":"Chen Yue","year":"2019","unstructured":"Yue Chen, Yalong Bai, Wei Zhang, and Tao Mei. 2019. Destruction and Construction Learning for Fine-grained Image Recognition. In CVPR. 5157-5166.","journal-title":"CVPR."},{"key":"e_1_3_2_2_8_1","first-page":"2219","article-title":"Attention-based dropout layer for weakly supervised object localization","author":"Choe Junsuk","year":"2019","unstructured":"Junsuk Choe and Hyunjung Shim. 2019. Attention-based dropout layer for weakly supervised object localization. In CVPR. 2219-2228.","journal-title":"CVPR."},{"key":"e_1_3_2_2_9_1","first-page":"2921","article-title":"Kernel pooling for convolutional neural networks","author":"Cui Yin","year":"2017","unstructured":"Yin Cui, Feng Zhou, Jiang Wang, Xiao Liu, Yuanqing Lin, and Serge Belongie. 2017. Kernel pooling for convolutional neural networks. In CVPR. 2921-2930.","journal-title":"CVPR."},{"key":"e_1_3_2_2_10_1","first-page":"5042","article-title":"Similarity metric for curved shapes in Euclidean space","author":"Demisse Girum G","year":"2016","unstructured":"Girum G Demisse, Djamila Aouada, and Bjorn Ottersten. 2016. Similarity metric for curved shapes in Euclidean space. In CVPR. 5042-5050.","journal-title":"CVPR."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2711607"},{"key":"e_1_3_2_2_12_1","volume-title":"Improved regularization of convolutional neural networks with cutout. arXiv preprint arXiv:1708.04552","author":"DeVries Terrance","year":"2017","unstructured":"Terrance DeVries and Graham W Taylor. 2017. Improved regularization of convolutional neural networks with cutout. arXiv preprint arXiv:1708.04552 (2017)."},{"key":"e_1_3_2_2_13_1","first-page":"1422","article-title":"Unsupervised visual representation learning by context prediction","author":"Doersch Carl","year":"2015","unstructured":"Carl Doersch, Abhinav Gupta, and Alexei A Efros. 2015. Unsupervised visual representation learning by context prediction. In ICCV. 1422-1430.","journal-title":"ICCV."},{"key":"e_1_3_2_2_14_1","volume-title":"Words: Transformers for Image Recognition at Scale. In ICLR.","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, et al., 2020. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In ICLR."},{"key":"e_1_3_2_2_15_1","volume-title":"International Conference on Machine Learning. PMLR, 3165-3176","author":"Finzi Marc","year":"2020","unstructured":"Marc Finzi, Samuel Stanton, Pavel Izmailov, and Andrew Gordon Wilson. 2020. Generalizing convolutional neural networks for equivariance to lie groups on arbitrary continuous data. In International Conference on Machine Learning. PMLR, 3165-3176."},{"key":"e_1_3_2_2_16_1","first-page":"317","article-title":"Compact bilinear pooling","author":"Gao Yang","year":"2016","unstructured":"Yang Gao, Oscar Beijbom, Ning Zhang, and Trevor Darrell. 2016. Compact bilinear pooling. In CVPR. 317-326.","journal-title":"CVPR."},{"key":"e_1_3_2_2_17_1","unstructured":"Spyros Gidaris Praveer Singh and Nikos Komodakis. 2018. Unsupervised Representation Learning by Predicting Image Rotations. In ICLR."},{"key":"e_1_3_2_2_18_1","first-page":"21271","article-title":"Bootstrap your own latent-a new approach to self-supervised learning","volume":"33","author":"Grill Jean-Bastien","year":"2020","unstructured":"Jean-Bastien Grill, Florian Strub, Florent Altch\u00e9, Corentin Tallec, Pierre Richemond, Elena Buchatskaya, Carl Doersch, Bernardo Avila Pires, Zhaohan Guo, Mohammad Gheshlaghi Azar, et al., 2020. Bootstrap your own latent-a new approach to self-supervised learning. In NeurIPS, Vol. 33. 21271-21284.","journal-title":"NeurIPS"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19967"},{"key":"e_1_3_2_2_20_1","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He Kaiming","year":"2016","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2016. Deep residual learning for image recognition. In CVPR. 770-778.","journal-title":"CVPR."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3148814"},{"key":"e_1_3_2_2_22_1","first-page":"81549","article-title":"On the comparison between multi-modal and single-modal contrastive learning","volume":"37","author":"Huang Wei","year":"2024","unstructured":"Wei Huang, Andi Han, Yongqiang Chen, Yuan Cao, Zhiqiang Xu, and Taiji Suzuki. 2024. On the comparison between multi-modal and single-modal contrastive learning. Advances in Neural Information Processing Systems, Vol. 37 (2024), 81549-81605.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_23_1","first-page":"8662","article-title":"Interpretable and Accurate Fine-grained Recognition via Region Grouping","author":"Huang Zixuan","year":"2020","unstructured":"Zixuan Huang and Yin Li. 2020. Interpretable and Accurate Fine-grained Recognition via Region Grouping. In CVPR. 8662-8672.","journal-title":"CVPR."},{"volume-title":"Visual prompt tuning","author":"Jia Menglin","key":"e_1_3_2_2_24_1","unstructured":"Menglin Jia, Luming Tang, Bor-Chun Chen, Claire Cardie, Serge Belongie, Bharath Hariharan, and Ser-Nam Lim. 2022. Visual prompt tuning. In ECCV. Springer, 709-727."},{"key":"e_1_3_2_2_25_1","first-page":"554","article-title":"3d Object Representations for Fine-Grained Categorization","author":"Krause Jonathan","year":"2013","unstructured":"Jonathan Krause, Michael Stark, Jia Deng, and Li Fei-Fei. 2013. 3d Object Representations for Fine-Grained Categorization. In CVPRW. 554-561.","journal-title":"CVPRW."},{"key":"e_1_3_2_2_26_1","first-page":"1097","article-title":"Imagenet classification with deep convolutional neural networks","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. 2012. Imagenet classification with deep convolutional neural networks. In NeurIPS. 1097-1105.","journal-title":"NeurIPS."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2013.06.012"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00471"},{"key":"e_1_3_2_2_29_1","first-page":"947","article-title":"Towards faster training of global covariance pooling networks by iterative matrix square root normalization","author":"Li Peihua","year":"2018","unstructured":"Peihua Li, Jiangtao Xie, Qilong Wang, and Zilin Gao. 2018. Towards faster training of global covariance pooling networks by iterative matrix square root normalization. In CVPR. 947-955.","journal-title":"CVPR."},{"key":"e_1_3_2_2_30_1","unstructured":"Tsung-Yu Lin and Subhransu Maji. 2017. Improved bilinear pooling with cnns. In BMVC."},{"key":"e_1_3_2_2_31_1","first-page":"1449","article-title":"Bilinear cnn models for fine-grained visual recognition","author":"Lin Tsung-Yu","year":"2015","unstructured":"Tsung-Yu Lin, Aruni RoyChowdhury, and Subhransu Maji. 2015. Bilinear cnn models for fine-grained visual recognition. In ICCV. 1449-1457.","journal-title":"ICCV."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2007.41"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01674"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2461466.2461489"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-005-3222-z"},{"key":"e_1_3_2_2_36_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2015. Very Deep Convolutional Networks for Large-Scale Image Recognition. In ICLR."},{"key":"e_1_3_2_2_37_1","first-page":"3544","article-title":"Hide-and-seek: Forcing a network to be meticulous for weakly-supervised object and action localization","author":"Singh Krishna Kumar","year":"2017","unstructured":"Krishna Kumar Singh and Yong Jae Lee. 2017. Hide-and-seek: Forcing a network to be meticulous for weakly-supervised object and action localization. In ICCV. 3544-3553.","journal-title":"ICCV."},{"key":"e_1_3_2_2_38_1","volume-title":"The Plant Pathology 2020 challenge dataset to classify foliar disease of apples. arXiv preprint arXiv:2004.11958","author":"Thapa Ranjita","year":"2020","unstructured":"Ranjita Thapa, Noah Snavely, Serge Belongie, and Awais Khan. 2020. The Plant Pathology 2020 challenge dataset to classify foliar disease of apples. arXiv preprint arXiv:2004.11958 (2020)."},{"key":"e_1_3_2_2_39_1","first-page":"10347","article-title":"Training data-efficient image transformers & distillation through attention","author":"Touvron Hugo","year":"2021","unstructured":"Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, and Herv\u00e9 J\u00e9gou. 2021. Training data-efficient image transformers & distillation through attention. In ICLR. 10347-10357.","journal-title":"ICLR."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00100"},{"key":"e_1_3_2_2_41_1","first-page":"420","article-title":"Learning to navigate for fine-grained classification","author":"Yang Ze","year":"2018","unstructured":"Ze Yang, Tiange Luo, Dong Wang, Zhiqiang Hu, Jun Gao, and Liwei Wang. 2018. Learning to navigate for fine-grained classification. In ECCV. 420-435.","journal-title":"ECCV."},{"key":"e_1_3_2_2_42_1","volume-title":"Self-Supervised Lie Algebra Representation Learning via Optimal Canonical Metric","author":"Yu Xiaohan","year":"2024","unstructured":"Xiaohan Yu, Zicheng Pan, Yang Zhao, and Yongsheng Gao. 2024. Self-Supervised Lie Algebra Representation Learning via Optimal Canonical Metric. IEEE Transactions on Neural Networks and Learning Systems (2024)."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.109131"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108691"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.108067"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6968"},{"key":"e_1_3_2_2_47_1","first-page":"10285","article-title":"Benchmark platform for ultra-fine-grained visual categorization beyond human performance","author":"Yu Xiaohan","year":"2021","unstructured":"Xiaohan Yu, Yang Zhao, Yongsheng Gao, Xiaohui Yuan, and Shengwu Xiong. 2021b. Benchmark platform for ultra-fine-grained visual categorization beyond human performance. In ICCV. 10285-10295.","journal-title":"ICCV."},{"key":"e_1_3_2_2_48_1","first-page":"6023","article-title":"Cutmix: Regularization strategy to train strong classifiers with localizable features","author":"Yun Sangdoo","year":"2019","unstructured":"Sangdoo Yun, Dongyoon Han, Seong Joon Oh, Sanghyuk Chun, Junsuk Choe, and Youngjoon Yoo. 2019. Cutmix: Regularization strategy to train strong classifiers with localizable features. In ICCV. 6023-6032.","journal-title":"ICCV."},{"key":"e_1_3_2_2_49_1","first-page":"2921","article-title":"Learning deep features for discriminative localization","author":"Zhou Bolei","year":"2016","unstructured":"Bolei Zhou, Aditya Khosla, Agata Lapedriza, Aude Oliva, and Antonio Torralba. 2016. Learning deep features for discriminative localization. In CVPR. 2921-2929.","journal-title":"CVPR."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755024","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:16:03Z","timestamp":1765307763000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755024"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":49,"alternative-id":["10.1145\/3746027.3755024","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755024","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}