{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T08:57:15Z","timestamp":1780477035963,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":61,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"TaiShan Scholars Program","award":["tsqn202211289"],"award-info":[{"award-number":["tsqn202211289"]}]},{"name":"Excellent Youth Scholars Program of Shandong Province","award":["2022HWYQ-048"],"award-info":[{"award-number":["2022HWYQ-048"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62006141"],"award-info":[{"award-number":["62006141"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Key R\\&D Program of China","award":["2021YFC3300203"],"award-info":[{"award-number":["2021YFC3300203"]}]},{"name":"Oversea Innovation Team Project of the 20 Regulations for New Universities funding program of Jinan","award":["2021GXRC073"],"award-info":[{"award-number":["2021GXRC073"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612511","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:26:54Z","timestamp":1698391614000},"page":"2964-2972","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["Class-level Structural Relation Modeling and Smoothing for Visual Representation Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-8422-4032","authenticated-orcid":false,"given":"Zitan","family":"Chen","sequence":"first","affiliation":[{"name":"Shandong University, Jinan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3433-7413","authenticated-orcid":false,"given":"Zhuang","family":"Qi","sequence":"additional","affiliation":[{"name":"Shandong University, Jinan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9704-9633","authenticated-orcid":false,"given":"Xiao","family":"Cao","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6638-2361","authenticated-orcid":false,"given":"Xiangxian","family":"Li","sequence":"additional","affiliation":[{"name":"Shandong University, Jinan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7290-5659","authenticated-orcid":false,"given":"Xiangxu","family":"Meng","sequence":"additional","affiliation":[{"name":"Shandong University, Jinan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0273-5946","authenticated-orcid":false,"given":"Lei","family":"Meng","sequence":"additional","affiliation":[{"name":"Shandong University &amp; Shandong Research Institute of Industrial Technology, Jinan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"CUDA: Curriculum of Data Augmentation for Long-tailed Recognition. arXiv preprint arXiv:2302.05499","author":"Ahn Sumyeong","year":"2023","unstructured":"Sumyeong Ahn, Jongwoo Ko, et al. 2023. CUDA: Curriculum of Data Augmentation for Long-tailed Recognition. arXiv preprint arXiv:2302.05499 (2023)."},{"key":"e_1_3_2_1_2_1","unstructured":"Mathilde Caron et al. 2020. Unsupervised learning of visual features by contrasting cluster assignments. NeurIPS (2020)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Jingjing Chen and Chong-Wah Ngo. 2016. Deep-based ingredient recognition for cooking recipe retrieval. In ACM MM.","DOI":"10.1145\/2964284.2964315"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Zhao-Min Chen Xiu-Shen Wei et al. 2019. Multi-Label Image Recognition with Joint Class-Aware Map Disentangling and Label Correlation Embedding. In IEEE.","DOI":"10.1109\/ICME.2019.00113"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Gong Cheng Pujian Lai et al. 2023. Class attention network for image recognition. Science China Information Sciences (2023).","DOI":"10.1007\/s11432-021-3493-7"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Tat-Seng Chua Jinhui Tang et al. 2009. Nus-wide: a real-world web image database from national university of singapore. In ACM MM.","DOI":"10.1145\/1646396.1646452"},{"key":"e_1_3_2_1_7_1","volume-title":"Dual Encoding for Video Retrieval by Text. TPAMI","author":"Dong Jianfeng","year":"2021","unstructured":"Jianfeng Dong, Xirong Li, Chaoxi Xu, Xun Yang, Gang Yang, Xun Wang, and Meng Wang. 2021. Dual Encoding for Video Retrieval by Text. TPAMI (2021)."},{"key":"e_1_3_2_1_8_1","unstructured":"Alexey Dosovitskiy Lucas Beyer et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_9_1","volume-title":"Few-shot learning with graph neural networks. arXiv preprint arXiv:1711.04043","author":"Garcia Victor","year":"2017","unstructured":"Victor Garcia and Joan Bruna. 2017. Few-shot learning with graph neural networks. arXiv preprint arXiv:1711.04043 (2017)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Golnaz Ghiasi Yin Cui et al. 2021. Simple copy-paste is a strong data augmentation method for instance segmentation. In CVPR.","DOI":"10.1109\/CVPR46437.2021.00294"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2023.3265645"},{"key":"e_1_3_2_1_12_1","unstructured":"Kaiming He Haoqi Fan Yuxin Wu Saining Xie and Ross Girshick. 2020. Momentum contrast for unsupervised visual representation learning. In CVPR."},{"key":"e_1_3_2_1_13_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep residual learning for image recognition. In CVPR."},{"key":"e_1_3_2_1_14_1","unstructured":"Ruibing Hou Hong Chang et al. 2019. Cross attention network for few-shot classification. NeurIPS (2019)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Zhi Hou Xiaojiang Peng et al. 2020. Visual compositional learning for human-object interaction detection. In ECCV.","DOI":"10.1007\/978-3-030-58555-6_35"},{"key":"e_1_3_2_1_16_1","volume-title":"Batchformer: Learning to explore sample relationships for robust representation learning. In CVPR.","author":"Hou Zhi","year":"2022","unstructured":"Zhi Hou, Baosheng Yu, and Dacheng Tao. 2022. Batchformer: Learning to explore sample relationships for robust representation learning. In CVPR."},{"key":"e_1_3_2_1_17_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Yann LeCun L\u00e9on Bottou et al. 1998. Gradient-based learning applied to document recognition. IEEE (1998).","DOI":"10.1109\/5.726791"},{"key":"e_1_3_2_1_19_1","unstructured":"Xiangxian Li Haokai Ma Lei Meng and Xiangxu Meng. 2021. Comparative study of adversarial training methods for long-tailed classification. In ADVM."},{"key":"e_1_3_2_1_20_1","volume-title":"Dse-net: Artistic font image synthesis via disentangled style encoding. In ICME.","author":"Li Xiang","year":"2022","unstructured":"Xiang Li, Lei Wu, Xu Chen, Lei Meng, and Xiangxu Meng. 2022. Dse-net: Artistic font image synthesis via disentangled style encoding. In ICME."},{"key":"e_1_3_2_1_21_1","volume-title":"2023 a. Compositional Zero-Shot Artistic Font Synthesis. IJCAI","author":"Li Xiang","year":"2023","unstructured":"Xiang Li, Lei Wu, Changshuo Wang, Lei Meng, and Xiangxu Meng. 2023 a. Compositional Zero-Shot Artistic Font Synthesis. IJCAI (2023)."},{"key":"e_1_3_2_1_22_1","volume-title":"2023 b. Cross-modal Learning Using Privileged Information for Long-tailed Image Classification. CVM","author":"Li Xiangxian","year":"2023","unstructured":"Xiangxian Li, Yuze Zheng, Haokai Ma, Zhuang Qi, Xiangxu Meng, and Lei Meng. 2023 b. Cross-modal Learning Using Privileged Information for Long-tailed Image Classification. CVM (2023)."},{"key":"e_1_3_2_1_23_1","volume-title":"2023 b. Incomplete Multi-View Multi-Label Learning via Label-Guided Masked View-and Category-Aware Transformers. arXiv preprint arXiv:2303.07180","author":"Liu Chengliang","year":"2023","unstructured":"Chengliang Liu, Jie Wen, et al. 2023 b. Incomplete Multi-View Multi-Label Learning via Label-Guided Masked View-and Category-Aware Transformers. arXiv preprint arXiv:2303.07180 (2023)."},{"key":"e_1_3_2_1_24_1","unstructured":"Jinxing Liu Junjin Xiao Haokai Ma Xiangxian Li Zhuang Qi Xiangxu Meng and Lei Meng. 2022. Prompt Learning with Cross-Modal Feature Alignment for Visual Domain Adaptation. In CAAI."},{"key":"e_1_3_2_1_25_1","volume-title":"2023 a. Cross-Training with Prototypical Distillation for improving the generalization of Federated Learning. ICME","author":"Liu Tianhan","year":"2023","unstructured":"Tianhan Liu, Zhuang Qi, Zitan Chen, Xiangxu Meng, and Lei Meng. 2023 a. Cross-Training with Prototypical Distillation for improving the generalization of Federated Learning. ICME (2023)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467276"},{"key":"e_1_3_2_1_27_1","unstructured":"Haokai Ma Xiangxian Li Lei Meng and Xiangxu Meng. 2021. Comparative study of adversarial training methods for cold-start recommendation. In ADVM."},{"key":"e_1_3_2_1_28_1","volume-title":"2023 a. Cross-Modal Content Inference and Feature Enrichment for Cold-Start Recommendation. IJCNN","author":"Ma Haokai","year":"2023","unstructured":"Haokai Ma, Zhuang Qi, Xinxin Dong, Xiangxian Li, Yuze Zheng, and Xiangxu Mengand Lei Meng. 2023 a. Cross-Modal Content Inference and Feature Enrichment for Cold-Start Recommendation. IJCNN (2023)."},{"key":"e_1_3_2_1_29_1","unstructured":"Haokai Ma Ruobing Xie Lei Meng Xin Chen Xu Zhang Leyu Lin and Jie Zhou. 2023 b. Exploring False Hard Negative Sample in Cross-Domain Recommendation. In Recsys."},{"key":"e_1_3_2_1_30_1","volume-title":"2023 c. Triple Sequence Learning for Cross-domain Recommendation. arXiv preprint arXiv:2304.05027","author":"Ma Haokai","year":"2023","unstructured":"Haokai Ma, Ruobing Xie, Lei Meng, Xin Chen, Xu Zhang, Leyu Lin, and Jie Zhou. 2023 c. Triple Sequence Learning for Cross-domain Recommendation. arXiv preprint arXiv:2304.05027 (2023)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Lei Meng Long Chen Xun Yang Dacheng Tao Hanwang Zhang Chunyan Miao and Tat-Seng Chua. 2019. Learning using privileged information for food recognition. In ACM MM.","DOI":"10.1145\/3343031.3350870"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413598"},{"key":"e_1_3_2_1_33_1","volume-title":"Adaptive scaling of cluster boundaries for large-scale social media data clustering. TNNLS","author":"Meng Lei","year":"2015","unstructured":"Lei Meng, Ah-Hwee Tan, and Donald C Wunsch. 2015. Adaptive scaling of cluster boundaries for large-scale social media data clustering. TNNLS (2015)."},{"key":"e_1_3_2_1_34_1","volume-title":"Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748","author":"van den Oord Aaron","year":"2018","unstructured":"Aaron van den Oord, Yazhe Li, and Oriol Vinyals. 2018. Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748 (2018)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Xuran Pan Chunjiang Ge et al. 2022. On the integration of self-attention and convolution. In CVPR.","DOI":"10.1109\/CVPR52688.2022.00089"},{"key":"e_1_3_2_1_36_1","unstructured":"Zhuang Qi Yuqing Wang Zitan Chen Ran Wang Xiangxu Meng and Lei Meng. 2022. Clustering-based Curriculum Construction for Sample-Balanced Federated Learning. In CAAI."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Pau Rodr\u00edguez Issam Laradji et al. 2020. Embedding propagation: Smoother manifold for few-shot classification. In ECCV.","DOI":"10.1007\/978-3-030-58574-7_8"},{"key":"e_1_3_2_1_38_1","unstructured":"Franco Scarselli Marco Gori et al. 2008. The graph neural network model. IEEE transactions on neural networks (2008)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3323873.3325056"},{"key":"e_1_3_2_1_40_1","unstructured":"Shiv Shankar Vihari Piratla et al. 2018. Generalizing across domains via cross-gradient training. arXiv preprint arXiv:1804.10745 (2018)."},{"key":"e_1_3_2_1_41_1","unstructured":"Weilin Sun Xiangxian Li Manyi Li Yuqing Wang Yuze Zheng Xiangxu Meng and Lei Meng. 2022. Sequential Fusion of Multi-view Video Frames for 3D Scene Generation. In CAAI."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Hossein Talebi and Peyman Milanfar. 2021. Learning to resize images for computer vision tasks. In ICCV.","DOI":"10.1109\/ICCV48922.2021.00055"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Jinhui Tang Xiangbo Shu et al. 2016a. Generalized deep transfer networks for knowledge propagation in heterogeneous domains.","DOI":"10.1145\/2998574"},{"key":"e_1_3_2_1_44_1","unstructured":"Jinhui Tang Xiangbo Shu et al. 2016b. Tri-clustered tensor completion for social-aware image tag refinement. TPAMI (2016)."},{"key":"e_1_3_2_1_45_1","unstructured":"Petar Velivc kovi? Guillem Cucurull et al. [n. d.]. Graph Attention Networks. In ICLR."},{"key":"e_1_3_2_1_46_1","unstructured":"Chu Wang Babak Samari et al. [n. d.]. Affinity graph supervision for visual recognition. In CVPR."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"Yuqing Wang Xiangxian Li Haokai Ma Zhuang Qi Xiangxu Meng and Lei Meng. 2022a. Causal Inference with Sample Balancing for Out-of-Distribution Detection in Visual Classification. In CAAI.","DOI":"10.1007\/978-3-031-20497-5_47"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"crossref","unstructured":"Yuqing Wang Xiangxian Li Zhuang Qi Jingyu Li Xuelong Li Xiangxu Meng and Lei Meng. 2022b. Meta-causal feature learning for out-of-distribution generalization. In ECCV.","DOI":"10.1007\/978-3-031-25075-0_36"},{"key":"e_1_3_2_1_49_1","volume-title":"Multi-channel Attentive Weighting of Visual Frames for Multimodal Video Classification. IJCNN","author":"Wang Yuqing","year":"2023","unstructured":"Yuqing Wang, Zhuang Qi, Xiangxian Li, Jinxing Liu, Xiangxu Meng, and Lei Meng. 2023. Multi-channel Attentive Weighting of Visual Frames for Multimodal Video Classification. IJCNN (2023)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Yue Xu Yong-Lu Li Jiefeng Li and Cewu Lu. 2022b. Constructing Balance from Imbalance for Long-Tailed Image Recognition. In ECCV.","DOI":"10.1007\/978-3-031-20044-1_3"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","unstructured":"Yue Xu Yong-Lu Li et al. 2022a. Constructing balance from imbalance for long-tailed image recognition. In ECCV.","DOI":"10.1007\/978-3-031-20044-1_3"},{"key":"e_1_3_2_1_52_1","volume-title":"Tree-Augmented Cross-Modal Encoding for Complex-Query Video Retrieval. SIGIR","author":"Yang Xun","year":"2020","unstructured":"Xun Yang, Jianfeng Dong, Yixin Cao, Xun Wang, Meng Wang, and Tat-Seng Chua. 2020a. Tree-Augmented Cross-Modal Encoding for Complex-Query Video Retrieval. SIGIR (2020)."},{"key":"e_1_3_2_1_53_1","volume-title":"Deconfounded Video Moment Retrieval with Causal Intervention. SIGIR","author":"Yang Xun","year":"2021","unstructured":"Xun Yang, Fuli Feng, Wei Ji, Meng Wang, and Tat-Seng Chua. 2021. Deconfounded Video Moment Retrieval with Causal Intervention. SIGIR (2021)."},{"key":"e_1_3_2_1_54_1","volume-title":"Weakly-Supervised Video Object Grounding by Exploring Spatio-Temporal Contexts. ACM MM","author":"Yang Xun","year":"2020","unstructured":"Xun Yang, Xueliang Liu, Meng Jian, Xinjian Gao, and Meng Wang. 2020b. Weakly-Supervised Video Object Grounding by Exploring Spatio-Temporal Contexts. ACM MM (2020)."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"crossref","unstructured":"Zhixiong Yang Junwen Pan et al. 2022. ProCo: Prototype-Aware Contrastive Learning for Long-Tailed Medical Image Classification. In MICCAI.","DOI":"10.1007\/978-3-031-16452-1_17"},{"key":"e_1_3_2_1_56_1","unstructured":"Hongyi Zhang Moustapha Cisse et al. 2017. mixup: Beyond empirical risk minimization. arXiv preprint arXiv:1710.09412 (2017)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Zhen Zhao Luping Zhou et al. 2022. LaSSL: Label-Guided Self-Training for Semi-supervised Learning. In AAAI.","DOI":"10.1609\/aaai.v36i8.20907"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"crossref","unstructured":"Yu Zheng Jiahui Zhan et al. 2023. Curricular contrastive regularization for physics-aware single image dehazing. arXiv preprint arXiv:2303.14218 (2023).","DOI":"10.1109\/CVPR52729.2023.00560"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"crossref","unstructured":"Zhun Zhong Liang Zheng et al. 2020a. Random erasing data augmentation. In AAAI.","DOI":"10.1609\/aaai.v34i07.7000"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"crossref","unstructured":"Zhun Zhong Liang Zheng et al. 2020b. Random erasing data augmentation. In AAAI.","DOI":"10.1609\/aaai.v34i07.7000"},{"key":"e_1_3_2_1_61_1","unstructured":"Jianggang Zhu Zheng Wang et al. 2022. Balanced Contrastive Learning for Long-Tailed Visual Recognition. In CVPR."}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612511","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612511","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T23:54:19Z","timestamp":1755820459000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612511"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":61,"alternative-id":["10.1145\/3581783.3612511","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612511","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}