{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T16:52:03Z","timestamp":1781542323767,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"National Natural Science Foundation of China","award":["62472165"],"award-info":[{"award-number":["62472165"]}]},{"name":"Hunan Provincial Natural Science Foundation General Project","award":["2025JJ50380"],"award-info":[{"award-number":["2025JJ50380"]}]},{"name":"Hunan Provincial Natural Science Foundation Youth Project","award":["2025JJ60420"],"award-info":[{"award-number":["2025JJ60420"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810726","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"1055-1063","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["C$^2$MOE: Consistency and Complementarity-guided Mixture of Experts for Incomplete Multimodal Emotion Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3270-6238","authenticated-orcid":false,"given":"Yuntao","family":"Shou","sequence":"first","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9787-2002","authenticated-orcid":false,"given":"Tao","family":"Meng","sequence":"additional","affiliation":[{"name":"Central South University of Forestry and Technology, Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-1487-7877","authenticated-orcid":false,"given":"Wei","family":"Ai","sequence":"additional","affiliation":[{"name":"Central South University of Forestry and Technology, Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5224-4048","authenticated-orcid":false,"given":"Keqin","family":"Li","sequence":"additional","affiliation":[{"name":"State University of New York, New York, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","first-page":"1247","volume-title":"International conference on machine learning","author":"Andrew Galen","year":"2013","unstructured":"Galen Andrew, Raman Arora, Jeff Bilmes, and Karen Livescu. 2013. Deep canonical correlation analysis. In International conference on machine learning. PMLR, 1247\u20131255."},{"key":"e_1_3_3_1_3_2","first-page":"1597","volume-title":"International conference on machine learning","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. 2020. A simple framework for contrastive learning of visual representations. In International conference on machine learning. PMLR, 1597\u20131607."},{"key":"e_1_3_3_1_4_2","unstructured":"Yijing Dai Yingjian Li Jinxing Li and Guangming Lu. 2025. Hierarchical Multi-Criteria Representation Fusion for Robust Incomplete Multimodal Sentiment Analysis. IEEE Transactions on Affective Computing (2025)."},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Cheng Deng Zhaojia Chen Xianglong Liu Xinbo Gao and Dacheng Tao. 2018. Triplet-based deep hashing network for cross-modal retrieval. IEEE Transactions on Image Processing 27 8 (2018) 3893\u20133903.","DOI":"10.1109\/TIP.2018.2821921"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"crossref","unstructured":"Yuanyue Deng Jintang Bian Shisong Wu Jianhuang Lai and Xiaohua Xie. 2025. Multiplex graph aggregation and feature refinement for unsupervised incomplete multimodal emotion recognition. Information Fusion 114 (2025) 102711.","DOI":"10.1016\/j.inffus.2024.102711"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"crossref","unstructured":"Yi Ding Neethu Robinson Chengxuan Tong Qiuhao Zeng and Cuntai Guan. 2023. LGGNet: Learning from local-global-graph representations for brain\u2013computer interface. IEEE Transactions on Neural Networks and Learning Systems (2023).","DOI":"10.1109\/TNNLS.2023.3236635"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Shengcai Duan Le Wu Aiping Liu and Xun Chen. 2023. Alignment-enhanced interactive fusion model for complete and incomplete multimodal hand gesture recognition. IEEE Transactions on Neural Systems and Rehabilitation Engineering 31 (2023) 4661\u20134671.","DOI":"10.1109\/TNSRE.2023.3335101"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2006.100"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"e_1_3_3_1_11_2","volume-title":"International Conference on Learning Representations","author":"He Pengcheng","unstructured":"Pengcheng He, Xiaodong Liu, Jianfeng Gao, and Weizhu Chen. [n. d.]. DEBERTA: DECODING-ENHANCED BERT WITH DISENTANGLED ATTENTION. In International Conference on Learning Representations."},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICME57554.2024.10687673"},{"key":"e_1_3_3_1_13_2","unstructured":"Thomas\u00a0N Kipf and Max Welling. 2016. Variational Graph Auto-Encoders. NIPS Workshop on Bayesian Deep Learning (2016)."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i17.33984"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Mingcheng Li Dingkang Yang Yang Liu Shunli Wang Jiawei Chen Shuaibing Wang Jinjie Wei Yue Jiang Qingyao Xu Xiaolu Hou et\u00a0al. 2024. Toward robust incomplete multimodal sentiment analysis via hierarchical representation learning. Advances in Neural Information Processing Systems 37 (2024) 28515\u201328536.","DOI":"10.52202\/079017-0895"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01184"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i10.17037"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00641"},{"key":"e_1_3_3_1_19_2","unstructured":"Zheng Lian Lan Chen Licai Sun Bin Liu and Jianhua Tao. 2023. GCNet: Graph completion network for incomplete multimodal learning in conversation. IEEE Transactions on pattern analysis and machine intelligence 45 7 (2023) 8419\u20138432."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00804"},{"key":"e_1_3_3_1_21_2","unstructured":"Yijie Lin Yuanbiao Gou Xiaotian Liu Jinfeng Bai Jiancheng Lv and Xi Peng. 2022. Dual contrastive prediction for incomplete multi-view representation learning. IEEE Transactions on Pattern Analysis and Machine Intelligence 45 4 (2022) 4447\u20134461."},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01102"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Rui Liu Haolin Zuo Zheng Lian Bj\u00f6rn\u00a0W Schuller and Haizhou Li. 2024. Contrastive learning based modality-invariant feature acquisition for robust multimodal emotion recognition with missing modalities. IEEE Transactions on Affective Computing 15 4 (2024) 1856\u20131873.","DOI":"10.1109\/TAFFC.2024.3378570"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00258"},{"key":"e_1_3_3_1_25_2","unstructured":"Hui Ma Jian Wang Hongfei Lin Bo Zhang Yijia Zhang and Bo Xu. 2023. A Transformer-Based Model With Self-Distillation for Multimodal Emotion Recognition in Conversations. IEEE Transactions on Multimedia (2023) 1\u201313."},{"key":"e_1_3_3_1_26_2","unstructured":"Aaron van\u00a0den Oord Yazhe Li and Oriol Vinyals. 2018. Representation learning with contrastive predictive coding. CoRR (2018)."},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016892"},{"key":"e_1_3_3_1_28_2","first-page":"8821","volume-title":"International Conference on Machine Learning","author":"Ramesh Aditya","year":"2021","unstructured":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. 2021. Zero-shot text-to-image generation. In International Conference on Machine Learning. Pmlr, 8821\u20138831."},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1873"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"Yuntao Shou Tao Meng Wei Ai Fangze Fu Nan Yin and Keqin Li. 2026. A comprehensive survey on multi-modal conversational emotion recognition with deep learning. ACM Transactions on Information Systems 44 2 (2026) 1\u201348.","DOI":"10.1145\/3786343"},{"key":"e_1_3_3_1_31_2","unstructured":"Yuntao Shou Tao Meng Wei Ai and Keqin Li. 2025. Multimodal large language models meet multimodal emotion recognition and reasoning: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2509.24322 (2025)."},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2025\/688"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58621-8_45"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.528"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1656"},{"key":"e_1_3_3_1_36_2","first-page":"1083","volume-title":"International conference on machine learning","author":"Wang Weiran","year":"2015","unstructured":"Weiran Wang, Raman Arora, Karen Livescu, and Jeff Bilmes. 2015. On deep multi-view representation learning. In International conference on machine learning. PMLR, 1083\u20131092."},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02013"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"crossref","unstructured":"Yuanzhi Wang Yong Li and Zhen Cui. 2023. Incomplete multimodality-diffused emotion recognition. Advances in Neural Information Processing Systems 36 (2023) 17117\u201317128.","DOI":"10.52202\/075280-0748"},{"key":"e_1_3_3_1_39_2","unstructured":"Yuanzhi Wang Yong Li and Zhen Cui. 2024. Incomplete multimodality-diffused emotion recognition. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681683"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"crossref","unstructured":"Amir Zadeh Rowan Zellers Eli Pincus and Louis-Philippe Morency. 2016. Multimodal sentiment intensity analysis in videos: Facial gestures and verbal messages. IEEE Intelligent Systems 31 6 (2016) 82\u201388.","DOI":"10.1109\/MIS.2016.94"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1208"},{"key":"e_1_3_3_1_43_2","unstructured":"Changqing Zhang Yajie Cui Zongbo Han Joey\u00a0Tianyi Zhou Huazhu Fu and Qinghua Hu. 2020. Deep partial multi-view learning. IEEE transactions on pattern analysis and machine intelligence 44 5 (2020) 2402\u20132415."},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"crossref","unstructured":"Haoyu Zhang Wenbin Wang and Tianshu Yu. 2024. Towards robust multimodal sentiment analysis with incomplete data. Advances in Neural Information Processing Systems 37 (2024) 55943\u201355974.","DOI":"10.52202\/079017-1779"},{"key":"e_1_3_3_1_45_2","unstructured":"Yunhua Zhang Hazel Doughty and Cees Snoek. 2024. Learning unseen modality interaction. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.203"},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"crossref","unstructured":"Zengqun Zhao Qingshan Liu and Shanmin Wang. 2021. Learning deep global multi-scale and local attention features for facial expression recognition in the wild. IEEE Transactions on Image Processing 30 (2021) 6544\u20136556.","DOI":"10.1109\/TIP.2021.3093397"},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.1075"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:53:01Z","timestamp":1781538781000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810726"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":47,"alternative-id":["10.1145\/3805622.3810726","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810726","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}