{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,11]],"date-time":"2026-06-11T17:27:28Z","timestamp":1781198848499,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","funder":[{"name":"the National Natural Science Foundation of China","award":["U22B2061"],"award-info":[{"award-number":["U22B2061"]}]},{"name":"the Natural Science Foundation of Sichuan, China","award":["2024NSFSC0496"],"award-info":[{"award-number":["2024NSFSC0496"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754758","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:27:39Z","timestamp":1761377259000},"page":"955-964","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["DRKF: Decoupled Representations with Knowledge Fusion for Multimodal Emotion Recognition"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-3913-9333","authenticated-orcid":false,"given":"Peiyuan","family":"Jiang","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5342-9896","authenticated-orcid":false,"given":"Yao","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Information and Software Engineering, University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2573-9544","authenticated-orcid":false,"given":"Qiao","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1928-2933","authenticated-orcid":false,"given":"Zongshun","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0226-4525","authenticated-orcid":false,"given":"Jiaye","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3726-4786","authenticated-orcid":false,"given":"Lu","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4191-6565","authenticated-orcid":false,"given":"Daibing","family":"Yao","sequence":"additional","affiliation":[{"name":"Yizhou Prison, Sichuan Province, Chengdu, Sichuan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2023.3250266"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","unstructured":"Jingwen Hu Yuchen Liu Jinming Zhao and Qin Jin. 2021. MMGCN: Multimodal Fusion via Deep Graph Convolution Network for Emotion Recognition in Conversation. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers) Chengqing Zong Fei Xia Wenjie Li and Roberto Navigli (Eds.). Association for Computational Linguistics Online 5666-5675. doi:10.18653\/v1\/2021.acl-long.440","DOI":"10.18653\/v1\/2021.acl-long.440"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1115"},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, Virtual Event, 8748-8763. https:\/\/proceedings.mlr.press\/v139\/radford21a.html"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","unstructured":"Simon Jenni Alexander Black and John Collomosse. 2023. Audio-Visual Contrastive Learning with Temporal Self-Supervision. In Proceedings of the Thirty-Seventh AAAI Conference on Artificial Intelligence and Thirty-Fifth Conference on Innovative Applications of Artificial Intelligence and Thirteenth Symposium on Educational Advances in Artificial Intelligence (AAAI'23\/IAAI'23\/EAAI'23). AAAI Press Washington DC USA Article 898 9 pages. doi:10.1609\/aaai.v37i7.25967","DOI":"10.1609\/aaai.v37i7.25967"},{"key":"e_1_3_2_1_6_1","volume-title":"VATT: Transformers for Multimodal Self-Supervised Learning from Raw Video, Audio and Text. In Advances in Neural Information Processing Systems","author":"Akbari Hassan","year":"2021","unstructured":"Hassan Akbari, Liangzhe Yuan, Rui Qian, Wei-Hong Chuang, Shih-Fu Chang, Yin Cui, and Boqing Gong. 2021. VATT: Transformers for Multimodal Self-Supervised Learning from Raw Video, Audio and Text. In Advances in Neural Information Processing Systems, M. Ranzato, A. Beygelzimer, Y. Dauphin, P.S. Liang, and J. Wortman Vaughan (Eds.), Vol. 34. Curran Associates, Inc., Red Hook, NY, USA, 24206-24221. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2021\/file\/cb3213ada48302953cb0f166464ab356-Paper.pdf"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 32nd International Conference on Algorithmic Learning Theory (Proceedings of Machine Learning Research","volume":"1206","author":"Tosh Christopher","year":"2021","unstructured":"Christopher Tosh, Akshay Krishnamurthy, and Daniel Hsu. 2021. Contrastive learning, multi-view redundancy, and linear models. In Proceedings of the 32nd International Conference on Algorithmic Learning Theory (Proceedings of Machine Learning Research, Vol. 132), Vitaly Feldman, Katrina Ligett, and Sivan Sabato (Eds.). PMLR, Virtual Event, 1179-1206. https:\/\/proceedings.mlr.press\/v132\/tosh21a.html"},{"key":"e_1_3_2_1_8_1","volume-title":"International Conference on Learning Representations (ICLR). OpenReview, Virtual Event, 10 pages. https:\/\/openreview.net\/forum?id=-bdp_8Itjwp","author":"Hubert Tsai Yao-Hung","year":"2021","unstructured":"Yao-Hung Hubert Tsai, Yue Wu, Ruslan Salakhutdinov, and Louis-Philippe Morency. 2021. Self-supervised Learning from a Multi-view Perspective. In International Conference on Learning Representations (ICLR). OpenReview, Virtual Event, 10 pages. https:\/\/openreview.net\/forum?id=-bdp_8Itjwp"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547754"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i8.20895"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447667"},{"key":"e_1_3_2_1_12_1","first-page":"9120","volume-title":"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING","author":"Du Xulong","year":"2024","unstructured":"Xulong Du, Xingnan Zhang, Dandan Wang, Yingying Xu, Zhiyuan Wu, Shiqing Zhang, Xiaoming Zhao, Jun Yu, and Liangliang Lou. 2024. Integrating Representation Subspace Mapping with Unimodal Auxiliary Loss for Attention-based Multimodal Emotion Recognition. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024). European Language Resources Association (ELRA), Torino, Italy, 9120-9130."},{"key":"e_1_3_2_1_13_1","volume-title":"Beyond Redundancy: Information-aware Unsupervised Multiplex Graph Structure Learning. In Advances in Neural Information Processing Systems. Curran Associates","author":"Shen Zhixiang","year":"2024","unstructured":"Zhixiang Shen, Shuo Wang, and Zhao Kang. 2024. Beyond Redundancy: Information-aware Unsupervised Multiplex Graph Structure Learning. In Advances in Neural Information Processing Systems. Curran Associates, Inc., Red Hook, NY, USA, 10 pages."},{"key":"e_1_3_2_1_14_1","first-page":"32971","volume-title":"Levine (Eds.)","volume":"36","author":"Liang Paul Pu","year":"2023","unstructured":"Paul Pu Liang, Zihao Deng, Martin Q. Ma, James Y. Zou, Louis-Philippe Morency, and Ruslan Salakhutdinov. 2023. Factorized Contrastive Learning: Going Beyond Multi-view Redundancy. In Advances in Neural Information Processing Systems, A. Oh, T. Naumann, A. Globerson, K. Saenko, M. Hardt, and S. Levine (Eds.), Vol. 36. Curran Associates, Inc., Red Hook, NY, USA, 32971-32998. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/6818dcc65fdf3cbd4b05770fb957803e-Paper-Conference.pdf"},{"key":"e_1_3_2_1_15_1","first-page":"9912","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Caron Mathilde","year":"2020","unstructured":"Mathilde Caron, Ishan Misra, Julien Mairal, Priya Goyal, Piotr Bojanowski, and Armand Joulin. 2020. Unsupervised Learning of Visual Features by Contrasting Cluster Assignments. In Advances in Neural Information Processing Systems, Vol. 33. Curran Associates, Inc., Red Hook, NY, USA, 9912-9924. https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/f1748d6b0fd9c3c0b67a5f43a715af2b-Paper.pdf"},{"key":"e_1_3_2_1_16_1","first-page":"15509","volume-title":"Garnett (Eds.)","volume":"32","author":"Bachman Philip","year":"2019","unstructured":"Philip Bachman, R Devon Hjelm, and William Buchwalter. 2019. Learning Representations by Maximizing Mutual Information Across Views. In Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. d' Alch\u00e9-Buc, E. Fox, and R. Garnett (Eds.), Vol. 32. Curran Associates, Inc., Red Hook, NY, USA, 15509-15520. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2019\/file\/ddf354219aac374f1d40b7e760ee5bb7-Paper.pdf"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1115"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.416"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2024.3498443"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.824"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00740"},{"key":"e_1_3_2_1_22_1","volume-title":"Representation learning: A review and new perspectives","author":"Bengio Yoshua","year":"2013","unstructured":"Yoshua Bengio, Aaron Courville, and Pascal Vincent. 2013. Representation learning: A review and new perspectives. IEEE transactions on pattern analysis and machine intelligence, Vol. 35, 8 (2013), 1798-1828."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874246"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/SCCC.2018.8705251"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-019-04248-z"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i11.26582"},{"key":"e_1_3_2_1_27_1","unstructured":"Yinhan Liu Myle Ott Naman Goyal Jingfei Du Mandar Joshi Danqi Chen Omer Levy Mike Lewis Luke Zettlemoyer and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. arXiv:1907.11692 [cs.CL] https:\/\/arxiv.org\/abs\/1907.11692"},{"key":"e_1_3_2_1_28_1","first-page":"12449","volume-title":"Lin (Eds.)","volume":"33","author":"Baevski Alexei","year":"2020","unstructured":"Alexei Baevski, Yuhao Zhou, Abdelrahman Mohamed, and Michael Auli. 2020. wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., Red Hook, NY, USA, 12449-12460. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/92d1e1eb1cd6f9fba3227870bb6d7f07-Paper.pdf"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.102216"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747631"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00393"},{"key":"e_1_3_2_1_33_1","first-page":"7354","volume-title":"Advances in Neural Information Processing Systems","volume":"31","author":"Chung Yu-An","year":"2018","unstructured":"Yu-An Chung, Wei-Hung Weng, Schrasing Tong, and James Glass. 2018. Unsupervised Cross-Modal Alignment of Speech and Text Embedding Spaces. In Advances in Neural Information Processing Systems, Vol. 31. Curran Associates, Inc., Red Hook, NY, USA, 7354-7365. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2018\/file\/f3ce96dfe0061d0e6e105b0b70e5aafb-Paper.pdf"},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings of the 35th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"540","author":"Belghazi Mohamed Ishmael","year":"2018","unstructured":"Mohamed Ishmael Belghazi, Aristide Baratin, Sai Rajeshwar, Sherjil Ozair, Yoshua Bengio, Aaron Courville, and Devon Hjelm. 2018. Mutual Information Neural Estimation. In Proceedings of the 35th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 80), Jennifer Dy and Andreas Krause (Eds.). PMLR, Stockholm, Sweden, 531-540. https:\/\/proceedings.mlr.press\/v80\/belghazi18a.html"},{"key":"e_1_3_2_1_35_1","volume-title":"Proceedings of the 37th International Conference on Machine Learning (ICML'20)","author":"Cheng Pengyu","year":"2020","unstructured":"Pengyu Cheng, Weituo Hao, Shuyang Dai, Jiachang Liu, Zhe Gan, and Lawrence Carin. 2020. CLUB: A Contrastive Log-Ratio Upper Bound of Mutual Information. In Proceedings of the 37th International Conference on Machine Learning (ICML'20). JMLR.org, Virtual Event, Article 166, 10 pages."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2798607"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","unstructured":"Devamanyu Hazarika Soujanya Poria Rada Mihalcea Erik Cambria and Roger Zimmermann. 2018. ICON: Interactive Conversational Memory Network for Multimodal Emotion Detection. In Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing Ellen Riloff David Chiang Julia Hockenmaier and Jun'ichi Tsujii (Eds.). Association for Computational Linguistics Brussels Belgium 2594-2604. doi:10.18653\/v1\/D18-1280","DOI":"10.18653\/v1\/D18-1280"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-3243"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.26599\/TST.2022.9010038"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683483"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2067"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-14600-0"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.122946"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-523"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3247822"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10448316"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746598"},{"key":"e_1_3_2_1_48_1","first-page":"1","volume-title":"Knowledge-aware Bayesian Co-attention for Multimodal Emotion Recognition. In ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE","author":"Zhao Zihan","year":"2023","unstructured":"Zihan Zhao, Yu Wang, and Yanfeng Wang. 2023. Knowledge-aware Bayesian Co-attention for Multimodal Emotion Recognition. In ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, Rhodes Island, Greece, 1-5."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-022-05001-5"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1050"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.391"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.10.009"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413816"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.111969"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747859"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.3390\/s21144913"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2022.3141237"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2024.106764"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414286"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747723"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","unstructured":"Cheng Peng Ke Chen Lidan Shou and Gang Chen. 2024. CARAT: Contrastive Feature Reconstruction and Aggregation for Multi-Modal Multi-Label Emotion Recognition. In Proceedings of the Thirty-Eighth AAAI Conference on Artificial Intelligence and Thirty-Sixth Conference on Innovative Applications of Artificial Intelligence and Fourteenth Symposium on Educational Advances in Artificial Intelligence (AAAI'24\/IAAI'24\/EAAI'24). AAAI Press Vancouver BC Canada Article 1626 9 pages. doi:10.1609\/aaai.v38i13.29374","DOI":"10.1609\/aaai.v38i13.29374"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754758","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T05:01:13Z","timestamp":1765342873000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754758"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":62,"alternative-id":["10.1145\/3746027.3754758","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754758","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}