{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:54:04Z","timestamp":1781538844696,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"Shanghai Municipal Education Commission Artificial Intelligence Plan","award":["Z2024-119"],"award-info":[{"award-number":["Z2024-119"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810760","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"710-718","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["From Confrontion to Balance: A Kronecker-Constrained Spectral Entropy Joint Optimization Framework for Multimodal Sentiment Analysis"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8044-5912","authenticated-orcid":false,"given":"Feifei","family":"Xu","sequence":"first","affiliation":[{"name":"Shanghai University of Electric Power, Shanghai, -\u9009\u62e9-, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1275-3454","authenticated-orcid":false,"given":"Puzhe","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai University of Electric Power, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5956-9143","authenticated-orcid":false,"given":"Dongyang","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai University of Electric Power, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2451-6609","authenticated-orcid":false,"given":"Bo","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai University of Electric Power, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9994-1382","authenticated-orcid":false,"given":"Luobing","family":"Huang","sequence":"additional","affiliation":[{"name":"Shanghai University of Electric Power, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6928-4141","authenticated-orcid":false,"given":"Wenjing","family":"Zhu","sequence":"additional","affiliation":[{"name":"Shanghai University of Electric Power, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9259-6345","authenticated-orcid":false,"given":"Zirui","family":"Xu","sequence":"additional","affiliation":[{"name":"Shanghai University of Electric Power, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8740-2374","authenticated-orcid":false,"given":"Yu","family":"Xie","sequence":"additional","affiliation":[{"name":"Shanghai University of Electric Power, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Shaden Alshammari John Hershey Axel Feldmann William\u00a0T Freeman and Mark Hamilton. 2025. I-con: A unifying framework for representation learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2504.16929 (2025)."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-74628-7_27"},{"key":"e_1_3_3_1_4_2","unstructured":"Tadas Baltru\u0161aitis Chaitanya Ahuja and Louis-Philippe Morency. 2019. Multimodal Machine Learning: A Survey and Taxonomy. IEEE TPAMI (2019)."},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Tadas Baltru\u0161aitis Chaitanya Ahuja and Louis-Philippe Morency. 2019. Multimodal Machine Learning: A Survey and Taxonomy. IEEE Transactions on Pattern Analysis and Machine Intelligence 41 2 (2019) 423\u2013443.","DOI":"10.1109\/TPAMI.2018.2798607"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/3136755.3136801"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"crossref","unstructured":"Yi Ding Alex Rich Mason Wang Noah Stier Matthew Turk Pradeep Sen and Tobias H\u00f6llerer. 2021. Sparse Fusion for Multimodal Transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2111.11992 (2021).","DOI":"10.31219\/osf.io\/f7jvn"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Cunhang Fan Kang Zhu Jianhua Tao Guofeng Yi Jun Xue and Zhao Lv. 2024. Multi-level contrastive learning: Hierarchical alleviation of heterogeneity in multimodal sentiment analysis. IEEE Transactions on Affective Computing 16 1 (2024) 207\u2013222.","DOI":"10.1109\/TAFFC.2024.3423671"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680949"},{"key":"e_1_3_3_1_10_2","first-page":"10929","volume-title":"ICML","author":"Garrido Quentin","year":"2023","unstructured":"Quentin Garrido, Randall Balestriero, Laurent Najman, and Yann Lecun. 2023. Rankme: Assessing the downstream performance of pretrained self-supervised representations by their rank. In ICML. PMLR, 10929\u201310974."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Luis Gonzalo\u00a0Sanchez Giraldo Murali Rao and Jose\u00a0C Principe. 2014. Measures of entropy from data using infinitely divisible kernels. IEEE Transactions on Information Theory 61 1 (2014) 535\u2013548.","DOI":"10.1109\/TIT.2014.2370058"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Wei Han Hui Chen and Soujanya Poria. 2021. Improving multimodal fusion with hierarchical mutual information maximization for multimodal sentiment analysis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2109.00412 (2021).","DOI":"10.18653\/v1\/2021.emnlp-main.723"},{"key":"e_1_3_3_1_13_2","unstructured":"Syed Hasan. 2025. Multimodal Learning with Cross-Attention Mechanisms: A Comprehensive Survey and Novel Architecture. (2025)."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413678"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19821-2_28"},{"key":"e_1_3_3_1_16_2","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Liang Weixin","year":"2022","unstructured":"Weixin Liang, Yuhui Zhang, Yongchan Kwon, Serena Yeung, and James Zou. 2022. Mind the Gap: Understanding the Modality Gap in Multi-modal Contrastive Representation Learning. In Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"Sijie Mai Ying Zeng Shuangjia Zheng and Haifeng Hu. 2022. Hybrid contrastive learning of tri-modal representation for multimodal sentiment analysis. IEEE Transactions on Affective Computing 14 3 (2022) 2276\u20132289.","DOI":"10.1109\/TAFFC.2022.3172360"},{"key":"e_1_3_3_1_18_2","first-page":"689","volume-title":"ICML","author":"Ngiam Jiquan","year":"2011","unstructured":"Jiquan Ngiam, Aditya Khosla, Mingyu Kim, Juhan Nam, Honglak Lee, and Andrew\u00a0Y Ng. 2011. Multimodal Deep Learning. In ICML. 689\u2013696."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","unstructured":"Hai Pham Guoxin Wang Yijuan Lu Dinei A.\u00a0F. Flor\u00eancio and Cha Zhang. 2022. Understanding Long Documents with Different Position-Aware Attentions. CoRR abs\/2208.08201 (2022). arXiv:https:\/\/arXiv.org\/abs\/2208.0820110.48550\/ARXIV.2208.08201","DOI":"10.48550\/ARXIV.2208.08201"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Soujanya Poria Erik Cambria Newton Howard Guang-Bin Huang and Amir Hussain. 2016. Fusing audio visual and textual clues for sentiment analysis from multimodal content. Neurocomputing 174 (2016) 50\u201359.","DOI":"10.1016\/j.neucom.2015.01.095"},{"key":"e_1_3_3_1_21_2","unstructured":"Chengxuan Qian Shuo Xing Shawn Li Yue Zhao and Zhengzhong Tu. 2025. Decalign: Hierarchical cross-modal alignment for decoupled multimodal representation learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2503.11892 (2025)."},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.214"},{"key":"e_1_3_3_1_23_2","first-page":"606","volume-title":"EUSIPCO","author":"Roy Olivier","year":"2007","unstructured":"Olivier Roy and Martin Vetterli. 2007. The effective rank: A measure of effective dimensionality. In EUSIPCO. IEEE, 606\u2013610."},{"key":"e_1_3_3_1_24_2","first-page":"5628","volume-title":"International conference on machine learning","author":"Saunshi Nikunj","year":"2019","unstructured":"Nikunj Saunshi, Orestis Plevrakis, Sanjeev Arora, Mikhail Khodak, and Hrishikesh Khandeparkar. 2019. A theoretical analysis of contrastive unsupervised representation learning. In International conference on machine learning. PMLR, 5628\u20135637."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413876"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1656"},{"key":"e_1_3_3_1_27_2","first-page":"5998","volume-title":"NeurIPS","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N. Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In NeurIPS. 5998\u20136008. https:\/\/proceedings.neurips.cc\/paper\/2017\/hash\/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html"},{"key":"e_1_3_3_1_28_2","unstructured":"Shanmin Wang Chengguang Liu and Qingshan Liu. 2025. Multi-modality collaborative learning for sentiment analysis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.12424 (2025)."},{"key":"e_1_3_3_1_29_2","volume-title":"ICML","author":"Wang Tongzhou","year":"2020","unstructured":"Tongzhou Wang and Phillip Isola. 2020. Understanding Contrastive Representation Learning through Alignment and Uniformity. In ICML."},{"key":"e_1_3_3_1_30_2","first-page":"9929","volume-title":"ICML","author":"Wang Tongzhou","year":"2020","unstructured":"Tongzhou Wang and Phillip Isola. 2020. Understanding contrastive representation learning through alignment and uniformity on the hypersphere. In ICML. PMLR, 9929\u20139939."},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1609\/AAAI.V39I2.32152"},{"key":"e_1_3_3_1_32_2","unstructured":"Zhuojia Wu Qi Zhang Duoqian Miao Kun Yi Wei Fan and Liang Hu. 2024. Hydiscgan: A hybrid distributed cgan for audio-visual privacy preservation in multimodal sentiment analysis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.11938 (2024)."},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","unstructured":"Dingkang Yang Mingcheng Li Xuecheng Wu Zhaoyu Chen Kaixun Jiang Keliang Liu Peng Zhai and Lihua Zhang. 2025. Improving Multimodal Sentiment Analysis via Modality Optimization and Dynamic Primary Modality Selection. CoRR abs\/2511.06328 (2025). arXiv:https:\/\/arXiv.org\/abs\/2511.0632810.48550\/ARXIV.2511.06328","DOI":"10.48550\/ARXIV.2511.06328"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.421"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2024.FINDINGS-NAACL.135"},{"key":"e_1_3_3_1_36_2","unstructured":"Wenzhe Yin Pan Zhou Zehao Xiao Jie Liu Shujian Yu Jan-Jakob Sonke and Efstratios Gavves. 2026. Towards Uniformity and Alignment for Multimodal Representation Learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2602.09507 (2026)."},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1023"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17288"},{"key":"e_1_3_3_1_39_2","unstructured":"Shujian Yu Luis Gonzalo\u00a0Sanchez Giraldo Robert Jenssen and Jose\u00a0C Principe. 2019. Multivariate Extension of Matrix-Based R\u00e9nyi\u2019s \u03b1 -Order Entropy Functional. IEEE transactions on pattern analysis and machine intelligence 42 11 (2019) 2960\u20132966."},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17289"},{"key":"e_1_3_3_1_41_2","unstructured":"Yakun Yu Mingjun Zhao Shi-ang Qi Feiran Sun Baoxun Wang Weidong Guo Xiaoli Wang Lei Yang and Di Niu. 2023. ConKI: Contrastive knowledge injection for multimodal sentiment analysis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.15796 (2023)."},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/P18-1208"},{"key":"e_1_3_3_1_43_2","unstructured":"Amir Zadeh Rowan Zellers Eli Pincus and Louis-Philippe Morency. 2016. Mosi: multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1606.06259 (2016)."},{"key":"e_1_3_3_1_44_2","first-page":"2236","volume-title":"ACL","author":"Zadeh AmirAli\u00a0Bagher","year":"2018","unstructured":"AmirAli\u00a0Bagher Zadeh, Paul\u00a0Pu Liang, Soujanya Poria, Erik Cambria, and Louis-Philippe Morency. 2018. Multimodal language analysis in the wild: Cmu-mosei dataset and interpretable dynamic fusion graph. In ACL. 2236\u20132246."},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"crossref","unstructured":"Haoyu Zhang Yu Wang Guanghao Yin Kejun Liu Yuanyuan Liu and Tianshu Yu. 2023. Learning Language-guided Adaptive Hyper-modality Representation for Multimodal Sentiment Analysis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.05804 (2023).","DOI":"10.18653\/v1\/2023.emnlp-main.49"},{"key":"e_1_3_3_1_46_2","first-page":"4611","volume-title":"COLING","author":"Zhang Xiangmin","year":"2025","unstructured":"Xiangmin Zhang, Wei Wei, and Shihao Zou. 2025. Modal feature optimization network with prompt for multimodal sentiment analysis. In COLING. 4611\u20134621."},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681163"},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681527"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:58:07Z","timestamp":1781535487000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810760"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":47,"alternative-id":["10.1145\/3805622.3810760","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810760","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}