{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T15:59:39Z","timestamp":1772553579231,"version":"3.50.1"},"reference-count":32,"publisher":"American Institute of Mathematical Sciences (AIMS)","issue":"3","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["MFC"],"published-print":{"date-parts":[[2025]]},"DOI":"10.3934\/mfc.2024017","type":"journal-article","created":{"date-parts":[[2024,4,19]],"date-time":"2024-04-19T13:30:26Z","timestamp":1713533426000},"page":"433-447","source":"Crossref","is-referenced-by-count":1,"title":["MSA-HCL: Multimodal sentiment analysis model with hybrid contrastive learning"],"prefix":"10.3934","volume":"8","author":[{"given":"Wang","family":"Zhao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shenzhen Institute of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiang","family":"Hua","sequence":"additional","affiliation":[{"name":"Hebei Key Laboratory of Machine Learning and Computational Intelligence College of Mathematics and Information Science, Hebei University, Baoding, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chun-Ru","family":"Dong","sequence":"additional","affiliation":[{"name":"Hebei Key Laboratory of Machine Learning and Computational Intelligence College of Mathematics and Information Science, Hebei University, Baoding, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jia-Nan","family":"Wang","sequence":"additional","affiliation":[{"name":"Hebei Key Laboratory of Machine Learning and Computational Intelligence College of Mathematics and Information Science, Hebei University, Baoding, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Feng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Hebei Key Laboratory of Machine Learning and Computational Intelligence College of Mathematics and Information Science, Hebei University, Baoding, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"2321","reference":[{"key":"key-10.3934\/mfc.2024017-1","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2020.106743"},{"key":"key-10.3934\/mfc.2024017-2","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106114"},{"key":"key-10.3934\/mfc.2024017-3","doi-asserted-by":"crossref","unstructured":"<p>R. Cadene, H. Ben-Younes, M. Cord, et al., Murel: Multimodal relational reasoning for visual question answering, <i>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition<\/i>, Los Angeles, America, (2019), 1989-1998.<\/p>","DOI":"10.1109\/CVPR.2019.00209"},{"key":"key-10.3934\/mfc.2024017-4","unstructured":"<p>T. Chen, S. Kornblith, M. Norouzi and G. Hinton, A simple framework for contrastive learning of visual representations, <i>International Conference on Machine Learning<\/i>, Vienna, Austria, (2020), 1597-1607.<\/p>"},{"key":"key-10.3934\/mfc.2024017-5","doi-asserted-by":"crossref","unstructured":"<p>G. Degottex, J. Kane, T. Drugman, T. Raitio and S. Scherer, COVAREP-A collaborative voice analysis repository for speech technologies, <i>2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)<\/i>, Florence, Italy, (2014), 960-964.<\/p>","DOI":"10.1109\/ICASSP.2014.6853739"},{"key":"key-10.3934\/mfc.2024017-6","doi-asserted-by":"crossref","unstructured":"<p>H. Devamanyu, Z, Roger and P. Soujanya, Misa: Modality-invariant and-specific representations for multimodal sentiment analysis, <i>Proceedings of the 28th ACM International Conference on Multimedia<\/i>, online, (2020), 1122-1131.<\/p>","DOI":"10.1145\/3394171.3413678"},{"key":"key-10.3934\/mfc.2024017-7","doi-asserted-by":"crossref","unstructured":"<p>Z. Fu, F. Liu, Q. Xu, et al., NHFNET: A non-homogeneous fusion network for multimodal sentiment analysis, <i>2022 IEEE International Conference on Multimedia and Expo (ICME)<\/i>, Taipei, China, (2022), 1-6.<\/p>","DOI":"10.1109\/ICME52920.2022.9859836"},{"key":"key-10.3934\/mfc.2024017-8","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2020.09.005"},{"key":"key-10.3934\/mfc.2024017-9","doi-asserted-by":"crossref","unstructured":"<p>W. Han, H. Chen, and S. Poria, Improving multimodal fusion with hierarchical mutual information maximization for multimodal sentiment analysis, <i>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing<\/i>, Punta Cana, Dominican Republic, (2021), 9180-9192.<\/p>","DOI":"10.18653\/v1\/2021.emnlp-main.723"},{"key":"key-10.3934\/mfc.2024017-10","doi-asserted-by":"crossref","unstructured":"<p>J. He, H. Yanga, C. Zhang, H. Chen, Y. Xua, et al., Dynamic invariant-specific representation fusion network for multimodal sentiment analysis, <i>Computational Intelligence and Neuroscience<\/i>, (2022), 2105593.<\/p>","DOI":"10.1155\/2022\/2105593"},{"key":"key-10.3934\/mfc.2024017-11","unstructured":"<p>K. He, X. Zhang, S. Ren and J. Sun, Delving deep into rectifiers: Surpassing human-level performance on imagenet classification, <i>Proceedings of the IEEE International Conference on Computer Vision<\/i>, Santiago, Chile, (2015), 1026-1034.<\/p>"},{"key":"key-10.3934\/mfc.2024017-12","doi-asserted-by":"publisher","DOI":"10.4018\/978-1-6684-6303-1.ch098"},{"key":"key-10.3934\/mfc.2024017-13","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2004.11362"},{"key":"key-10.3934\/mfc.2024017-14","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.11.022"},{"key":"key-10.3934\/mfc.2024017-15","doi-asserted-by":"crossref","unstructured":"<p>R. Lin and H. Hu, Multimodal contrastive learning via uni-modal coding and cross-modal prediction for multimodal sentiment analysis, <i>Findings of the Association for Computational Linguistics: EMNLP 2022<\/i>, Abu Dhabi, United Arab Emirates, (2022), 511-523.<\/p>","DOI":"10.18653\/v1\/2022.findings-emnlp.36"},{"key":"key-10.3934\/mfc.2024017-16","doi-asserted-by":"crossref","unstructured":"<p>Y. Ma, H. Peng, and E. Cambria, Targeted aspect-based sentiment analysis via embedding commonsense knowledge into an attentive LSTM, <i>Proceedings of the AAAI Conference on Artificial Intelligence<\/i>, New Orleans, America, <b>32<\/b> (2018), 5876-5883.<\/p>","DOI":"10.1609\/aaai.v32i1.12048"},{"key":"key-10.3934\/mfc.2024017-17","doi-asserted-by":"crossref","unstructured":"<p>S. Mai and H. Hu and S. Xing, Modality to modality translation: An adversarial representation learning and graph fusion network for multimodal fusion, <i>Proceedings of the AAAI Conference on Artificial Intelligence<\/i>, New York, America, <b>34<\/b> (2020), 164-172.<\/p>","DOI":"10.1609\/aaai.v34i01.5347"},{"key":"key-10.3934\/mfc.2024017-18","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.11.003"},{"key":"key-10.3934\/mfc.2024017-19","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2022.3172360"},{"key":"key-10.3934\/mfc.2024017-20","unstructured":"<p>L. P. Morency, P. P. Liang, A. Zadeh, Z. Liu, Y. Shen and V. B. Lakshminarasimhan, Efficient low-rank multimodal fusion with modality-specific factors, <i>Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics<\/i>, Melbourne, Australia, (2018), 2247-2256.<\/p>"},{"key":"key-10.3934\/mfc.2024017-21","doi-asserted-by":"crossref","unstructured":"<p>W. Rahman, M. K. Hasan, S. Lee, A. Zadeh, C. Mao, L. P. Morency and E. Hoque, Integrating multimodal information in large pretrained transformers, <i>Proceedings of the conference. Association for Computational Linguistics<\/i>, Seattle, America, (2020), 2359-2369.<\/p>","DOI":"10.18653\/v1\/2020.acl-main.214"},{"key":"key-10.3934\/mfc.2024017-22","unstructured":"<p>H. Raia, C. Sumit and L. Yann, Dimensionality reduction by learning an invariant mapping, <i>2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition<\/i>, New Orleans, America, (2006), 1735-1742.<\/p>"},{"key":"key-10.3934\/mfc.2024017-23","unstructured":"<p>Y. H. H. Tsai, S. Bai, P. P. Liang, J. Z. Kolter, L. P. Morency and R. Salakhutdinov, Multimodal transformer for unaligned multimodal language sequences, <i>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics<\/i>, Florence, Italy, (2019), 6558-6569.<\/p>"},{"key":"key-10.3934\/mfc.2024017-24","doi-asserted-by":"crossref","unstructured":"<p>W. Yu, H. Xu, Z. Yuan and J. Wu, Learning modality-specific representations with self-supervised multi-task learning for multimodal sentiment analysis, <i>Proceedings of the AAAI Conference on Artificial Intelligence<\/i>, Beijing, China, (2021), 10790-10797.<\/p>","DOI":"10.1609\/aaai.v35i12.17289"},{"key":"key-10.3934\/mfc.2024017-25","doi-asserted-by":"crossref","unstructured":"<p>J. Yuan, M. Liberman, et al., Speaker identification on the SCOTUS corpus, <i>Journal of the\nAcoustical Society of America<\/i>, <b>123<\/b> (2008), 3878.<\/p>","DOI":"10.1121\/1.2935783"},{"key":"key-10.3934\/mfc.2024017-26","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.109259"},{"key":"key-10.3934\/mfc.2024017-27","doi-asserted-by":"crossref","unstructured":"<p>Y. Wang, Y. Shen, Z. Liu, P. P. Liang, A. Zadeh and L. P. Morency, Words can shift: Dynamically adjusting word representations using nonverbal behaviors, <i>Proceedings of the AAAI Conference on Artificial Intelligence<\/i>, Hawaii, America, <b>33<\/b> (2019), 7216-7223.<\/p>","DOI":"10.1609\/aaai.v33i01.33017216"},{"key":"key-10.3934\/mfc.2024017-28","doi-asserted-by":"crossref","unstructured":"<p>A. Zadeh, M. Chen, S. Poria, E. Cambria and L. P. Morency, Tensor fusion network for multimodal sentiment analysis, <i>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing<\/i>, Copenhagen, Denmark, (2017), 1103-1114.<\/p>","DOI":"10.18653\/v1\/D17-1115"},{"key":"key-10.3934\/mfc.2024017-29","doi-asserted-by":"publisher","DOI":"10.1109\/MIS.2016.94"},{"key":"key-10.3934\/mfc.2024017-30","unstructured":"<p>A. B. Zadeh, P. P. Liang, S. Poria, E. Cambria and L. P. Morency, Multimodal language analysis in the wild: Cmu-mosei dataset and interpretable dynamic fusion graph, <i>Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics<\/i>, Melbourne, Australia, (2018), 2236-2246.<\/p>"},{"key":"key-10.3934\/mfc.2024017-31","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.07.006"},{"key":"key-10.3934\/mfc.2024017-32","doi-asserted-by":"crossref","unstructured":"<p>X. Zhao, Y. Chen, W. Li, L. Gao and B. Tang, MAG+: An extended multimodal adaptation gate for multimodal sentiment analysis, <i>2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)<\/i>, Singapore, Singapore, (2022), 4753-4757.<\/p>","DOI":"10.1109\/ICASSP43922.2022.9746536"}],"container-title":["Mathematical Foundations of Computing"],"original-title":[],"deposited":{"date-parts":[[2025,2,26]],"date-time":"2025-02-26T08:17:56Z","timestamp":1740557876000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.aimsciences.org\/\/article\/doi\/10.3934\/mfc.2024017"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":32,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.3934\/mfc.2024017","relation":{},"ISSN":["2577-8838"],"issn-type":[{"value":"2577-8838","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}