{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T04:08:02Z","timestamp":1776053282766,"version":"3.50.1"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,8,30]],"date-time":"2025-08-30T00:00:00Z","timestamp":1756512000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,30]],"date-time":"2025-08-30T00:00:00Z","timestamp":1756512000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"The National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["61602161"],"award-info":[{"award-number":["61602161"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Intell Inf Syst"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s10844-025-00974-0","type":"journal-article","created":{"date-parts":[[2025,8,30]],"date-time":"2025-08-30T07:28:51Z","timestamp":1756538931000},"page":"2031-2055","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["$$A^{2} H^{2}$$ for multimodal emotional data analysis"],"prefix":"10.1007","volume":"63","author":[{"given":"Jun","family":"Wu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jinyu","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianfeng","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuai","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiahui","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fang","family":"Deng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,8,30]]},"reference":[{"key":"974_CR1","doi-asserted-by":"publisher","unstructured":"Anderson, P., He, X., Buehler, C., et al. (2018). Bottom-up and top-down attention for image captioning and visual question answering. In: 2018 IEEE\/CVF conference on computer vision and pattern recognition, pp. 6077\u20136086. https:\/\/doi.org\/10.1109\/CVPR.2018.00636","DOI":"10.1109\/CVPR.2018.00636"},{"key":"974_CR2","unstructured":"Bahdanau, D., Cho, K., Bengio, Y. (2014). Neural machine translation by jointly learning to align and translate. arXiv:1409.0473"},{"issue":"2","key":"974_CR3","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1109\/TLA.2024.10412035","volume":"22","author":"J Bi","year":"2024","unstructured":"Bi, J., Wei, H., Zhang, G., et al. (2024). Dyfusion: Cross-attention 3d object detection with dynamic fusion. IEEE Latin America Transactions, 22(2), 106\u2013112. https:\/\/doi.org\/10.1109\/TLA.2024.10412035","journal-title":"IEEE Latin America Transactions"},{"issue":"4","key":"974_CR4","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","volume":"42","author":"C Busso","year":"2008","unstructured":"Busso, C., Bulut, M., Lee, C.-C., et al. (2008). Iemocap: Interactive emotional dyadic motion capture database. Language Resources and Evaluation, 42(4), 335\u2013359. https:\/\/doi.org\/10.1007\/s10579-008-9076-6","journal-title":"Language Resources and Evaluation"},{"key":"974_CR5","unstructured":"Chorowski, J., Bahdanau, D., Serdyuk, D., et al. (2015). Attention-based models for speech recognition. In: Proceedings of the 29th international conference on neural information processing systems - Volume 1. NIPS\u201915, pp. 577\u2013585. MIT Press. arXiv:1506.07503"},{"key":"974_CR6","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.-W., Lee, K., et al. (2019). BERT: Pre-training of deep bidirectional transformers for language understanding. In: Burstein, J., Doran, C., Solorio, T. (eds.) Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 4171\u20134186. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"974_CR7","doi-asserted-by":"publisher","unstructured":"Hazarika, D., Zimmermann, R., Poria, S. (2020). Misa: Modality-invariant and -specific representations for multimodal sentiment analysis. In: Proceedings of the 28th ACM International Conference on Multimedia. MM \u201920, pp. 1122\u20131131. Association for Computing Machinery. https:\/\/doi.org\/10.1145\/3394171.3413678","DOI":"10.1145\/3394171.3413678"},{"issue":"6","key":"974_CR8","doi-asserted-by":"publisher","first-page":"3599","DOI":"10.1007\/s00530-023-01133-7","volume":"29","author":"W Jun","year":"2023","unstructured":"Jun, W., Tianliang, Z., Jiahui, Z., et al. (2023). Hierarchical multiples self-attention mechanism for multi-modal analysis. Multimedia Systems, 29(6), 3599\u20133608. https:\/\/doi.org\/10.1007\/s00530-023-01133-7","journal-title":"Multimedia Systems"},{"key":"974_CR9","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/j.inffus.2022.11.022","volume":"92","author":"K Kim","year":"2023","unstructured":"Kim, K., & Park, S. (2023). Aobert: All-modalities-in-one bert for multimodal sentiment analysis. Information Fusion, 92, 37\u201345. https:\/\/doi.org\/10.1016\/j.inffus.2022.11.022","journal-title":"Information Fusion"},{"key":"974_CR10","doi-asserted-by":"publisher","first-page":"673","DOI":"10.1007\/s10844-023-00789-x","volume":"61","author":"R Kumari","year":"2023","unstructured":"Kumari, R., Ashok, N., Agrawal, P. K., et al. (2023). Identifying multimodal misinformation leveraging novelty detection and emotion recognition: Misinformation detection. Journal of Intelligent Information System, 61, 673\u2013694. https:\/\/doi.org\/10.1007\/s10844-023-00789-x","journal-title":"Journal of Intelligent Information System"},{"key":"974_CR11","doi-asserted-by":"publisher","unstructured":"Lee, J., Tashev, I. (2015). High-level feature representation using recurrent neural network for speech emotion recognition. In: Interspeech 2015, pp. 1537\u20131540. https:\/\/doi.org\/10.21437\/Interspeech.2015-336","DOI":"10.21437\/Interspeech.2015-336"},{"key":"974_CR12","doi-asserted-by":"publisher","unstructured":"Liang, Y., Meng, F., Xu, J., et al. (2022). MSCTD: A multimodal sentiment chat translation dataset. In: Muresan, S., Nakov, P., Villavicencio, A. (eds.) Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2601\u20132613. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/2022.acl-long.186","DOI":"10.18653\/v1\/2022.acl-long.186"},{"key":"974_CR13","doi-asserted-by":"publisher","first-page":"945","DOI":"10.1007\/s10844-025-00923-x","volume":"63","author":"Y Li","year":"2025","unstructured":"Li, Y., Liu, A., & Lu, Y. (2025). Multi-level language interaction transformer for multimodal sentiment analysis. Journal of Intelligent Information System, 63, 945\u2013964. https:\/\/doi.org\/10.1007\/s10844-025-00923-x","journal-title":"Journal of Intelligent Information System"},{"key":"974_CR14","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.114683","volume":"173","author":"D Li","year":"2021","unstructured":"Li, D., Liu, J., Yang, Z., et al. (2021). Speech emotion recognition using recurrent neural networks with directional self-attention. Expert Systems with Applications, 173, Article 114683. https:\/\/doi.org\/10.1016\/j.eswa.2021.114683","journal-title":"Expert Systems with Applications"},{"key":"974_CR15","doi-asserted-by":"publisher","DOI":"10.1016\/j.adhoc.2019.101991","volume":"96","author":"M Li","year":"2020","unstructured":"Li, M., Liu, S., & Zhang, Z. (2020). Deep tensor fusion network for multimodal ground-based cloud classification in weather station networks. Ad Hoc Networks, 96, Article 101991. https:\/\/doi.org\/10.1016\/j.adhoc.2019.101991","journal-title":"Ad Hoc Networks"},{"key":"974_CR16","doi-asserted-by":"publisher","unstructured":"Liu, Z., Lin, Y., Cao, Y., et al. (2021). Swin transformer: Hierarchical vision transformer using shifted windows. In: 2021 IEEE\/cvf international conference on computer vision (ICCV), pp. 9992\u201310002. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00986","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"974_CR17","doi-asserted-by":"publisher","unstructured":"Liu, Z., Shen, Y., Lakshminarasimhan, V.B., et al. (2018). Efficient low-rank multimodal fusion with modality-specific factors. In: Gurevych, I., Miyao, Y. (eds.) Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2247\u20132256. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/P18-1209","DOI":"10.18653\/v1\/P18-1209"},{"key":"974_CR18","doi-asserted-by":"publisher","first-page":"182","DOI":"10.1016\/j.ins.2022.03.062","volume":"598","author":"Y Liu","year":"2022","unstructured":"Liu, Y., Feng, C., Yuan, X., et al. (2022). Clip-aware expressive feature learning for video-based facial expression recognition. Information Sciences, 598, 182\u2013195. https:\/\/doi.org\/10.1016\/j.ins.2022.03.062","journal-title":"Information Sciences"},{"key":"974_CR19","doi-asserted-by":"publisher","unstructured":"Luong, T., Pham, H., Manning, C.D. (2015). Effective approaches to attention-based neural machine translation. In: M\u00e0rquez, L., Callison-Burch, C., Su, J. (eds.) Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing, pp. 1412\u20131421. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/D15-1166","DOI":"10.18653\/v1\/D15-1166"},{"key":"974_CR20","doi-asserted-by":"publisher","unstructured":"Ma, Y., Peng, H., Cambria, E. (2018). Targeted aspect-based sentiment analysis via embedding commonsense knowledge into an attentive lstm. Proceedings of the AAAI Conference on Artificial Intelligence32(1). https:\/\/doi.org\/10.1609\/aaai.v32i1.12048","DOI":"10.1609\/aaai.v32i1.12048"},{"key":"974_CR21","doi-asserted-by":"publisher","unstructured":"McIntosh, B., Duarte, K., Rawat, Y.S., et al. (2020). Visual-textual capsule routing for text-based video segmentation. In: 2020 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp. 9939\u20139948. https:\/\/doi.org\/10.1109\/CVPR42600.2020.00996","DOI":"10.1109\/CVPR42600.2020.00996"},{"issue":"9","key":"974_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1371\/journal.pone.0273936","volume":"17","author":"J Mingyu","year":"2022","unstructured":"Mingyu, J., Jiawei, Z., & Ning, W. (2022). Afr-bert: Attention-based mechanism feature relevance fusion multimodal sentiment analysis model. PLOS ONE, 17(9), 1\u201320. https:\/\/doi.org\/10.1371\/journal.pone.0273936","journal-title":"PLOS ONE"},{"issue":"5","key":"974_CR23","doi-asserted-by":"publisher","first-page":"1901","DOI":"10.1109\/TCSVT.2020.3014889","volume":"31","author":"Y Ou","year":"2021","unstructured":"Ou, Y., Chen, Z., & Wu, F. (2021). Multimodal local-global attention network for affective video content analysis. IEEE Transactions on Circuits and Systems for Video Technology, 31(5), 1901\u20131914. https:\/\/doi.org\/10.1109\/TCSVT.2020.3014889","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"974_CR24","doi-asserted-by":"publisher","unstructured":"Poria, S., Hazarika, D., Majumder, N., et al. (2019). MELD: A multimodal multi-party dataset for emotion recognition in conversations. In: Korhonen, A., Traum, D., M\u00e0rquez, L. (eds.) Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 527\u2013536. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/P19-1050","DOI":"10.18653\/v1\/P19-1050"},{"key":"974_CR25","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1016\/j.inffus.2017.02.003","volume":"37","author":"S Poria","year":"2017","unstructured":"Poria, S., Cambria, E., Bajpai, R., et al. (2017). A review of affective computing: From unimodal analysis to multimodal fusion. Information Fusion, 37, 98\u2013125. https:\/\/doi.org\/10.1016\/j.inffus.2017.02.003","journal-title":"Information Fusion"},{"key":"974_CR26","doi-asserted-by":"publisher","unstructured":"Rahman, W., Hasan, M.K., Lee, S., et al. (2020). Integrating multimodal information in large pretrained transformers. In: Jurafsky, D., Chai, J., Schluter, N., et al. (eds.) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 2359\u20132369. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.214","DOI":"10.18653\/v1\/2020.acl-main.214"},{"issue":"15","key":"974_CR27","doi-asserted-by":"publisher","first-page":"13789","DOI":"10.1609\/aaai.v35i15.17625","volume":"35","author":"W Shen","year":"2021","unstructured":"Shen, W., Chen, J., Quan, X., et al. (2021). Dialogxl: All-in-one xlnet for multi-party conversation emotion recognition. Proceedings of the AAAI Conference on Artificial Intelligence, 35(15), 13789\u201313797. https:\/\/doi.org\/10.1609\/aaai.v35i15.17625","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"974_CR28","doi-asserted-by":"publisher","unstructured":"Tsai, Y.-H.H., Bai, S., Liang, P.P., et al. (2019). Multimodal transformer for unaligned multimodal language sequences. In: Korhonen, A., Traum, D., M\u00e0rquez, L. (eds.) Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 6558\u20136569. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/P19-1656","DOI":"10.18653\/v1\/P19-1656"},{"key":"974_CR29","doi-asserted-by":"publisher","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., et al. (2017). Attention is all you need. In: Proceedings of the 31st international conference on neural information processing systems. NIPS\u201917, pp. 6000\u20136010. Curran Associates Inc. https:\/\/doi.org\/10.48550\/arXiv.1706.03762","DOI":"10.48550\/arXiv.1706.03762"},{"key":"974_CR30","doi-asserted-by":"publisher","unstructured":"Wang, H., Cao, J., Liu, J., et al. (2025). A method for multimodal sentiment analysis: adaptive interaction and multi-scale fusion. Journal of Intelligent Information Systems, pp. 1\u201320. https:\/\/doi.org\/10.1007\/s10844-025-00957-1","DOI":"10.1007\/s10844-025-00957-1"},{"key":"974_CR31","doi-asserted-by":"publisher","unstructured":"Wang, Y., Huang, M., Zhu, X., et al. (2016). Attention-based LSTM for aspect-level sentiment classification. In: Su, J., Duh, K., Carreras, X. (eds.) Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing, pp. 606\u2013615. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/D16-1058","DOI":"10.18653\/v1\/D16-1058"},{"key":"974_CR32","doi-asserted-by":"publisher","unstructured":"Wang, H., Meghawat, A., Morency, L.-P., et al. (2017). Select-additive learning: Improving generalization in multimodal sentiment analysis. In: 2017 IEEE International Conference on Multimedia and Expo (ICME), pp. 949\u2013954. https:\/\/doi.org\/10.1109\/ICME.2017.8019301","DOI":"10.1109\/ICME.2017.8019301"},{"issue":"4","key":"974_CR33","doi-asserted-by":"publisher","first-page":"1084","DOI":"10.1109\/TMM.2019.2934824","volume":"22","author":"S Wang","year":"2020","unstructured":"Wang, S., Hao, L., & Ji, Q. (2020). Knowledge-augmented multimodal deep regression bayesian networks for emotion video tagging. IEEE Transactions on Multimedia, 22(4), 1084\u20131097. https:\/\/doi.org\/10.1109\/TMM.2019.2934824","journal-title":"IEEE Transactions on Multimedia"},{"key":"974_CR34","doi-asserted-by":"publisher","unstructured":"Wu, Z., Gong, Z., Koo, J., et al. (2024). Multimodal multi-loss fusion network for sentiment analysis. In: Duh, K., Gomez, H., Bethard, S. (eds.) Proceedings of the 2024 conference of the north american chapter of the association for computational linguistics: human language technologies (Volume 1: Long Papers), pp. 3588\u20133602. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/2024.naacl-long.197","DOI":"10.18653\/v1\/2024.naacl-long.197"},{"key":"974_CR35","doi-asserted-by":"publisher","unstructured":"Wu, J., Wang, J., Jing, S., et al. (2024). Text-dominant strategy for multistage optimized modality fusion in multimodal sentiment analysis: Text-dominant strategy for multistage. Multimedia System30(6). https:\/\/doi.org\/10.1007\/s00530-024-01518-2","DOI":"10.1007\/s00530-024-01518-2"},{"key":"974_CR36","doi-asserted-by":"publisher","unstructured":"Wu, J., Zhu, T., Zhu, J., et al. (2023). A optimized bert for multimodal sentiment analysis. ACM Transactions on Multimedia Computing, Communications, and Applications19(2s). https:\/\/doi.org\/10.1145\/3566126","DOI":"10.1145\/3566126"},{"issue":"2","key":"974_CR37","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1007\/s10844-024-00879-4","volume":"63","author":"J Wu","year":"2024","unstructured":"Wu, J., Wu, J., Zheng, Y., et al. (2024). Mlgat: multi-layer graph attention networks for multimodal emotion recognition in conversations. J. Intell. Inf. Syst., 63(2), 375\u2013394. https:\/\/doi.org\/10.1007\/s10844-024-00879-4","journal-title":"J. Intell. Inf. Syst."},{"issue":"11","key":"974_CR38","doi-asserted-by":"publisher","first-page":"12978","DOI":"10.1109\/TPAMI.2022.3183612","volume":"45","author":"J Xiao","year":"2023","unstructured":"Xiao, J., Fu, X., Liu, A., et al. (2023). Image de-raining transformer. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(11), 12978\u201312995. https:\/\/doi.org\/10.1109\/TPAMI.2022.3183612","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"974_CR39","doi-asserted-by":"publisher","unstructured":"Yang, Z., Dai, Z., Yang, Y., et al. (2019). XLNet: generalized autoregressive pretraining for language understanding. Curran Associates Inc. https:\/\/doi.org\/10.48550\/arXiv.1906.08237","DOI":"10.48550\/arXiv.1906.08237"},{"key":"974_CR40","doi-asserted-by":"publisher","unstructured":"Yang, K., Xu, H., Gao, K. (2020). Cm-bert: Cross-modal bert for text-audio sentiment analysis. In: Proceedings of the 28th ACM International Conference on Multimedia. MM \u201920, pp. 521\u2013528. Association for Computing Machinery. https:\/\/doi.org\/10.1145\/3394171.3413690","DOI":"10.1145\/3394171.3413690"},{"key":"974_CR41","doi-asserted-by":"publisher","unstructured":"Yu, W., Xu, H., Meng, F., et al. (2020). CH-SIMS: A Chinese multimodal sentiment analysis dataset with fine-grained annotation of modality. In: Jurafsky, D., Chai, J., Schluter, N., et al. (eds.) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 3718\u20133727. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.343","DOI":"10.18653\/v1\/2020.acl-main.343"},{"issue":"12","key":"974_CR42","doi-asserted-by":"publisher","first-page":"10790","DOI":"10.1609\/aaai.v35i12.17289","volume":"35","author":"W Yu","year":"2021","unstructured":"Yu, W., Xu, H., Yuan, Z., et al. (2021). Learning modality-specific representations with self-supervised multi-task learning for multimodal sentiment analysis. Proceedings of the AAAI Conference on Artificial Intelligence, 35(12), 10790\u201310797. https:\/\/doi.org\/10.1609\/aaai.v35i12.17289","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"974_CR43","doi-asserted-by":"publisher","unstructured":"Zadeh, A., Chen, M., Poria, S., et al. (2017). Tensor fusion network for multimodal sentiment analysis. In: Palmer, M., Hwa, R., Riedel, S. (eds.) Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, pp. 1103\u20131114. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/D17-1115","DOI":"10.18653\/v1\/D17-1115"},{"key":"974_CR44","doi-asserted-by":"publisher","unstructured":"Zadeh, A., Liang, P.P., Mazumder, N., et al.: Memory fusion network for multi-view sequential learning. Proceedings of the AAAI Conference on Artificial Intelligence32(1). https:\/\/doi.org\/10.1609\/aaai.v32i1.12021","DOI":"10.1609\/aaai.v32i1.12021"},{"key":"974_CR45","doi-asserted-by":"publisher","unstructured":"Zadeh, A., Liang, P.P., Poria, S., et al. (2018). Multi-attention recurrent network for human communication comprehension. Proceedings of the AAAI Conference on Artificial Intelligence32(1). https:\/\/doi.org\/10.1609\/aaai.v32i1.12024","DOI":"10.1609\/aaai.v32i1.12024"},{"issue":"6","key":"974_CR46","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MIS.2016.94","volume":"31","author":"A Zadeh","year":"2016","unstructured":"Zadeh, A., Zellers, R., Pincus, E., et al. (2016). Multimodal sentiment intensity analysis in videos: Facial gestures and verbal messages. IEEE Intelligent Systems, 31(6), 82\u201388. https:\/\/doi.org\/10.1109\/MIS.2016.94","journal-title":"IEEE Intelligent Systems"},{"key":"974_CR47","doi-asserted-by":"publisher","unstructured":"Zhang, H., Wang, Y., Yin, G., et al. (2023). Learning language-guided adaptive hyper-modality representation for multimodal sentiment analysis. In: Bouamor, H., Pino, J., Bali, K. (eds.) Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pp. 756\u2013767. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/2023.emnlp-main.49","DOI":"10.18653\/v1\/2023.emnlp-main.49"},{"key":"974_CR48","doi-asserted-by":"publisher","unstructured":"Zhao, J., Zhang, T., Hu, J., et al. (2022) M3ED: Multi-modal multi-scene multi-label emotional dialogue database. In: Muresan, S., Nakov, P., Villavicencio, A. (eds.) Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 5699\u20135710. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/2022.acl-long.391","DOI":"10.18653\/v1\/2022.acl-long.391"},{"key":"974_CR49","doi-asserted-by":"publisher","first-page":"306","DOI":"10.1016\/j.inffus.2023.02.028","volume":"95","author":"L Zhu","year":"2023","unstructured":"Zhu, L., Zhu, Z., Zhang, C., et al. (2023). Multimodal sentiment analysis based on fusion methods: A survey. Information Fusion, 95, 306\u2013325. https:\/\/doi.org\/10.1016\/j.inffus.2023.02.028","journal-title":"Information Fusion"}],"container-title":["Journal of Intelligent Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10844-025-00974-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10844-025-00974-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10844-025-00974-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,28]],"date-time":"2025-10-28T08:11:09Z","timestamp":1761639069000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10844-025-00974-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,30]]},"references-count":49,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["974"],"URL":"https:\/\/doi.org\/10.1007\/s10844-025-00974-0","relation":{},"ISSN":["0925-9902","1573-7675"],"issn-type":[{"value":"0925-9902","type":"print"},{"value":"1573-7675","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,30]]},"assertion":[{"value":"28 March 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 July 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 July 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 August 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"The authors declare that they have no conflict of interest.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"The authors state that this research complies with ethical standards. This research does not involve either human participants or animals.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"The paper is the authors\u2019 original work, which has not been previously published elsewhere. The paper is not currently being considered for publication elsewhere. The paper reflects the author\u2019s research and analysis truthfully and completely. The paper properly credits the meaningful contributions of co-authors and co-researchers.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}]}}