{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T12:14:25Z","timestamp":1775132065319,"version":"3.50.1"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T00:00:00Z","timestamp":1772582400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T00:00:00Z","timestamp":1772582400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100019025","name":"Guangdong University of Science and Technology","doi-asserted-by":"publisher","award":["GKY-2024BSQDK-13\/14"],"award-info":[{"award-number":["GKY-2024BSQDK-13\/14"]}],"id":[{"id":"10.13039\/100019025","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Language Commission","award":["YB145-122"],"award-info":[{"award-number":["YB145-122"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1007\/s00530-026-02213-0","type":"journal-article","created":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T12:51:16Z","timestamp":1772628676000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multimodal sentiment analysis based on dual perspective modeling"],"prefix":"10.1007","volume":"32","author":[{"given":"Tuerhong","family":"Gulanbaier","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wang","family":"Hao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wushouer","family":"Mairidan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,3,4]]},"reference":[{"key":"2213_CR1","doi-asserted-by":"crossref","unstructured":"Zhuang, Y., Zhang, Y., Hu, Z., Zhang, X., Deng, J., Ren, F.: Glomo: global-local modal fusion for multimodal sentiment analysis. In: Proceedings of the 32nd ACM International Conference on Multimedia, pp. 1800\u20131809 (2024)","DOI":"10.1145\/3664647.3681527"},{"key":"2213_CR2","unstructured":"Qian, F., Han, J., Li, J., He, Y., Zheng, T., Zheng, G.: Mutual information-based representations disentanglement for unaligned multimodal language sequences. arXiv preprint arXiv:2409.12408 (2024)"},{"issue":"13s","key":"2213_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3586075","volume":"55","author":"R Das","year":"2023","unstructured":"Das, R., Singh, T.D.: Multimodal sentiment analysis: a survey of methods, trends, and challenges. ACM Comput. Surv. 55(13s), 1\u201338 (2023)","journal-title":"ACM Comput. Surv."},{"key":"2213_CR4","doi-asserted-by":"publisher","first-page":"4121","DOI":"10.1109\/TMM.2022.3171679","volume":"25","author":"S Mai","year":"2022","unstructured":"Mai, S., Zeng, Y., Hu, H.: Multimodal information bottleneck: learning minimal sufficient unimodal and multimodal representations. IEEE Trans. Multimedia 25, 4121\u20134134 (2022)","journal-title":"IEEE Trans. Multimedia"},{"key":"2213_CR5","unstructured":"Li, T., Liu, D.: Mpid: a modality-preserving and interaction-driven fusion network for multimodal sentiment analysis. In: Proceedings of the 31st International Conference on Computational Linguistics, pp. 4313\u20134322 (2025)"},{"key":"2213_CR6","unstructured":"Li, Z., Li, L.: t-hne: A text-guided hierarchical noise eliminator for multimodal sentiment analysis. In: Proceedings of the 31st International Conference on Computational Linguistics, pp. 2834\u20132844 (2025)"},{"key":"2213_CR7","doi-asserted-by":"publisher","first-page":"122454","DOI":"10.1016\/j.eswa.2023.122454","volume":"239","author":"A Ghorbanali","year":"2024","unstructured":"Ghorbanali, A., Sohrabi, M.K.: Capsule network-based deep ensemble transfer learning for multimodal sentiment analysis. Expert. Syst. Appl. 239, 122454 (2024) https:\/\/doi.org\/10.1016\/j.eswa.2023.122454","journal-title":"Expert. Syst. Appl."},{"key":"2213_CR8","doi-asserted-by":"publisher","first-page":"124236","DOI":"10.1016\/j.eswa.2024.124236","volume":"252","author":"Z Li","year":"2024","unstructured":"Li, Z., Huang, Z., Pan, Y., Yu, J., Liu, W., Chen, H., Luo, Y., Wu, D., Wang, H.: Hierarchical denoising representation disentanglement and dual-channel cross-modal-context interaction for multimodal sentiment analysis. Expert Syst. Appl. 252, 124236 (2024) https:\/\/doi.org\/10.1016\/j.eswa.2024.124236","journal-title":"Expert Syst. Appl."},{"key":"2213_CR9","doi-asserted-by":"publisher","first-page":"126274","DOI":"10.1016\/j.eswa.2024.126274","volume":"269","author":"X Li","year":"2025","unstructured":"Li, X., Zhang, H., Dong, Z., Cheng, X., Liu, Y., Zhang, X.: Learning fine-grained representation with token-level alignment for multimodal sentiment analysis. Expert Syst. Appl. 269, 126274 (2025) https:\/\/doi.org\/10.1016\/j.eswa.2024.126274","journal-title":"Expert Syst. Appl."},{"key":"2213_CR10","doi-asserted-by":"publisher","first-page":"122728","DOI":"10.1016\/j.eswa.2023.122728","volume":"242","author":"Y Zheng","year":"2024","unstructured":"Zheng, Y., Gong, J., Wen, Y., Zhang, P.: Djmf: a discriminative joint multi-task framework for multimodal sentiment analysis based on intra- and inter-task dynamics. Expert Syst. Appl. 242, 122728 (2024) https:\/\/doi.org\/10.1016\/j.eswa.2023.122728","journal-title":"Expert Syst. Appl."},{"key":"2213_CR11","doi-asserted-by":"crossref","unstructured":"Yu, W., Xu, H., Yuan, Z., Wu, J.: Learning modality-specific representations with self-supervised multi-task learning for multimodal sentiment analysis. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 10790\u201310797 (2021)","DOI":"10.1609\/aaai.v35i12.17289"},{"key":"2213_CR12","doi-asserted-by":"crossref","unstructured":"Nojavanasghari, B., Gopinath, D., Koushik, J., Baltru\u0161aitis, T., Morency, L.-P.: Deep multimodal fusion for persuasiveness prediction. In: Proceedings of the 18th ACM International Conference on Multimodal Interaction, pp. 284\u2013288 (2016)","DOI":"10.1145\/2993148.2993176"},{"key":"2213_CR13","doi-asserted-by":"crossref","unstructured":"Zadeh, A., Chen, M., Poria, S., Cambria, E., Morency, L.-P.: Tensor fusion network for multimodal sentiment analysis. arXiv preprint arXiv:1707.07250 (2017)","DOI":"10.18653\/v1\/D17-1115"},{"key":"2213_CR14","doi-asserted-by":"crossref","unstructured":"Liu, Z., Shen, Y., Lakshminarasimhan, V.B., Liang, P.P., Zadeh, A., Morency, L.-P.: Efficient low-rank multimodal fusion with modality-specific factors. arXiv preprint arXiv:1806.00064 (2018)","DOI":"10.18653\/v1\/P18-1209"},{"key":"2213_CR15","doi-asserted-by":"crossref","unstructured":"Hazarika, D., Zimmermann, R., Poria, S.: Misa: modality-invariant and-specific representations for multimodal sentiment analysis. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 1122\u20131131 (2020)","DOI":"10.1145\/3394171.3413678"},{"key":"2213_CR16","doi-asserted-by":"crossref","unstructured":"Han, W., Chen, H., Gelbukh, A., Zadeh, A., Morency, L.-P., Poria, S.: Bi-bimodal modality fusion for correlation-controlled multimodal sentiment analysis. In: Proceedings of the 2021 International Conference on Multimodal Interaction, pp. 6\u201315 (2021)","DOI":"10.1145\/3462244.3479919"},{"key":"2213_CR17","unstructured":"Qingwen, L., Wushouer, M., Tuerhong, G.: Bi-bi-modality with bi-gated fusion in multimodal sentiment analysis, vol. 60, pp. 165\u2013172 (2024)"},{"key":"2213_CR18","doi-asserted-by":"crossref","unstructured":"Hu, G., Lin, T.-E., Zhao, Y., Lu, G., Wu, Y., Li, Y.: Unimse: towards unified multimodal sentiment analysis and emotion recognition. arXiv preprint arXiv:2211.11256 (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.534"},{"key":"2213_CR19","unstructured":"Liu, Y., Ott, M., Goyal, N., Du, J., Joshi, M., Chen, D., Levy, O., Lewis, M., Zettlemoyer, L., Stoyanov, V.: Roberta: a robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"2213_CR20","unstructured":"Du, N., Huang, Y., Dai, A.M., Tong, S., Lepikhin, D., Xu, Y., Krikun, M., Zhou, Y., Yu, A.W., Firat, O., et al.: Glam: efficient scaling of language models with mixture-of-experts. In: International Conference on Machine Learning, pp. 5547\u20135569. PMLR (2022)"},{"issue":"6","key":"2213_CR21","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MIS.2016.94","volume":"31","author":"A Zadeh","year":"2016","unstructured":"Zadeh, A., Zellers, R., Pincus, E., Morency, L.-P.: Multimodal sentiment intensity analysis in videos: facial gestures and verbal messages. IEEE Intell. Syst. 31(6), 82\u201388 (2016)","journal-title":"IEEE Intell. Syst."},{"key":"2213_CR22","doi-asserted-by":"crossref","unstructured":"Zadeh, A.B., Liang, P.P., Poria, S., Cambria, E., Morency, L.-P.: Multimodal language analysis in the wild: Cmu-mosei dataset and interpretable dynamic fusion graph. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: long papers), pp. 2236\u20132246 (2018)","DOI":"10.18653\/v1\/P18-1208"},{"key":"2213_CR23","doi-asserted-by":"crossref","unstructured":"Yu, W., Xu, H., Meng, F., Zhu, Y., Ma, Y., Wu, J., Zou, J., Yang, K.: Ch-sims: a Chinese multimodal sentiment analysis dataset with fine-grained annotation of modality. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 3718\u20133727 (2020)","DOI":"10.18653\/v1\/2020.acl-main.343"},{"key":"2213_CR24","doi-asserted-by":"crossref","unstructured":"Hasan, M.K., Rahman, W., Zadeh, A., Zhong, J., Tanveer, M.I., Morency, L.-P., et al.: Ur-funny: a multimodal language dataset for understanding humor. arXiv preprint arXiv:1904.06618 (2019)","DOI":"10.18653\/v1\/D19-1211"},{"key":"2213_CR25","doi-asserted-by":"crossref","unstructured":"Yang, J., Wang, Y., Yi, R., Zhu, Y., Rehman, A., Zadeh, A., Poria, S., Morency, L.-P.: Mtag: modal-temporal attention graph for unaligned human multimodal language sequences. arXiv preprint arXiv:2010.11985 (2020)","DOI":"10.18653\/v1\/2021.naacl-main.79"},{"key":"2213_CR26","doi-asserted-by":"crossref","unstructured":"Cheng, J., Fostiropoulos, I., Boehm, B., Soleymani, M.: Multimodal phased transformer for sentiment analysis. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 2447\u20132458 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.189"},{"key":"2213_CR27","doi-asserted-by":"crossref","unstructured":"Sun, Z., Sarma, P., Sethares, W., Liang, Y.: Learning relationships between text, audio, and video via deep canonical correlation for multimodal language analysis. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 8992\u20138999 (2020)","DOI":"10.1609\/aaai.v34i05.6431"},{"key":"2213_CR28","doi-asserted-by":"crossref","unstructured":"Tsai, Y.-H.H., Bai, S., Liang, P.P., Kolter, J.Z., Morency, L.-P., Salakhutdinov, R.: Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the Conference. Association for Computational Linguistics. Meeting, vol. 2019, pp. 6558 (2019)","DOI":"10.18653\/v1\/P19-1656"},{"key":"2213_CR29","doi-asserted-by":"crossref","unstructured":"Han, W., Chen, H., Poria, S.: Improving multimodal fusion with hierarchical mutual information maximization for multimodal sentiment analysis. arXiv preprint arXiv:2109.00412 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.723"},{"key":"2213_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2024.105172","volume":"149","author":"M Li","year":"2024","unstructured":"Li, M., Zhu, Z., Li, K., Zhou, L., Zhao, Z., Pei, H.: Joint training strategy of unimodal and multimodal for multimodal sentiment analysis. Image Vis. Comput. 149, 105172 (2024)","journal-title":"Image Vis. Comput."},{"key":"2213_CR31","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110847","volume":"156","author":"Z Liu","year":"2024","unstructured":"Liu, Z., Cai, L., Yang, W., Liu, J.: Sentiment analysis based on text information enhancement and multimodal feature fusion. Pattern Recogn. 156, 110847 (2024)","journal-title":"Pattern Recogn."},{"key":"2213_CR32","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.111136","volume":"283","author":"G Yi","year":"2024","unstructured":"Yi, G., Fan, C., Zhu, K., Lv, Z., Liang, S., Wen, Z., Pei, G., Li, T., Tao, J.: Vlp2msa: expanding vision-language pre-training to multimodal sentiment analysis. Knowl. Based Syst. 283, 111136 (2024)","journal-title":"Knowl.-Based Syst."},{"key":"2213_CR33","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1016\/j.procs.2025.08.006","volume":"266","author":"H Wang","year":"2025","unstructured":"Wang, H., Tuerhong, G., Wushouer, M., Guo, X.: Multi-modal sentiment analysis based on multi-level modal information interaction. Procedia Computer Science 266, 41\u201351 (2025)","journal-title":"Procedia Computer Science"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-026-02213-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-026-02213-0","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-026-02213-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T11:37:22Z","timestamp":1775129842000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-026-02213-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,4]]},"references-count":33,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2026,4]]}},"alternative-id":["2213"],"URL":"https:\/\/doi.org\/10.1007\/s00530-026-02213-0","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3,4]]},"assertion":[{"value":"7 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 January 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 March 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"154"}}