{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T17:21:32Z","timestamp":1770830492369,"version":"3.50.1"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61602161"],"award-info":[{"award-number":["61602161"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Hubei Province Science and Technology Support Project","award":["2020BAB012"],"award-info":[{"award-number":["2020BAB012"]}]},{"name":"Hubei Provincial Science and Technology Program Project","award":["2023BCB041"],"award-info":[{"award-number":["2023BCB041"]}]},{"name":"Fundamental Research Funds for the Research Fund of Hubei University of Technology","award":["2021046, 21060, 21066"],"award-info":[{"award-number":["2021046, 21060, 21066"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s00530-024-01518-2","type":"journal-article","created":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T07:11:23Z","timestamp":1732173083000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Text-dominant strategy for multistage optimized modality fusion in multimodal sentiment analysis"],"prefix":"10.1007","volume":"30","author":[{"given":"Jun","family":"Wu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiangpeng","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shilong","family":"Jing","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jinyu","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianfeng","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Min","family":"Han","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pengfei","family":"Zhan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gan","family":"Zuo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,21]]},"reference":[{"key":"1518_CR1","doi-asserted-by":"publisher","unstructured":"Gandhi, A., Adhvaryu, K., Poria, S., et al.: Multimodal sentiment analysis: a systematic review of history, datasets, multimodal fusion methods, applications, challenges and future directions. Inf. Fus. 91, 424\u2013444 (2023). https:\/\/doi.org\/10.1016\/j.inffus.2022.09.025. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S1566253522001634","DOI":"10.1016\/j.inffus.2022.09.025"},{"key":"1518_CR2","doi-asserted-by":"publisher","unstructured":"Wang, L., Peng, J., Zheng, C., et al.: A cross modal hierarchical fusion multimodal sentiment analysis method based on multi-task learning. Inf. Process. Manage. 61(3), 103675 (2024). https:\/\/doi.org\/10.1016\/j.ipm.2024.103675. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0306457324000359","DOI":"10.1016\/j.ipm.2024.103675"},{"key":"1518_CR3","doi-asserted-by":"publisher","unstructured":"Lai, S., Hu, X., Xu, H., et al.: Multimodal sentiment analysis: a survey. Displays 80, 102563 (2023). https:\/\/doi.org\/10.1016\/j.displa.2023.102563. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0141938223001968","DOI":"10.1016\/j.displa.2023.102563"},{"key":"1518_CR4","doi-asserted-by":"publisher","unstructured":"Soleymani, M., Garcia, D., Jou, B., et al.: A survey of multimodal sentiment analysis. Image Vis. Comput. 65, 3\u201314 (2017). https:\/\/doi.org\/10.1016\/j.imavis.2017.08.003. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0262885617301191 (multimodal Sentiment Analysis and Mining in the Wild Image and Vision Computing)","DOI":"10.1016\/j.imavis.2017.08.003"},{"key":"1518_CR5","doi-asserted-by":"publisher","first-page":"12039","DOI":"10.1109\/ACCESS.2024.3354844","volume":"12","author":"H Zhao","year":"2024","unstructured":"Zhao, H., Yang, M., Bai, X., et al.: A survey on multimodal aspect-based sentiment analysis. IEEE Access 12, 12039\u201312052 (2024). https:\/\/doi.org\/10.1109\/ACCESS.2024.3354844","journal-title":"IEEE Access"},{"key":"1518_CR6","doi-asserted-by":"publisher","unstructured":"Ghorbanali, A., Sohrabi, M.K.: Capsule network-based deep ensemble transfer learning for multimodal sentiment analysis. Expert Syst. Appl. 239, 122454 (2024). https:\/\/doi.org\/10.1016\/j.eswa.2023.122454. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0957417423029561","DOI":"10.1016\/j.eswa.2023.122454"},{"key":"1518_CR7","doi-asserted-by":"publisher","unstructured":"Das, R., Singh, T.D.: Multimodal sentiment analysis: a survey of methods, trends, and challenges. ACM Comput. Surv. 55(13s), (2023). https:\/\/doi.org\/10.1145\/3586075","DOI":"10.1145\/3586075"},{"key":"1518_CR8","doi-asserted-by":"crossref","unstructured":"Poria, S., Hazarika, D., Majumder, N., et al.: Beneath the tip of the iceberg: current challenges and new directions in sentiment analysis research. IEEE Trans. Affect. Comput. 14, 108\u2013132 (2020). https:\/\/api.semanticscholar.org\/CorpusID:218470466","DOI":"10.1109\/TAFFC.2020.3038167"},{"key":"1518_CR9","doi-asserted-by":"publisher","unstructured":"Pandey, A., Vishwakarma, D.K.: Progress, achievements, and challenges in multimodal sentiment analysis using deep learning: a survey. Appl. Soft Comput. 152, 111206 (2024). https:\/\/doi.org\/10.1016\/j.asoc.2023.111206. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S1568494623012243","DOI":"10.1016\/j.asoc.2023.111206"},{"key":"1518_CR10","doi-asserted-by":"publisher","unstructured":"Zhu, L., Zhu, Z., Zhang, C., et al.: Multimodal sentiment analysis based on fusion methods: a survey. Inf. Fus. 95, 306\u2013325 (2023). https:\/\/doi.org\/10.1016\/j.inffus.2023.02.028. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S156625352300074X","DOI":"10.1016\/j.inffus.2023.02.028"},{"key":"1518_CR11","doi-asserted-by":"publisher","unstructured":"Tsai, Y.H.H., Bai, S., Liang, P.P. et\u00a0al.: Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics, Florence, Italy, pp. 6558\u20136569 (2019). https:\/\/doi.org\/10.18653\/v1\/P19-1656, https:\/\/aclanthology.org\/P19-1656","DOI":"10.18653\/v1\/P19-1656"},{"key":"1518_CR12","doi-asserted-by":"publisher","unstructured":"Hazarika, D., Zimmermann, R., Poria, S.: Misa: modality-invariant and -specific representations for multimodal sentiment analysis. In: Proceedings of the 28th ACM International Conference on Multimedia. Association for Computing Machinery, New York, NY, USA, MM \u201920, pp. 1122\u20131131 (2020). https:\/\/doi.org\/10.1145\/3394171.3413678","DOI":"10.1145\/3394171.3413678"},{"key":"1518_CR13","doi-asserted-by":"crossref","unstructured":"Yu, W., Xu, H., Yuan, Z. et\u00a0al.: Learning modality-specific representations with self-supervised multi-task learning for multimodal sentiment analysis. In: AAAI Conference on Artificial Intelligence, (2021). https:\/\/api.semanticscholar.org\/CorpusID:231855771","DOI":"10.1609\/aaai.v35i12.17289"},{"key":"1518_CR14","doi-asserted-by":"publisher","unstructured":"Yang, D., Huang, S., Kuang, H., et\u00a0al.: Disentangled representation learning for multimodal emotion recognition. In: Proceedings of the 30th ACM International Conference on Multimedia. Association for Computing Machinery, New York, NY, USA, MM \u201922, pp. 1642\u20131651 (2022). https:\/\/doi.org\/10.1145\/3503161.3547754","DOI":"10.1145\/3503161.3547754"},{"issue":"1","key":"1518_CR15","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1007\/s00530-023-01208-5","volume":"30","author":"Y Luo","year":"2024","unstructured":"Luo, Y., Wu, R., Liu, J., et al.: Balanced sentimental information via multimodal interaction model. Multimedia Syst. 30(1), 10 (2024). https:\/\/doi.org\/10.1007\/s00530-023-01208-5","journal-title":"Multimedia Syst."},{"key":"1518_CR16","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-18032-8","author":"X Miao","year":"2024","unstructured":"Miao, X., Zhang, X., Zhang, H.: Low-rank tensor fusion and self-supervised multi-task multimodal sentiment analysis. Multimed. Tools Appl. (2024). https:\/\/doi.org\/10.1007\/s11042-023-18032-8","journal-title":"Multimed. Tools Appl."},{"issue":"07","key":"1518_CR17","doi-asserted-by":"publisher","first-page":"8419","DOI":"10.1109\/TPAMI.2023.3234553","volume":"45","author":"Z Lian","year":"2023","unstructured":"Lian, Z., Chen, L., Sun, L., et al.: Gcnet: Graph completion network for incomplete multimodal learning in conversation. IEEE Trans. Pattern Anal. Mach. Intell. 45(07), 8419\u20138432 (2023). https:\/\/doi.org\/10.1109\/TPAMI.2023.3234553","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1518_CR18","doi-asserted-by":"publisher","DOI":"10.1145\/3566126","author":"J Wu","year":"2023","unstructured":"Wu, J., Zhu, T., Zhu, J., et al.: A optimized bert for multimodal sentiment analysis. ACM Trans. Multimed. Comput. Commun. Appl. (2023). https:\/\/doi.org\/10.1145\/3566126","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"1518_CR19","doi-asserted-by":"publisher","unstructured":"Li, K., Lu, J., Zuo, H., et al.: Multi-source domain adaptation handling inaccurate label spaces. Neurocomputing 594, 127824 (2024). https:\/\/doi.org\/10.1016\/j.neucom.2024.127824. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0925231224005952","DOI":"10.1016\/j.neucom.2024.127824"},{"issue":"4","key":"1518_CR20","doi-asserted-by":"publisher","first-page":"2193","DOI":"10.1109\/TCYB.2023.3236008","volume":"54","author":"K Li","year":"2024","unstructured":"Li, K., Lu, J., Zuo, H., et al.: Multidomain adaptation with sample and source distillation. IEEE Trans. Cybern. 54(4), 2193\u20132205 (2024). https:\/\/doi.org\/10.1109\/TCYB.2023.3236008","journal-title":"IEEE Trans. Cybern."},{"key":"1518_CR21","doi-asserted-by":"publisher","unstructured":"Rahman, W., Hasan, M.K., Lee, S. et\u00a0al.: Integrating multimodal information in large pretrained transformers. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics, Online, pp. 2359\u20132369 (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.214","DOI":"10.18653\/v1\/2020.acl-main.214"},{"key":"1518_CR22","doi-asserted-by":"publisher","unstructured":"Guo, J., Tang, J., Dai, W. et\u00a0al.: Dynamically adjust word representations using unaligned multimodal information. In: Proceedings of the 30th ACM International Conference on Multimedia. Association for Computing Machinery, New York, NY, USA, MM \u201922, pp. 3394\u20133402 (2022). https:\/\/doi.org\/10.1145\/3503161.3548137","DOI":"10.1145\/3503161.3548137"},{"key":"1518_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.110502","author":"C Huang","year":"2023","unstructured":"Huang, C., Zhang, J., Wu, X., et al.: Tefna: Text-centered fusion network with crossmodal attention for multimodal sentiment analysis. Know-Based Syst. (2023). https:\/\/doi.org\/10.1016\/j.knosys.2023.110502","journal-title":"Know-Based Syst."},{"issue":"1","key":"1518_CR24","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1109\/TAFFC.2023.3274829","volume":"15","author":"L Sun","year":"2023","unstructured":"Sun, L., Lian, Z., Liu, B., et al.: Efficient multimodal transformer with dual-level feature restoration for robust multimodal sentiment analysis. IEEE Trans. Affect. Comput. 15(1), 309\u2013325 (2023). https:\/\/doi.org\/10.1109\/TAFFC.2023.3274829","journal-title":"IEEE Trans. Affect. Comput."},{"key":"1518_CR25","doi-asserted-by":"publisher","DOI":"10.1145\/3517139","author":"A Yadav","year":"2023","unstructured":"Yadav, A., Vishwakarma, D.K.: A deep multi-level attentive network for multimodal sentiment analysis. ACM Trans. Multimed. Comput. Commun. Appl. (2023). https:\/\/doi.org\/10.1145\/3517139","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"1518_CR26","doi-asserted-by":"publisher","first-page":"6868","DOI":"10.1109\/TMM.2022.3214989","volume":"25","author":"T Zhu","year":"2023","unstructured":"Zhu, T., Li, L., Yang, J., et al.: Multimodal emotion classification with multi-level semantic reasoning network. IEEE Trans. Multimed. 25, 6868\u20136880 (2023). https:\/\/doi.org\/10.1109\/TMM.2022.3214989","journal-title":"IEEE Trans. Multimed."},{"issue":"9","key":"1518_CR27","doi-asserted-by":"publisher","first-page":"10074","DOI":"10.1609\/aaai.v38i9.28871","volume":"38","author":"M Li","year":"2024","unstructured":"Li, M., Yang, D., Lei, Y., et al.: A unified self-distillation framework for multimodal sentiment analysis with uncertain missing modalities. Proc. AAAI Conf. Artif. Intell. 38(9), 10074\u201310082 (2024). https:\/\/doi.org\/10.1609\/aaai.v38i9.28871","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"1518_CR28","doi-asserted-by":"publisher","unstructured":"Zhang, H., Wang, Y., Yin, G. et\u00a0al.: Learning language-guided adaptive hyper-modality representation for multimodal sentiment analysis. In: Bouamor, H., Pino, J., Bali, K. (eds.) Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics, Singapore, pp. 756\u2013767 (2023). https:\/\/doi.org\/10.18653\/v1\/2023.emnlp-main.49","DOI":"10.18653\/v1\/2023.emnlp-main.49"},{"issue":"2","key":"1518_CR29","doi-asserted-by":"publisher","first-page":"4243","DOI":"10.3233\/JIFS-233868","volume":"46","author":"T Lu","year":"2024","unstructured":"Lu, T., Zhong, X., Zhong, L.: mswinunet: a multi-modal u-shaped Swin transformer for supervised change detection. J. Intell. Fuzzy Syst. 46(2), 4243\u20134252 (2024). https:\/\/doi.org\/10.3233\/JIFS-233868","journal-title":"J. Intell. Fuzzy Syst."},{"issue":"10","key":"1518_CR30","doi-asserted-by":"publisher","first-page":"18523","DOI":"10.3934\/mbe.2023822","volume":"20","author":"J Wu","year":"2023","unstructured":"Wu, J., Zheng, X., Wang, J., et al.: AB-GRU: an attention-based bidirectional GRU model for multimodal sentiment fusion and analysis. Math. Biosci. Eng. 20(10), 18523\u201318544 (2023)","journal-title":"Math. Biosci. Eng."},{"issue":"6","key":"1518_CR31","doi-asserted-by":"publisher","first-page":"3599","DOI":"10.1007\/s00530-023-01133-7","volume":"29","author":"W Jun","year":"2023","unstructured":"Jun, W., Tianliang, Z., Jiahui, Z., et al.: Hierarchical multiples self-attention mechanism for multi-modal analysis. Multimed. Syst. 29(6), 3599\u20133608 (2023). https:\/\/doi.org\/10.1007\/s00530-023-01133-7","journal-title":"Multimed. Syst."},{"key":"1518_CR32","unstructured":"Zadeh, A., Zellers, R., Pincus, E., et\u00a0al.: MOSI: multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos. (2016). CoRR abs\/1606.06259 . http:\/\/arxiv.org\/abs\/1606.06259. arXiv:1606.06259"},{"key":"1518_CR33","doi-asserted-by":"publisher","unstructured":"Bagher\u00a0Zadeh, A., Liang, P.P., Poria, S. et\u00a0al.: Multimodal language analysis in the wild: CMU-MOSEI dataset and interpretable dynamic fusion graph. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). Association for Computational Linguistics, Melbourne, Australia, pp. 2236\u20132246 (2018). https:\/\/doi.org\/10.18653\/v1\/P18-1208. https:\/\/aclanthology.org\/P18-1208","DOI":"10.18653\/v1\/P18-1208"},{"key":"1518_CR34","unstructured":"Sun, Z., Sarma, P.K., Sethares, W.A., et\u00a0al.: Learning relationships between text, audio, and video via deep canonical correlation for multimodal language analysis. In: AAAI Conference on Artificial Intelligence, (2019). https:\/\/api.semanticscholar.org\/CorpusID:207930647"},{"key":"1518_CR35","unstructured":"Yang, Z., Dai, Z., Yang, Y., et\u00a0al.: Xlnet: Generalized autoregressive pretraining for language understanding. In: Wallach, H., Larochelle, H., Beygelzimer, A. et\u00a0al. (eds.), Advances in Neural Information Processing Systems, vol\u00a032. Curran Associates, Inc., (2019). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2019\/file\/dc6a7e655d7e5840e66733e9ee67cc69-Paper.pdf"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01518-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-024-01518-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01518-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,16]],"date-time":"2024-12-16T09:16:54Z","timestamp":1734340614000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-024-01518-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,21]]},"references-count":35,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["1518"],"URL":"https:\/\/doi.org\/10.1007\/s00530-024-01518-2","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,21]]},"assertion":[{"value":"12 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 September 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All authors declare that we have no conflict of interest. We promise that our studies have no ethical issues. The Official Datasets website: CMU-MOSI:  CMU-MOSEI:  The code is available on  All authors declare that we agree to submit this manuscript.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"353"}}