{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T17:21:05Z","timestamp":1770830465772,"version":"3.50.1"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,2,17]],"date-time":"2025-02-17T00:00:00Z","timestamp":1739750400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,17]],"date-time":"2025-02-17T00:00:00Z","timestamp":1739750400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U21B2027"],"award-info":[{"award-number":["U21B2027"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62366025"],"award-info":[{"award-number":["62366025"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Natural Science Foundation project of Yunnan Science and Technology Department","award":["202301AT070444"],"award-info":[{"award-number":["202301AT070444"]}]},{"name":"Yunnan Key Research Projects","award":["202303AP140008"],"award-info":[{"award-number":["202303AP140008"]}]},{"name":"Yunnan Key Research Projects","award":["202203AA080004"],"award-info":[{"award-number":["202203AA080004"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1007\/s00530-025-01697-6","type":"journal-article","created":{"date-parts":[[2025,2,18]],"date-time":"2025-02-18T00:19:56Z","timestamp":1739837996000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Multi-level sentiment-aware clustering for denoising in multimodal sentiment analysis with ASR errors"],"prefix":"10.1007","volume":"31","author":[{"given":"Zixu","family":"Hu","sequence":"first","affiliation":[]},{"given":"Zhengtao","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Junjun","family":"Guo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,2,17]]},"reference":[{"key":"1697_CR1","doi-asserted-by":"crossref","unstructured":"Das, R., Singh, T.D.: Multimodal sentiment analysis: a survey of methods, trends, and challenges. ACM Comput Surv 55(13s):270:1\u2013270:38 (2023)","DOI":"10.1145\/3586075"},{"key":"1697_CR2","doi-asserted-by":"crossref","unstructured":"Yu, W., Xu, H., Meng, F., et\u00a0al.: CH-SIMS: a Chinese multimodal sentiment analysis dataset with fine-grained annotation of modality. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, ACL 2020, Online, July 5\u201310, 2020. Association for Computational Linguistics, pp. 3718\u20133727 (2020)","DOI":"10.18653\/v1\/2020.acl-main.343"},{"key":"1697_CR3","doi-asserted-by":"crossref","unstructured":"Yang, J., Yu, Y., Niu, D., et\u00a0al.: Confede: contrastive feature decomposition for multimodal sentiment analysis. In: Rogers, A., Boyd-Graber, J.L., Okazaki, N. (eds.) Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), ACL 2023, Toronto, Canada, July 9\u201314, 2023. Association for Computational Linguistics, pp. 7617\u20137630 (2023)","DOI":"10.18653\/v1\/2023.acl-long.421"},{"key":"1697_CR4","doi-asserted-by":"publisher","first-page":"2689","DOI":"10.1109\/TASLP.2022.3192728","volume":"30","author":"Q Chen","year":"2022","unstructured":"Chen, Q., Huang, G., Wang, Y.: The weighted cross-modal attention mechanism with sentiment prediction auxiliary task for multimodal sentiment analysis. IEEE ACM Trans. Audio Speech Lang. Process. 30, 2689\u20132695 (2022)","journal-title":"IEEE ACM Trans. Audio Speech Lang. Process."},{"issue":"2","key":"1697_CR5","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1109\/TPAMI.2018.2798607","volume":"41","author":"T Baltrusaitis","year":"2019","unstructured":"Baltrusaitis, T., Ahuja, C., Morency, L.: Multimodal machine learning: a survey and taxonomy. IEEE Trans. Pattern Anal. Mach. Intell. 41(2), 423\u2013443 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"10","key":"1697_CR6","doi-asserted-by":"publisher","first-page":"12113","DOI":"10.1109\/TPAMI.2023.3275156","volume":"45","author":"P Xu","year":"2023","unstructured":"Xu, P., Zhu, X., Clifton, D.A.: Multimodal learning with transformers: a survey. IEEE Trans. Pattern Anal. Mach. Intell. 45(10), 12113\u201312132 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1697_CR7","doi-asserted-by":"crossref","unstructured":"Dong, Z., Liu, H.: Recent advancements and challenges in multimodal sentiment analysis: a survey. In: International Conference on Machine Learning and Cybernetics, ICMLC 2023, Adelaide, Australia, July 9\u201311, 2023. IEEE, pp. 464\u2013469 (2023)","DOI":"10.1109\/ICMLC58545.2023.10327944"},{"key":"1697_CR8","doi-asserted-by":"crossref","unstructured":"Zeng, J., Liu, T., Zhou, J.: Tag-assisted multimodal sentiment analysis under uncertain missing modalities. In: SIGIR \u201922: The 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, Madrid, Spain, July 11\u201315, 2022. ACM, pp. 1545\u20131554 (2022)","DOI":"10.1145\/3477495.3532064"},{"key":"1697_CR9","doi-asserted-by":"publisher","first-page":"5753","DOI":"10.1109\/TMM.2023.3338769","volume":"26","author":"R Huan","year":"2024","unstructured":"Huan, R., Zhong, G., Chen, P., et al.: Unimf: a unified multimodal framework for multimodal sentiment analysis in missing modalities and unaligned multimodal sequences. IEEE Trans. Multim. 26, 5753\u20135768 (2024)","journal-title":"IEEE Trans. Multim."},{"key":"1697_CR10","doi-asserted-by":"publisher","first-page":"7265","DOI":"10.1109\/TMM.2024.3362600","volume":"26","author":"Z Yuan","year":"2024","unstructured":"Yuan, Z., Zhang, B., Xu, H., et al.: Meta noise adaption framework for multimodal sentiment analysis with feature noise. IEEE Trans. Multim. 26, 7265\u20137277 (2024)","journal-title":"IEEE Trans. Multim."},{"key":"1697_CR11","doi-asserted-by":"crossref","unstructured":"Shang, C., Palmer, A., Sun, J., et\u00a0al.: VIGAN: missing view imputation with generative adversarial networks. In: 2017 IEEE International Conference on Big Data (IEEE BigData 2017), Boston, MA, USA, December 11\u201314, 2017. IEEE Computer Society, pp. 766\u2013775 (2017)","DOI":"10.1109\/BigData.2017.8257992"},{"key":"1697_CR12","doi-asserted-by":"crossref","unstructured":"Zhao, J., Li, R., Jin, Q.: Missing modality imagination network for emotion recognition with uncertain missing modalities. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing, ACL\/IJCNLP 2021, (Volume 1: Long Papers), Virtual Event, August 1\u20136, 2021. Association for Computational Linguistics, pp. 2608\u20132618 (2021)","DOI":"10.18653\/v1\/2021.acl-long.203"},{"key":"1697_CR13","doi-asserted-by":"crossref","unstructured":"Wu, Y., Zhao, Y., Yang, H., et\u00a0al.: Sentiment word aware multimodal refinement for multimodal sentiment analysis with ASR errors. In: Findings of the Association for Computational Linguistics: ACL 2022, Dublin, Ireland, May 22\u201327, 2022. Association for Computational Linguistics, pp. 1397\u20131406 (2022)","DOI":"10.18653\/v1\/2022.findings-acl.109"},{"key":"1697_CR14","doi-asserted-by":"crossref","unstructured":"Tsai, Y.H., Bai, S., Liang, P.P., et\u00a0al.: Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the 57th Conference of the Association for Computational Linguistics, ACL 2019, Florence, Italy, July 28\u2013August 2, 2019, Volume 1: Long Papers. Association for Computational Linguistics, pp. 6558\u20136569 (2019)","DOI":"10.18653\/v1\/P19-1656"},{"key":"1697_CR15","doi-asserted-by":"publisher","first-page":"109259","DOI":"10.1016\/j.patcog.2022.109259","volume":"136","author":"D Wang","year":"2023","unstructured":"Wang, D., Guo, X., Tian, Y., et al.: TETFN: a text enhanced transformer fusion network for multimodal sentiment analysis. Pattern Recognit. 136, 109259 (2023)","journal-title":"Pattern Recognit."},{"issue":"4","key":"1697_CR16","doi-asserted-by":"publisher","first-page":"2889","DOI":"10.1109\/TAFFC.2022.3222023","volume":"14","author":"X Zhao","year":"2023","unstructured":"Zhao, X., Chen, Y., Liu, S., et al.: Shared-private memory networks for multimodal sentiment analysis. IEEE Trans. Affect. Comput. 14(4), 2889\u20132900 (2023)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"1697_CR17","doi-asserted-by":"crossref","unstructured":"Hazarika, D., Zimmermann, R., Poria, S.: MISA: modality-invariant and -specific representations for multimodal sentiment analysis. In: MM \u201920: The 28th ACM International Conference on Multimedia, Virtual Event\/Seattle, WA, USA, October 12\u201316, 2020. ACM, pp. 1122\u20131131 (2020)","DOI":"10.1145\/3394171.3413678"},{"key":"1697_CR18","doi-asserted-by":"crossref","unstructured":"Yu, W., Xu, H., Yuan, Z., et\u00a0al.: Learning modality-specific representations with self-supervised multi-task learning for multimodal sentiment analysis. In: Thirty-Fifth AAAI Conference on Artificial Intelligence, AAAI 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, IAAI 2021, The Eleventh Symposium on Educational Advances in Artificial Intelligence, EAAI 2021, Virtual Event, February 2\u20139, 2021. AAAI Press, pp. 10790\u201310797 (2021)","DOI":"10.1609\/aaai.v35i12.17289"},{"issue":"3","key":"1697_CR19","doi-asserted-by":"publisher","first-page":"2276","DOI":"10.1109\/TAFFC.2022.3172360","volume":"14","author":"S Mai","year":"2023","unstructured":"Mai, S., Zeng, Y., Zheng, S., et al.: Hybrid contrastive learning of tri-modal representation for multimodal sentiment analysis. IEEE Trans. Affect. Comput. 14(3), 2276\u20132289 (2023)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"1697_CR20","doi-asserted-by":"crossref","unstructured":"Hu, G., Lin, T., Zhao, Y., et\u00a0al.: Unimse: towards unified multimodal sentiment analysis and emotion recognition. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, EMNLP 2022, Abu Dhabi, United Arab Emirates, December 7\u201311, 2022. Association for Computational Linguistics, pp. 7837\u20137851 (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.534"},{"key":"1697_CR21","unstructured":"Yuan, Z., Li, W., Xu, H., et\u00a0al.: Transformer-based feature reconstruction network for robust multimodal sentiment analysis. In: MM \u201921: ACM Multimedia Conference, Virtual Event, China, October 20\u201324, 2021. ACM, pp. 4400\u20134407 (2021)"},{"key":"1697_CR22","doi-asserted-by":"crossref","unstructured":"Dumpala, S.H., Sheikh, I.A., Chakraborty, R., et\u00a0al.: Sentiment classification on erroneous ASR transcripts: a multi view learning approach. In: 2018 IEEE Spoken Language Technology Workshop, SLT 2018, Athens, Greece, December 18\u201321, 2018. IEEE, pp. 807\u2013814 (2018)","DOI":"10.1109\/SLT.2018.8639665"},{"key":"1697_CR23","doi-asserted-by":"crossref","unstructured":"Lakomkin, E., Zamani, M., Weber, C., et\u00a0al.: Incorporating end-to-end speech recognition models for sentiment analysis. In: International Conference on Robotics and Automation, ICRA 2019, Montreal, QC, Canada, May 20\u201324, 2019. IEEE, pp. 7976\u20137982 (2019)","DOI":"10.1109\/ICRA.2019.8794468"},{"key":"1697_CR24","volume-title":"What the face reveals: basic and applied studies of spontaneous expression using the Facial Action Coding System (FACS)","author":"P Ekman","year":"1997","unstructured":"Ekman, P., Rosenberg, E.L.: What the face reveals: basic and applied studies of spontaneous expression using the Facial Action Coding System (FACS). Oxford University Press, Oxford (1997)"},{"key":"1697_CR25","doi-asserted-by":"crossref","unstructured":"Degottex, G., Kane, J., Drugman, T., et\u00a0al.: COVAREP\u2014a collaborative voice analysis repository for speech technologies. In: IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2014, Florence, Italy, May 4\u20139, 2014. IEEE, pp. 960\u2013964 (2014)","DOI":"10.1109\/ICASSP.2014.6853739"},{"key":"1697_CR26","unstructured":"Devlin, J., Chang, M., Lee, K., et\u00a0al.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Minneapolis, MN, USA, June 2\u20137, 2019, Volume 1 (Long and Short Papers). Association for Computational Linguistics, pp. 4171\u20134186 (2019)"},{"key":"1697_CR27","doi-asserted-by":"crossref","unstructured":"Yin, D., Meng, T., Chang, K.: Sentibert: a transferable transformer-based architecture for compositional sentiment semantics. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, ACL 2020, Online, July 5\u201310, 2020. Association for Computational Linguistics, pp. 3695\u20133706 (2020)","DOI":"10.18653\/v1\/2020.acl-main.341"},{"key":"1697_CR28","unstructured":"Zadeh, A., Zellers, R., Pincus, E., et\u00a0al.: MOSI: multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos. CoRR. arXiv:abs\/1606.06259 (2016)"},{"issue":"1","key":"1697_CR29","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1109\/TAFFC.2023.3274829","volume":"15","author":"L Sun","year":"2024","unstructured":"Sun, L., Lian, Z., Liu, B., et al.: Efficient multimodal transformer with dual-level feature restoration for robust multimodal sentiment analysis. IEEE Trans. Affect. Comput. 15(1), 309\u2013325 (2024)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"1697_CR30","doi-asserted-by":"publisher","first-page":"4909","DOI":"10.1109\/TMM.2022.3183830","volume":"25","author":"D Wang","year":"2023","unstructured":"Wang, D., Liu, S., Wang, Q., et al.: Cross-modal enhancement network for multimodal sentiment analysis. IEEE Trans. Multim. 25, 4909\u20134921 (2023)","journal-title":"IEEE Trans. Multim."},{"key":"1697_CR31","doi-asserted-by":"publisher","first-page":"111346","DOI":"10.1016\/j.knosys.2023.111346","volume":"285","author":"J Huang","year":"2024","unstructured":"Huang, J., Zhou, J., Tang, Z., et al.: TMBL: transformer-based multimodal binding learning model for multimodal sentiment analysis. Knowl. Based Syst. 285, 111346 (2024)","journal-title":"Knowl. Based Syst."},{"key":"1697_CR32","doi-asserted-by":"crossref","unstructured":"Zadeh, A., Chen, M., Poria, S., et\u00a0al.: Tensor fusion network for multimodal sentiment analysis. In: Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, EMNLP 2017, Copenhagen, Denmark, September 9\u201311, 2017. Association for Computational Linguistics, pp. 1103\u20131114 (2017)","DOI":"10.18653\/v1\/D17-1115"},{"key":"1697_CR33","doi-asserted-by":"crossref","unstructured":"Liu, Z., Shen, Y., Lakshminarasimhan, V.B., et\u00a0al.: Efficient low-rank multimodal fusion with modality-specific factors. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics, ACL 2018, Melbourne, Australia, July 15\u201320, 2018, Volume 1: Long Papers. Association for Computational Linguistics, pp. 2247\u20132256 (2018)","DOI":"10.18653\/v1\/P18-1209"},{"key":"1697_CR34","unstructured":"Van\u00a0der Maaten, L., Hinton, G.: Visualizing data using t-sne. J. Mach. Learn. Res. 9(11) (2008)"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01697-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-01697-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01697-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,21]],"date-time":"2025-04-21T19:34:42Z","timestamp":1745264082000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-01697-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,17]]},"references-count":34,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["1697"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-01697-6","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,17]]},"assertion":[{"value":"10 September 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 January 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 February 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"116"}}