{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T17:55:23Z","timestamp":1776880523827,"version":"3.51.2"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,6,12]],"date-time":"2025-06-12T00:00:00Z","timestamp":1749686400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,6,12]],"date-time":"2025-06-12T00:00:00Z","timestamp":1749686400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Henan Provincial Key Research and Development Program in Science and Technology","award":["232102210102"],"award-info":[{"award-number":["232102210102"]}]},{"name":"Henan Provincial Key Research and Development Program in Science and Technology","award":["232102210102"],"award-info":[{"award-number":["232102210102"]}]},{"name":"Henan Provincial Key Research and Development Program in Science and Technology","award":["232102210102"],"award-info":[{"award-number":["232102210102"]}]},{"name":"Henan Provincial Key Research and Development Program in Science and Technology","award":["232102210102"],"award-info":[{"award-number":["232102210102"]}]},{"name":"Key Research Project for Higher Education Institutions of Henan Province","award":["25A520023"],"award-info":[{"award-number":["25A520023"]}]},{"name":"Key Research Project for Higher Education Institutions of Henan Province","award":["25A520023"],"award-info":[{"award-number":["25A520023"]}]},{"name":"Key Research Project for Higher Education Institutions of Henan Province","award":["25A520023"],"award-info":[{"award-number":["25A520023"]}]},{"name":"Key Research Project for Higher Education Institutions of Henan Province","award":["25A520023"],"award-info":[{"award-number":["25A520023"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Intell Inf Syst"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s10844-025-00957-1","type":"journal-article","created":{"date-parts":[[2025,6,12]],"date-time":"2025-06-12T04:07:30Z","timestamp":1749701250000},"page":"1667-1686","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["A method for multimodal sentiment analysis: adaptive interaction and multi-scale fusion"],"prefix":"10.1007","volume":"63","author":[{"given":"HaiLong","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"JiaXin","family":"Cao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"JinJin","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jie","family":"Song","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,6,12]]},"reference":[{"key":"957_CR1","doi-asserted-by":"publisher","unstructured":"Aziz, A., Chowdhury, N. K., Kabir, M. A., et al. (2025). Mmtf-des: A fusion of multimodal transformer models for desire, emotion, and sentiment analysis of social media data. Neurocomputing, 623, Article 129376. https:\/\/doi.org\/10.1016\/j.neucom.2025.129376","DOI":"10.1016\/j.neucom.2025.129376"},{"key":"957_CR2","doi-asserted-by":"publisher","unstructured":"Ashish, V. (2017). Attention is all you need. Advances in Neural Information Processing Systems. 30. https:\/\/doi.org\/10.48550\/arXiv.1706.03762","DOI":"10.48550\/arXiv.1706.03762"},{"issue":"11","key":"957_CR3","doi-asserted-by":"publisher","first-page":"12505","DOI":"10.1007\/s10462-023-10442-2","volume":"56","author":"M Bordoloi","year":"2023","unstructured":"Bordoloi, M., & Biswas, S. K. (2023). Sentiment analysis: A survey on design framework, applications and future scopes. Artificial Intelligence Review, 56(11), 12505\u201312560. https:\/\/doi.org\/10.1007\/s10462-023-10442-2","journal-title":"Artificial Intelligence Review"},{"key":"957_CR4","doi-asserted-by":"publisher","unstructured":"Feng, X., Lin, Y., He, L., et al. (2024). Knowledge-guided dynamic modality attention fusion framework for multimodal sentiment analysis. arXiv preprint arXiv:2410.04491. https:\/\/doi.org\/10.18653\/v1\/2024.findings-emnlp.865","DOI":"10.18653\/v1\/2024.findings-emnlp.865"},{"key":"957_CR5","doi-asserted-by":"publisher","unstructured":"Ghosal, D., Majumder, N., et al. (2020). Cosmic: Commonsense knowledge for emotion identification in conversations. arXiv:2010.02795. https:\/\/doi.org\/10.48550\/arXiv.2010.02795","DOI":"10.48550\/arXiv.2010.02795"},{"key":"957_CR6","doi-asserted-by":"publisher","unstructured":"Ghosal, D., Majumder, N., Poria, S., et al. (2019). Dialoguegcn: A graph convolutional neural network for emotion recognition in conversation. arXiv:1908.11540. https:\/\/doi.org\/10.48550\/arXiv.1908.11540","DOI":"10.48550\/arXiv.1908.11540"},{"key":"957_CR7","doi-asserted-by":"crossref","unstructured":"Guo, J., Tang, W. Jiajia Dai, et al. (2022). Dynamically adjust word representations using unaligned multimodal information. In: Proceedings of the 30th ACM International Conference on Multimedia, 3394\u20133402. https:\/\/doi.org\/10.1145\/3503161. 3548137","DOI":"10.1145\/3503161.3548137"},{"key":"957_CR8","doi-asserted-by":"publisher","unstructured":"Han, W., Chen, H., Gelbukh, A., et al. (2021). Bi-bimodal modality fusion for correlation-controlled multimodal sentiment analysis. In: Proceedings of the 2021 International Conference on Multimodal Interaction, 6\u201315. https:\/\/doi.org\/10.1145\/3462244.3479919","DOI":"10.1145\/3462244.3479919"},{"key":"957_CR9","doi-asserted-by":"publisher","unstructured":"Han, W., Chen, H., & Poria, S. (2021). Improving multimodal fusion with hierarchical mutual information maximization for multimodal sentiment analysis. arXiv preprint arXiv:2109.00412. https:\/\/doi.org\/10.48550\/arXiv.2109.00412","DOI":"10.48550\/arXiv.2109.00412"},{"key":"957_CR10","doi-asserted-by":"publisher","unstructured":"Hu, D., Hou, X., Wei, L., et al. (2022a). Mm-dfn: Multimodal dynamic fusion network for emotion recognition in conversations. In: ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 7037\u20137041 . https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9747397. IEEE","DOI":"10.1109\/ICASSP43922.2022.9747397"},{"key":"957_CR11","doi-asserted-by":"publisher","unstructured":"Hu, G., Lin, T.-E., Zhao, Y., et al. (2022b). Unimse: Towards unified multimodal sentiment analysis and emotion recognition. arXiv:2211.11256. https:\/\/doi.org\/10.48550\/arXiv.2211.11256","DOI":"10.48550\/arXiv.2211.11256"},{"key":"957_CR12","doi-asserted-by":"publisher","unstructured":"Hazarika, D., Zimmermann, R., et al. (2020). Misa: Modality-invariant and-specific representations for multimodal sentiment analysis. In: Proceedings of the 28th ACM International Conference on Multimedia, 1122\u20131131. https:\/\/doi.org\/10.1145\/3394171.3413678","DOI":"10.1145\/3394171.3413678"},{"key":"957_CR13","doi-asserted-by":"publisher","unstructured":"Han, X., Zhang, Z., Ding, N., et al. (2021). Pre-trained models: Past, present and future. AI Open. 2, 225\u2013250. https:\/\/doi.org\/10.48550\/arXiv.2106.07139","DOI":"10.48550\/arXiv.2106.07139"},{"key":"957_CR14","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.111346","volume":"285","author":"J Huang","year":"2024","unstructured":"Huang, J., Zhou, J., Tang, Z., et al. (2024). Tmbl: Transformer-based multimodal binding learning model for multimodal sentiment analysis. Knowledge-Based Systems, 285, Article 111346. https:\/\/doi.org\/10.1016\/j.knosys.2023.111346","journal-title":"Knowledge-Based Systems"},{"key":"957_CR15","doi-asserted-by":"publisher","unstructured":"Kharde, V., Sonawane, P., et al. (2016). Sentiment analysis of twitter data: a survey of techniques. arXiv:1601.06971. https:\/\/doi.org\/10.5120\/ijca2016908625","DOI":"10.5120\/ijca2016908625"},{"key":"957_CR16","doi-asserted-by":"publisher","unstructured":"Li, Y., Liu, A., Lu, Y. (2025). Multi-level language interaction transformer for multimodal sentiment analysis. Journal of Intelligent Information Systems, 1\u201320. https:\/\/doi.org\/10.1007\/s10844-025-00923-x","DOI":"10.1007\/s10844-025-00923-x"},{"key":"957_CR17","doi-asserted-by":"publisher","unstructured":"Liu, Z., Shen, Y., Lakshminarasimhan, V. B., et al. (2018). Efficient low-rank multimodal fusion with modality-specific factors. arXiv:1806.00064. https:\/\/doi.org\/10.48550\/arXiv.1806.00064","DOI":"10.48550\/arXiv.1806.00064"},{"key":"957_CR18","doi-asserted-by":"publisher","unstructured":"Liu, Y., Wang, W., Feng, C., et al. (2023). Expression snippet transformer for robust videobased facial expression recognition. Pattern Recognition. 138, 109368. https:\/\/doi.org\/10.1016\/j.patcog.2023.109368","DOI":"10.1016\/j.patcog.2023.109368"},{"key":"957_CR19","doi-asserted-by":"publisher","unstructured":"Li, C., Xiao, P., & Yuan, Q. (2024). Fpn-fusion: Enhanced linear complexity time series forecasting model. arXiv:2406.06603. https:\/\/doi.org\/10.48550\/arXiv2406.06603","DOI":"10.48550\/arXiv2406.06603"},{"key":"957_CR20","doi-asserted-by":"publisher","unstructured":"Liu, Y., Yuan, Z., Mao, H., et al. (2022). Make acoustic and visual cues matter: Ch-sims v2. 0 dataset and av-mixup consistent module. In: Proceedings of the 2022 International Conference on Multimodal Interaction, 247\u2013258. https:\/\/doi.org\/10.1145\/3536221.3556630","DOI":"10.1145\/3536221.3556630"},{"key":"957_CR21","doi-asserted-by":"publisher","unstructured":"Liang, P.P., Zadeh, A., Morency, et al. (2022). Foundations and trends in multimodal machine learning: Principles, challenges, and open questions. arXiv:2209.03430. https:\/\/doi.org\/10.1145\/3656580","DOI":"10.1145\/3656580"},{"key":"957_CR22","doi-asserted-by":"publisher","unstructured":"Liu, Y., Zhang, H., Zhan, Y., et al. (2024). Noise-resistant multimodal transformer for emotion recognition. International Journal of Computer Vision, 1\u201321. https:\/\/doi.org\/10.1007\/s11263-024-02304-3","DOI":"10.1007\/s11263-024-02304-3"},{"key":"957_CR23","doi-asserted-by":"publisher","unstructured":"McFee, B., Raffel, C., Liang, D., et al. (2015). librosa: Audio and music signal analysis in python. In: SciPy, 18\u201324. https:\/\/doi.org\/10.25080\/Majora-7b98e3ed-003","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"957_CR24","doi-asserted-by":"publisher","unstructured":"Poria, S., Hazarika, D., Majumder, et al. (2018). Meld: A multimodal multi-party dataset for emotion recognition in conversations. arXiv:1810.02508. https:\/\/doi.org\/10.48550\/arXiv.1810.02508","DOI":"10.48550\/arXiv.1810.02508"},{"key":"957_CR25","doi-asserted-by":"publisher","unstructured":"Pan, L., Liu, W. (2024). Adaptive language-interacted hyper-modality representation for multimodal sentiment analysis. International Journal of Advanced Computer Science & Applications 15(7). https:\/\/doi.org\/10.14569\/ijacsa.2024.0150746","DOI":"10.14569\/ijacsa.2024.0150746"},{"key":"957_CR26","doi-asserted-by":"publisher","unstructured":"Pham, H., Liang, P. P., Manzini, T., et al. (2019). Found in translation: Learning robust joint representations by cyclic translations between modalities. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, 6892\u20136899. https:\/\/doi.org\/10.1609\/aaai.v33i01.33016892","DOI":"10.1609\/aaai.v33i01.33016892"},{"key":"957_CR27","doi-asserted-by":"publisher","unstructured":"Shi, Y., Cai, J., & Liao, L. (2024). Multi-task learning and mutual information maximization with crossmodal transformer for multimodal sentiment analysis. Journal of Intelligent Information Systems, 1\u201319,. https:\/\/doi.org\/10.1007\/s10844-024-00858-9","DOI":"10.1007\/s10844-024-00858-9"},{"key":"957_CR28","doi-asserted-by":"publisher","unstructured":"Sun, H., Wang, H., Liu, J., et al. (2022). Cubemlp: An mlp-based model for multimodal sentiment analysis and depression estimation. In: Proceedings of the 30th ACM International Conference on Multimedia, 3722\u20133729. https:\/\/doi.org\/10.1145\/3503161.3548025","DOI":"10.1145\/3503161.3548025"},{"key":"957_CR29","doi-asserted-by":"publisher","unstructured":"Tsai, Y.-H.H., Bai, S., Liang, P.P., et al. (2019). Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the Conference. Association for Computational Linguistics. Meeting, vol. 2019, 6558. https:\/\/doi.org\/10.18653\/v1\/P19-1656","DOI":"10.18653\/v1\/P19-1656"},{"key":"957_CR30","doi-asserted-by":"publisher","unstructured":"Tsai, Y.-H.H., Liang, P.P., et al. (2018). Learning factorized multimodal representations. arXiv:1806.06176. https:\/\/doi.org\/10.48550\/arXiv.1806.06176","DOI":"10.48550\/arXiv.1806.06176"},{"key":"957_CR31","doi-asserted-by":"publisher","unstructured":"Wang, D., Guo, X., Tian, Y., et al. (2023). Tetfn: A text enhanced transformer fusion network for multimodal sentiment analysis. Pattern Recognition. 136, 109259. https:\/\/doi.org\/10.1016\/j.patcog.2022.109259","DOI":"10.1016\/j.patcog.2022.109259"},{"key":"957_CR32","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-02067-x","author":"X Wang","year":"2021","unstructured":"Wang, X., Jiang, B., Wang, X., et al. (2021). Mutualformer: Multi-modality representation learning via mutual transformer. CoRR. https:\/\/doi.org\/10.1007\/s11263-024-02067-x","journal-title":"CoRR"},{"key":"957_CR33","doi-asserted-by":"publisher","unstructured":"Wu, J., Wu, J., Zheng, Y., et al. (2024). Mlgat: multi - layer graph attention networks for multimodal emotion recognition in conversations. Journal of Intelligent Information Systems, 1\u201320,. https:\/\/doi.org\/10.1007\/s10844-024-00879-4","DOI":"10.1007\/s10844-024-00879-4"},{"key":"957_CR34","doi-asserted-by":"publisher","unstructured":"Wang, J., Yu, L.-C., Lai, K. R., et al. (2019). Tree-structured regional cnn-lstm model for dimensional sentiment analysis. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 28, 581\u2013591. https:\/\/doi.org\/10.1109\/TASLP.2019.2959251","DOI":"10.1109\/TASLP.2019.2959251"},{"key":"957_CR35","doi-asserted-by":"publisher","unstructured":"Xie, H., Lin, W., Lin, S., et al. (2021). A multi-dimensional relation model for dimensional sentiment analysis. Information Sciences. 579, 832\u2013844. https:\/\/doi.org\/10.1016\/j.ins.2021.08.052","DOI":"10.1016\/j.ins.2021.08.052"},{"key":"957_CR36","doi-asserted-by":"publisher","unstructured":"Yang, D., Huang, S., Kuang, H., et al. (2022). Disentangled representation learning for multimodal emotion recognition. In: Proceedings of the 30th ACM International Conference on Multimedia, 1642\u20131651. https:\/\/doi.org\/10.1145\/3503161.3547754","DOI":"10.1145\/3503161.3547754"},{"key":"957_CR37","doi-asserted-by":"publisher","unstructured":"Yu, W., Xu, H., Yuan, Z., et al. (2021). Learning modality-specific representations with selfsupervised multi-task learning for multimodal sentiment analysis. In: Proceedings of the AAAI Conference on Artificial Intelligence. 35, 10790\u201310797. https:\/\/doi.org\/10.1609\/aaai.v35i12.17289","DOI":"10.1609\/aaai.v35i12.17289"},{"key":"957_CR38","doi-asserted-by":"publisher","unstructured":"Zaremba, W. (2014). Recurrent neural network regularization. arXiv:1409.2329. https:\/\/doi.org\/10.48550\/arXiv.1409.2329","DOI":"10.48550\/arXiv.1409.2329"},{"key":"957_CR39","doi-asserted-by":"publisher","unstructured":"Zadeh, A., Chen, M., Poria, et al. (2017). Tensor fusion network for multimodal sentiment analysis. arXiv:1707.07250. https:\/\/doi.org\/10.48550\/arXiv.1707.07250","DOI":"10.48550\/arXiv.1707.07250"},{"key":"957_CR40","doi-asserted-by":"publisher","unstructured":"Zadeh, A., Lim, Y. C., & Morency, L.-P. (2018). Openface 2.0: Facial behavior analysis toolkit tadas baltru\u0161aitis. In: IEEE International Conference on Automatic Face and Gesture Recognition. https:\/\/doi.org\/10.1109\/FG.2018.00019","DOI":"10.1109\/FG.2018.00019"},{"key":"957_CR41","doi-asserted-by":"publisher","unstructured":"Zadeh, A.B., Liang, P.P., Poria, S., et al. (2018). Multimodal language analysis in the wild: Cmu-mosei dataset and interpretable dynamic fusion graph. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), 2236\u20132246. https:\/\/doi.org\/10.18653\/v1\/P18-1208","DOI":"10.18653\/v1\/P18-1208"},{"key":"957_CR42","doi-asserted-by":"publisher","unstructured":"Zhang, H., Wang, Y., Yin, G., et al. (2023). Learning language-guided adaptive hypermodality representation for multimodal sentiment analysis. arXiv:2310.05804. https:\/\/doi.org\/10.48550\/arXiv.2310.05804","DOI":"10.48550\/arXiv.2310.05804"},{"key":"957_CR43","doi-asserted-by":"publisher","first-page":"14374","DOI":"10.1609\/aaai.v35i16.17690","volume":"35","author":"H Zhang","year":"2021","unstructured":"Zhang, H., Xu, H., & Lin, T.-E. (2021). Deep open intent classification with adaptive decision boundary. Proceedings of the AAAI Conference on Artificial Intelligence, 35, 14374\u201314382. https:\/\/doi.org\/10.1609\/aaai.v35i16.17690","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"issue":"6","key":"957_CR44","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MIS.2016.94","volume":"31","author":"A Zadeh","year":"2016","unstructured":"Zadeh, A., Zellers, R., Pincus, E., et al. (2016). Multimodal sentiment intensity analysis in videos: Facial gestures and verbal messages. IEEE Intelligent Systems, 31(6), 82\u201388. https:\/\/doi.org\/10.1109\/MIS.2016.94","journal-title":"IEEE Intelligent Systems"}],"container-title":["Journal of Intelligent Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10844-025-00957-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10844-025-00957-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10844-025-00957-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T12:16:28Z","timestamp":1759234588000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10844-025-00957-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,12]]},"references-count":44,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["957"],"URL":"https:\/\/doi.org\/10.1007\/s10844-025-00957-1","relation":{},"ISSN":["0925-9902","1573-7675"],"issn-type":[{"value":"0925-9902","type":"print"},{"value":"1573-7675","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6,12]]},"assertion":[{"value":"18 February 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 June 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 June 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 June 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"The authors declare that there are no potential conflicts of interest.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"The authors declare no competing interests.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}