{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T10:31:33Z","timestamp":1761388293371,"version":"build-2065373602"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T00:00:00Z","timestamp":1755734400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T00:00:00Z","timestamp":1755734400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100003488","name":"Henan Polytechnic University","doi-asserted-by":"crossref","award":["B2024-44"],"award-info":[{"award-number":["B2024-44"]}],"id":[{"id":"10.13039\/501100003488","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100009101","name":"The Education Department Henan Province","doi-asserted-by":"crossref","award":["24A520016"],"award-info":[{"award-number":["24A520016"]}],"id":[{"id":"10.13039\/501100009101","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Henan Science and Technology Department","award":["242102211043"],"award-info":[{"award-number":["242102211043"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s00530-025-01914-2","type":"journal-article","created":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T11:03:14Z","timestamp":1755774194000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multimodal large language model enhancement network for multimodal sentiment analysis"],"prefix":"10.1007","volume":"31","author":[{"given":"Huanyu","family":"Zhu","sequence":"first","affiliation":[]},{"given":"Zhihao","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Chengxiao","family":"Dai","sequence":"additional","affiliation":[]},{"given":"Zhitao","family":"Yu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,8,21]]},"reference":[{"issue":"13s","key":"1914_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3586075","volume":"55","author":"R Das","year":"2023","unstructured":"Das, R., Singh, T.D.: Multimodal sentiment analysis: a survey of methods, trends, and challenges. ACM Comput. Surv. 55(13s), 1\u201338 (2023)","journal-title":"ACM Comput. Surv."},{"key":"1914_CR2","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1016\/j.neunet.2014.10.005","volume":"63","author":"S Poria","year":"2015","unstructured":"Poria, S., Cambria, E., Hussain, A., Huang, G.-B.: Towards an intelligent framework for multimodal affective data analysis. Neural Netw. 63, 104\u2013116 (2015)","journal-title":"Neural Netw."},{"key":"1914_CR3","doi-asserted-by":"crossref","unstructured":"Zadeh, A., Chen, M., Poria, S., Cambria, E., Morency, L.-P.: Tensor fusion network for multimodal sentiment analysis. arXiv preprint arXiv:1707.07250 (2017)","DOI":"10.18653\/v1\/D17-1115"},{"key":"1914_CR4","doi-asserted-by":"crossref","unstructured":"Tsai, Y.-H.H., Bai, S., Liang, P.P., Kolter, J.Z., Morency, L.-P., Salakhutdinov, R.: Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the Conference. Association for Computational Linguistics. Meeting, vol. 2019, p. 6558 (2019). NIH Public Access","DOI":"10.18653\/v1\/P19-1656"},{"key":"1914_CR5","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"1914_CR6","doi-asserted-by":"crossref","unstructured":"Sun, Z., Sarma, P., Sethares, W., Liang, Y.: Learning relationships between text, audio, and video via deep canonical correlation for multimodal language analysis. In: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 34, pp. 8992\u20138999 (2020)","DOI":"10.1609\/aaai.v34i05.6431"},{"key":"1914_CR7","doi-asserted-by":"crossref","unstructured":"Rahman, W., Hasan, M.K., Lee, S., Zadeh, A., Mao, C., Morency, L.-P., Hoque, E.: Integrating multimodal information in large pretrained transformers. In: Proceedings of the Conference. Association for Computational Linguistics. Meeting, vol. 2020, p. 2359 (2020). NIH Public Access","DOI":"10.18653\/v1\/2020.acl-main.214"},{"key":"1914_CR8","doi-asserted-by":"publisher","first-page":"4909","DOI":"10.1109\/TMM.2022.3183830","volume":"25","author":"D Wang","year":"2022","unstructured":"Wang, D., Liu, S., Wang, Q., Tian, Y., He, L., Gao, X.: Cross-modal enhancement network for multimodal sentiment analysis. IEEE Trans. Multimed. 25, 4909\u20134921 (2022)","journal-title":"IEEE Trans. Multimed."},{"key":"1914_CR9","unstructured":"Su, Y., Lan, T., Li, H., Xu, J., Wang, Y., Cai, D.: Pandagpt: one model to instruction-follow them all. arXiv preprint arXiv:2305.16355 (2023)"},{"key":"1914_CR10","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In: International Conference on Machine Learning, pp. 19730\u201319742 (2023). PMLR"},{"key":"1914_CR11","doi-asserted-by":"crossref","unstructured":"Jaiswal, A., Raju, A.K., Deb, S.: Facial emotion detection using deep learning. In: 2020 International Conference for Emerging Technology (INCET), pp. 1\u20135 (2020). IEEE","DOI":"10.1109\/INCET49848.2020.9154121"},{"issue":"3","key":"1914_CR12","doi-asserted-by":"publisher","first-page":"446","DOI":"10.1007\/s42452-020-2234-1","volume":"2","author":"N Mehendale","year":"2020","unstructured":"Mehendale, N.: Facial emotion recognition using convolutional neural networks (FERC). SN Appl. Sci. 2(3), 446 (2020)","journal-title":"SN Appl. Sci."},{"key":"1914_CR13","doi-asserted-by":"crossref","unstructured":"Singh, S., Nasoz, F.: Facial expression recognition with convolutional neural networks. In: 2020 10th Annual Computing and Communication Workshop and Conference (CCWC), pp. 0324\u20130328 (2020). IEEE","DOI":"10.1109\/CCWC47524.2020.9031283"},{"key":"1914_CR14","doi-asserted-by":"crossref","unstructured":"Zahara, L., Musa, P., Wibowo, E.P., Karim, I., Musa, S.B.: The facial emotion recognition (fer-2013) dataset for prediction system of micro-expressions face using the convolutional neural network (CNN) algorithm based raspberry pi. In: 2020 Fifth International Conference on Informatics and Computing (ICIC), pp. 1\u20139 (2020). IEEE","DOI":"10.1109\/ICIC50835.2020.9288560"},{"key":"1914_CR15","doi-asserted-by":"publisher","first-page":"340","DOI":"10.1016\/j.neucom.2020.06.014","volume":"411","author":"J Li","year":"2020","unstructured":"Li, J., Jin, K., Zhou, D., Kubota, N., Ju, Z.: Attention mechanism-based CNN for facial expression recognition. Neurocomputing 411, 340\u2013350 (2020)","journal-title":"Neurocomputing"},{"key":"1914_CR16","doi-asserted-by":"crossref","unstructured":"Meng, D., Peng, X., Wang, K., Qiao, Y.: Frame attention networks for facial expression recognition in videos. In: 2019 IEEE International Conference on Image Processing (ICIP), pp. 3866\u20133870 (2019). IEEE","DOI":"10.1109\/ICIP.2019.8803603"},{"key":"1914_CR17","doi-asserted-by":"publisher","first-page":"176","DOI":"10.1016\/j.jvcir.2018.12.039","volume":"59","author":"M Hu","year":"2019","unstructured":"Hu, M., Wang, H., Wang, X., Yang, J., Wang, R.: Video facial emotion recognition based on local enhanced motion history image and CNN-CTSLSTM networks. J. Vis. Commun. Image Represent. 59, 176\u2013185 (2019)","journal-title":"J. Vis. Commun. Image Represent."},{"key":"1914_CR18","doi-asserted-by":"crossref","unstructured":"Eyben, F., W\u00f6llmer, M., Schuller, B.: Openear\u2013introducing the munich open-source emotion and affect recognition toolkit. In: 2009 3rd International Conference on Affective Computing and Intelligent Interaction and Workshops, pp. 1\u20136 (2009). IEEE","DOI":"10.1109\/ACII.2009.5349350"},{"key":"1914_CR19","doi-asserted-by":"crossref","unstructured":"Degottex, G., Kane, J., Drugman, T., Raitio, T., Scherer, S.: Covarep\u2013a collaborative voice analysis repository for speech technologies. In: 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 960\u2013964 (2014). IEEE","DOI":"10.1109\/ICASSP.2014.6853739"},{"key":"1914_CR20","doi-asserted-by":"crossref","unstructured":"Ng, H.T., Goh, W.B., Low, K.L.: Feature selection, perceptron learning, and a usability case study for text categorization. In: Proceedings of the 20th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 67\u201373 (1997)","DOI":"10.1145\/258525.258537"},{"key":"1914_CR21","unstructured":"Vapnik, V., Golowich, S., Smola, A.: Support vector method for function approximation, regression estimation and signal processing. Advances in neural information processing systems 9 (1996)"},{"key":"1914_CR22","doi-asserted-by":"crossref","unstructured":"Luan, Y., Lin, S.: Research on text classification based on cnn and lstm. In: 2019 IEEE International Conference on Artificial Intelligence and Computer Applications (ICAICA), pp. 352\u2013355 (2019). IEEE","DOI":"10.1109\/ICAICA.2019.8873454"},{"key":"1914_CR23","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1016\/j.neucom.2019.01.078","volume":"337","author":"G Liu","year":"2019","unstructured":"Liu, G., Guo, J.: Bidirectional LSTM with attention mechanism and convolutional layer for text classification. Neurocomputing 337, 325\u2013338 (2019)","journal-title":"Neurocomputing"},{"issue":"8","key":"1914_CR24","doi-asserted-by":"publisher","first-page":"5789","DOI":"10.1007\/s10462-021-09958-2","volume":"54","author":"FA Acheampong","year":"2021","unstructured":"Acheampong, F.A., Nunoo-Mensah, H., Chen, W.: Transformer models for text-based emotion detection: a review of BERT-based approaches. Artif. Intell. Rev. 54(8), 5789\u20135829 (2021)","journal-title":"Artif. Intell. Rev."},{"issue":"3","key":"1914_CR25","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1109\/MIS.2013.9","volume":"28","author":"VP Rosas","year":"2013","unstructured":"Rosas, V.P., Mihalcea, R., Morency, L.-P.: Multimodal sentiment analysis of Spanish online videos. IEEE Intell. Syst. 28(3), 38\u201345 (2013)","journal-title":"IEEE Intell. Syst."},{"key":"1914_CR26","doi-asserted-by":"crossref","unstructured":"Sarkar, C., Bhatia, S., Agarwal, A., Li, J.: Feature analysis for computational personality recognition using Youtube personality data set. In: Proceedings of the 2014 ACM Multi Media on Workshop on Computational Personality Recognition, pp. 11\u201314 (2014)","DOI":"10.1145\/2659522.2659528"},{"key":"1914_CR27","doi-asserted-by":"crossref","unstructured":"Monkaresi, H., Hussain, M.S., Calvo, R.A.: Classification of affects using head movement, skin color features and physiological signals. In: 2012 IEEE International Conference on Systems, Man, and Cybernetics (SMC), pp. 2664\u20132669 (2012). IEEE","DOI":"10.1109\/ICSMC.2012.6378149"},{"key":"1914_CR28","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Schmidhuber, J.: Bidirectional LSTM networks for improved phoneme classification and recognition. In: International Conference on Artificial Neural Networks, pp. 799\u2013804 (2005). Springer","DOI":"10.1007\/11550907_126"},{"key":"1914_CR29","doi-asserted-by":"crossref","unstructured":"Zadeh, A., Liang, P.P., Mazumder, N., Poria, S., Cambria, E., Morency, L.-P.: Memory fusion network for multi-view sequential learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32 (2018)","DOI":"10.1609\/aaai.v32i1.12021"},{"key":"1914_CR30","doi-asserted-by":"crossref","unstructured":"Hazarika, D., Zimmermann, R., Poria, S.: Misa: Modality-invariant and-specific representations for multimodal sentiment analysis. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 1122\u20131131 (2020)","DOI":"10.1145\/3394171.3413678"},{"key":"1914_CR31","first-page":"29615","volume":"36","author":"G Luo","year":"2024","unstructured":"Luo, G., Zhou, Y., Ren, T., Chen, S., Sun, X., Ji, R.: Cheap and quick: efficient vision-language instruction tuning for large language models. Adv. Neural. Inf. Process. Syst. 36, 29615\u201329627 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1914_CR32","unstructured":"Wu, S., Fei, H., Qu, L., Ji, W., Chua, T.-S.: Next-GPT: Any-to-any multimodal LLM. arXiv preprint arXiv:2309.05519 (2023)"},{"key":"1914_CR33","doi-asserted-by":"crossref","unstructured":"Lu, J., Clark, C., Lee, S., Zhang, Z., Khosla, S., Marten, R., Hoiem, D., Kembhavi, A.: Unified-IO 2: Scaling autoregressive multimodal models with vision, language, audio, and action. arXiv preprint arXiv:2312.17172 (2023)","DOI":"10.1109\/CVPR52733.2024.02497"},{"key":"1914_CR34","unstructured":"Zadeh, A., Zellers, R., Pincus, E., Morency, L.-P.: Mosi: multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos. arXiv preprint arXiv:1606.06259 (2016)"},{"key":"1914_CR35","unstructured":"Zadeh, A.B., Liang, P.P., Poria, S., Cambria, E., Morency, L.-P.: Multimodal language analysis in the wild: CMU-MOSEI dataset and interpretable dynamic fusion graph. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2236\u20132246 (2018)"},{"key":"1914_CR36","doi-asserted-by":"crossref","unstructured":"Liu, Z., Shen, Y., Lakshminarasimhan, V.B., Liang, P.P., Zadeh, A., Morency, L.-P.: Efficient low-rank multimodal fusion with modality-specific factors. arXiv preprint arXiv:1806.00064 (2018)","DOI":"10.18653\/v1\/P18-1209"},{"key":"1914_CR37","unstructured":"Zadeh, A., Mao, C., Shi, K., Zhang, Y., Liang, P.P., Poria, S., Morency, L.-P.: Factorized multimodal transformer for multimodal sequential learning. arXiv preprint arXiv:1911.09826 (2019)"},{"key":"1914_CR38","doi-asserted-by":"crossref","unstructured":"Yu, W., Xu, H., Yuan, Z., Wu, J.: Learning modality-specific representations with self-supervised multi-task learning for multimodal sentiment analysis. In: Proceedings of the AAAI Conference on Artificial Intelligence vol. 35, pp. 10790\u201310797 (2021)","DOI":"10.1609\/aaai.v35i12.17289"},{"key":"1914_CR39","doi-asserted-by":"crossref","unstructured":"Han, W., Chen, H., Gelbukh, A., Zadeh, A., Morency, L.-p., Poria, S.: Bi-bimodal modality fusion for correlation-controlled multimodal sentiment analysis. In: Proceedings of the 2021 International Conference on Multimodal Interaction, pp. 6\u201315 (2021)","DOI":"10.1145\/3462244.3479919"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01914-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-01914-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01914-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T10:26:08Z","timestamp":1761387968000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-01914-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,21]]},"references-count":39,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["1914"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-01914-2","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"type":"print","value":"0942-4962"},{"type":"electronic","value":"1432-1882"}],"subject":[],"published":{"date-parts":[[2025,8,21]]},"assertion":[{"value":"31 December 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 June 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 August 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This declaration is not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}],"article-number":"335"}}