{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T13:05:53Z","timestamp":1760101553053,"version":"3.41.0"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T00:00:00Z","timestamp":1748217600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T00:00:00Z","timestamp":1748217600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"DOI":"10.1007\/s11227-025-07364-x","type":"journal-article","created":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T13:23:36Z","timestamp":1748265816000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["CU-SEMLP: All-MLP-based multimodal interaction model for multimodal sentiment analysis"],"prefix":"10.1007","volume":"81","author":[{"given":"Siyuan","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongkun","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yang","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fanmin","family":"Kong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kang","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,26]]},"reference":[{"issue":"7553","key":"7364_CR1","first-page":"436","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun Y, Bengio Y, Hinton G (2015) Nature. Deep Learn 521(7553):436\u2013444","journal-title":"Deep Learn"},{"key":"7364_CR2","doi-asserted-by":"publisher","first-page":"306","DOI":"10.1016\/j.inffus.2023.02.028","volume":"95","author":"L Zhu","year":"2023","unstructured":"Zhu L, Zhu Z, Zhang C, Xu Y, Kong X (2023) Multimodal sentiment analysis based on fusion methods: a survey. Inf Fusion 95:306\u2013325","journal-title":"Inf Fusion"},{"key":"7364_CR3","doi-asserted-by":"crossref","unstructured":"Shutova E, Kiela D, Maillard J (2016) Black holes and white rabbits: Metaphor identification with visual features. In: Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 160\u2013170","DOI":"10.18653\/v1\/N16-1020"},{"key":"7364_CR4","doi-asserted-by":"crossref","unstructured":"Morvant E, Habrard A, Ayache S (2014) Majority vote of diverse classifiers for late fusion. Structural, Syntactic, and Statistical Pattern Recognition: Joint IAPR International Workshop, S+ SSPR 2014, Joensuu, Finland, August 20-22, 2014. Proceedings, pp. 153\u2013162","DOI":"10.1007\/978-3-662-44415-3_16"},{"issue":"7","key":"7364_CR5","doi-asserted-by":"publisher","first-page":"1553","DOI":"10.1109\/TMM.2013.2267205","volume":"15","author":"G Evangelopoulos","year":"2013","unstructured":"Evangelopoulos G, Zlatintsi A, Potamianos A, Maragos P, Rapantzikos K, Skoumas G, Avrithis Y (2013) Multimodal saliency and fusion for movie summarization based on aural, visual, and textual attention. IEEE Trans Multimed 15(7):1553\u20131568","journal-title":"IEEE Trans Multimed"},{"key":"7364_CR6","doi-asserted-by":"crossref","unstructured":"Chen M, Wang S, Liang PP, Baltruvsaitis T, Zadeh A, Morency L-P (2017) Multimodal sentiment analysis with word-level fusion and reinforcement learning. In: Proceedings of the 19th ACM International Conference on Multimodal Interaction, pp. 163\u2013171","DOI":"10.1145\/3136755.3136801"},{"key":"7364_CR7","doi-asserted-by":"publisher","first-page":"110494","DOI":"10.1016\/j.asoc.2023.110494","volume":"144","author":"A Aslam","year":"2023","unstructured":"Aslam A, Sargano AB, Habib Z (2023) Attention-based multimodal sentiment analysis and emotion recognition using deep neural networks. Appl Soft Comput 144:110494","journal-title":"Appl Soft Comput"},{"key":"7364_CR8","doi-asserted-by":"publisher","first-page":"108107","DOI":"10.1016\/j.knosys.2021.108107","volume":"240","author":"Y Du","year":"2022","unstructured":"Du Y, Liu Y, Peng Z, Jin X (2022) Gated attention fusion network for multimodal sentiment classification. Knowl -Based Syst 240:108107","journal-title":"Knowl -Based Syst"},{"issue":"4","key":"7364_CR9","doi-asserted-by":"publisher","first-page":"1966","DOI":"10.1109\/TCSVT.2022.3218018","volume":"33","author":"J Tang","year":"2022","unstructured":"Tang J, Liu D, Jin X, Peng Y, Zhao Q, Ding Y, Kong W (2022) BAFN: Bi-direction attention based fusion network for multimodal sentiment analysis. IEEE Trans Circuits Syst Video Technol 33(4):1966\u20131978","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"7364_CR10","doi-asserted-by":"crossref","unstructured":"Zadeh A, Liang PP, Mazumder N, Poria S, Cambria E, Morency L-P (2018) Memory fusion network for multi-view sequential learning. In: Proceedings of the AAAI Conference on Artificial Intelligence. 32:1","DOI":"10.1609\/aaai.v32i1.12021"},{"key":"7364_CR11","doi-asserted-by":"crossref","unstructured":"Liang PP, Liu Z, Zadeh A, Morency L-P (2018) Multimodal language analysis with recurrent multistage fusion. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 150\u2013161","DOI":"10.18653\/v1\/D18-1014"},{"key":"7364_CR12","doi-asserted-by":"crossref","unstructured":"Yu Z, Wang J, Yu L-C, Zhang X (2022) Dual-encoder transformers with cross-modal alignment for multimodal aspect-based sentiment analysis. In: Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 1: Long Papers) pp. 414\u2013423","DOI":"10.18653\/v1\/2022.aacl-main.32"},{"key":"7364_CR13","doi-asserted-by":"publisher","first-page":"109259","DOI":"10.1016\/j.patcog.2022.109259","volume":"136","author":"D Wang","year":"2023","unstructured":"Wang D, Guo X, Tian Y, Liu J, He L, Luo X (2023) TETFN: a text enhanced transformer fusion network for multimodal sentiment analysis. Pattern Recognit 136:109259","journal-title":"Pattern Recognit"},{"key":"7364_CR14","doi-asserted-by":"crossref","unstructured":"Zadeh A, Chen M, Poria S, Cambria E, Morency L-P (2017) Tensor fusion network for multimodal sentiment analysis. In: Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, pp. 1103\u20131114","DOI":"10.18653\/v1\/D17-1115"},{"key":"7364_CR15","doi-asserted-by":"crossref","unstructured":"Liu Z, Shen Y, Lakshminarasimhan VB, Liang PP, Bagher\u00a0Zadeh A, Morency L-P (2018) Efficient low-rank multimodal fusion with modality-specific factors. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2247\u20132256","DOI":"10.18653\/v1\/P18-1209"},{"key":"7364_CR16","doi-asserted-by":"crossref","unstructured":"Ma L, Yao Y, Liang T, Liu T (2025) Multi-scale cooperative multimodal transformers for multimodal sentiment analysis in videos. AI 2024: Advances in Artificial Intelligence, pp. 281\u2013297","DOI":"10.1007\/978-981-96-0351-0_21"},{"issue":"2","key":"7364_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s40747-024-01724-5","volume":"11","author":"J Fu","year":"2025","unstructured":"Fu J, Fu Y, Xue H, Xu Z (2025) TMFN: a text-based multimodal fusion network with multi-scale feature extraction and unsupervised contrastive learning for multimodal sentiment analysis. Complex Intell Syst 11(2):1\u201316","journal-title":"Complex Intell Syst"},{"key":"7364_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.neucom.2024.127468","volume":"582","author":"M Farina","year":"2024","unstructured":"Farina M, Ahmad U, Taha A, Younes H, Mesbah Y, Yu X, Pedrycz W (2024) Sparsity in transformers: a systematic literature review. Neurocomputing 582:1\u20131126","journal-title":"Neurocomputing"},{"key":"7364_CR19","first-page":"24261","volume":"34","author":"IO Tolstikhin","year":"2021","unstructured":"Tolstikhin IO, Houlsby N, Kolesnikov A, Beyer L, Zhai X, Unterthiner T, Yung J, Steiner A, Keysers D, Uszkoreit J et al (2021) MLP-mixer: An all-MLP architecture for vision. Adv Neural Inf Process Syst 34:24261\u201324272","journal-title":"Adv Neural Inf Process Syst"},{"issue":"4","key":"7364_CR20","doi-asserted-by":"publisher","first-page":"5314","DOI":"10.1109\/TPAMI.2022.3206148","volume":"45","author":"H Touvron","year":"2022","unstructured":"Touvron H, Bojanowski P, Caron M, Cord M, El-Nouby A, Grave E, Izacard G, Joulin A, Synnaeve G, Verbeek J et al (2022) Resmlp: Feedforward networks for image classification with data-efficient training. IEEE Trans Pattern Anal Mach Intell 45(4):5314\u20135321","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"7364_CR21","doi-asserted-by":"crossref","unstructured":"Yu T, Li X, Cai Y, Sun M, Li P (2021) S2-MLPV2: improved spatial-shift MLP architecture for vision. arXiv preprint arXiv:2108.01072","DOI":"10.1109\/WACV51458.2022.00367"},{"key":"7364_CR22","doi-asserted-by":"crossref","unstructured":"Yu T, Li X, Cai Y, Sun M, Li P (2022) S2-MLP: Spatial-shift MLP architecture for vision. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 297\u2013306","DOI":"10.1109\/WACV51458.2022.00367"},{"issue":"12","key":"7364_CR23","doi-asserted-by":"publisher","first-page":"14284","DOI":"10.1109\/TPAMI.2023.3303397","volume":"45","author":"S Chen","year":"2023","unstructured":"Chen S, Xie E, Ge C, Chen R, Liang D, Luo P (2023) Cyclemlp: A mlp-like architecture for dense prediction. IEEE Trans Pattern Anal Mach Intell 45(12):14284\u201314300","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"7364_CR24","unstructured":"Nie Y, Li L, Gan Z, Wang S, Zhu C, Zeng M, Liu Z, Bansal M, Wang L (2021) MLP architectures for vision-and-language modeling: an empirical study. arXiv preprint arXiv:2112.04453"},{"issue":"2","key":"7364_CR25","doi-asserted-by":"publisher","first-page":"103229","DOI":"10.1016\/j.ipm.2022.103229","volume":"60","author":"H Lin","year":"2023","unstructured":"Lin H, Zhang P, Ling J, Yang Z, Lee LK, Liu W (2023) PS-mixer: a polar-vector and strength-vector mixer model for multimodal sentiment analysis. Inf Process Manag 60(2):103229","journal-title":"Inf Process Manag"},{"key":"7364_CR26","doi-asserted-by":"crossref","unstructured":"Morency L-P, Mihalcea R, Doshi P (2011) Towards multimodal sentiment analysis: harvesting opinions from the web. In: Proceedings of the 13th International Conference on Multimodal Interfaces, pp. 169\u2013176","DOI":"10.1145\/2070481.2070509"},{"key":"7364_CR27","unstructured":"Perez-Rosas V, Mihalcea R, Morency L-P (2013) Utterance-level multimodal sentiment analysis. In: Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 973\u2013982"},{"issue":"3","key":"7364_CR28","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2897739","volume":"6","author":"S Park","year":"2016","unstructured":"Park S, Shim HS, Chatterjee M, Sagae K, Morency L-P (2016) Multimodal analysis and prediction of persuasiveness in online social multimedia. ACM Trans Interact Intell Syst (TiiS) 6(3):1\u201325","journal-title":"ACM Trans Interact Intell Syst (TiiS)"},{"key":"7364_CR29","doi-asserted-by":"crossref","unstructured":"Nojavanasghari B, Gopinath D, Koushik J, Baltruvsaitis T, Morency L-P (2016) Deep multimodal fusion for persuasiveness prediction. In: Proceedings of the 18th ACM International Conference on Multimodal Interaction, pp. 284\u2013288","DOI":"10.1145\/2993148.2993176"},{"issue":"01","key":"7364_CR30","first-page":"6892","volume":"33","author":"H Pham","year":"2019","unstructured":"Pham H, Liang PP, Manzini T, Morency L-P, Pozos B (2019) Found in translation: learning robust joint representations by cyclic translations between modalities. Proc AAAI Conf Artif Intell 33(01):6892\u20136899","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"7364_CR31","doi-asserted-by":"crossref","unstructured":"Tsai Y-HH, Bai S, Liang PP, Kolter JZ, Morency L-P, Salakhutdinov R (2019) Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the Conference. Association for Computational Linguistics. Meeting 2019, p 6558","DOI":"10.18653\/v1\/P19-1656"},{"key":"7364_CR32","doi-asserted-by":"crossref","unstructured":"Zadeh A, Liang PP, Poria S, Vij P, Cambria E, Morency L-P (2018) Multi-attention recurrent network for human communication comprehension. In: Proceedings of the AAAI Conference on Artificial Intelligence. 32(1)","DOI":"10.1609\/aaai.v32i1.12024"},{"issue":"1","key":"7364_CR33","first-page":"7124","volume":"29","author":"Z Lin","year":"2012","unstructured":"Lin Z, Liang B, Long Y, Dang Y, Yang M, Zhang M, Xu R (2012) Modeling intra-and inter-modal relations: hierarchical graph contrastive learning for multimodal sentiment analysis. Proc 29th Int Conf Comput Linguist 29(1):7124\u20137135","journal-title":"Proc 29th Int Conf Comput Linguist"},{"key":"7364_CR34","doi-asserted-by":"publisher","first-page":"124","DOI":"10.1016\/j.knosys.2018.07.041","volume":"161","author":"N Majumder","year":"2018","unstructured":"Majumder N, Hazarika D, Gelbukh A, Cambria E, Poria S (2018) Multimodal sentiment analysis using hierarchical fusion with context modeling. Knowl -Based Syst 161:124\u2013133","journal-title":"Knowl -Based Syst"},{"key":"7364_CR35","doi-asserted-by":"crossref","unstructured":"Ghosal D, Akhtar MS, Chauhan D, Poria S, Ekbal A, Bhattacharyya P (2018) Contextual inter-modal attention for multi-modal sentiment analysis. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 3454\u20133466","DOI":"10.18653\/v1\/D18-1382"},{"key":"7364_CR36","doi-asserted-by":"crossref","unstructured":"Hazarika D, Zimmermann R, Poria S (2020) Misa: Modality-invariant and-specific representations for multimodal sentiment analysis. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 1122\u20131131","DOI":"10.1145\/3394171.3413678"},{"issue":"3","key":"7364_CR37","doi-asserted-by":"publisher","first-page":"2276","DOI":"10.1109\/TAFFC.2022.3172360","volume":"14","author":"S Mai","year":"2022","unstructured":"Mai S, Zeng Y, Zheng S, Hu H (2022) Hybrid contrastive learning of tri-modal representation for multimodal sentiment analysis. IEEE Trans Affect Comput 14(3):2276\u20132289","journal-title":"IEEE Trans Affect Comput"},{"key":"7364_CR38","unstructured":"Lian D, Yu Z, Sun X, Gao S (2022) AS-MLP: An axial shifted MLP architecture for vision. In: International Conference on Learning Representations"},{"key":"7364_CR39","first-page":"9204","volume":"34","author":"H Liu","year":"2021","unstructured":"Liu H, Dai Z, So D, Le QV (2021) Pay attention to mlps. Adv Neural Inform Process Syst 34:9204\u20139215","journal-title":"Adv Neural Inform Process Syst"},{"key":"7364_CR40","doi-asserted-by":"crossref","unstructured":"Zhang H, Wu C, Zhang Z, Zhu Y, Lin H, Zhang Z, Sun Y, He T, Mueller J, Manmatha R et al (2022) Resnest: split-attention networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2736\u20132746","DOI":"10.1109\/CVPRW56347.2022.00309"},{"issue":"5","key":"7364_CR41","first-page":"5436","volume":"45","author":"M-H Guo","year":"2023","unstructured":"Guo M-H, Liu Z-N, Mu T-J, Hu S-M (2023) Beyond self-attention: external attention using two linear layers for visual tasks. IEEE Trans Pattern Anal Mach Intell 45(5):5436\u20135447","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"7364_CR42","doi-asserted-by":"crossref","unstructured":"Sun H, Wang H, Liu J, Chen Y-W, Lin L (2022) CubeMLP: An MLP-based model for multimodal sentiment analysis and depression estimation. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 3722\u20133729","DOI":"10.1145\/3503161.3548025"},{"issue":"1","key":"7364_CR43","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1007\/s11227-024-06588-7","volume":"81","author":"C Hu","year":"2025","unstructured":"Hu C, Liu J, Li X, Li M, He H (2025) MST-ARGCN: modality-squeeze transformer with attentional recurrent graph capsule network for multimodal sentiment analysis. J Supercomput 81(1):86","journal-title":"J Supercomput"},{"issue":"17","key":"7364_CR44","doi-asserted-by":"publisher","first-page":"8415","DOI":"10.1007\/s10489-024-05623-7","volume":"54","author":"C Liu","year":"2024","unstructured":"Liu C, Wang Y, Yang J (2024) A transformer-encoder-based multimodal multi-attention fusion network for sentiment analysis. Appl Intell 54(17):8415\u20138441","journal-title":"Appl Intell"},{"issue":"3","key":"7364_CR45","doi-asserted-by":"publisher","first-page":"103675","DOI":"10.1016\/j.ipm.2024.103675","volume":"61","author":"L Wang","year":"2024","unstructured":"Wang L, Peng J, Zheng C, Zhao T et al (2024) A cross modal hierarchical fusion multimodal sentiment analysis method based on multi-task learning. Inf Process Manag 61(3):103675","journal-title":"Inf Process Manag"},{"key":"7364_CR46","doi-asserted-by":"publisher","first-page":"126992","DOI":"10.1016\/j.neucom.2023.126992","volume":"565","author":"J Huang","year":"2024","unstructured":"Huang J, Pu Y, Zhou D, Cao J, Gu J, Zhao Z, Xu D (2024) Dynamic hypergraph convolutional network for multimodal sentiment analysis. Neurocomputing 565:126992","journal-title":"Neurocomputing"},{"key":"7364_CR47","doi-asserted-by":"publisher","first-page":"111346","DOI":"10.1016\/j.knosys.2023.111346","volume":"285","author":"J Huang","year":"2024","unstructured":"Huang J, Zhou J, Tang Z, Lin J, Chen CY-C (2024) TMBL: transformer-based multimodal binding learning model for multimodal sentiment analysis. Knowl -Based Syst 285:111346","journal-title":"Knowl -Based Syst"},{"key":"7364_CR48","doi-asserted-by":"crossref","unstructured":"Kumar A, Vepa J (2020) Gated mechanism for attention based multi modal sentiment analysis. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4477\u20134481","DOI":"10.1109\/ICASSP40776.2020.9053012"},{"issue":"01","key":"7364_CR49","first-page":"7216","volume":"33","author":"Y Wang","year":"2019","unstructured":"Wang Y, Shen Y, Liu Z, Liang PP, Zadeh A, Morency L-P (2019) Words can shift: dynamically adjusting word representations using nonverbal behaviors. Proc AAAI Conf Artif Intell 33(01):7216\u20137223","journal-title":"Proc AAAI Conf Artif Intell"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07364-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-025-07364-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07364-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T13:23:43Z","timestamp":1748265823000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-025-07364-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,26]]},"references-count":49,"journal-issue":{"issue":"8","published-online":{"date-parts":[[2025,6]]}},"alternative-id":["7364"],"URL":"https:\/\/doi.org\/10.1007\/s11227-025-07364-x","relation":{},"ISSN":["1573-0484"],"issn-type":[{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,26]]},"assertion":[{"value":"30 April 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 May 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}],"article-number":"893"}}