{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T11:20:32Z","timestamp":1780053632639,"version":"3.54.0"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Multimed Info Retr"],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1007\/s13735-025-00362-y","type":"journal-article","created":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T08:11:20Z","timestamp":1740816680000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["PAMoE-MSA: polarity-aware mixture of experts network for multimodal sentiment analysis"],"prefix":"10.1007","volume":"14","author":[{"given":"Changqin","family":"Huang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhenheng","family":"Lin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhongmei","family":"Han","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Qionghao","family":"Huang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fan","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaodi","family":"Huang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,3,1]]},"reference":[{"issue":"13s","key":"362_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3586075","volume":"55","author":"R Das","year":"2023","unstructured":"Das R, Singh TD (2023) Multimodal sentiment analysis: a survey of methods, trends, and challenges. ACM Comput Surv 55(13s):1\u201338","journal-title":"ACM Comput Surv"},{"key":"362_CR2","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1016\/j.inffus.2020.09.005","volume":"66","author":"D Gkoumas","year":"2021","unstructured":"Gkoumas D, Li Q, Lioma C, Yu Y, Song D (2021) What makes the difference? An empirical comparison of fusion strategies for multimodal language analysis. Inf Fus 66:184\u2013197","journal-title":"Inf Fus"},{"key":"362_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.111149","volume":"283","author":"H Shi","year":"2024","unstructured":"Shi H, Pu Y, Zhao Z, Huang J, Zhou D, Xu D et al (2024) Co-space representation interaction network for multimodal sentiment analysis. Knowl-Based Syst 283:111149","journal-title":"Knowl-Based Syst"},{"key":"362_CR4","doi-asserted-by":"publisher","first-page":"424","DOI":"10.1016\/j.inffus.2022.09.025","volume":"91","author":"A Gandhi","year":"2023","unstructured":"Gandhi A, Adhvaryu K, Poria S, Cambria E, Hussain A (2023) Multimodal sentiment analysis: a systematic review of history, datasets, multimodal fusion methods, applications, challenges and future directions. Inf Fus 91:424\u2013444","journal-title":"Inf Fus"},{"key":"362_CR5","unstructured":"P\u00e9rez-Rosas V, Mihalcea R, Morency LP (2013) Utterance-level multimodal sentiment analysis. In: Proceedings of the 51st annual meeting of the association for computational linguistics. Long Papers, vol 1. pp 973\u2013982"},{"key":"362_CR6","doi-asserted-by":"crossref","unstructured":"Poria S, Cambria E, Gelbukh A (2015) Deep convolutional neural network textual features and multiple kernel learning for utterance-level multimodal sentiment analysis. In: Proceedings of the 2015 conference on empirical methods in natural language processing, pp 2539\u20132544","DOI":"10.18653\/v1\/D15-1303"},{"key":"362_CR7","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, et\u00a0al (2017) Attention is all you need. Adv Neural Inf Process Syst 30"},{"key":"362_CR8","doi-asserted-by":"crossref","unstructured":"Tsai YHH, Bai S, Liang PP, Kolter JZ, Morency LP, Salakhutdinov R (2019) Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the conference. Association for computational linguistics. Meeting. vol 2019. NIH Public Access, p 6558","DOI":"10.18653\/v1\/P19-1656"},{"key":"362_CR9","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.109259","volume":"136","author":"D Wang","year":"2023","unstructured":"Wang D, Guo X, Tian Y, Liu J, He L, Luo X (2023) TETFN: a text enhanced transformer fusion network for multimodal sentiment analysis. Pattern Recogn 136:109259","journal-title":"Pattern Recogn"},{"key":"362_CR10","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2023.119125","volume":"641","author":"Z Tang","year":"2023","unstructured":"Tang Z, Xiao Q, Zhou X, Li Y, Chen C, Li K (2023) Learning discriminative multi-relation representations for multimodal sentiment analysis. Inf Sci 641:119125","journal-title":"Inf Sci"},{"key":"362_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.111346","volume":"285","author":"J Huang","year":"2024","unstructured":"Huang J, Zhou J, Tang Z, Lin J, Chen CYC (2024) TMBL: transformer-based multimodal binding learning model for multimodal sentiment analysis. Knowl-Based Syst 285:111346","journal-title":"Knowl-Based Syst"},{"key":"362_CR12","doi-asserted-by":"crossref","unstructured":"Collier G, Collier GJ (2014) Emotional expression. Psychology Press","DOI":"10.4324\/9781315802411"},{"key":"362_CR13","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1007\/s10919-019-00293-3","volume":"43","author":"D Keltner","year":"2019","unstructured":"Keltner D, Sauter D, Tracy J, Cowen A (2019) Emotional expression: advances in basic emotion theory. J Nonverbal Behav 43:133\u2013160","journal-title":"J Nonverbal Behav"},{"key":"362_CR14","doi-asserted-by":"crossref","unstructured":"James W (2002) The James-Lange theory of emotion. Visceral sensory neuroscience: interoception, p 9","DOI":"10.1093\/oso\/9780195136012.003.0002"},{"issue":"5","key":"362_CR15","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1037\/h0046234","volume":"69","author":"S Schachter","year":"1962","unstructured":"Schachter S, Singer J (1962) Cognitive, social, and physiological determinants of emotional state. Psychol Rev 69(5):379","journal-title":"Psychol Rev"},{"issue":"6","key":"362_CR16","doi-asserted-by":"publisher","first-page":"1161","DOI":"10.1037\/h0077714","volume":"39","author":"JA Russell","year":"1980","unstructured":"Russell JA (1980) A circumplex model of affect. J Pers Soc Psychol 39(6):1161","journal-title":"J Pers Soc Psychol"},{"key":"362_CR17","doi-asserted-by":"crossref","unstructured":"Rao Y, Chen G, Lu J, Zhou J (2021) Counterfactual attention learning for fine-grained visual categorization and re-identification. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 1025\u20131034","DOI":"10.1109\/ICCV48922.2021.00106"},{"key":"362_CR18","first-page":"8583","volume":"34","author":"C Riquelme","year":"2021","unstructured":"Riquelme C, Puigcerver J, Mustafa B, Neumann M, Jenatton R, Susano Pinto A et al (2021) Scaling vision with sparse mixture of experts. Adv Neural Inf Process Syst 34:8583\u20138595","journal-title":"Adv Neural Inf Process Syst"},{"key":"362_CR19","first-page":"10790","volume":"35","author":"W Yu","year":"2021","unstructured":"Yu W, Xu H, Yuan Z, Wu J (2021) Learning modality-specific representations with self-supervised multi-task learning for multimodal sentiment analysis. Proc AAAI Conf Artif Intell 35:10790\u201310797","journal-title":"Proc AAAI Conf Artif Intell"},{"issue":"6","key":"362_CR20","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MIS.2016.94","volume":"31","author":"A Zadeh","year":"2016","unstructured":"Zadeh A, Zellers R, Pincus E, Morency LP (2016) Multimodal sentiment intensity analysis in videos: facial gestures and verbal messages. IEEE Intell Syst 31(6):82\u201388","journal-title":"IEEE Intell Syst"},{"key":"362_CR21","doi-asserted-by":"crossref","unstructured":"Zadeh AB, Liang PP, Poria S, Cambria E, Morency LP (2018) Multimodal language analysis in the wild: Cmu-mosei dataset and interpretable dynamic fusion graph. In: Proceedings of the 56th annual meeting of the association for computational linguistics, vol 1. Long Papers, pp 2236\u20132246","DOI":"10.18653\/v1\/P18-1208"},{"key":"362_CR22","doi-asserted-by":"crossref","unstructured":"Yu W, Xu H, Meng F, Zhu Y, Ma Y, Wu J, et\u00a0al (2020) Ch-sims: a Chinese multimodal sentiment analysis dataset with fine-grained annotation of modality. In: Proceedings of the 58th annual meeting of the association for computational linguistics, pp 3718\u20133727","DOI":"10.18653\/v1\/2020.acl-main.343"},{"key":"362_CR23","first-page":"6892","volume":"33","author":"H Pham","year":"2019","unstructured":"Pham H, Liang PP, Manzini T, Morency LP, P\u00f3czos B (2019) Found in translation: learning robust joint representations by cyclic translations between modalities. Proc AAAI Conf Artif Intell 33:6892\u20136899","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"362_CR24","doi-asserted-by":"crossref","unstructured":"Cambria E, Howard N, Hsu J, Hussain A (2013) Sentic blending: scalable multimodal fusion for the continuous interpretation of semantics and sentics. In: IEEE symposium on computational intelligence for human-like intelligence (CIHLI). IEEE, pp 108\u2013117","DOI":"10.1109\/CIHLI.2013.6613272"},{"key":"362_CR25","doi-asserted-by":"crossref","unstructured":"Mai S, Hu H, Xing S (2019) Divide, conquer and combine: Hierarchical feature fusion network with local and global perspectives for multimodal affective computing. In: Proceedings of the 57th annual meeting of the association for computational linguistics, pp 481\u2013492","DOI":"10.18653\/v1\/P19-1046"},{"key":"362_CR26","doi-asserted-by":"publisher","first-page":"306","DOI":"10.1016\/j.inffus.2023.02.028","volume":"95","author":"L Zhu","year":"2023","unstructured":"Zhu L, Zhu Z, Zhang C, Xu Y, Kong X (2023) Multimodal sentiment analysis based on fusion methods: a survey. Inf Fus 95:306\u2013325","journal-title":"Inf Fus"},{"key":"362_CR27","doi-asserted-by":"crossref","unstructured":"Zadeh A, Chen M, Poria S, Cambria E, Morency LP (2017) Tensor fusion network for multimodal sentiment analysis. arXiv preprint arXiv:1707.07250","DOI":"10.18653\/v1\/D17-1115"},{"key":"362_CR28","doi-asserted-by":"crossref","unstructured":"Liu Z, Shen Y, Lakshminarasimhan VB, Liang PP, Zadeh A, Morency LP (2018) Efficient low-rank multimodal fusion with modality-specific factors. arXiv preprint arXiv:1806.00064","DOI":"10.18653\/v1\/P18-1209"},{"key":"362_CR29","doi-asserted-by":"crossref","unstructured":"Zhang D, Ju X, Zhang W, Li J, Li S, Zhu Q, et\u00a0al. (2021) Multi-modal multi-label emotion recognition with heterogeneous hierarchical message passing. In: Proceedings of the AAAI conference on artificial intelligence, vol 35. pp 14338\u201314346","DOI":"10.1609\/aaai.v35i16.17686"},{"key":"362_CR30","doi-asserted-by":"crossref","unstructured":"Hazarika D, Zimmermann R, Poria S (2020) Misa: Modality-invariant and-specific representations for multimodal sentiment analysis. In: Proceedings of the 28th ACM international conference on multimedia, pp 1122\u20131131","DOI":"10.1145\/3394171.3413678"},{"key":"362_CR31","doi-asserted-by":"publisher","first-page":"282","DOI":"10.1016\/j.inffus.2023.01.005","volume":"93","author":"Y Zhang","year":"2023","unstructured":"Zhang Y, Wang J, Liu Y, Rong L, Zheng Q, Song D et al (2023) A multitask learning model for multimodal sarcasm, sentiment and emotion recognition in conversations. Inf Fus 93:282\u2013301","journal-title":"Inf Fus"},{"key":"362_CR32","unstructured":"Yang H, Zhao Y, Wu Y, Wang S, Zheng T, Zhang H, et\u00a0al (2024) Large language models meet text-centric multimodal sentiment analysis: a survey. arXiv preprint arXiv:2406.08068"},{"key":"362_CR33","doi-asserted-by":"crossref","unstructured":"Tang B, Lin B, Yan H, Li S (2024) Leveraging generative large language models with visual instruction and demonstration retrieval for multimodal sarcasm detection. In: Proceedings of the 2024 conference of the North American chapter of the association for computational linguistics: human language technologies, vol 1. Long Papers, pp 1732\u20131742","DOI":"10.18653\/v1\/2024.naacl-long.97"},{"key":"362_CR34","doi-asserted-by":"crossref","unstructured":"Lin H, Chen Z, Luo Z, Cheng M, Ma J, Chen G (2024) CofiPara: a coarse-to-fine paradigm for multimodal sarcasm target identification with large multimodal models. arXiv preprint arXiv:2405.00390","DOI":"10.18653\/v1\/2024.acl-long.522"},{"issue":"4","key":"362_CR35","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2024.103724","volume":"61","author":"L Yang","year":"2024","unstructured":"Yang L, Wang Z, Li Z, Na JC, Yu J (2024) An empirical study of Multimodal Entity-Based Sentiment Analysis with ChatGPT: improving in-context learning via entity-aware contrastive learning. Inf Process Manag 61(4):103724","journal-title":"Inf Process Manag"},{"key":"362_CR36","unstructured":"Yang Q, Ye M, Du B (2024) Emollm: multimodal emotional understanding meets large language models. arXiv preprint arXiv:2406.16442"},{"key":"362_CR37","doi-asserted-by":"crossref","unstructured":"Wang W, Ding L, Shen L, Luo Y, Hu H, Tao D (2024) WisdoM: improving multimodal sentiment analysis by fusing contextual world knowledge. arXiv preprint arXiv:2401.06659","DOI":"10.1145\/3664647.3681403"},{"key":"362_CR38","unstructured":"Feng J, Lin M, Shang L, Gao X (2024) Autonomous aspect-image instruction a2ii: q-former guided multimodal sentiment classification. In: Proceedings of the 2024 joint international conference on computational linguistics, language resources and evaluation (LREC-COLING 2024), pp 1996\u20132005"},{"key":"362_CR39","unstructured":"Jiang AQ, Sablayrolles A, Roux A, Mensch A, Savary B, Bamford C, et\u00a0al (2024) Mixtral of experts. arXiv preprint arXiv:2401.04088"},{"issue":"120","key":"362_CR40","first-page":"1","volume":"23","author":"W Fedus","year":"2022","unstructured":"Fedus W, Zoph B, Shazeer N (2022) Switch transformers: scaling to trillion parameter models with simple and efficient sparsity. J Mach Learn Res 23(120):1\u201339","journal-title":"J Mach Learn Res"},{"key":"362_CR41","unstructured":"Shazeer N, Mirhoseini A, Maziarz K, Davis A, Le Q, Hinton G, et\u00a0al (2017) Outrageously large neural networks: the sparsely-gated mixture-of-experts layer. arXiv preprint arXiv:1701.06538"},{"key":"362_CR42","doi-asserted-by":"crossref","unstructured":"Degottex G, Kane J, Drugman T, Raitio T, Scherer S (2014) COVAREP\u2014A collaborative voice analysis repository for speech technologies. In: IEEE international conference on acoustics, speech and signal processing (icassp). IEEE, pp 960\u2013964","DOI":"10.1109\/ICASSP.2014.6853739"},{"key":"362_CR43","doi-asserted-by":"crossref","unstructured":"Baltru\u0161aitis T, Robinson P, Morency LP (2016) Openface: an open source facial behavior analysis toolkit. In: IEEE winter conference on applications of computer vision (WACV). IEEE, pp 1\u201310","DOI":"10.1109\/WACV.2016.7477553"},{"key":"362_CR44","unstructured":"Cai TT, Frankle J, Schwab DJ, Morcos AS (2020) Are all negatives created equal in contrastive instance discrimination? arXiv preprint arXiv:2010.06682"},{"key":"362_CR45","unstructured":"Kingma DP, Ba J (2014) Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980"},{"key":"362_CR46","unstructured":"Yang Z, Dai Z, Yang Y, Carbonell J, Salakhutdinov RR, Le QV (2019) Xlnet: generalized autoregressive pretraining for language understanding. Adv Neural Inf Process Syst 32"},{"key":"362_CR47","doi-asserted-by":"crossref","unstructured":"Li Z, Lin TE, Wu Y, Liu M, Tang F, Zhao M, et\u00a0al (2023) Unisa: unified generative framework for sentiment analysis. In: Proceedings of the 31st ACM international conference on multimedia, pp 6132\u20136142","DOI":"10.1145\/3581783.3612336"},{"key":"362_CR48","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.110502","volume":"269","author":"C Huang","year":"2023","unstructured":"Huang C, Zhang J, Wu X, Wang Y, Li M, Huang X (2023) TeFNA: text-centered fusion network with crossmodal attention for multimodal sentiment analysis. Knowl-Based Syst 269:110502","journal-title":"Knowl-Based Syst"},{"key":"362_CR49","doi-asserted-by":"crossref","unstructured":"Fu Y, Zhang Z, Yang R, Yao C (2024) Hybrid cross-modal interaction learning for multimodal sentiment analysis. Neurocomputing 571:127201","DOI":"10.1016\/j.neucom.2023.127201"},{"key":"362_CR50","doi-asserted-by":"crossref","unstructured":"Sun Z, Sarma P, Sethares W, Liang Y (2020) Learning relationships between text, audio, and video via deep canonical correlation for multimodal language analysis. In: Proceedings of the AAAI conference on artificial intelligence, vol 34. pp 8992\u20138999","DOI":"10.1609\/aaai.v34i05.6431"},{"key":"362_CR51","doi-asserted-by":"crossref","unstructured":"Rahman W, Hasan MK, Lee S, Zadeh A, Mao C, Morency LP, et\u00a0al (2020) Integrating multimodal information in large pretrained transformers. In: Proceedings of the conference. Association for Computational Linguistics. Meeting. vol 2020. NIH Public Access, p 2359","DOI":"10.18653\/v1\/2020.acl-main.214"},{"key":"362_CR52","doi-asserted-by":"crossref","unstructured":"Hu G, Lin TE, Zhao Y, Lu G, Wu Y, Li Y (2022) UniMSE: towards unified multimodal sentiment analysis and emotion recognition. arXiv preprint arXiv:2211.11256","DOI":"10.18653\/v1\/2022.emnlp-main.534"},{"key":"362_CR53","doi-asserted-by":"crossref","unstructured":"Wu Z, Gong Z, Koo J, Hirschberg J (2024) Multimodal multi-loss fusion network for sentiment analysis. In: Proceedings of the 2024 conference of the north American chapter of the association for computational linguistics: human language technologies, vol 1. Long Papers, pp 3588\u20133602","DOI":"10.18653\/v1\/2024.naacl-long.197"}],"container-title":["International Journal of Multimedia Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-025-00362-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13735-025-00362-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-025-00362-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,17]],"date-time":"2025-03-17T07:32:58Z","timestamp":1742196778000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13735-025-00362-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3]]},"references-count":53,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,3]]}},"alternative-id":["362"],"URL":"https:\/\/doi.org\/10.1007\/s13735-025-00362-y","relation":{},"ISSN":["2192-6611","2192-662X"],"issn-type":[{"value":"2192-6611","type":"print"},{"value":"2192-662X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3]]},"assertion":[{"value":"2 August 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 February 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 February 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 March 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"On behalf of all authors, the corresponding author states that there is no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"7"}}