{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T12:13:30Z","timestamp":1775132010374,"version":"3.50.1"},"reference-count":50,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T00:00:00Z","timestamp":1771459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T00:00:00Z","timestamp":1771459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1007\/s00530-026-02235-8","type":"journal-article","created":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T10:41:36Z","timestamp":1771497696000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["MHAMF: mamba-based emotional hyper-modal assisted multi-granularity fusion for emotion recognition in conversations"],"prefix":"10.1007","volume":"32","author":[{"given":"Jun","family":"Wu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianfeng","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shilong","family":"Jing","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jinyu","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fang","family":"Deng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,2,19]]},"reference":[{"key":"2235_CR1","doi-asserted-by":"publisher","unstructured":"Zhuang, X., Zhou, F., Li, Z.: Mv-bart: Multi-view bart for multi-modal sarcasm detection. In: Proceedings of the 33rd ACM International Conference on Information and Knowledge Management, pp. 3602\u20133611. Association for Computing Machinery, New York, NY, USA (2024). https:\/\/doi.org\/10.1145\/3627673.3679570","DOI":"10.1145\/3627673.3679570"},{"key":"2235_CR2","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2024.109884","volume":"142","author":"X Zhuang","year":"2025","unstructured":"Zhuang, X., Li, Z., Zhang, C., Ma, H.: A cross-modal collaborative guiding network for sarcasm explanation in multi-modal multi-party dialogues. Eng. Appl. Artif. Intell. 142, 109884 (2025). https:\/\/doi.org\/10.1016\/j.engappai.2024.109884","journal-title":"Eng. Appl. Artif. Intell."},{"key":"2235_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2025.126924","volume":"274","author":"H Liu","year":"2025","unstructured":"Liu, H., Wei, R., Tu, G., Lin, J., Jiang, D., Cambria, E.: Knowing what and why: causal emotion entailment for emotion recognition in conversations. Expert Syst. Appl. 274, 126924 (2025). https:\/\/doi.org\/10.1016\/j.eswa.2025.126924","journal-title":"Expert Syst. Appl."},{"key":"2235_CR4","doi-asserted-by":"publisher","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., Polosukhin, I.: Attention is all you need (2023). https:\/\/doi.org\/10.48550\/arXiv.1706.03762","DOI":"10.48550\/arXiv.1706.03762"},{"key":"2235_CR5","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 4171\u20134186. Association for Computational Linguistics, Minneapolis, Minnesota (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"2235_CR6","doi-asserted-by":"publisher","unstructured":"Beltagy, I., Peters, M.E., Cohan, A.: Longformer: the long-document transformer (2020). https:\/\/doi.org\/10.48550\/arXiv.2004.05150","DOI":"10.48550\/arXiv.2004.05150"},{"key":"2235_CR7","doi-asserted-by":"publisher","unstructured":"Hua, W., Dai, Z., Liu, H., Le, Q.: Transformer quality in linear time. In: Proceedings of the 39th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 162, pp. 9099\u20139117 (2022). https:\/\/doi.org\/10.48550\/arXiv.2202.10447","DOI":"10.48550\/arXiv.2202.10447"},{"key":"2235_CR8","doi-asserted-by":"publisher","unstructured":"Gu, A., Dao, T.: Mamba: linear-time sequence modeling with selective state spaces (2024). https:\/\/doi.org\/10.48550\/arXiv.2312.00752","DOI":"10.48550\/arXiv.2312.00752"},{"key":"2235_CR9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/LGRS.2024.3414293","volume":"21","author":"X Ma","year":"2024","unstructured":"Ma, X., Zhang, X., Pun, M.-O.: Rs3mamba: visual state space model for remote sensing image semantic segmentation. IEEE Geosci. Remote Sens. Lett. 21, 1\u20135 (2024). https:\/\/doi.org\/10.1109\/LGRS.2024.3414293","journal-title":"IEEE Geosci. Remote Sens. Lett."},{"key":"2235_CR10","doi-asserted-by":"publisher","unstructured":"Xu, Q., Liu, X., Zhu, L., Lin, G., Long, C., Li, Z., Zhao, R.: Hybrid mamba for few-shot segmentation. In: Globerson, A., Mackey, L., Belgrave, D., Fan, A., Paquet, U., Tomczak, J., Zhang, C. (eds.) Advances in Neural Information Processing Systems, vol. 37, pp. 73858\u201373883. Curran Associates, Inc. (2024). https:\/\/doi.org\/10.48550\/arXiv.2409.19613","DOI":"10.48550\/arXiv.2409.19613"},{"key":"2235_CR11","doi-asserted-by":"publisher","first-page":"615","DOI":"10.48550\/arXiv.2402.03302","volume-title":"Medical Image Computing and Computer Assisted Intervention - MICCAI 2024","author":"J Liu","year":"2024","unstructured":"Liu, J., Yang, H., Zhou, H.-Y., Xi, Y., Yu, L., Li, C., Liang, Y., Shi, G., Yu, Y., Zhang, S., Zheng, H., Wang, S.: Swin-umamba: mamba-based unet with imagenet-based pretraining. In: Linguraru, M.G., Dou, Q., Feragen, A., Giannarou, S., Glocker, B., Lekadir, K., Schnabel, J.A. (eds.) Medical Image Computing and Computer Assisted Intervention - MICCAI 2024, pp. 615\u2013625. Springer, Cham (2024). https:\/\/doi.org\/10.48550\/arXiv.2402.03302"},{"key":"2235_CR12","doi-asserted-by":"publisher","first-page":"578","DOI":"10.48550\/2401.13560","volume-title":"Medical Image Computing and Computer Assisted Intervention - MICCAI 2024","author":"Z Xing","year":"2024","unstructured":"Xing, Z., Ye, T., Yang, Y., Liu, G., Zhu, L.: Segmamba: long-range sequential modeling mamba for 3d medical image segmentation. In: Linguraru, M.G., Dou, Q., Feragen, A., Giannarou, S., Glocker, B., Lekadir, K., Schnabel, J.A. (eds.) Medical Image Computing and Computer Assisted Intervention - MICCAI 2024, pp. 578\u2013588. Springer, Cham (2024). https:\/\/doi.org\/10.48550\/2401.13560"},{"key":"2235_CR13","doi-asserted-by":"publisher","unstructured":"Patro, B.N., Agneeswaran, V.S.: Simba: simplified mamba-based architecture for vision and multivariate time series. arXiv preprint arXiv:2403.15360 (2024) https:\/\/doi.org\/10.48550\/2403.15360","DOI":"10.48550\/2403.15360"},{"key":"2235_CR14","doi-asserted-by":"publisher","unstructured":"Hazarika, D., Poria, S., Zadeh, A., Cambria, E., Morency, L.-P., Zimmermann, R.: Conversational memory network for emotion recognition in dyadic dialogue videos. In: Proceedings of the Conference. Association for Computational Linguistics. North American Chapter. Meeting, vol. 2018, p. 2122 (2018). https:\/\/doi.org\/10.18653\/v1\/n18-1193","DOI":"10.18653\/v1\/n18-1193"},{"key":"2235_CR15","doi-asserted-by":"publisher","unstructured":"Hu, D., Bao, Y., Wei, L., Zhou, W., Hu, S.: Supervised adversarial contrastive learning for emotion recognition in conversations. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 10835\u201310852. Association for Computational Linguistics, Toronto, Canada (2023). https:\/\/doi.org\/10.18653\/v1\/2023.acl-long.606","DOI":"10.18653\/v1\/2023.acl-long.606"},{"issue":"01","key":"2235_CR16","doi-asserted-by":"publisher","first-page":"6818","DOI":"10.1609\/aaai.v33i01.33016818","volume":"33","author":"N Majumder","year":"2019","unstructured":"Majumder, N., Poria, S., Hazarika, D., Mihalcea, R., Gelbukh, A., Cambria, E.: Dialoguernn: an attentive rnn for emotion detection in conversations. Proc. AAAI Conf. Artif. Intell. 33(01), 6818\u20136825 (2019). https:\/\/doi.org\/10.1609\/aaai.v33i01.33016818","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"2235_CR17","doi-asserted-by":"publisher","unstructured":"Hu, D., Wei, L., Huai, X.: DialogueCRN: contextual reasoning networks for emotion recognition in conversations. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 7042\u20137052. Association for Computational Linguistics, Online (2021). https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.547","DOI":"10.18653\/v1\/2021.acl-long.547"},{"key":"2235_CR18","doi-asserted-by":"publisher","unstructured":"Hu, J., Liu, Y., Zhao, J., Jin, Q.: MMGCN: multimodal fusion via deep graph convolution network for emotion recognition in conversation. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 5666\u20135675. Association for Computational Linguistics, Online (2021). https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.440","DOI":"10.18653\/v1\/2021.acl-long.440"},{"issue":"1","key":"2235_CR19","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1109\/TAFFC.2023.3261279","volume":"15","author":"J Li","year":"2023","unstructured":"Li, J., Wang, X., Lv, G., Zeng, Z.: Ga2mif: graph and attention based two-stage multi-source information fusion for conversational emotion detection. IEEE Trans. Affect. Comput. 15(1), 130\u2013143 (2023). https:\/\/doi.org\/10.1109\/TAFFC.2023.3261279","journal-title":"IEEE Trans. Affect. Comput."},{"key":"2235_CR20","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1109\/TMM.2023.3260635","volume":"26","author":"J Li","year":"2023","unstructured":"Li, J., Wang, X., Lv, G., Zeng, Z.: Graphcfc: a directed graph based cross-modal feature complementation approach for multimodal conversational emotion recognition. Trans. Multi. 26, 77\u201389 (2023). https:\/\/doi.org\/10.1109\/TMM.2023.3260635","journal-title":"Trans. Multi."},{"key":"2235_CR21","doi-asserted-by":"publisher","unstructured":"Ghosal, D., Majumder, N., Poria, S., Chhaya, N., Gelbukh, A.: DialogueGCN: a graph convolutional neural network for emotion recognition in conversation. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp. 154\u2013164. Association for Computational Linguistics, Hong Kong, China (2019). https:\/\/doi.org\/10.18653\/v1\/D19-1015","DOI":"10.18653\/v1\/D19-1015"},{"key":"2235_CR22","doi-asserted-by":"publisher","unstructured":"Hu, D., Hou, X., Wei, L., Jiang, L., Mo, Y.: Mm-dfn: multimodal dynamic fusion network for emotion recognition in conversations. In: ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7037\u20137041 (2022). https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9747397","DOI":"10.1109\/ICASSP43922.2022.9747397"},{"key":"2235_CR23","doi-asserted-by":"publisher","unstructured":"Gu, A., Goel, K., R\u00e9, C.: Efficiently modeling long sequences with structured state spaces (2022). https:\/\/doi.org\/10.48550\/arXiv.2111.00396","DOI":"10.48550\/arXiv.2111.00396"},{"key":"2235_CR24","doi-asserted-by":"publisher","unstructured":"Smith, J.T.H., Warrington, A., Linderman, S.W.: Simplified state space layers for sequence modeling (2023). https:\/\/doi.org\/10.48550\/arXiv.2208.04933","DOI":"10.48550\/arXiv.2208.04933"},{"key":"2235_CR25","doi-asserted-by":"publisher","unstructured":"Lawan, A., Pu, J., Yunusa, H., Lawan, M., Umar, A., Yahya, A.S.: Dualkanbaformer: kolmogorov-arnold networks and state space model transformer for multimodal aspect-based sentiment analysis. arXiv preprint arXiv:2408.15379 (2024) https:\/\/doi.org\/10.48550\/arXiv.2408.15379","DOI":"10.48550\/arXiv.2408.15379"},{"key":"2235_CR26","doi-asserted-by":"publisher","unstructured":"Shou, Y., Meng, T., Zhang, F., Yin, N., Li, K.: Revisiting multi-modal emotion learning with broad state space models and probability-guidance fusion (2024). https:\/\/doi.org\/10.48550\/arXiv.2404.17858","DOI":"10.48550\/arXiv.2404.17858"},{"key":"2235_CR27","doi-asserted-by":"publisher","unstructured":"Li, X., Fan, X., Wu, Q., Peng, X., Li, Y.: Mamba-enhanced text-audio-video alignment network for emotion recognition in conversations (2024). https:\/\/doi.org\/10.48550\/arXiv.2409.05243","DOI":"10.48550\/arXiv.2409.05243"},{"key":"2235_CR28","doi-asserted-by":"publisher","unstructured":"Sol\u00eds-Garc\u00eda, J., Vega-M\u00e1rquez, B., Nepomuceno, J.A., Nepomuceno-Chamorro, I.A.: TIMBA: time series imputation with bi-directional mamba blocks and diffusion models (2024). https:\/\/doi.org\/10.48550\/arXiv.2410.05916","DOI":"10.48550\/arXiv.2410.05916"},{"key":"2235_CR29","doi-asserted-by":"publisher","unstructured":"Zhu, L., Liao, B., Zhang, Q., Wang, X., Liu, W., Wang, X.: Vision mamba: efficient visual representation learning with bidirectional state space model (2024). https:\/\/doi.org\/10.48550\/arXiv.2401.09417","DOI":"10.48550\/arXiv.2401.09417"},{"key":"2235_CR30","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1016\/j.neucom.2021.09.057","volume":"467","author":"W Li","year":"2022","unstructured":"Li, W., Shao, W., Ji, S., Cambria, E.: Bieru: bidirectional emotional recurrent unit for conversational sentiment analysis. Neurocomputing 467, 73\u201382 (2022). https:\/\/doi.org\/10.1016\/j.neucom.2021.09.057","journal-title":"Neurocomputing"},{"key":"2235_CR31","doi-asserted-by":"publisher","unstructured":"Hazarika, D., Poria, S., Mihalcea, R., Cambria, E., Zimmermann, R.: ICON: interactive conversational memory network for multimodal emotion detection. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 2594\u20132604. Association for Computational Linguistics, Brussels, Belgium (2018). https:\/\/doi.org\/10.18653\/v1\/D18-1280","DOI":"10.18653\/v1\/D18-1280"},{"issue":"3","key":"2235_CR32","doi-asserted-by":"publisher","first-page":"1567","DOI":"10.1109\/TAFFC.2024.3360979","volume":"15","author":"Z Yang","year":"2024","unstructured":"Yang, Z., Li, X., Cheng, Y., Zhang, T., Wang, X.: Emotion recognition in conversation based on a dynamic complementary graph convolutional network. IEEE Trans. Affect. Comput. 15(3), 1567\u20131579 (2024). https:\/\/doi.org\/10.1109\/TAFFC.2024.3360979","journal-title":"IEEE Trans. Affect. Comput."},{"key":"2235_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102306","volume":"106","author":"C Fan","year":"2024","unstructured":"Fan, C., Lin, J., Mao, R., Cambria, E.: Fusing pairwise modalities for emotion recognition in conversations. Inf. Fusion. 106, 102306 (2024). https:\/\/doi.org\/10.1016\/j.inffus.2024.102306","journal-title":"Inf. Fusion."},{"key":"2235_CR34","doi-asserted-by":"publisher","unstructured":"Meng, T., Zhang, F., Shou, Y., Ai, W., Yin, N., Li, K.: Revisiting multimodal emotion recognition in conversation from the perspective of graph spectrum. arXiv preprint arXiv:2404.17862 (2024) https:\/\/doi.org\/10.48550\/arXiv.2404.17862","DOI":"10.48550\/arXiv.2404.17862"},{"key":"2235_CR35","doi-asserted-by":"publisher","first-page":"4298","DOI":"10.1109\/TASLP.2024.3434495","volume":"32","author":"T Meng","year":"2024","unstructured":"Meng, T., Zhang, F., Shou, Y., Shao, H., Ai, W., Li, K.: Masked graph learning with recurrent alignment for multimodal emotion recognition in conversation. IEEE\/ACM Trans. Audio Speech Lang. Process. 32, 4298\u20134312 (2024). https:\/\/doi.org\/10.1109\/TASLP.2024.3434495","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2235_CR36","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102590","volume":"112","author":"Y Shou","year":"2024","unstructured":"Shou, Y., Meng, T., Ai, W., Zhang, F., Yin, N., Li, K.: Adversarial alignment and graph fusion via information bottleneck for multimodal emotion recognition in conversations. Inf. Fusion. 112, 102590 (2024). https:\/\/doi.org\/10.1016\/j.inffus.2024.102590","journal-title":"Inf. Fusion."},{"key":"2235_CR37","doi-asserted-by":"publisher","unstructured":"Zadeh, A., Chen, M., Poria, S., Cambria, E., Morency, L.-P.: Tensor fusion network for multimodal sentiment analysis. In: Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, pp. 1103\u20131114. Association for Computational Linguistics, Copenhagen, Denmark (2017). https:\/\/doi.org\/10.18653\/v1\/D17-1115","DOI":"10.18653\/v1\/D17-1115"},{"key":"2235_CR38","doi-asserted-by":"publisher","unstructured":"Liu, Z., Shen, Y., Lakshminarasimhan, V.B., Liang, P.P., Bagher\u00a0Zadeh, A., Morency, L.-P.: Efficient low-rank multimodal fusion with modality-specific factors. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2247\u20132256. Association for Computational Linguistics, Melbourne, Australia (2018). https:\/\/doi.org\/10.18653\/v1\/P18-1209","DOI":"10.18653\/v1\/P18-1209"},{"issue":"1","key":"2235_CR39","doi-asserted-by":"publisher","first-page":"122","DOI":"10.1109\/TMM.2019.2925966","volume":"22","author":"S Mai","year":"2020","unstructured":"Mai, S., Xing, S., Hu, H.: Locally confined modality fusion network with a global perspective for multimodal human affective computing. IEEE Trans. Multimedia 22(1), 122\u2013137 (2020). https:\/\/doi.org\/10.1109\/TMM.2019.2925966","journal-title":"IEEE Trans. Multimedia"},{"key":"2235_CR40","doi-asserted-by":"publisher","unstructured":"Mai, S., Hu, H., Xing, S.: Divide, conquer and combine: hierarchical feature fusion network with local and global perspectives for multimodal affective computing. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 481\u2013492. Association for Computational Linguistics, Florence, Italy (2019). https:\/\/doi.org\/10.18653\/v1\/P19-1046","DOI":"10.18653\/v1\/P19-1046"},{"key":"2235_CR41","doi-asserted-by":"publisher","unstructured":"Tsai, Y.-H.H., Bai, S., Pu\u00a0Liang, P., Kolter, J.Z., Morency, L.-P., Salakhutdinov, R.: Multimodal transformer for unaligned multimodal language sequences. Proceedings of the conference. Association for Computational Linguistics. Meeting 2019, 6558\u20136569 (2019) https:\/\/doi.org\/10.18653\/v1\/p19-1656","DOI":"10.18653\/v1\/p19-1656"},{"key":"2235_CR42","doi-asserted-by":"publisher","unstructured":"Wu, Z., Gong, Z., Koo, J., Hirschberg, J.: Multimodal multi-loss fusion network for sentiment analysis. In: Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pp. 3588\u20133602. Association for Computational Linguistics, Mexico City, Mexico (2024). https:\/\/doi.org\/10.18653\/v1\/2024.naacl-long.197","DOI":"10.18653\/v1\/2024.naacl-long.197"},{"key":"2235_CR43","doi-asserted-by":"publisher","unstructured":"Delbrouck, J.-B., Tits, N., Dupont, S.: Modulated fusion using transformer for linguistic-acoustic emotion recognition. In: Proceedings of the First International Workshop on Natural Language Processing Beyond Text, pp. 1\u201310. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.nlpbt-1.1","DOI":"10.18653\/v1\/2020.nlpbt-1.1"},{"key":"2235_CR44","doi-asserted-by":"publisher","unstructured":"Hazarika, D., Poria, S., Zadeh, A., Cambria, E., Morency, L.-P., Zimmermann, R.: Conversational memory network for emotion recognition in dyadic dialogue videos. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers), pp. 2122\u20132132. Association for Computational Linguistics, New Orleans, Louisiana (2018). https:\/\/doi.org\/10.18653\/v1\/N18-1193","DOI":"10.18653\/v1\/N18-1193"},{"key":"2235_CR45","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2025.113029","volume":"310","author":"X Zhuang","year":"2025","unstructured":"Zhuang, X., Li, Z., Zhou, F., Gu, J., Zhang, C., Ma, H.: Dycr-net: a dynamic context-aware routing network for multi-modal sarcasm detection in conversation. Knowl.-Based Syst. 310, 113029 (2025). https:\/\/doi.org\/10.1016\/j.knosys.2025.113029","journal-title":"Knowl.-Based Syst."},{"key":"2235_CR46","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","volume":"42","author":"C Busso","year":"2008","unstructured":"Busso, C., Bulut, M., Lee, C.-C., Kazemzadeh, A., Mower, E., Kim, S., Chang, J.N., Lee, S., Narayanan, S.S.: Iemocap: interactive emotional dyadic motion capture database. Lang. Resour. Eval. 42, 335\u2013359 (2008). https:\/\/doi.org\/10.1007\/s10579-008-9076-6","journal-title":"Lang. Resour. Eval."},{"key":"2235_CR47","doi-asserted-by":"publisher","unstructured":"Poria, S., Hazarika, D., Majumder, N., Naik, G., Cambria, E., Mihalcea, R.: MELD: a multimodal multi-party dataset for emotion recognition in conversations. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 527\u2013536. Association for Computational Linguistics, Florence, Italy (2019). https:\/\/doi.org\/10.18653\/v1\/P19-1050","DOI":"10.18653\/v1\/P19-1050"},{"key":"2235_CR48","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.110285","volume":"263","author":"B Wang","year":"2023","unstructured":"Wang, B., Dong, G., Zhao, Y., Li, R., Cao, Q., Hu, K., Jiang, D.: Hierarchically stacked graph convolution for emotion recognition in conversation. Knowl.-Based Syst. 263, 110285 (2023). https:\/\/doi.org\/10.1016\/j.knosys.2023.110285","journal-title":"Knowl.-Based Syst."},{"issue":"2","key":"2235_CR49","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1007\/s10844-024-00879-4","volume":"63","author":"J Wu","year":"2025","unstructured":"Wu, J., Wu, J., Zheng, Y., Zhan, P., Han, M., Zuo, G., Yang, L.: Mlgat: multi-layer graph attention networks for multimodal emotion recognition in conversations. J. Intell. Inf. Syst. 63(2), 375\u2013394 (2025)","journal-title":"J. Intell. Inf. Syst."},{"key":"2235_CR50","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102272","volume":"106","author":"N Lu","year":"2024","unstructured":"Lu, N., Han, Z., Han, M., Qian, J.: Bi-stream graph learning based multimodal fusion for emotion recognition in conversation. Inf. Fusion 106, 102272 (2024). https:\/\/doi.org\/10.1016\/j.inffus.2024.102272","journal-title":"Inf. Fusion"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-026-02235-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-026-02235-8","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-026-02235-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T11:36:03Z","timestamp":1775129763000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-026-02235-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,19]]},"references-count":50,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2026,4]]}},"alternative-id":["2235"],"URL":"https:\/\/doi.org\/10.1007\/s00530-026-02235-8","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,19]]},"assertion":[{"value":"13 June 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 January 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"The authors state that this research complies with ethical standards. This research does not involve either human participants or animals.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval and consent to participate"}},{"value":"The paper is the authors\u2019 original work, which has not been previously published elsewhere. The paper is not currently being considered for publication elsewhere. The paper reflects the author\u2019s research and analysis truthfully and completely. The paper properly credits the meaningful contributions of co-authors and co-researchers.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}],"article-number":"152"}}