{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T07:04:24Z","timestamp":1766127864588,"version":"3.48.0"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T00:00:00Z","timestamp":1760572800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T00:00:00Z","timestamp":1760572800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Kang Li","award":["the National Key Research and Development Program of China under Grant 2020YFC0833205"],"award-info":[{"award-number":["the National Key Research and Development Program of China under Grant 2020YFC0833205"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s00530-025-02032-9","type":"journal-article","created":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T07:27:15Z","timestamp":1760599635000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["GCIF: graph based cross-modal information fusion for conversational emotion recognition"],"prefix":"10.1007","volume":"31","author":[{"given":"Hongkun","family":"Zhao","sequence":"first","affiliation":[]},{"given":"Siyuan","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Fanmin","family":"Kong","sequence":"additional","affiliation":[]},{"given":"Qingtian","family":"Zeng","sequence":"additional","affiliation":[]},{"given":"Kang","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,16]]},"reference":[{"key":"2032_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102306","volume":"106","author":"C Fan","year":"2024","unstructured":"Fan, C., Lin, J., Mao, R., Cambria, E.: Fusing pairwise modalities for emotion recognition in conversations. Inf. Fusion 106, 102306 (2024)","journal-title":"Inf. Fusion"},{"issue":"7","key":"2032_CR2","doi-asserted-by":"publisher","first-page":"4873","DOI":"10.1007\/s10462-021-10030-2","volume":"54","author":"K Cortis","year":"2021","unstructured":"Cortis, K., Davis, B.: Over a decade of social opinion mining: a systematic review. Artif. Intell. Rev. 54(7), 4873\u20134965 (2021)","journal-title":"Artif. Intell. Rev."},{"key":"2032_CR3","doi-asserted-by":"publisher","DOI":"10.1017\/S204579602400009X","volume":"33","author":"I Gorrino","year":"2024","unstructured":"Gorrino, I., Rossetti, M.G., Girelli, F., Bellani, M., Perlini, C., Mattavelli, G.: A critical overview of emotion processing assessment in non-affective and affective psychoses. Epidemiol. Psychiatr. Sci. 33, e8 (2024)","journal-title":"Epidemiol. Psychiatr. Sci."},{"key":"2032_CR4","doi-asserted-by":"crossref","unstructured":"Li, S., Xue, F., Liu, K., Guo, D., Hong, R.:Multimodal graph causal embedding for multimedia-based Recommendation. IEEE Transactions on Knowledge and Data Engineering. 36 (12) 8842-8858 (2024)","DOI":"10.1109\/TKDE.2024.3424268"},{"key":"2032_CR5","doi-asserted-by":"publisher","first-page":"3369","DOI":"10.1109\/TMM.2021.3097171","volume":"24","author":"J Wang","year":"2021","unstructured":"Wang, J., Bao, B.-K., Xu, C.: Dualvgr: a dual-visual graph reasoning unit for video question answering. IEEE Trans. Multimed. 24, 3369\u20133380 (2021)","journal-title":"IEEE Trans. Multimed."},{"issue":"1","key":"2032_CR6","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1109\/MMUL.2022.3217307","volume":"30","author":"P Li","year":"2022","unstructured":"Li, P., Tan, Z., Bao, B.-K.: Multiview language bias reduction for visual question answering. IEEE Trans. Multimed. 30(1), 91\u201399 (2022)","journal-title":"IEEE Trans. Multimed."},{"key":"2032_CR7","doi-asserted-by":"crossref","unstructured":"Jiao, W., Yang, H., King, I., Lyu, M.R.: Higru: Hierarchical gated recurrent units for utterance-level emotion recognition in Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics Human Language Technologies. Minneapolis, 397\u2013406 (2019)","DOI":"10.18653\/v1\/N19-1037"},{"issue":"01","key":"2032_CR8","first-page":"6818","volume":"33","author":"SN Majumder","year":"2019","unstructured":"Majumder, S.N., Poria, S., Hazarika, D., Mihalcea, R., Gelbukh, A., Cambria, E.: Dialoguernn: an attentive rnn for emotion detection in conversations. Proc. AAAI Conf. Artif. Intell. 33(01), 6818\u20136825 (2019)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"2032_CR9","doi-asserted-by":"crossref","unstructured":"Ghosal, D., Majumder, N., Gelbukh, A., Mihalcea, R., Poria, S.: Cosmic: commonsense knowledge for emotion identification in conversations. Findings of the Association for Computational Linguistics: EMNLP 2020, 2470\u20132481 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.224"},{"key":"2032_CR10","doi-asserted-by":"crossref","unstructured":"Hu, D., Wei, L., Huai, X.: Dialoguecrn: Contextual reasoning networks for emotion recognition in conversations, in Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing, 1, 7042\u20137052 (2021)","DOI":"10.18653\/v1\/2021.acl-long.547"},{"key":"2032_CR11","doi-asserted-by":"crossref","unstructured":"Li, J., Wang, X., Liu, Y., Zeng, Z.: CFN-ESA: a cross-modal fusion network with emotion-shift awareness for dialogue emotion recognition. IEEE Transactions on Affective Computing. 15 (4) \t1919-1933 (2024)","DOI":"10.1109\/TAFFC.2024.3389453"},{"key":"2032_CR12","doi-asserted-by":"crossref","unstructured":"Shou, Y., Meng, T., Zhang, F., Yin, N., Li, K.: Revisiting multi-modal emotion learning with broad state space models and probability-guidance fusion, arXiv preprint arXiv:2404.17858, (2024)","DOI":"10.1007\/978-3-032-06078-5_29"},{"issue":"8","key":"2032_CR13","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-023-3908-6","volume":"67","author":"Y Liu","year":"2024","unstructured":"Liu, Y., Li, J., Wang, X., Zeng, Z.: Emotionic: emotional inertia and contagion-driven dependency modeling for emotion recognition in conversation. Sci. China Inf. Sci. 67(8), 182103 (2024)","journal-title":"Sci. China Inf. Sci."},{"key":"2032_CR14","doi-asserted-by":"crossref","unstructured":"Hu, J., Liu, Y., Zhao, J., Jin, Q.: MMGCN: multimodal fusion via deep graph convolution network for emotion recognition in conversation,\" in Proceedings of the Annual Meeting of the Association for Computational Linguistics and the International Joint Conference on Natural Language Processing, pp. 5666\u20135675 (2021)","DOI":"10.18653\/v1\/2021.acl-long.440"},{"key":"2032_CR15","doi-asserted-by":"crossref","unstructured":"Hu, D., Hou, X., Wei, L., Jiang, L., Mo, Y.: MM-DFN: Multimodal dynamic fusion network for emotion recognition in conversations, in ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7037-7041 (2022)","DOI":"10.1109\/ICASSP43922.2022.9747397"},{"issue":"1","key":"2032_CR16","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1109\/TAFFC.2023.3261279","volume":"15","author":"J Li","year":"2023","unstructured":"Li, J., Wang, X., Lv, G., Zeng, Z.: Ga2mif: graph and attention based two-stage multi-source information fusion for conversational emotion detection. IEEE Trans. Affect. Comput. 15(1), 130\u2013143 (2023)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"2032_CR17","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1109\/TMM.2023.3260635","volume":"26","author":"J Li","year":"2023","unstructured":"Li, J., Wang, X., Lv, G., Zeng, Z.: Graphcfc: a directed graph based cross-modal feature complementation approach for multimodal conversational emotion recognition. IEEE Trans. Multimed. 26, 77\u201389 (2023)","journal-title":"IEEE Trans. Multimed."},{"issue":"13s","key":"2032_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3586075","volume":"55","author":"R Das","year":"2023","unstructured":"Das, R., Singh, T.D.: Multimodal sentiment analysis: a survey of methods, trends, and challenges. ACM Comput. Surv. 55(13s), 1\u201338 (2023)","journal-title":"ACM Comput. Surv."},{"key":"2032_CR19","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.107530","volume":"128","author":"J Li","year":"2024","unstructured":"Li, J., Wang, X., Zeng, Z.: A dual-stream recurrence-attention network with global\u2013local awareness for emotion recognition in textual dialog. Eng. Appl. Artif. Intell. 128, 107530 (2024)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"2032_CR20","doi-asserted-by":"crossref","unstructured":"Jieying, X., Minh, N. P., Blake, M., Le, N. M.: Accumulating word representations in multi-level context integration for ERC task, in 2023 15th International Conference on Knowledge and Systems Engineering (KSE), pp. 1-6 (2023)","DOI":"10.1109\/KSE59128.2023.10299463"},{"issue":"15","key":"2032_CR21","first-page":"13789","volume":"35","author":"W Shen","year":"2021","unstructured":"Shen, W., Chen, J., Quan, X., Xie, Z.: Dialogxl: all-in-one xlnet for multi-party conversation emotion recognition. Proc. AAAI Conf. Artif. Intell. 35(15), 13789\u201313797 (2021)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"issue":"1","key":"2032_CR22","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1109\/TAFFC.2022.3205919","volume":"14","author":"J Hu","year":"2022","unstructured":"Hu, J., Huang, Y., Hu, X., Xu, Y.: The acoustically emotion-aware conversational agent with speech emotion recognition and empathetic responses. IEEE Trans. Affect. Comput. 14(1), 17\u201330 (2022)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"2032_CR23","doi-asserted-by":"crossref","unstructured":"Strizhkova, V., Ferrari, L. M., Kachmar, H., Dantcheva, A., Bremond, F.: Video representation learning for conversational facial expression recognition guided by multiple view reconstruction, in Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4693-4702 (2024)","DOI":"10.1109\/CVPRW63382.2024.00472"},{"key":"2032_CR24","unstructured":"Shou, Y., Liu, H., Cao, X. et al.: A low-rank matching attention based cross-modal feature fusion method for conversational emotion recognition. IEEE Transactions on Affective Computing. 01,1-13.(2024)"},{"key":"2032_CR25","doi-asserted-by":"crossref","unstructured":"Shi, H., Zhang, X., Cheng, N. et al.: Enhancing emotion recognition in conversation through emotional cross-modal fusion and inter-class contrastive learning. International Conference on Intelligent Computing. Singapore: Springer Nature Singapore, 391-401 (2024)","DOI":"10.1007\/978-981-97-5669-8_32"},{"key":"2032_CR26","doi-asserted-by":"crossref","unstructured":"Zhang, D., Chen, F., Chen, X.: Dualgats: Dual graph attention networks for emotion recognition in conversations, in Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics, vol. 1, pp. 7395-7408 (2023)","DOI":"10.18653\/v1\/2023.acl-long.408"},{"key":"2032_CR27","unstructured":"Kipf, T. N., Welling, M.: Semi-supervised classification with graph convolutional networks, in Proc. 5th International Conference on Learning Representations ICLR, (2017)"},{"issue":"20","key":"2032_CR28","first-page":"10-48550","volume":"1050","author":"P Velickovic","year":"2017","unstructured":"Velickovic, P., Cucurull, G., Casanova, A., Romero, A., Lio, P., Bengio, Y.: Graph attention networks. Stat 1050(20), 10\u201348550 (2017)","journal-title":"Stat"},{"key":"2032_CR29","unstructured":"Kipf, T. N., Welling, M.: Variational graph auto-encoders, in Neural Information Processing Systems Workshop on Bayesian Deep Learning, (2016)"},{"key":"2032_CR30","unstructured":"Veli\u010dkovi\u0107, P., Fedus, W., Hamilton, W. L., Li\u00f2, P., Bengio, Y., Hjelm, R. D.: Deep graph infomax, in Proceedings of the 7th International Conference on Learning Representations, (2019)"},{"key":"2032_CR31","doi-asserted-by":"crossref","unstructured":"Ghosal, D., Majumder, N., Poria, S., Chhaya, N., Gelbukh, A.: Dialoguegcn: A graph convolutional neural network for emotion recognition in conversation, in Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing, pp. 154-164 (2019)","DOI":"10.18653\/v1\/D19-1015"},{"key":"2032_CR32","doi-asserted-by":"crossref","unstructured":"Ai, W., Shou, Y., Meng, T., Li, K.: DER-GCN: Dialog and event relation-aware graph convolutional neural network for multimodal dialog emotion recognition. IEEE Transactions on Neural Networks and Learning Systems. 36 (3) 4908-4921 (2024)","DOI":"10.1109\/TNNLS.2024.3367940"},{"key":"2032_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.126427","volume":"550","author":"J Li","year":"2023","unstructured":"Li, J., Wang, X., Lv, G., Zeng, Z.: Graphmft: a graph network based multimodal fusion technique for emotion recognition in conversation. Neurocomputing 550, 126427 (2023)","journal-title":"Neurocomputing"},{"key":"2032_CR34","doi-asserted-by":"crossref","unstructured":"Luo, J., Phan, H., Reiss, J.: Cross-modal fusion techniques for utterance-level emotion recognition from text and speech, in ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1-5 (2023)","DOI":"10.1109\/ICASSP49357.2023.10096885"},{"key":"2032_CR35","doi-asserted-by":"crossref","unstructured":"Li, H., Guo, A., Li, Y.: CCMA: CapsNet for audio\u2013video sentiment analysis using cross-modal attention. The Visual Computer, 41 (3) 1609-1620 (2025)","DOI":"10.1007\/s00371-024-03453-9"},{"key":"2032_CR36","doi-asserted-by":"crossref","unstructured":"Kumar, A., Vepa, J.: Gated mechanism for attention based multi modal sentiment analysis, in ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4477-4481 (2020)","DOI":"10.1109\/ICASSP40776.2020.9053012"},{"key":"2032_CR37","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2024.124852","volume":"255","author":"HFT Al-Saadawi","year":"2024","unstructured":"Al-Saadawi, H.F.T., Das, B., Das, R.: A systematic review of trimodal affective computing approaches: text, audio, and visual integration in emotion recognition and sentiment analysis. Expert Syst. Appl. 255, 124852 (2024)","journal-title":"Expert Syst. Appl."},{"key":"2032_CR38","unstructured":"Brody, S., Alon, U., Yahav, E.: How attentive are graph attention networks?, in International Conference on Learning Representations, (2022)"},{"issue":"04","key":"2032_CR39","first-page":"3438","volume":"34","author":"D Chen","year":"2020","unstructured":"Chen, D., Lin, Y., Li, W., Li, P., Zhou, J., Sun, X.: Measuring and relieving the over-smoothing problem for graph neural networks from the topological view. Proc. AAAI Conf. Artif. Intell. 34(04), 3438\u20133445 (2020)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"2032_CR40","doi-asserted-by":"crossref","unstructured":"Guohao, L., M\u00fcller, M., Qian, G., Delgadillo, I. C., Abualshour, A., Thabet, A. K.: Deepgcns: making gcns go as deep as cnns. IEEE transactions on pattern analysis and machine intelligence (T-PAMI). 45 (6), 6923-6939 (2023)","DOI":"10.1109\/TPAMI.2021.3074057"},{"key":"2032_CR41","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","volume":"42","author":"C Busso","year":"2008","unstructured":"Busso, C., et al.: Iemocap: interactive emotional dyadic motion capture database. Lang. Resour. Eval. 42, 335\u2013359 (2008)","journal-title":"Lang. Resour. Eval."},{"key":"2032_CR42","doi-asserted-by":"crossref","unstructured":"Poria, S., Hazarika, D., Majumder, N., Naik, G., Cambria, E., Mihalcea, R.: Meld: A multimodal multi-party dataset for emotion recognition in conversations, in Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 527\u2013536 (2019)","DOI":"10.18653\/v1\/P19-1050"},{"key":"2032_CR43","doi-asserted-by":"crossref","unstructured":"Kim, Y.: Convolutional neural networks for sentence classification, in Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing, pp. 1746\u20131751 (2014)","DOI":"10.3115\/v1\/D14-1181"},{"issue":"9\u201310","key":"2032_CR44","doi-asserted-by":"publisher","first-page":"1062","DOI":"10.1016\/j.specom.2011.01.011","volume":"53","author":"B Schuller","year":"2011","unstructured":"Schuller, B., Batliner, A., Steidl, S., Seppi, D.: Recognising realistic emotions and affect in speech: state of the art and lessons learnt from the first challenge. Speech Commun. 53(9\u201310), 1062\u20131087 (2011)","journal-title":"Speech Commun."},{"key":"2032_CR45","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Van Der Maaten, L., Weinberger, K. Q.: Densely connected convolutional networks, in Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 4700-4708 (2017)","DOI":"10.1109\/CVPR.2017.243"},{"key":"2032_CR46","doi-asserted-by":"crossref","unstructured":"Zadeh, A., Liang, P. P., Mazumder, N., Poria, S., Cambria, E., Morency, L.-P.: Memory fusion network for multi-view sequential learning, in Proceedings of the AAAI conference on artificial intelligence, 32(1), (2018)","DOI":"10.1609\/aaai.v32i1.12021"},{"key":"2032_CR47","doi-asserted-by":"crossref","unstructured":"Poria, S., Cambria, E., Hazarika, D., Majumder, N., Zadeh, A., Morency, L.-P.: Context-dependent sentiment analysis in user-generated videos, in Proceedings of the 55th annual meeting of the association for computational linguistics, pp. 873-883 (2017)","DOI":"10.18653\/v1\/P17-1081"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02032-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-02032-9","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02032-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T07:00:03Z","timestamp":1766127603000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-02032-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,16]]},"references-count":47,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["2032"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-02032-9","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"type":"print","value":"0942-4962"},{"type":"electronic","value":"1432-1882"}],"subject":[],"published":{"date-parts":[[2025,10,16]]},"assertion":[{"value":"23 January 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 September 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"447"}}