{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T08:54:34Z","timestamp":1770800074187,"version":"3.50.0"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T00:00:00Z","timestamp":1764806400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T00:00:00Z","timestamp":1764806400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No.61971347"],"award-info":[{"award-number":["No.61971347"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Project of the Xi\u2019an Science and Technology Planning Foundation","award":["No.24ZDCYISGG0020"],"award-info":[{"award-number":["No.24ZDCYISGG0020"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1007\/s00530-025-02062-3","type":"journal-article","created":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T11:33:59Z","timestamp":1764848039000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["MDR-MSA: multi-perspective decoupled representation learning for multimodal sentiment analysis"],"prefix":"10.1007","volume":"32","author":[{"given":"Jiangying","family":"Du","sequence":"first","affiliation":[]},{"given":"Yuxing","family":"Zhi","sequence":"additional","affiliation":[]},{"given":"Huaijun","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Kan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Junhuai","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,12,4]]},"reference":[{"issue":"2","key":"2062_CR1","doi-asserted-by":"publisher","first-page":"680","DOI":"10.1109\/TAFFC.2019.2947464","volume":"13","author":"S Zhang","year":"2019","unstructured":"Zhang, S., Zhao, X., Tian, Q.: Spontaneous speech emotion recognition using multiscale deep convolutional lSTM. IEEE Trans. Affect. Comput. 13(2), 680\u2013688 (2019)","journal-title":"IEEE Trans. Affect. Comput."},{"issue":"6","key":"2062_CR2","doi-asserted-by":"publisher","first-page":"8597","DOI":"10.1007\/s11042-022-12122-9","volume":"81","author":"N Ding","year":"2022","unstructured":"Ding, N., Tian Sw, Y.L.: A multimodal fusion method for sarcasm detection based on late fusion. Multimedia Tools Appl. 81(6), 8597\u20138616 (2022)","journal-title":"Multimedia Tools Appl."},{"key":"2062_CR3","doi-asserted-by":"publisher","unstructured":"Zhi, Y., Li, J., Wang, H., et\u00a0al (2024) A fine-grained tri-modal interaction model for multimodal sentiment analysis. In: ICASSP 2024\u20142024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp 5715\u20135719, https:\/\/doi.org\/10.1109\/ICASSP48485.2024.10447872","DOI":"10.1109\/ICASSP48485.2024.10447872"},{"key":"2062_CR4","unstructured":"Bousmalis, K., Trigeorgis, G., Silberman, N., et\u00a0al.: Domain separation networks. Adv. Neural Inform. Process. Syst. 29 (2016)"},{"key":"2062_CR5","doi-asserted-by":"crossref","unstructured":"Hou, Q., Zhou, D., Feng, J.: Coordinate attention for efficient mobile network design. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 13713\u201313722 (2021)","DOI":"10.1109\/CVPR46437.2021.01350"},{"key":"2062_CR6","doi-asserted-by":"crossref","unstructured":"Mai, S., Hu, H., Xing, S.: Modality to modality translation: An adversarial representation learning and graph fusion network for multimodal fusion. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp 164\u2013172 (2020)","DOI":"10.1609\/aaai.v34i01.5347"},{"key":"2062_CR7","doi-asserted-by":"crossref","unstructured":"Peris, \u00c1., Casacuberta, F.: A neural, interactive-predictive system for multimodal sequence to sequence tasks (2019). arXiv preprint arXiv:1905.08181","DOI":"10.18653\/v1\/P19-3014"},{"issue":"01","key":"2062_CR8","first-page":"6892","volume":"33","author":"H Pham","year":"2019","unstructured":"Pham, H., Liang, P.P., Manzini, T., et al.: Found in translation: learning robust joint representations by cyclic translations between modalities. Proc. AAAI Conf. Artif. Intell. 33(01), 6892\u20136899 (2019)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"2062_CR9","unstructured":"Andrew, G., Arora, R., Bilmes, J., et\u00a0al.: Deep canonical correlation analysis. In: International Conference on Machine Learning, PMLR, pp 1247\u20131255 (2013)"},{"key":"2062_CR10","doi-asserted-by":"crossref","unstructured":"Ni, H., Song, J., Zhu, X., et\u00a0al.: Camera-agnostic person re-identification via adversarial disentangling learning. In: Proceedings of the 29th ACM International Conference on Multimedia, pp 2002\u20132010 (2021)","DOI":"10.1145\/3474085.3475361"},{"key":"2062_CR11","doi-asserted-by":"crossref","unstructured":"Liang, T., Lin, G., Feng, L., et\u00a0al.: Attention is not enough: Mitigating the distribution discrepancy in asynchronous multimodal sequence fusion. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 8148\u20138156 (2021)","DOI":"10.1109\/ICCV48922.2021.00804"},{"key":"2062_CR12","doi-asserted-by":"crossref","unstructured":"Yang, D., Huang, S., Kuang, H., et\u00a0al.: Disentangled representation learning for multimodal emotion recognition. In: Proceedings of the 30th ACM International Conference on Multimedia, pp 1642\u20131651 (2022)","DOI":"10.1145\/3503161.3547754"},{"key":"2062_CR13","doi-asserted-by":"crossref","unstructured":"Oskouei, A.G., Samadi, N., Khezri, S., et\u00a0al.: Feature-weighted fuzzy clustering methods: An experimental review. Neurocomputing. p 129176 (2024)","DOI":"10.1016\/j.neucom.2024.129176"},{"key":"2062_CR14","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2024.111712","volume":"161","author":"AG Oskouei","year":"2024","unstructured":"Oskouei, A.G., Samadi, N., Tanha, J.: Feature-weight and cluster-weight learning in fuzzy c-means method for semi-supervised clustering. Appl. Soft Comput. 161, 111712 (2024)","journal-title":"Appl. Soft Comput."},{"issue":"3","key":"2062_CR15","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1109\/MIS.2013.34","volume":"28","author":"M W\u00f6llmer","year":"2013","unstructured":"W\u00f6llmer, M., Weninger, F., Knaup, T., et al.: Youtube movie reviews: Sentiment analysis in an audio-visual context. IEEE Intell. Syst. 28(3), 46\u201353 (2013)","journal-title":"IEEE Intell. Syst."},{"key":"2062_CR16","doi-asserted-by":"crossref","unstructured":"Poria, S., Chaturvedi, I., Cambria, E., et\u00a0al.: Convolutional mkl based multimodal emotion recognition and sentiment analysis. In: 2016 IEEE 16th International Conference on Data Mining (ICDM), IEEE, pp 439\u2013448 (2016)","DOI":"10.1109\/ICDM.2016.0055"},{"key":"2062_CR17","doi-asserted-by":"crossref","unstructured":"Nojavanasghari, B., Gopinath, D., Koushik, J., et\u00a0al.: Deep multimodal fusion for persuasiveness prediction. In: Proceedings of the 18th ACM International Conference on Multimodal Interaction, pp 284\u2013288 (2016)","DOI":"10.1145\/2993148.2993176"},{"key":"2062_CR18","doi-asserted-by":"crossref","unstructured":"Kampman, O., Barezi, E.J., Bertero, D., et\u00a0al.: Investigating audio, visual, and text fusion methods for end-to-end automatic personality prediction (2018). arXiv preprint arXiv:1805.00705","DOI":"10.18653\/v1\/P18-2096"},{"issue":"2","key":"2062_CR19","doi-asserted-by":"publisher","first-page":"1391","DOI":"10.1109\/TAFFC.2021.3093923","volume":"14","author":"Y Zhao","year":"2021","unstructured":"Zhao, Y., Cao, X., Lin, J., et al.: Multimodal affective states recognition based on multiscale CNNs and biologically inspired decision fusion model. IEEE Trans. Affect. Comput. 14(2), 1391\u20131403 (2021)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"2062_CR20","doi-asserted-by":"crossref","unstructured":"Zadeh, A.B., Liang, P.P., Poria, S., et\u00a0al.: Multimodal language analysis in the wild: Cmu-mosei dataset and interpretable dynamic fusion graph. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp 2236\u20132246 (2018)","DOI":"10.18653\/v1\/P18-1208"},{"key":"2062_CR21","doi-asserted-by":"crossref","unstructured":"Behmanesh, M., Adibi, P., Ehsani, S.M.S., et\u00a0al.: Geometric multimodal deep learning with multiscaled graph wavelet convolutional network. IEEE Trans. Neural Netw. Learn. Syst. (2022)","DOI":"10.31219\/osf.io\/9axct"},{"key":"2062_CR22","doi-asserted-by":"crossref","unstructured":"Liang, P.P., Liu, Z., Tsai, Y.H.H., et\u00a0al.: Learning representations from imperfect time series data via tensor rank regularization (2019). arXiv preprint arXiv:1907.01011","DOI":"10.18653\/v1\/P19-1152"},{"key":"2062_CR23","doi-asserted-by":"crossref","unstructured":"Zadeh, A., Chen, M., Poria, S., et\u00a0al.: Tensor fusion network for multimodal sentiment analysis (2017). arXiv preprint arXiv:1707.07250","DOI":"10.18653\/v1\/D17-1115"},{"key":"2062_CR24","doi-asserted-by":"crossref","unstructured":"Liu, Z., Shen, Y., Lakshminarasimhan, V.B., et\u00a0al.: Efficient low-rank multimodal fusion with modality-specific factors (2018). arXiv preprint arXiv:1806.00064","DOI":"10.18653\/v1\/P18-1209"},{"key":"2062_CR25","doi-asserted-by":"crossref","unstructured":"Tsai, Y.H.H., Ma, M.Q., Yang, M., et\u00a0al.: Multimodal routing: Improving local and global interpretability of multimodal language analysis. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing. Conference on Empirical Methods in Natural Language Processing, NIH Public Access, p 1823 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.143"},{"key":"2062_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.110370","volume":"265","author":"D Yang","year":"2023","unstructured":"Yang, D., Liu, Y., Huang, C., et al.: Target and source modality co-reinforcement for emotion understanding from asynchronous multimodal sequences. Knowl.-Based Syst. 265, 110370 (2023)","journal-title":"Knowl.-Based Syst."},{"key":"2062_CR27","doi-asserted-by":"crossref","unstructured":"Zadeh, A., Liang, P.P., Poria, S., et\u00a0al.: Multi-attention recurrent network for human communication comprehension. In: Proceedings of the AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.12024"},{"key":"2062_CR28","doi-asserted-by":"crossref","unstructured":"Wu, J., Mai, S., Hu, H.: Graph capsule aggregation for unaligned multimodal sequences. In: Proceedings of the 2021 International Conference on Multimodal Interaction, pp 521\u2013529 (2021)","DOI":"10.1145\/3462244.3479931"},{"key":"2062_CR29","doi-asserted-by":"crossref","unstructured":"Sun, Z., Sarma, P., Sethares, W., et\u00a0al.: Learning relationships between text, audio, and video via deep canonical correlation for multimodal language analysis. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp 8992\u20138999 (2020)","DOI":"10.1609\/aaai.v34i05.6431"},{"key":"2062_CR30","unstructured":"Dumpala, S.H., Sheikh, I., Chakraborty, R., et\u00a0al.: Audio-visual fusion for sentiment classification using cross-modal autoencoder. In: 32nd Conference on Neural Information Processing Systems (NIPS 2018), pp 1\u20134 (2019)"},{"key":"2062_CR31","doi-asserted-by":"crossref","unstructured":"Shankar, S.:Multimodal fusion via cortical network inspired losses. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp 1167\u20131178 (2022)","DOI":"10.18653\/v1\/2022.acl-long.83"},{"key":"2062_CR32","doi-asserted-by":"crossref","unstructured":"Sun, Y., Mai, S., Hu, H.: Learning to learn better unimodal representations via adaptive multimodal meta-learning. IEEE Trans. Affect. Comput. (2022)","DOI":"10.1109\/TAFFC.2022.3178231"},{"key":"2062_CR33","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1016\/j.inffus.2020.08.006","volume":"65","author":"Q Li","year":"2021","unstructured":"Li, Q., Gkoumas, D., Lioma, C., et al.: Quantum-inspired multimodal fusion for video sentiment analysis. Inform. Fus. 65, 58\u201371 (2021)","journal-title":"Inform. Fus."},{"key":"2062_CR34","doi-asserted-by":"crossref","unstructured":"Liu, P., Zheng, X., Li, H., et al.: Improving the modality representation with multi-view contrastive learning for multimodal sentiment analysis. In: ICASSP 2023\u20132023 IEEE International Conference on Acoustics, pp. 1\u20135. IEEE, Speech and Signal Processing (ICASSP) (2023)","DOI":"10.1109\/ICASSP49357.2023.10096470"},{"key":"2062_CR35","doi-asserted-by":"crossref","unstructured":"Yang, D., Kuang, H., Huang, S., et\u00a0al.: Learning modality-specific and-agnostic representations for asynchronous multimodal language sequences. In: Proceedings of the 30th ACM International Conference on Multimedia, pp 1708\u20131717 (2022)","DOI":"10.1145\/3503161.3547755"},{"key":"2062_CR36","doi-asserted-by":"crossref","unstructured":"Tan, H., Bansal, M.: Lxmert: Learning cross-modality encoder representations from transformers (2019). arXiv preprint arXiv:1908.07490","DOI":"10.18653\/v1\/D19-1514"},{"key":"2062_CR37","doi-asserted-by":"publisher","first-page":"542","DOI":"10.1016\/j.inffus.2022.11.003","volume":"91","author":"S Mai","year":"2023","unstructured":"Mai, S., Sun, Y., Zeng, Y., et al.: Excavating multimodal correlation for representation learning. Inform Fus 91, 542\u2013555 (2023)","journal-title":"Inform Fus"},{"issue":"3","key":"2062_CR38","doi-asserted-by":"publisher","first-page":"2276","DOI":"10.1109\/TAFFC.2022.3172360","volume":"14","author":"S Mai","year":"2022","unstructured":"Mai, S., Zeng, Y., Zheng, S., et al.: Hybrid contrastive learning of tri-modal representation for multimodal sentiment analysis. IEEE Trans. Affect. Comput. 14(3), 2276\u20132289 (2022)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"2062_CR39","unstructured":"Udandarao, V., Maiti, A., Srivatsav, D., et\u00a0al.: Cobra: Contrastive bi-modal representation algorithm (2020). arXiv preprint arXiv:2005.03687"},{"key":"2062_CR40","first-page":"18433","volume":"33","author":"N Pielawski","year":"2020","unstructured":"Pielawski, N., Wetzer, E., \u00d6fverstedt, J., et al.: Comir: Contrastive multimodal image representation for registration. Adv. Neural. Inf. Process. Syst. 33, 18433\u201318444 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2062_CR41","doi-asserted-by":"crossref","unstructured":"Chuang, C.Y., Hjelm, R.D., Wang, X., et\u00a0al.: Robust contrastive learning against noisy views. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 16670\u201316681 (2022)","DOI":"10.1109\/CVPR52688.2022.01617"},{"key":"2062_CR42","doi-asserted-by":"crossref","unstructured":"Degottex, G., Kane, J., Drugman, T., et\u00a0al.: Covarep\u2014a collaborative voice analysis repository for speech technologies. In: 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, pp 960\u2013964 (2014)","DOI":"10.1109\/ICASSP.2014.6853739"},{"key":"2062_CR43","unstructured":"Devlin, J., Chang, M.W., Lee, K., et\u00a0al.: Bert: Pre-training of deep bidirectional transformers for language understanding (2018). arXiv preprint arXiv:1810.04805"},{"issue":"4","key":"2062_CR44","doi-asserted-by":"publisher","first-page":"735","DOI":"10.1037\/a0030737","volume":"139","author":"TM Chaplin","year":"2013","unstructured":"Chaplin, T.M., Aldao, A.: Gender differences in emotion expression in children: a meta-analytic review. Psychol. Bull. 139(4), 735 (2013)","journal-title":"Psychol. Bull."},{"issue":"3","key":"2062_CR45","doi-asserted-by":"publisher","first-page":"686","DOI":"10.1037\/0022-3514.74.3.686","volume":"74","author":"AM Kring","year":"1998","unstructured":"Kring, A.M., Gordon, A.H.: Sex differences in emotion: expression, experience, and physiology. J. Pers. Soc. Psychol. 74(3), 686 (1998)","journal-title":"J. Pers. Soc. Psychol."},{"key":"2062_CR46","doi-asserted-by":"crossref","unstructured":"Hazarika, D., Zimmermann, R., Poria, S.: Misa: Modality-invariant and-specific representations for multimodal sentiment analysis. In: Proceedings of the 28th ACM International Conference on Multimedia, pp 1122\u20131131 (2020)","DOI":"10.1145\/3394171.3413678"},{"key":"2062_CR47","doi-asserted-by":"crossref","unstructured":"Chauhan, D.S., Akhtar, M.S., Ekbal, A., et\u00a0al.: Context-aware interactive attention for multi-modal sentiment and emotion analysis. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp 5647\u20135657 (2019)","DOI":"10.18653\/v1\/D19-1566"},{"key":"2062_CR48","doi-asserted-by":"crossref","unstructured":"Tsai, Y.H.H., Bai, S., Liang, P.P., et\u00a0al.: Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the Conference. Association for Computational Linguistics. Meeting, NIH Public Access, p 6558 (2019)","DOI":"10.18653\/v1\/P19-1656"},{"key":"2062_CR49","doi-asserted-by":"crossref","unstructured":"Gong, P., Liu, J., Zhang, X., et al.: A multi-stage hierarchical relational graph neural network for multimodal sentiment analysis. In: ICASSP 2023\u20132023 IEEE International Conference on Acoustics, pp. 1\u20135. IEEE, Speech and Signal Processing (ICASSP) (2023)","DOI":"10.1109\/ICASSP49357.2023.10096644"},{"key":"2062_CR50","doi-asserted-by":"crossref","unstructured":"Yuan, Z., Li, W., Xu, H., et\u00a0al.: Transformer-based feature reconstruction network for robust multimodal sentiment analysis. In: Proceedings of the 29th ACM International Conference on Multimedia, pp 4400\u20134407 (2021)","DOI":"10.1145\/3474085.3475585"},{"issue":"1","key":"2062_CR51","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2023.103538","volume":"61","author":"Q Lu","year":"2024","unstructured":"Lu, Q., Sun, X., Gao, Z., et al.: Coordinated-joint translation fusion framework with sentiment-interactive graph convolutional networks for multimodal sentiment analysis. Inform. Process. Manag. 61(1), 103538 (2024)","journal-title":"Inform. Process. Manag."},{"key":"2062_CR52","unstructured":"Wu, Z., Zhang, Q., Miao, D., et\u00a0al.: Hydiscgan: A hybrid distributed cgan for audio-visual privacy preservation in multimodal sentiment analysis (2024). arXiv preprint arXiv:2404.11938"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02062-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-02062-3","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02062-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T04:18:10Z","timestamp":1770783490000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-02062-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,4]]},"references-count":52,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,2]]}},"alternative-id":["2062"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-02062-3","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,4]]},"assertion":[{"value":"23 December 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"34"}}