{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T22:57:34Z","timestamp":1781650654954,"version":"3.54.5"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2025,4,9]],"date-time":"2025-04-09T00:00:00Z","timestamp":1744156800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,4,9]],"date-time":"2025-04-09T00:00:00Z","timestamp":1744156800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62266025"],"award-info":[{"award-number":["62266025"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2025,9]]},"DOI":"10.1007\/s00371-025-03883-z","type":"journal-article","created":{"date-parts":[[2025,4,9]],"date-time":"2025-04-09T15:20:48Z","timestamp":1744212048000},"page":"8537-8552","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Transformer-based short-term memory attention for enhanced multimodal sentiment analysis"],"prefix":"10.1007","volume":"41","author":[{"given":"Dangguo","family":"Shao","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kaiqiang","family":"Tang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jingtao","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sanli","family":"Yi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lei","family":"Ma","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,4,9]]},"reference":[{"key":"3883_CR1","unstructured":"Zhu, R., Han, C., Qian, Y., et al.: Exchanging-based Multimodal Fusion with Transformer. arXiv preprint arXiv:2309.02190 (2023)."},{"key":"3883_CR2","unstructured":"Mehrabian, A., et al.: Silent messages. Wadsworth Belmont, CA (1971)."},{"key":"3883_CR3","doi-asserted-by":"crossref","unstructured":"Xu, N., Mao, W., Chen, G.: A co-memory network for multimodal sentiment analysis. In: The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval (2018).","DOI":"10.1145\/3209978.3210093"},{"key":"3883_CR4","doi-asserted-by":"publisher","first-page":"4014","DOI":"10.1109\/TMM.2020.3035277","volume":"23","author":"X Yang","year":"2020","unstructured":"Yang, X., et al.: Image-text multimodal emotion classification via multi-view attentional network. IEEE Trans. Multimed. 23, 4014\u20134026 (2020)","journal-title":"IEEE Trans. Multimed."},{"key":"3883_CR5","doi-asserted-by":"crossref","unstructured":"Colombo, P., et al.: Improving multimodal fusion via mutual dependency maximisation. arXiv preprint arXiv:2109.00922 (2021).","DOI":"10.18653\/v1\/2021.emnlp-main.21"},{"key":"3883_CR6","doi-asserted-by":"publisher","first-page":"22945","DOI":"10.1109\/ACCESS.2020.2969205","volume":"8","author":"D Zhang","year":"2020","unstructured":"Zhang, D., et al.: Multi-modal sentiment classification with independent and interactive knowledge via semi-supervised learning. IEEE Access 8, 22945\u201322954 (2020)","journal-title":"IEEE Access"},{"key":"3883_CR7","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al. Efficient low-rank multimodal fusion with modality-specific factors. arXiv preprint arXiv:1806.00064\u00a0(2018).","DOI":"10.18653\/v1\/P18-1209"},{"key":"3883_CR8","doi-asserted-by":"publisher","first-page":"3957","DOI":"10.1109\/TIP.2020.2967577","volume":"29","author":"S Song","year":"2020","unstructured":"Song, S., et al.: Modality compensation network: cross-modal adaptation for action recognition. IEEE Trans. Image Process. 29, 3957\u20133969 (2020)","journal-title":"IEEE Trans. Image Process."},{"key":"3883_CR9","doi-asserted-by":"crossref","unstructured":"Han, W., Chen, H., Poria, S.: Improving multimodal fusion with hierarchical mutual information maximization for multimodal sentiment analysis. arXiv preprint arXiv:2109.00412\u00a0(2021).","DOI":"10.18653\/v1\/2021.emnlp-main.723"},{"key":"3883_CR10","doi-asserted-by":"crossref","unstructured":"Ju, X., et al.: Joint multi-modal aspect-sentiment analysis with auxiliary cross-modal relation detection. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (2021).","DOI":"10.18653\/v1\/2021.emnlp-main.360"},{"key":"3883_CR11","unstructured":"De Vries, H., et al.: Modulating early visual processing by language. Adv. Neural Inf. Process. Syst. 30 (2017)."},{"key":"3883_CR12","doi-asserted-by":"crossref","unstructured":"Ma, H., et al.: A transformer-based model with self-distillation for multimodal emotion recognition in conversations. IEEE Trans. Multimed. (2023).","DOI":"10.1109\/TMM.2023.3271019"},{"key":"3883_CR13","doi-asserted-by":"crossref","unstructured":"Ma, L., et al.: BCD-MM: Multimodal sentiment analysis model with dual-bias-aware feature learning and attention mechanisms. IEEE Access\u00a0(2024).","DOI":"10.1109\/ACCESS.2024.3405586"},{"key":"3883_CR14","doi-asserted-by":"crossref","unstructured":"Chen, Q., et al.: CTHFNet: contrastive translation and hierarchical fusion network for text\u2013video\u2013audio sentiment analysis. Vis. Comput. pp. 1\u201314 (2024).","DOI":"10.1007\/s00371-024-03668-w"},{"key":"3883_CR15","doi-asserted-by":"crossref","unstructured":"Li, H., Guo, A., Li, Y.: CCMA: CapsNet for audio\u2013video sentiment analysis using cross-modal attention. Vis. Compute. pp. 1\u201312 (2024).","DOI":"10.1007\/s00371-024-03453-9"},{"key":"3883_CR16","doi-asserted-by":"crossref","unstructured":"Wei, Y., Yuan, S., Yang, R., et al.: Tackling modality heterogeneity with multi-view calibration network for multimodal sentiment detection. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (vol. 1: Long Papers), pp. 5240\u20135252 (2023).","DOI":"10.18653\/v1\/2023.acl-long.287"},{"key":"3883_CR17","doi-asserted-by":"crossref","unstructured":"Chen, Y., Li, K., Mai, W., et al.: D2r: dual-branch dynamic routing network for multimodal sentiment detection. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pp. 3536\u20133547 (2024).","DOI":"10.18653\/v1\/2024.emnlp-main.207"},{"key":"3883_CR18","doi-asserted-by":"crossref","unstructured":"Li, Z., et al.: CLMLF: A contrastive learning and multi-layer fusion method for multimodal sentiment detection. arXiv preprint arXiv:2204.05515\u00a0(2022).","DOI":"10.18653\/v1\/2022.findings-naacl.175"},{"key":"3883_CR19","doi-asserted-by":"crossref","unstructured":"Yang, X., et al.: Multimodal sentiment detection based on multi-channel graph neural networks. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers) (2021).","DOI":"10.18653\/v1\/2021.acl-long.28"},{"key":"3883_CR20","doi-asserted-by":"publisher","first-page":"111878","DOI":"10.1016\/j.knosys.2024.111878","volume":"296","author":"G Sun","year":"2024","unstructured":"Sun, G., et al.: Multi-label text classification model integrating label attention and historical attention. Knowl. Based Syst. 296, 111878 (2024)","journal-title":"Knowl. Based Syst."},{"key":"3883_CR21","doi-asserted-by":"crossref","unstructured":"Passalis, N., Tefas, A.: Learning deep representations with probabilistic knowledge transfer. In: Proceedings of the European Conference on Computer Vision (ECCV) (2018).","DOI":"10.1007\/978-3-030-01252-6_17"},{"key":"3883_CR22","doi-asserted-by":"crossref","unstructured":"Li, T., et al.: Few sample knowledge distillation for efficient network compression. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2020).","DOI":"10.1109\/CVPR42600.2020.01465"},{"key":"3883_CR23","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: Deep mutual learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. (2018).","DOI":"10.1109\/CVPR.2018.00454"},{"key":"3883_CR24","unstructured":"Chung, I., et al.: Feature-map-level online adversarial knowledge distillation. In: International Conference on Machine Learning. PMLR (2020)."},{"key":"3883_CR25","doi-asserted-by":"crossref","unstructured":"Zhang, L., et al. Be your own teacher: Improve the performance of convolutional neural networks via self distillation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2019).","DOI":"10.1109\/ICCV.2019.00381"},{"key":"3883_CR26","doi-asserted-by":"crossref","unstructured":"Hou, Y., et al.: Learning lightweight lane detection CNNS by self attention distillation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2019).","DOI":"10.1109\/ICCV.2019.00110"},{"issue":"8","key":"3883_CR27","first-page":"4388","volume":"44","author":"L Zhang","year":"2021","unstructured":"Zhang, L., Bao, C., Ma, K.: Self-distillation: towards efficient and compact neural networks. IEEE Trans. Pattern Anal. Mach. Intell. 44(8), 4388\u20134403 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3883_CR28","doi-asserted-by":"crossref","unstructured":"Albanie, S., et al.: Emotion recognition in speech using cross-modal transfer in the wild. In: Proceedings of the 26th ACM International Conference on Multimedia (2018).","DOI":"10.1145\/3240508.3240578"},{"key":"3883_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.patrec.2021.03.007","volume":"146","author":"L Schoneveld","year":"2021","unstructured":"Schoneveld, L., Othmani, A., Abdelkawy, H.: Leveraging recent advances in deep learning for audio-visual emotion recognition. Pattern Recogn. Lett. 146, 1\u20137 (2021)","journal-title":"Pattern Recogn. Lett."},{"key":"3883_CR30","doi-asserted-by":"crossref","unstructured":"Zhou, T., et al.: Automatic ICD coding via interactive shared representation networks with self-distillation mechanism. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers) (2021).","DOI":"10.18653\/v1\/2021.acl-long.463"},{"key":"3883_CR31","unstructured":"Kenton, J.D.M-W.C., Toutanova, L.K.: Bert: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of naacL-HLT, vol. 1 (2019)."},{"key":"3883_CR32","doi-asserted-by":"crossref","unstructured":"He, K., et al.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2016).","DOI":"10.1109\/CVPR.2016.90"},{"key":"3883_CR33","unstructured":"Ashish, V.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30: I (2017)."},{"key":"3883_CR34","unstructured":"Yang, P., et al.: SGM: sequence generation model for multi-label classification. arXiv preprint arXiv:1806.04822\u00a0(2018)."},{"key":"3883_CR35","unstructured":"Hinton, G.: Distilling the Knowledge in a Neural Network. arXiv preprint arXiv:1503.02531\u00a0(2015)."},{"key":"3883_CR36","unstructured":"Niu, T., et al.: Sentiment analysis on multi-view social data. In: Multimedia Modeling: 22nd International Conference, MMM 2016, Miami, FL, USA, January 4\u20136, 2016, Proceedings, Part II 22. Springer International Publishing (2016)."},{"key":"3883_CR37","doi-asserted-by":"crossref","unstructured":"Cai, Y., Cai, H., Wan, X.: Multi-modal sarcasm detection in twitter with hierarchical fusion model. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (2019).","DOI":"10.18653\/v1\/P19-1239"},{"key":"3883_CR38","unstructured":"Yoon, K.: Convolutional neural networks for sentence classification. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1746\u20131751, Doha, Qatar. Association for Computational Linguistics (2014)."},{"key":"3883_CR39","doi-asserted-by":"crossref","unstructured":"Zhou, P., et al.: Attention-based bidirectional long short-term memory networks for relation classification. In: Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (vol. 2: Short papers) (2016)","DOI":"10.18653\/v1\/P16-2034"},{"key":"3883_CR40","doi-asserted-by":"crossref","unstructured":"Lai, S., et al.: Recurrent convolutional neural networks for text classification. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 29. No. 1 (2015)","DOI":"10.1609\/aaai.v29i1.9513"},{"key":"3883_CR41","doi-asserted-by":"crossref","unstructured":"Huang, L., et al.: Text level graph neural network for text classification. arXiv preprint arXiv:1910.02356\u00a0(2019).","DOI":"10.18653\/v1\/D19-1345"},{"key":"3883_CR42","unstructured":"Dosovitskiy, A.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929\u00a0(2020)."},{"key":"3883_CR43","doi-asserted-by":"crossref","unstructured":"Xu, N., Mao, W.: Multisentinet: A deep semantic network for multimodal sentiment analysis. In: Proceedings of the 2017 ACM on Conference on Information and Knowledge Management (2017).","DOI":"10.1145\/3132847.3133142"},{"key":"3883_CR44","doi-asserted-by":"crossref","unstructured":"Schifanella, R., et al.: Detecting sarcasm in multimodal social platforms. In: Proceedings of the 24th ACM International Conference on Multimedia (2016).","DOI":"10.1145\/2964284.2964321"},{"key":"3883_CR45","doi-asserted-by":"crossref","unstructured":"Xu, N., Zeng, Z., Mao, W.: Reasoning with multimodal sarcastic tweets via modeling cross-modality contrast and semantic association. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics (2020).","DOI":"10.18653\/v1\/2020.acl-main.349"},{"issue":"4","key":"3883_CR46","doi-asserted-by":"publisher","first-page":"3403","DOI":"10.1007\/s10489-024-05309-0","volume":"54","author":"H Wang","year":"2024","unstructured":"Wang, H., Ren, C., Yu, Z.: Multimodal sentiment analysis based on cross-instance graph neural networks. Appl. Intell. 54(4), 3403\u20133416 (2024)","journal-title":"Appl. Intell."}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-03883-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-025-03883-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-03883-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T10:49:51Z","timestamp":1757155791000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-025-03883-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,9]]},"references-count":46,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2025,9]]}},"alternative-id":["3883"],"URL":"https:\/\/doi.org\/10.1007\/s00371-025-03883-z","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4,9]]},"assertion":[{"value":"11 March 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 April 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}