{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T02:25:33Z","timestamp":1763346333170,"version":"3.45.0"},"reference-count":49,"publisher":"Tech Science Press","issue":"3","license":[{"start":{"date-parts":[[2025,10,26]],"date-time":"2025-10-26T00:00:00Z","timestamp":1761436800000},"content-version":"vor","delay-in-days":298,"URL":"https:\/\/doi.org\/10.32604\/TSP-CROSSMARKPOLICY"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["CMC"],"published-print":{"date-parts":[[2025]]},"DOI":"10.32604\/cmc.2025.068126","type":"journal-article","created":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T08:51:16Z","timestamp":1758099076000},"page":"5399-5421","update-policy":"https:\/\/doi.org\/10.32604\/tsp-crossmarkpolicy","source":"Crossref","is-referenced-by-count":0,"title":["Enhanced Multimodal Sentiment Analysis via Integrated Spatial Position Encoding and Fusion Embedding"],"prefix":"10.32604","volume":"85","author":[{"given":"Chenquan","family":"Gan","sequence":"first","affiliation":[]},{"given":"Xu","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Yu","family":"Tang","sequence":"additional","affiliation":[]},{"given":"Xianrong","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Qingyi","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Deepak Kumar","family":"Jain","sequence":"additional","affiliation":[]}],"member":"17807","published-online":{"date-parts":[[2025]]},"reference":[{"key":"ref1","doi-asserted-by":"crossref","first-page":"102048","DOI":"10.1016\/j.jksuci.2024.102048","article-title":"Sentiment analysis methods, applications, and challenges: a systematic literature review","volume":"36","author":"Mao","year":"2024","journal-title":"J King Saud Univ Comput Inf Sci"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"101891","DOI":"10.1016\/j.inffus.2023.101891","article-title":"Multi-level correlation mining framework with self-supervised label generation for multimodal sentiment analysis","volume":"99","author":"Li","year":"2023","journal-title":"Inf Fusion"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1109\/MSP.2021.3106895","article-title":"Emotion recognition from multiple modalities: fundamentals and methodologies","volume":"38","author":"Zhao","year":"2021","journal-title":"IEEE Signal Process Mag"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"1067","DOI":"10.1109\/TNSE.2021.3049262","article-title":"Deep learning-embedded social Internet of Things for ambiguity-aware social recommendations","volume":"9","author":"Guo","year":"2022","journal-title":"IEEE Trans Netw Sci Eng"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"e1415","DOI":"10.1002\/widm.1415","article-title":"Multimodal sentimental analysis for social media applications: a comprehensive review","volume":"11","author":"Chandrasekaran","year":"2021","journal-title":"Wires Data Min Knowl Discov"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"102563","DOI":"10.1016\/j.displa.2023.102563","article-title":"Multimodal sentiment analysis: a survey","volume":"80","author":"Lai","year":"2023","journal-title":"Displays"},{"key":"ref7","series-title":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing","first-page":"3454","article-title":"Contextual inter-modal attention for multi-modal sentiment analysis","author":"Ghosal","year":"2018"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"423","DOI":"10.1109\/TPAMI.2018.2798607","article-title":"Multimodal machine learning: a survey and taxonomy","volume":"41","author":"Baltrusaitis","year":"2019","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"424","DOI":"10.1016\/j.inffus.2022.09.025","article-title":"Multimodal sentiment analysis: a systematic review of history, datasets, multimodal fusion methods, applications, challenges and future directions","volume":"91","author":"Gandhi","year":"2023","journal-title":"Inf Fusion"},{"key":"ref10","doi-asserted-by":"crossref","unstructured":"Liu Z, Shen Y, Lakshminarasimhan VB, Liang PP, Zadeh A, Morency LP. Efficient low-rank multimodal fusion with modality-specific factors. arXiv:1806.00064. 2018.","DOI":"10.18653\/v1\/P18-1209"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1016\/j.imavis.2012.03.001","article-title":"LSTM-Modeling of continuous emotions in an audiovisual affect recognition framework","volume":"31","author":"Wllmer","year":"2013","journal-title":"Image Vis Comput"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"542","DOI":"10.1016\/j.inffus.2022.11.003","article-title":"Excavating multimodal correlation for representation learning","volume":"91","author":"Mai","year":"2023","journal-title":"Inf Fusion"},{"key":"ref13","first-page":"30","author":"Vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref14","series-title":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics; 2019 Jul 28\u2013Aug 2","first-page":"6558","article-title":"Multimodal transformer for unaligned multimodal language sequences","author":"Tsai","year":"2019"},{"key":"ref15","series-title":"ICASSP 2020\u20142020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP); 2020 May 4\u20138","first-page":"4477","article-title":"Gated mechanism for attention based multi modal sentiment analysis","author":"Kumar","year":"2020"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"4014","DOI":"10.1109\/TMM.2020.3035277","article-title":"Image-text multimodal emotion classification via multi-view attentional network","volume":"23","author":"Yang","year":"2021","journal-title":"IEEE Trans Multimed"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"93","DOI":"10.1007\/s10772-018-9491-z","article-title":"Databases, features and classifiers for speech emotion recognition: a review","volume":"21","author":"Swain","year":"2018","journal-title":"Int J Speech Technol"},{"key":"ref18","first-page":"1","article-title":"AMSA: adaptive multimodal learning for sentiment analysis","volume":"19","author":"Wang","year":"2023","journal-title":"ACM Trans Multimed Comput Commun Appl"},{"key":"ref19","doi-asserted-by":"crossref","unstructured":"Zadeh A, Chen M, Poria S, Cambria E, Morency LP. Tensor fusion network for multimodal sentiment analysis. arXiv:1707.07250. 2017.","DOI":"10.18653\/v1\/D17-1115"},{"key":"ref20","unstructured":"Wang H, Li X, Ren Z, Yang D, Ma C. Exploring multimodal sentiment analysis via CBAM attention and double-layer BiLSTM architecture. arXiv:2303.14708. 2023."},{"key":"ref21","doi-asserted-by":"crossref","first-page":"470","DOI":"10.62411\/jcta.12376","article-title":"Aspect-based sentiment analysis on E-commerce reviews using BiGRU and bi-directional attention flow","volume":"2","author":"Setiadi","year":"2025","journal-title":"J Comput Theor Appl"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"109731","DOI":"10.1016\/j.engappai.2024.109731","article-title":"Multimodal sentiment analysis based on multiple attention","volume":"140","author":"Wang","year":"2025","journal-title":"Eng Appl Artif Intell"},{"key":"ref23","series-title":"ICASSP 2022\u20142022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP); 2022 May 23\u201327","first-page":"4578","article-title":"Multi-channel attentive graph convolutional network with sentiment fusion for multimodal sentiment analysis","author":"Xiao","year":"2022"},{"key":"ref24","series-title":"Proceedings of the 29th International Conference on Computational Linguistics; 2022 Oct 12\u201317","first-page":"7124","article-title":"Modeling intra-and inter-modal relations: hierarchical graph contrastive learning for multimodal sentiment analysis","author":"Lin","year":"2022"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1016\/j.inffus.2022.11.022","article-title":"AOBERT: all-modalities-in-One BERT for multimodal sentiment analysis","volume":"92","author":"Kim","year":"2023","journal-title":"Inf Fusion"},{"key":"ref26","unstructured":"Su W, Zhu X, Cao Y, Li B, Lu L, Wei F, et al. VL-BERT: pre-training of generic visual-linguistic representations. arXiv:1908.08530. 2019."},{"key":"ref27","doi-asserted-by":"crossref","unstructured":"Tan H, Bansal M. LXMERT: learning cross-modality encoder representations from transformers. arXiv:1908.07490. 2019.","DOI":"10.18653\/v1\/D19-1514"},{"key":"ref28","series-title":"Second International Conference on Algorithms, Microchips, and Network Applications (AMNA 2023); 2023 Jan 13\u201315","first-page":"47","article-title":"Multimodal sentiment analysis with BERT-ResNet50","author":"Zhang","year":"2023"},{"key":"ref29","doi-asserted-by":"crossref","first-page":"777","DOI":"10.1080\/10255842.2024.2313066","article-title":"Multimodal sentiment analysis leveraging the strength of deep neural networks enhanced by the XGBoost classifier","volume":"28","author":"Chandrasekaran","year":"2025","journal-title":"Comput Methods Biomech Biomed Engin"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"2126","DOI":"10.1038\/s41598-025-85859-6","article-title":"Multimodal sentiment analysis based on multi-layer feature fusion and multi-task learning","volume":"15","author":"Cai","year":"2025","journal-title":"Sci Rep"},{"key":"ref31","doi-asserted-by":"crossref","first-page":"111982","DOI":"10.1016\/j.knosys.2024.111982","article-title":"Video multimodal sentiment analysis using cross-modal feature translation and dynamical propagation","volume":"299","author":"Gan","year":"2024","journal-title":"Knowl Based Syst"},{"key":"ref32","doi-asserted-by":"crossref","first-page":"110219","DOI":"10.1016\/j.knosys.2022.110219","article-title":"Transfer-based adaptive tree for multimodal sentiment analysis based on user latent aspects","volume":"261","author":"Rahmani","year":"2023","journal-title":"Knowl Based Syst"},{"key":"ref33","doi-asserted-by":"crossref","first-page":"103675","DOI":"10.1016\/j.ipm.2024.103675","article-title":"A cross modal hierarchical fusion multimodal sentiment analysis method based on multi-task learning","volume":"61","author":"Wang","year":"2024","journal-title":"Inf Process Manag"},{"key":"ref34","first-page":"8992","article-title":"Learning relationships between text, audio, and video via deep canonical correlation for multimodal language analysis","volume":"34","author":"Sun","year":"2020","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref35","unstructured":"Zadeh A, Zellers R, Pincus E, Morency LP. MOSI: multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos. arXiv:1606.06259. 2016."},{"key":"ref36","series-title":"Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers); 2018 Jul 15\u201320","first-page":"2236","article-title":"Multimodal language analysis in the wild: CMU-MOSEI dataset and interpretable dynamic fusion graph","author":"Bagher Zadeh","year":"2018"},{"key":"ref37","article-title":"Multi-attention recurrent network for human communication comprehension","volume":"32","author":"Zadeh","year":"2018","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref38","series-title":"Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP); 2014 Oct 25\u201329","first-page":"1532","article-title":"Glove: global vectors for word representation","author":"Pennington","year":"2014"},{"key":"ref39","series-title":"2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP); 2014 May 4\u20139","first-page":"960","article-title":"COVAREP\u2014collaborative voice analysis repository for speech technologies","author":"Degottex","year":"2014"},{"key":"ref40","series-title":"Proceedings of the 55th Annual Meeting of the Association forComputational Linguistics (Volume 1: Long Papers); 2017 Jul 30\u2013Aug 4","first-page":"873","article-title":"Context-dependent sentiment analysis in user-generated videos","author":"Poria","year":"2017"},{"key":"ref41","unstructured":"Tsai YH, Liang PP, Zadeh A, Morency LP, Salakhutdinov R. Learning factorized multimodal representations. arXiv:1806.06176. 2018."},{"key":"ref42","article-title":"Memory fusion network for multi-view sequential learning","volume":"32","author":"Zadeh","year":"2018","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref43","doi-asserted-by":"crossref","first-page":"110370","DOI":"10.1016\/j.knosys.2023.110370","article-title":"Target and source modality co-reinforcement for emotion understanding from asynchronous multimodal sequences","volume":"265","author":"Yang","year":"2023","journal-title":"Knowl Based Syst"},{"key":"ref44","first-page":"6892","article-title":"Found in translation: learning robust joint representations by cyclic translations between modalities","volume":"33","author":"Pham","year":"2019","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref45","series-title":"Proceedings of the 2021 International Conference on Multimodal Interaction; 2021 Oct 18\u201322","first-page":"521","article-title":"Graph capsule aggregation for unaligned multimodal sequences","author":"Wu","year":"2021"},{"key":"ref46","series-title":"ICASSP 2022\u20142022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP); 2022 May 23\u201327","first-page":"8547","article-title":"Multimodal sentiment analysis on unaligned sequences via holographic embedding","author":"Ma","year":"2022"},{"key":"ref47","doi-asserted-by":"crossref","first-page":"184314","DOI":"10.1007\/s11704-023-2444-y","article-title":"LMR-CBT: learning modality-fused representations with CB-Transformer for multimodal emotion recognition from unaligned multimodal sequences","volume":"18","author":"Fu","year":"2023","journal-title":"Front Comput Sci"},{"key":"ref48","series-title":"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics; 2020 Jul 6\u20138","first-page":"2359","article-title":"Integrating multimodal information in large pretrained transformers","author":"Rahman"},{"key":"ref49","series-title":"Proceedings of the 28th ACM International Conference on Multimedia; 2020 Oct 12\u201316","first-page":"1122","article-title":"MISA: modality-invariant and-specific representations for multimodal sentiment analysis","author":"Hazarika","year":"2020"}],"container-title":["Computers, Materials &amp; Continua"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/cdn.techscience.cn\/files\/cmc\/2025\/TSP_CMC-85-3\/TSP_CMC_68126\/TSP_CMC_68126.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T02:21:56Z","timestamp":1763346116000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.techscience.com\/cmc\/v85n3\/64171"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":49,"journal-issue":{"issue":"3","published-online":{"date-parts":[[2025]]},"published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.32604\/cmc.2025.068126","relation":{},"ISSN":["1546-2226"],"issn-type":[{"type":"electronic","value":"1546-2226"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"2025-05-21","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-08-26","order":1,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-10-23","order":2,"name":"published","label":"Published Online","group":{"name":"publication_history","label":"Publication History"}}]}}