{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,30]],"date-time":"2025-04-30T04:07:06Z","timestamp":1745986026907,"version":"3.40.4"},"reference-count":60,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,4,29]],"date-time":"2025-04-29T00:00:00Z","timestamp":1745884800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,4,29]],"date-time":"2025-04-29T00:00:00Z","timestamp":1745884800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["72301010","72171004"],"award-info":[{"award-number":["72301010","72171004"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Project of Cultivation for Young Top-notch Talents of Beijing Municipal Institutions","award":["BPHR202203061"],"award-info":[{"award-number":["BPHR202203061"]}]},{"name":"R&D Program of Beijing Municipal Commission of Education","award":["KM202010011011"],"award-info":[{"award-number":["KM202010011011"]}]},{"name":"Innovation Research Special Project of the IFLYTEK for University Intelligent Teaching","award":["2022XF055"],"award-info":[{"award-number":["2022XF055"]}]},{"name":"Humanities and Social Science Project of Ministry of Education of China","award":["21YJCZH186"],"award-info":[{"award-number":["21YJCZH186"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"DOI":"10.1007\/s11227-025-07252-4","type":"journal-article","created":{"date-parts":[[2025,4,29]],"date-time":"2025-04-29T05:24:11Z","timestamp":1745904251000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multi-task learning framework using tri-encoder with caption prompt for multimodal aspect-based sentiment analysis"],"prefix":"10.1007","volume":"81","author":[{"given":"Yuanyuan","family":"Cai","sequence":"first","affiliation":[]},{"given":"Fei","family":"Tong","sequence":"additional","affiliation":[]},{"given":"Qingchuan","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Haitao","family":"Xiong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,29]]},"reference":[{"issue":"4","key":"7252_CR1","doi-asserted-by":"publisher","first-page":"4709","DOI":"10.1007\/S11227-021-04040-8","volume":"78","author":"R Safa","year":"2022","unstructured":"Safa R, Bayat P, Moghtader L (2022) Automatic detection of depression symptoms in twitter using multimodal analysis. J Supercomput 78(4):4709\u20134744. https:\/\/doi.org\/10.1007\/S11227-021-04040-8","journal-title":"J Supercomput"},{"key":"7252_CR2","doi-asserted-by":"publisher","unstructured":"Ju X, Zhang D, Xiao R, Li J, Li S, Zhang M, Zhou G (2021) Joint multi-modal aspect-sentiment analysis with auxiliary cross-modal relation detection. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, EMNLP 2021, Virtual Event \/ Punta Cana, Dominican Republic, 7-11 November, 2021, pp 4395\u20134405. https:\/\/doi.org\/10.18653\/v1\/2021.emnlp-main.360","DOI":"10.18653\/v1\/2021.emnlp-main.360"},{"key":"7252_CR3","doi-asserted-by":"publisher","unstructured":"Wu Z, Zheng C, Cai Y, Chen J, Leung H, Li Q (2020) Multimodal representation with embedded visual guiding objects for named entity recognition in social media posts. In: MM \u201920: The 28th ACM International Conference on Multimedia, Virtual Event \/ Seattle, WA, USA, October 12\u201316, 2020, pp 1038\u20131046. https:\/\/doi.org\/10.1145\/3394171.3413650","DOI":"10.1145\/3394171.3413650"},{"key":"7252_CR4","doi-asserted-by":"publisher","unstructured":"Wu H, Cheng S, Wang J, Li S, Chi L (2020) Multimodal aspect extraction with region-aware alignment network. In: Natural Language Processing and Chinese Computing\u20149th CCF International Conference, NLPCC 2020, Zhengzhou, China, October 14\u201318, 2020, Proceedings, Part I. Lecture Notes in Computer Science, vol 12430, pp 145\u2013156. https:\/\/doi.org\/10.1007\/978-3-030-60450-9_12","DOI":"10.1007\/978-3-030-60450-9_12"},{"key":"7252_CR5","doi-asserted-by":"publisher","unstructured":"Yu J, Jiang J, Yang L, Xia R (2020) Improving multimodal named entity recognition via entity span detection with unified multimodal transformer. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, ACL 2020, Online, July 5\u201310, 2020, pp 3342\u20133352. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.306","DOI":"10.18653\/v1\/2020.acl-main.306"},{"key":"7252_CR6","doi-asserted-by":"publisher","unstructured":"Zhang Q, Fu J, Liu X, Huang X (2018) Adaptive co-attention network for named entity recognition in tweets. In: Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence, (AAAI-18), the 30th Innovative Applications of Artificial Intelligence (IAAI-18), and the 8th AAAI Symposium on Educational Advances in Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February 2\u20137, 2018, pp 5674\u20135681. https:\/\/doi.org\/10.1609\/aaai.v32i1.11962","DOI":"10.1609\/aaai.v32i1.11962"},{"key":"7252_CR7","doi-asserted-by":"publisher","unstructured":"Khan Z, Fu Y (2021) Exploiting BERT for multimodal target sentiment classification through input space translation. In: MM \u201921: ACM Multimedia Conference, Virtual Event, China, October 20\u201324, 2021, pp 3034\u20133042. https:\/\/doi.org\/10.1145\/3474085.3475692","DOI":"10.1145\/3474085.3475692"},{"key":"7252_CR8","doi-asserted-by":"publisher","unstructured":"Xu N, Mao W, Chen G (2019) Multi-interactive memory network for aspect based multimodal sentiment analysis. In: The Thirty-Third AAAI Conference on Artificial Intelligence, AAAI 2019, The Thirty-First Innovative Applications of Artificial Intelligence Conference, IAAI 2019, The Ninth AAAI Symposium on Educational Advances in Artificial Intelligence, EAAI 2019, Honolulu, Hawaii, USA, January 27\u2013February 1, 2019, pp 371\u2013378. https:\/\/doi.org\/10.1609\/aaai.v33i01.3301371","DOI":"10.1609\/aaai.v33i01.3301371"},{"key":"7252_CR9","doi-asserted-by":"publisher","unstructured":"Yang L, Yu J, Zhang C, Na J (2021) Fine-grained sentiment analysis of political tweets with entity-aware multimodal network. In: Diversity, Divergence, Dialogue\u201416th International Conference, iConference 2021, Beijing, China, March 17\u201331, 2021, Proceedings, Part I. Lecture Notes in Computer Science, vol 12645, pp 411\u2013420. https:\/\/doi.org\/10.1007\/978-3-030-71292-1_31","DOI":"10.1007\/978-3-030-71292-1_31"},{"key":"7252_CR10","doi-asserted-by":"publisher","unstructured":"Yu J, Jiang J (2019) Adapting BERT for target-oriented multimodal sentiment classification. In: Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, IJCAI 2019, Macao, China, August 10-16, 2019, pp. 5408\u20135414. https:\/\/doi.org\/10.24963\/ijcai.2019\/751","DOI":"10.24963\/ijcai.2019\/751"},{"key":"7252_CR11","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2016, Las Vegas, NV, USA, June 27\u201330, 2016, pp 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"7252_CR12","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S, Uszkoreit J, Houlsby N (2021) An image is worth 16x16 words: transformers for image recognition at scale. In: 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3\u20137, 2021. https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"key":"7252_CR13","doi-asserted-by":"publisher","unstructured":"Devlin J, Chang M, Lee K, Toutanova K (2019) BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Minneapolis, MN, USA, June 2\u20137, 2019, Volume 1 (Long and Short Papers), pp 4171\u20134186. https:\/\/doi.org\/10.18653\/v1\/n19-1423","DOI":"10.18653\/v1\/n19-1423"},{"key":"7252_CR14","unstructured":"Liu Y, Ott M, Goyal N, Du J, Joshi M, Chen D, Levy O, Lewis M, Zettlemoyer L, Stoyanov V (2019) Roberta: a robustly optimized BERT pretraining approach. CoRR arXiv:1907.11692"},{"key":"7252_CR15","doi-asserted-by":"publisher","unstructured":"Lu D, Neves L, Carvalho V, Zhang N, Ji H (2018) Visual attention model for name tagging in multimodal social media. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics, ACL 2018, Melbourne, Australia, July 15\u201320, 2018, Volume 1: Long Papers, pp 1990\u20131999. https:\/\/doi.org\/10.18653\/v1\/P18-1185. https:\/\/aclanthology.org\/P18-1185\/","DOI":"10.18653\/v1\/P18-1185"},{"key":"7252_CR16","doi-asserted-by":"publisher","unstructured":"Si Q, Lin Z, Zheng M, Fu P, Wang W (2021) Check it again: progressive visual question answering via visual entailment. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing, ACL\/IJCNLP 2021, (Volume 1: Long Papers), Virtual Event, August 1\u20136, 2021, pp 4101\u20134110. https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.317","DOI":"10.18653\/v1\/2021.acl-long.317"},{"key":"7252_CR17","doi-asserted-by":"publisher","unstructured":"Dou Z, Xu Y, Gan Z, Wang J, Wang S, Wang L, Zhu C, Zhang P, Yuan L, Peng N, Liu Z, Zeng M (2022) An empirical study of training end-to-end vision-and-language transformers. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, June 18\u201324, 2022, pp 18145\u201318155. https:\/\/doi.org\/10.1109\/CVPR52688.2022.01763","DOI":"10.1109\/CVPR52688.2022.01763"},{"key":"7252_CR18","doi-asserted-by":"publisher","unstructured":"Zhao F, Li C, Wu Z, Ouyang Y, Zhang J, Dai X (2023) M2DF: multi-grained multi-curriculum denoising framework for multimodal aspect-based sentiment analysis. In: Bouamor H, Pino J, Bali K (eds) Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pp. 9057\u20139070. Association for Computational Linguistics, Singapore. https:\/\/doi.org\/10.18653\/v1\/2023.emnlp-main.561. https:\/\/aclanthology.org\/2023.emnlp-main.561\/","DOI":"10.18653\/v1\/2023.emnlp-main.561"},{"issue":"1","key":"7252_CR19","doi-asserted-by":"publisher","first-page":"108","DOI":"10.1109\/TAFFC.2020.3038167","volume":"14","author":"S Poria","year":"2023","unstructured":"Poria S, Hazarika D, Majumder N, Mihalcea R (2023) Beneath the tip of the iceberg: current challenges and new directions in sentiment analysis research. IEEE Trans Affect Comput 14(1):108\u2013132. https:\/\/doi.org\/10.1109\/TAFFC.2020.3038167","journal-title":"IEEE Trans Affect Comput"},{"key":"7252_CR20","unstructured":"Schmidt D (2013) Text to image linking tool (TILT). In: 8th Annual International Conference of the Alliance of Digital Humanities Organizations, DH 2013, Lincoln, NE, USA, July 16\u201319, 2013, Conference Abstracts, pp 380\u2013382. http:\/\/dh2013.unl.edu\/abstracts\/ab-112.html"},{"key":"7252_CR21","doi-asserted-by":"publisher","unstructured":"Yuan J, Mcdonough S, You Q, Luo J (2013) Sentribute: image sentiment analysis from a mid-level perspective. In: Proceedings of the Second International Workshop on Issues of Sentiment Discovery and Opinion Mining, WISDOM 2013, Chicago, IL, USA, August 11, 2013, pp 10\u20131108. https:\/\/doi.org\/10.1145\/2502069.2502079","DOI":"10.1145\/2502069.2502079"},{"key":"7252_CR22","doi-asserted-by":"publisher","unstructured":"Borth D, Ji R, Chen T, Breuel TM, Chang S (2013) Large-scale visual sentiment ontology and detectors using adjective noun pairs. In: ACM Multimedia Conference, MM \u201913, Barcelona, Spain, October 21\u201325, 2013, pp 223\u2013232. https:\/\/doi.org\/10.1145\/2502081.2502282","DOI":"10.1145\/2502081.2502282"},{"key":"7252_CR23","doi-asserted-by":"publisher","unstructured":"Wang H, Meghawat A, Morency L, Xing EP (2017) Select-additive learning: improving generalization in multimodal sentiment analysis. In: 2017 IEEE International Conference on Multimedia and Expo, ICME 2017, Hong Kong, China, July 10\u201314, 2017, pp 949\u2013954. https:\/\/doi.org\/10.1109\/ICME.2017.8019301","DOI":"10.1109\/ICME.2017.8019301"},{"key":"7252_CR24","doi-asserted-by":"publisher","unstructured":"Nojavanasghari B, Gopinath D, Koushik J, Baltrusaitis T, Morency L (2016) Deep multimodal fusion for persuasiveness prediction. In: Proceedings of the 18th ACM International Conference on Multimodal Interaction, ICMI 2016, Tokyo, Japan, November 12\u201316, 2016, pp 284\u2013288. https:\/\/doi.org\/10.1145\/2993148.2993176","DOI":"10.1145\/2993148.2993176"},{"key":"7252_CR25","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017) Attention is all you need. In: Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4\u20139, 2017, Long Beach, CA, USA, pp 5998\u20136008. https:\/\/proceedings.neurips.cc\/paper\/2017\/hash\/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html"},{"key":"7252_CR26","doi-asserted-by":"crossref","unstructured":"Delbrouck J, Tits N, Dupont S (2020) Modulated fusion using transformer for linguistic-acoustic emotion recognition. CoRR arXiv:2010.02057","DOI":"10.18653\/v1\/2020.nlpbt-1.1"},{"key":"7252_CR27","doi-asserted-by":"publisher","unstructured":"Han W, Chen H, Gelbukh AF, Zadeh A, Morency L, Poria S (2021) Bi-bimodal modality fusion for correlation-controlled multimodal sentiment analysis. In: ICMI \u201921: International Conference on Multimodal Interaction, Montr\u00e9al, QC, Canada, October 18\u201322, 2021, pp 6\u201315. https:\/\/doi.org\/10.1145\/3462244.3479919","DOI":"10.1145\/3462244.3479919"},{"key":"7252_CR28","doi-asserted-by":"publisher","unstructured":"Wang Z, Wan Z, Wan X (2020) Transmodality: an end2end fusion method with transformer for multimodal sentiment analysis. In: WWW \u201920: The Web Conference 2020, Taipei, Taiwan, April 20\u201324, 2020, pp 2514\u20132520. https:\/\/doi.org\/10.1145\/3366423.3380000","DOI":"10.1145\/3366423.3380000"},{"key":"7252_CR29","doi-asserted-by":"publisher","unstructured":"Wang K, Jin T (2021) Multimodal social media sentiment analysis based on cross-modal hierarchical attention fusion. In: Artificial Intelligence and Mobile Services\u2014AIMS 2021\u201410th International Conference, Held as Part of the Services Conference Federation, SCF 2021, Virtual Event, December 10\u201314, 2021, Proceedings. Lecture Notes in Computer Science, vol 12987, pp 29\u201344. https:\/\/doi.org\/10.1007\/978-3-030-96033-9_3","DOI":"10.1007\/978-3-030-96033-9_3"},{"key":"7252_CR30","doi-asserted-by":"publisher","unstructured":"Toledo GL, Marcacini RM (2022) Transfer learning with joint fine-tuning for multimodal sentiment analysis. CoRR arXiv:2210.05790https:\/\/doi.org\/10.48550\/arXiv.2210.05790","DOI":"10.48550\/arXiv.2210.05790"},{"key":"7252_CR31","doi-asserted-by":"publisher","unstructured":"Yang X, Feng S, Wang D, Hong P, Poria S (2022) Few-shot multimodal sentiment analysis based on multimodal probabilistic fusion prompts. CoRR arXiv:2211.06607https:\/\/doi.org\/10.48550\/arXiv.2211.06607","DOI":"10.48550\/arXiv.2211.06607"},{"key":"7252_CR32","doi-asserted-by":"publisher","unstructured":"Hu X, Yamamura M (2022) Two-stage attention-based fusion neural network for image-text sentiment classification. In: Proceedings of the 2022 4th International Conference on Image, Video and Signal Processing, New York, NY, USA, pp 1\u20137. https:\/\/doi.org\/10.1145\/3531232.3531233","DOI":"10.1145\/3531232.3531233"},{"key":"7252_CR33","doi-asserted-by":"publisher","unstructured":"Li Z, Xu B, Zhu C, Zhao T (2022) CLMLF: a contrastive learning and multi-layer fusion method for multimodal sentiment detection. In: Findings of the Association for Computational Linguistics: NAACL 2022, Seattle, WA, United States, July 10\u201315, 2022, pp 2282\u20132294. https:\/\/doi.org\/10.18653\/v1\/2022.findings-naacl.175","DOI":"10.18653\/v1\/2022.findings-naacl.175"},{"issue":"3","key":"7252_CR34","doi-asserted-by":"publisher","first-page":"2103","DOI":"10.1007\/s11063-022-11124-w","volume":"55","author":"X Xiao","year":"2023","unstructured":"Xiao X, Pu Y, Zhao Z, Nie R, Xu D, Qian W, Wu H (2023) Image-text sentiment analysis via context guided adaptive fine-tuning transformer. Neural Process Lett 55(3):2103\u20132125. https:\/\/doi.org\/10.1007\/s11063-022-11124-w","journal-title":"Neural Process Lett"},{"key":"7252_CR35","doi-asserted-by":"publisher","unstructured":"Liu P, Joty SR, Meng HM (2015) Fine-grained opinion mining with recurrent neural networks and word embeddings. In: Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing, EMNLP 2015, Lisbon, Portugal, September 17\u201321, 2015, pp 1433\u20131443. https:\/\/doi.org\/10.18653\/v1\/d15-1168","DOI":"10.18653\/v1\/d15-1168"},{"key":"7252_CR36","doi-asserted-by":"publisher","unstructured":"Sun L, Wang J, Zhang K, Su Y, Weng F (2021) Rpbert: a text-image relation propagation-based BERT model for multimodal NER. In: Thirty-Fifth AAAI Conference on Artificial Intelligence, AAAI 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, IAAI 2021, The Eleventh Symposium on Educational Advances in Artificial Intelligence, EAAI 2021, Virtual Event, February 2\u20139, 2021, pp 13860\u201313868. https:\/\/doi.org\/10.1609\/aaai.v35i15.17633","DOI":"10.1609\/aaai.v35i15.17633"},{"key":"7252_CR37","doi-asserted-by":"publisher","unstructured":"Zhang D, Wei S, Li S, Wu H, Zhu Q, Zhou G (2021) Multi-modal graph fusion for named entity recognition with targeted visual guidance. In: Thirty-Fifth AAAI Conference on Artificial Intelligence, AAAI 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, IAAI 2021, The Eleventh Symposium on Educational Advances in Artificial Intelligence, EAAI 2021, Virtual Event, February 2\u20139, 2021, pp 14347\u201314355. https:\/\/doi.org\/10.1609\/aaai.v35i16.17687","DOI":"10.1609\/aaai.v35i16.17687"},{"key":"7252_CR38","doi-asserted-by":"publisher","first-page":"429","DOI":"10.1109\/TASLP.2019.2957872","volume":"28","author":"J Yu","year":"2020","unstructured":"Yu J, Jiang J, Xia R (2020) Entity-sensitive attention and fusion network for entity-level multimodal sentiment classification. IEEE ACM Trans Audio Speech Lang Process 28:429\u2013439. https:\/\/doi.org\/10.1109\/TASLP.2019.2957872","journal-title":"IEEE ACM Trans Audio Speech Lang Process"},{"issue":"13","key":"7252_CR39","doi-asserted-by":"publisher","first-page":"14846","DOI":"10.1007\/S11227-022-04480-W","volume":"78","author":"X Li","year":"2022","unstructured":"Li X, Lu R, Liu P, Zhu Z (2022) Graph convolutional networks with hierarchical multi-head attention for aspect-level sentiment classification. J Supercomput 78(13):14846\u201314865. https:\/\/doi.org\/10.1007\/S11227-022-04480-W","journal-title":"J Supercomput"},{"key":"7252_CR40","doi-asserted-by":"publisher","unstructured":"Huang Y, Chen Z, Zhang W, Chen J, Pan JZ, Yao Z, Xie Y, Chen H (2022) Aspect-based sentiment classification with sequential cross-modal semantic graph. CoRR arXiv:2208.09417https:\/\/doi.org\/10.48550\/arXiv.2208.09417","DOI":"10.48550\/arXiv.2208.09417"},{"key":"7252_CR41","doi-asserted-by":"crossref","unstructured":"Yu Z, Wang J, Yu L, Zhang X (2022) Dual-encoder transformers with cross-modal alignment for multimodal aspect-based sentiment analysis. In: Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing, AACL\/IJCNLP 2022\u2014Volume 1: Long Papers, Online Only, November 20\u201323, 2022, pp 414\u2013423. https:\/\/aclanthology.org\/2022.aacl-main.32","DOI":"10.18653\/v1\/2022.aacl-main.32"},{"issue":"5","key":"7252_CR42","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2022.103038","volume":"59","author":"L Yang","year":"2022","unstructured":"Yang L, Na J, Yu J (2022) Cross-modal multitask transformer for end-to-end multimodal aspect-based sentiment analysis. Inf Process Manag 59(5):103038. https:\/\/doi.org\/10.1016\/j.ipm.2022.103038","journal-title":"Inf Process Manag"},{"key":"7252_CR43","doi-asserted-by":"publisher","unstructured":"Ling Y, Yu J, Xia R (2022) Vision-language pre-training for multimodal aspect-based sentiment analysis. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), ACL 2022, Dublin, Ireland, May 22\u201327, 2022, pp 2149\u20132159. https:\/\/doi.org\/10.18653\/v1\/2022.acl-long.152","DOI":"10.18653\/v1\/2022.acl-long.152"},{"issue":"17","key":"7252_CR44","doi-asserted-by":"publisher","first-page":"18869","DOI":"10.1609\/aaai.v38i17.29852","volume":"38","author":"T Peng","year":"2024","unstructured":"Peng T, Li Z, Wang P, Zhang L, Zhao H (2024) A novel energy based model mechanism for multi-modal aspect-based sentiment analysis. Proc AAAI Conf Artif Intell 38(17):18869\u201318878. https:\/\/doi.org\/10.1609\/aaai.v38i17.29852","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"7252_CR45","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102304","volume":"106","author":"L Xiao","year":"2024","unstructured":"Xiao L, Wu X, Xu J, Li W, Jin C, He L (2024) Atlantis: aesthetic-oriented multiple granularities fusion network for joint multimodal aspect-based sentiment analysis. Inf Fusion 106:102304. https:\/\/doi.org\/10.1016\/j.inffus.2024.102304","journal-title":"Inf Fusion"},{"key":"7252_CR46","doi-asserted-by":"publisher","unstructured":"Sun H, Wang H, Liu J, Chen Y, Lin L (2022) Cubemlp: a mlp-based model for multimodal sentiment analysis and depression estimation. CoRR arXiv:2207.14087https:\/\/doi.org\/10.48550\/arXiv.2207.14087","DOI":"10.48550\/arXiv.2207.14087"},{"key":"7252_CR47","doi-asserted-by":"publisher","unstructured":"Yu W, Xu H, Yuan Z, Wu J (2021) Learning modality-specific representations with self-supervised multi-task learning for multimodal sentiment analysis. In: Thirty-Fifth AAAI Conference on Artificial Intelligence, AAAI 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, IAAI 2021, The Eleventh Symposium on Educational Advances in Artificial Intelligence, EAAI 2021, Virtual Event, February 2\u20139, 2021, pp 10790\u201310797. https:\/\/doi.org\/10.1609\/aaai.v35i12.17289","DOI":"10.1609\/aaai.v35i12.17289"},{"key":"7252_CR48","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.112331","volume":"301","author":"J Zhang","year":"2024","unstructured":"Zhang J, Qu J, Liu J, Wang Z (2024) Mcpl: multi-model co-guided progressive learning for multimodal aspect-based sentiment analysis. Knowl Based Syst 301:112331. https:\/\/doi.org\/10.1016\/j.knosys.2024.112331","journal-title":"Knowl Based Syst"},{"key":"7252_CR49","doi-asserted-by":"publisher","unstructured":"Liu Y, Zhou Y, Li Z, Zhang J, Shang Y, Zhang C, Hu S (2024) Rng: reducing multi-level noise and multi-grained semantic gap for joint multimodal aspect-sentiment analysis. In: 2024 IEEE International Conference on Multimedia and Expo (ICME), pp 1\u20136. https:\/\/doi.org\/10.1109\/ICME57554.2024.10687372","DOI":"10.1109\/ICME57554.2024.10687372"},{"key":"7252_CR50","unstructured":"Liu X, Li R, Ye S, Zhang G, Wang X (2025) Multimodal aspect-based sentiment analysis under conditional relation. In: Rambow O, Wanner L, Apidianaki M, Al-Khalifa H, Eugenio BD, Schockaert S (eds) Proceedings of the 31st International Conference on Computational Linguistics, pp 313\u2013323. Association for Computational Linguistics, Abu Dhabi, UAE. https:\/\/aclanthology.org\/2025.coling-main.22\/"},{"key":"7252_CR51","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1109\/TNNLS.2024.3415","volume":"1","author":"R Fan","year":"2024","unstructured":"Fan R, He T, Chen M, Zhang M, Tu X, Dong M (2024) Dual causes generation assisted model for multimodal aspect-based sentiment classification. IEEE Trans Neural Netw Learn Syst 1:15. https:\/\/doi.org\/10.1109\/TNNLS.2024.3415","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"7252_CR52","doi-asserted-by":"crossref","unstructured":"Sang EFTK, Veenstra J (1999) Representing text chunks. In: EACL 1999, 9th Conference of the European Chapter of the Association for Computational Linguistics, June 8\u201312, 1999, University of Bergen, Bergen, Norway, pp 173\u2013179. https:\/\/aclanthology.org\/E99-1023\/","DOI":"10.3115\/977035.977059"},{"key":"7252_CR53","unstructured":"Wu Y, Schuster M, Chen Z, Le QV, Norouzi M, Macherey W, Krikun M, Cao Y, Gao Q, Macherey K, Klingner J, Shah A, Johnson M, Liu X, Kaiser L, Gouws S, Kato Y, Kudo T, Kazawa H, Stevens K, Kurian G, Patil N, Wang W, Young C, Smith J, Riesa J, Rudnick A, Vinyals O, Corrado G, Hughes M, Dean J (2016) Google\u2019s neural machine translation system: Bridging the gap between human and machine translation. CoRR arXiv:1609.08144"},{"key":"7252_CR54","doi-asserted-by":"publisher","unstructured":"Li D, Li J, Le H, Wang G, Savarese S, Hoi SCH (2022) LAVIS: a library for language-vision intelligence. CoRR arXiv:2209.09019https:\/\/doi.org\/10.48550\/arXiv.2209.09019","DOI":"10.48550\/arXiv.2209.09019"},{"key":"7252_CR55","doi-asserted-by":"publisher","unstructured":"Manning CD, Surdeanu M, Bauer J, Finkel JR, Bethard S, McClosky D (2014) The stanford corenlp natural language processing toolkit. In: Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics, ACL 2014, June 22\u201327, 2014, Baltimore, MD, USA, System Demonstrations, pp 55\u201360. https:\/\/doi.org\/10.3115\/v1\/p14-5010","DOI":"10.3115\/v1\/p14-5010"},{"key":"7252_CR56","unstructured":"Chen T, Borth D, Darrell T, Chang S (2014) Deepsentibank: visual sentiment concept classification with deep convolutional neural networks. CoRR arXiv:1410.8586"},{"issue":"1","key":"7252_CR57","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1214\/aoms\/1177729694","volume":"22","author":"S Kullback","year":"1951","unstructured":"Kullback S, Leibler RA (1951) On information and sufficiency. Ann Math Stat 22(1):79\u201386","journal-title":"Ann Math Stat"},{"key":"7252_CR58","doi-asserted-by":"publisher","unstructured":"Hu M, Peng Y, Huang Z, Li D, Lv Y (2019) Open-domain targeted sentiment analysis via span-based extraction and classification. In: Proceedings of the 57th Conference of the Association for Computational Linguistics, ACL 2019, Florence, Italy, July 28\u2013August 2, 2019, Volume 1: Long Papers, pp 537\u2013546. https:\/\/doi.org\/10.18653\/v1\/p19-1051","DOI":"10.18653\/v1\/p19-1051"},{"key":"7252_CR59","doi-asserted-by":"publisher","unstructured":"Chen G, Tian Y, Song Y (2020) Joint aspect extraction and sentiment analysis with directional graph convolutional networks. In: Proceedings of the 28th International Conference on Computational Linguistics, COLING 2020, Barcelona, Spain (Online), December 8\u201313, 2020, pp 272\u2013279. https:\/\/doi.org\/10.18653\/v1\/2020.coling-main.24","DOI":"10.18653\/v1\/2020.coling-main.24"},{"issue":"12","key":"7252_CR60","doi-asserted-by":"publisher","first-page":"8787","DOI":"10.1109\/TKDE.2023.3345022","volume":"36","author":"J Mu","year":"2024","unstructured":"Mu J, Nie F, Wang W, Xu J, Zhang J, Liu H (2024) MOCOLNet: a momentum contrastive learning network for multimodal aspect-level sentiment analysis. IEEE Trans Knowl Data Eng 36(12):8787\u20138800. https:\/\/doi.org\/10.1109\/TKDE.2023.3345022","journal-title":"IEEE Trans Knowl Data Eng"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07252-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-025-07252-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07252-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,29]],"date-time":"2025-04-29T05:24:19Z","timestamp":1745904259000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-025-07252-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,29]]},"references-count":60,"journal-issue":{"issue":"6","published-online":{"date-parts":[[2025,4]]}},"alternative-id":["7252"],"URL":"https:\/\/doi.org\/10.1007\/s11227-025-07252-4","relation":{},"ISSN":["1573-0484"],"issn-type":[{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4,29]]},"assertion":[{"value":"26 March 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 April 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}],"article-number":"791"}}