{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T01:21:38Z","timestamp":1765502498672,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","funder":[{"name":"Natural Science Foundation of China","award":["62302213"],"award-info":[{"award-number":["62302213"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,10]]},"DOI":"10.1145\/3746252.3761328","type":"proceedings-article","created":{"date-parts":[[2025,11,8]],"date-time":"2025-11-08T01:03:42Z","timestamp":1762563822000},"page":"2042-2051","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Multimodal Sentiment Analysis with Multi-Perspective Thinking via Large Multimodal Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-7479-6959","authenticated-orcid":false,"given":"Juhao","family":"Ma","sequence":"first","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5734-3616","authenticated-orcid":false,"given":"Shuai","family":"Xu","sequence":"additional","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China and State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7905-4885","authenticated-orcid":false,"given":"Yicong","family":"Li","sequence":"additional","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8012-4753","authenticated-orcid":false,"given":"Xiaoming","family":"Fu","sequence":"additional","affiliation":[{"name":"University of G\u00f6ttingen, G\u00f6ttingen, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,11,10]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Jinze Bai Shuai Bai Shusheng Yang Shijie Wang Sinan Tan Peng Wang Junyang Lin Chang Zhou and Jingren Zhou. 2023. Qwen-VL: A Versatile Vision-Language Model for Understanding Localization Text Reading and Beyond. arXiv:2308.12966 [cs.CV] https:\/\/arxiv.org\/abs\/2308.12966"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502282"},{"key":"e_1_3_2_1_3_1","volume-title":"Janus-pro: Unified multimodal understanding and generation with data and model scaling. arXiv preprint arXiv:2501.17811","author":"Chen Xiaokang","year":"2025","unstructured":"Xiaokang Chen, Zhiyu Wu, Xingchao Liu, Zizheng Pan, Wen Liu, Zhenda Xie, Xingkai Yu, and Chong Ruan. 2025. Janus-pro: Unified multimodal understanding and generation with data and model scaling. arXiv preprint arXiv:2501.17811 (2025)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.52202\/079017-3518"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3586075"},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 4171-4186","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 4171-4186."},{"key":"e_1_3_2_1_7_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548137"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19781-9_26"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.360"},{"key":"e_1_3_2_1_11_1","first-page":"18661","article-title":"Supervised contrastive learning","volume":"33","author":"Khosla Prannay","year":"2020","unstructured":"Prannay Khosla, Piotr Teterwak, Chen Wang, Aaron Sarna, Yonglong Tian, Phillip Isola, Aaron Maschinot, Ce Liu, and Dilip Krishnan. 2020. Supervised contrastive learning. Advances in Neural Information Processing Systems, Vol. 33 (2020), 18661-18673.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_12_1","volume-title":"Convolutional neural networks for sentence classification. arXiv preprint arXiv:1408.5882","author":"Kim Yoon","year":"2014","unstructured":"Yoon Kim. 2014. Convolutional neural networks for sentence classification. arXiv preprint arXiv:1408.5882 (2014)."},{"key":"e_1_3_2_1_13_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International Conference on Machine Learning. PMLR, 19730-19742."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00060"},{"key":"e_1_3_2_1_15_1","first-page":"1","article-title":"Multimodal pear chain-of-thought reasoning for multimodal sentiment analysis","volume":"20","author":"Li Yan","year":"2025","unstructured":"Yan Li, Xiangyuan Lan, Haifeng Chen, Ke Lu, and Dongmei Jiang. 2025. Multimodal pear chain-of-thought reasoning for multimodal sentiment analysis. ACM Transactions on Multimedia Computing, Communications and Applications, Vol. 20, 9 (2025), 1-23.","journal-title":"ACM Transactions on Multimedia Computing, Communications and Applications"},{"key":"e_1_3_2_1_16_1","unstructured":"Zhen Li Bing Xu Conghui Zhu and Tiejun Zhao. 2022. CLMLF:A Contrastive Learning and Multi-Layer Fusion Method for Multimodal Sentiment Detection. arXiv:2204.05515 [cs.CL] https:\/\/arxiv.org\/abs\/2204.05515"},{"key":"e_1_3_2_1_17_1","volume-title":"CofiPara: A coarse-to-fine paradigm for multimodal sarcasm target identification with large multimodal models. arXiv preprint arXiv:2405.00390","author":"Lin Hongzhan","year":"2024","unstructured":"Hongzhan Lin, Zixin Chen, Ziyang Luo, Mingfei Cheng, Jing Ma, and Guang Chen. 2024. CofiPara: A coarse-to-fine paradigm for multimodal sarcasm target identification with large multimodal models. arXiv preprint arXiv:2405.00390 (2024)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"e_1_3_2_1_19_1","first-page":"34892","article-title":"Visual instruction tuning","volume":"36","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2023. Visual instruction tuning. Advances in Neural Information Processing Systems, Vol. 36 (2023), 34892-34916.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3696410.3714777"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3679125"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-27674-8_2"},{"key":"e_1_3_2_1_23_1","volume-title":"SemEval-2020 Task 8: Memotion Analysis-The Visuo-Lingual Metaphor! arXiv preprint arXiv:2008.03781","author":"Sharma Chhavi","year":"2020","unstructured":"Chhavi Sharma, Deepesh Bhageria, William Scott, Srinivas Pykl, Amitava Das, Tanmoy Chakraborty, Viswanath Pulabaigari, and Bjorn Gamback. 2020. SemEval-2020 Task 8: Memotion Analysis-The Visuo-Lingual Metaphor! arXiv preprint arXiv:2008.03781 (2020)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3680020"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.14778\/3514061.3514064"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2016.7498228"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.111848"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681403"},{"key":"e_1_3_2_1_29_1","volume-title":"Chain of Thought Guided Few-Shot Fine-Tuning of LLMs for Multimodal Aspect-Based Sentiment Classification. In International Conference on Multimedia Modeling. Springer, 182-194","author":"Wu Hao","year":"2025","unstructured":"Hao Wu, Danping Yang, Peng Liu, and Xianxian Li. 2025. Chain of Thought Guided Few-Shot Fine-Tuning of LLMs for Multimodal Aspect-Based Sentiment Classification. In International Conference on Multimedia Modeling. Springer, 182-194."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11063-022-11124-w"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132847.3133142"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210093"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547754"},{"key":"e_1_3_2_1_34_1","volume-title":"Large language models meet text-centric multimodal sentiment analysis: A survey. arXiv preprint arXiv:2406.08068","author":"Yang Hao","year":"2024","unstructured":"Hao Yang, Yanyan Zhao, Yang Wu, Shilong Wang, Tian Zheng, Hongbo Zhang, Zongyang Ma, Wanxiang Che, and Bing Qin. 2024. Large language models meet text-centric multimodal sentiment analysis: A survey. arXiv preprint arXiv:2406.08068 (2024)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2022.103038"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.3035277"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.3035277"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.28"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.3390\/a9020041"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME52920.2022.9859654"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548306"},{"key":"e_1_3_2_1_42_1","volume-title":"A C-LSTM neural network for text classification. arXiv preprint arXiv:1511.08630","author":"Zhou Chunting","year":"2015","unstructured":"Chunting Zhou, Chonglin Sun, Zhiyuan Liu, and Francis Lau. 2015. A C-LSTM neural network for text classification. arXiv preprint arXiv:1511.08630 (2015)."},{"key":"e_1_3_2_1_43_1","volume-title":"Aom: Detecting aspect-oriented information for multimodal aspect-based sentiment analysis. arXiv preprint arXiv:2306.01004","author":"Zhou Ru","year":"2023","unstructured":"Ru Zhou, Wenya Guo, Xumeng Liu, Shenglong Yu, Ying Zhang, and Xiaojie Yuan. 2023. Aom: Detecting aspect-oriented information for multimodal aspect-based sentiment analysis. arXiv preprint arXiv:2306.01004 (2023)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3160060"}],"event":{"name":"CIKM '25: The 34th ACM International Conference on Information and Knowledge Management","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Seoul Republic of Korea","acronym":"CIKM '25"},"container-title":["Proceedings of the 34th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746252.3761328","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T01:18:37Z","timestamp":1765502317000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746252.3761328"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,10]]},"references-count":44,"alternative-id":["10.1145\/3746252.3761328","10.1145\/3746252"],"URL":"https:\/\/doi.org\/10.1145\/3746252.3761328","relation":{},"subject":[],"published":{"date-parts":[[2025,11,10]]},"assertion":[{"value":"2025-11-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}