{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:09:12Z","timestamp":1765339752961,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3762091","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:54:15Z","timestamp":1761375255000},"page":"14101-14106","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["The ACM Multimedia 2025 Grand Challenge of Multimodal Conversational Aspect-based Sentiment Analysis"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2274-5719","authenticated-orcid":false,"given":"Meng","family":"Luo","sequence":"first","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3026-6347","authenticated-orcid":false,"given":"Hao","family":"Fei","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0513-5540","authenticated-orcid":false,"given":"Bobo","family":"Li","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6192-1194","authenticated-orcid":false,"given":"Shengqiong","family":"Wu","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3162-935X","authenticated-orcid":false,"given":"Qian","family":"Liu","sequence":"additional","affiliation":[{"name":"The University of Auckland, Auckland, New Zealand"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6924-7931","authenticated-orcid":false,"given":"Soujanya","family":"Poria","sequence":"additional","affiliation":[{"name":"Singapore University of Technology and Design, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3030-1280","authenticated-orcid":false,"given":"Erik","family":"Cambria","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9636-388X","authenticated-orcid":false,"given":"Mong-Li","family":"Lee","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4142-8893","authenticated-orcid":false,"given":"Wynne","family":"Hsu","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al., 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"Pad: Personalized alignment at decoding-time. arXiv e-prints","author":"Chen Ruizhe","year":"2024","unstructured":"Ruizhe Chen, Xiaotian Zhang, Meng Luo, Wenhao Chai, and Zuozhu Liu. 2024. Pad: Personalized alignment at decoding-time. arXiv e-prints (2024), arXiv-2410."},{"key":"e_1_3_2_1_3_1","unstructured":"Yunfei Chu Jin Xu Qian Yang Haojie Wei Xipin Wei Zhifang Guo Yichong Leng Yuanjun Lv Jinzheng He Junyang Lin et al. 2024. Qwen2-audio technical report. arXiv preprint arXiv:2407.10759 (2024)."},{"key":"e_1_3_2_1_4_1","unstructured":"Hyung Won Chung Le Hou Shayne Longpre Barret Zoph Yi Tay William Fedus Yunxuan Li Xuezhi Wang Mostafa Dehghani Siddhartha Brahma et al. 2022. Scaling instruction-finetuned language models. arXiv preprint arXiv:2210.11416 (2022)."},{"key":"e_1_3_2_1_5_1","unstructured":"Gheorghe Comanici Eric Bieber Mike Schaekermann Ice Pasupat Noveen Sachdeva Inderjit Dhillon Marcel Blistein Ori Ram Dan Zhang Evan Rosen et al. 2025. Gemini 2.5: Pushing the frontier with advanced reasoning multimodality long context and next generation agentic capabilities. arXiv preprint arXiv:2507.06261 (2025)."},{"key":"e_1_3_2_1_6_1","volume-title":"Audio retrieval with wavtext5k and clap training. arXiv preprint arXiv:2209.14275","author":"Deshmukh Soham","year":"2022","unstructured":"Soham Deshmukh, Benjamin Elizalde, and Huaming Wang. 2022. Audio retrieval with wavtext5k and clap training. arXiv preprint arXiv:2209.14275 (2022)."},{"key":"e_1_3_2_1_7_1","first-page":"2509","article-title":"Target-oriented Opinion Words Extraction with Target-fused Neural Sequence Labeling","author":"Fan Zhifang","year":"2019","unstructured":"Zhifang Fan, Zhen Wu, Xin-Yu Dai, Shujian Huang, and Jiajun Chen. 2019. Target-oriented Opinion Words Extraction with Target-fused Neural Sequence Labeling. In Proceedings of the ACL. 2509-2518.","journal-title":"Proceedings of the ACL."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3564281"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-short.101"},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the International Conference on Machine Learning.","author":"Fei Hao","year":"2024","unstructured":"Hao Fei, Shengqiong Wu, Wei Ji, Hanwang Zhang, Meishan Zhang, Mong-Li Lee, and Wynne Hsu. 2024a. Video-of-thought: Step-by-step video reasoning from perception to cognition. In Proceedings of the International Conference on Machine Learning."},{"key":"e_1_3_2_1_11_1","volume-title":"Editing. Proceedings of the Advances in neural information processing systems.","author":"Fei Hao","year":"2024","unstructured":"Hao Fei, Shengqiong Wu, Hanwang Zhang, Tat-Seng Chua, and Shuicheng Yan. 2024b. VITRON: A Unified Pixel-level Vision LLM for Understanding, Generating, Segmenting, Editing. Proceedings of the Advances in neural information processing systems."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3393452"},{"key":"e_1_3_2_1_13_1","volume-title":"Empathyear: An open-source avatar multimodal empathetic chatbot. arXiv preprint arXiv:2406.15177","author":"Fei Hao","year":"2024","unstructured":"Hao Fei, Han Zhang, Bin Wang, Lizi Liao, Qian Liu, and Erik Cambria. 2024d. Empathyear: An open-source avatar multimodal empathetic chatbot. arXiv preprint arXiv:2406.15177 (2024)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6271"},{"key":"e_1_3_2_1_15_1","volume-title":"Forty-second International Conference on Machine Learning.","author":"Fei Hao","year":"2025","unstructured":"Hao Fei, Yuan Zhou, Juncheng Li, Xiangtai Li, Qingshan Xu, Bobo Li, Shengqiong Wu, Yaoting Wang, Junbao Zhou, Jiahao Meng, et al., 2025. On path to multimodal generalist: General-level and general-bench. In Forty-second International Conference on Machine Learning."},{"key":"e_1_3_2_1_16_1","first-page":"776","article-title":"Audio set: An ontology and human-labeled dataset for audio events","author":"Gemmeke Jort F","year":"2017","unstructured":"Jort F Gemmeke, Daniel PW Ellis, Dylan Freedman, Aren Jansen, Wade Lawrence, R Channing Moore, Manoj Plakal, and Marvin Ritter. 2017. Audio set: An ontology and human-labeled dataset for audio events. In IEEE ICASSP. IEEE, 776-780.","journal-title":"IEEE ICASSP. IEEE"},{"key":"e_1_3_2_1_17_1","first-page":"15180","article-title":"Imagebind: One embedding space to bind them all","author":"Girdhar Rohit","year":"2023","unstructured":"Rohit Girdhar, Alaaeldin El-Nouby, Zhuang Liu, Mannat Singh, Kalyan Vasudev Alwala, Armand Joulin, and Ishan Misra. 2023. Imagebind: One embedding space to bind them all. In Proceedings of the CVPR. 15180-15190.","journal-title":"Proceedings of the CVPR."},{"key":"e_1_3_2_1_18_1","unstructured":"Daya Guo Dejian Yang Haowei Zhang Junxiao Song Ruoyu Zhang Runxin Xu Qihao Zhu Shirong Ma Peiyi Wang Xiao Bi et al. 2025. Deepseek-r1: Incentivizing reasoning capability in llms via reinforcement learning. arXiv preprint arXiv:2501.12948 (2025)."},{"key":"e_1_3_2_1_19_1","first-page":"3","article-title":"Lora: Low-rank adaptation of large language models","volume":"1","author":"Hu Edward J","year":"2022","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, Weizhu Chen, et al., 2022. Lora: Low-rank adaptation of large language models. ICLR, Vol. 1, 2 (2022), 3.","journal-title":"ICLR"},{"key":"e_1_3_2_1_20_1","unstructured":"Binyuan Hui Jian Yang Zeyu Cui Jiaxi Yang Dayiheng Liu Lei Zhang Tianyu Liu Jiajun Zhang Bowen Yu Keming Lu et al. 2024. Qwen2.5-coder technical report. arXiv preprint arXiv:2409.12186 (2024)."},{"key":"e_1_3_2_1_21_1","first-page":"18062","article-title":"Shopping mmlu: A massive multi-task online shopping benchmark for large language models","volume":"37","author":"Jin Yilun","year":"2024","unstructured":"Yilun Jin, Zheng Li, Chenwei Zhang, Tianyu Cao, Yifan Gao, Pratik Jayarao, Mao Li, Xin Liu, Ritesh Sarkhel, Xianfeng Tang, et al., 2024. Shopping mmlu: A massive multi-task online shopping benchmark for large language models. Advances in Neural Information Processing Systems, Vol. 37 (2024), 18062-18089.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3698191"},{"key":"e_1_3_2_1_23_1","first-page":"13449","article-title":"DiaASQ","author":"Li Bobo","year":"2023","unstructured":"Bobo Li, Hao Fei, Fei Li, Yuhan Wu, Jinsong Zhang, Shengqiong Wu, Jingye Li, Yijiang Liu, Lizi Liao, Tat-Seng Chua, and Donghong Ji. 2023a. DiaASQ: A Benchmark of Conversational Aspect-based Sentiment Quadruple Analysis. In Findings of the ACL. 13449-13467.","journal-title":"A Benchmark of Conversational Aspect-based Sentiment Quadruple Analysis. In Findings of the ACL."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i16.29807"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612053"},{"key":"e_1_3_2_1_26_1","unstructured":"Jian Li Weiheng Lu Hao Fei Meng Luo Ming Dai Min Xia Yizhang Jin Zhenye Gan Ding Qi Chaoyou Fu et al. 2024b. A survey on benchmarks of multimodal large language models. arXiv preprint arXiv:2408.08632 (2024)."},{"key":"e_1_3_2_1_27_1","first-page":"740","article-title":"Microsoft coco: Common objects in context","author":"Lin Tsung-Yi","year":"2014","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C Lawrence Zitnick. 2014. Microsoft coco: Common objects in context. In Procedding of the ECCV. 740-755.","journal-title":"Procedding of the ECCV."},{"key":"e_1_3_2_1_28_1","unstructured":"Aixin Liu Bei Feng Bing Xue Bingxuan Wang Bochao Wu Chengda Lu Chenggang Zhao Chengqi Deng Chenyu Zhang Chong Ruan et al. 2024. Deepseek-v3 technical report. arXiv preprint arXiv:2412.19437 (2024)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680705"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.semeval-1.226"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6383"},{"key":"e_1_3_2_1_32_1","first-page":"27","article-title":"SemEval-2014 Task 4","author":"Pontiki Maria","year":"2014","unstructured":"Maria Pontiki, Dimitris Galanis, John Pavlopoulos, Harris Papageorgiou, Ion Androutsopoulos, and Suresh Manandhar. 2014. SemEval-2014 Task 4: Aspect Based Sentiment Analysis. In Proceedings of the SemEval. 27-35.","journal-title":"Aspect Based Sentiment Analysis. In Proceedings of the SemEval."},{"key":"e_1_3_2_1_33_1","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J Liu. 2020. Exploring the limits of transfer learning with a unified text-to-text transformer. Journal of machine learning research, Vol. 21, 140 (2020), 1-67.","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_34_1","first-page":"527","article-title":"Multi-response Regression for Block-missing Multi-modal Data without Imputation","volume":"34","author":"Wang Haodong","year":"2024","unstructured":"Haodong Wang, Quefeng Li, and Yufeng Liu. 2024. Multi-response Regression for Block-missing Multi-modal Data without Imputation. Statistica Sinica, Vol. 34, 2 (2024), 527.","journal-title":"Statistica Sinica"},{"key":"e_1_3_2_1_35_1","volume-title":"Multimodal chain-of-thought reasoning: A comprehensive survey. arXiv preprint arXiv:2503.12605","author":"Wang Yaoting","year":"2025","unstructured":"Yaoting Wang, Shengqiong Wu, Yuecheng Zhang, Shuicheng Yan, Ziwei Liu, Jiebo Luo, and Hao Fei. 2025. Multimodal chain-of-thought reasoning: A comprehensive survey. arXiv preprint arXiv:2503.12605 (2025)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2923608"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413556"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.823"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01321"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.146"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i10.21404"},{"key":"e_1_3_2_1_42_1","volume-title":"Towards Semantic Equivalence of Tokenization in Multimodal LLM. arXiv preprint arXiv:2406.05127","author":"Wu Shengqiong","year":"2024","unstructured":"Shengqiong Wu, Hao Fei, Xiangtai Li, Jiayi Ji, Hanwang Zhang, Tat-Seng Chua, and Shuicheng Yan. 2024a. Towards Semantic Equivalence of Tokenization in Multimodal LLM. arXiv preprint arXiv:2406.05127 (2024)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i8.32913"},{"key":"e_1_3_2_1_44_1","volume-title":"Proceedings of the International Conference on Machine Learning. 53366-53397","author":"Wu Shengqiong","year":"2024","unstructured":"Shengqiong Wu, Hao Fei, Leigang Qu, Wei Ji, and Tat-Seng Chua. 2024b. NExT-GPT: Any-to-Any Multimodal LLM. In Proceedings of the International Conference on Machine Learning. 53366-53397."},{"key":"e_1_3_2_1_45_1","unstructured":"An Yang Anfeng Li Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chang Gao Chengen Huang Chenxu Lv et al. 2025. Qwen3 technical report. arXiv preprint arXiv:2505.09388 (2025)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3696410.3714739"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"crossref","unstructured":"Zhihan Zhang Shiyang Li Zixuan Zhang Xin Liu Haoming Jiang Xianfeng Tang Yifan Gao Zheng Li Haodong Wang Zhaoxuan Tan et al. 2025a. IHEval: Evaluating language models on following the instruction hierarchy. arXiv preprint arXiv:2502.08745 (2025).","DOI":"10.18653\/v1\/2025.naacl-long.425"},{"key":"e_1_3_2_1_49_1","unstructured":"Jinguo Zhu Weiyun Wang Zhe Chen Zhaoyang Liu Shenglong Ye Lixin Gu Hao Tian Yuchen Duan Weijie Su Jie Shao et al. 2025. Internvl3: Exploring advanced training and test-time recipes for open-source multimodal models. arXiv preprint arXiv:2504.10479 (2025)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3762091","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:04:22Z","timestamp":1765339462000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3762091"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":49,"alternative-id":["10.1145\/3746027.3762091","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3762091","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}