{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,23]],"date-time":"2025-10-23T01:09:17Z","timestamp":1761181757986,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","funder":[{"name":"Postdoctoral Fellowship Program of CPSF","award":["2025M773444, GZC20252271"],"award-info":[{"award-number":["2025M773444, GZC20252271"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746270.3760231","type":"proceedings-article","created":{"date-parts":[[2025,10,20]],"date-time":"2025-10-20T15:14:09Z","timestamp":1760973249000},"page":"95-99","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["EmoSync: Multi-Stage Reasoning with Multimodal Large Language Models for Fine-Grained Emotion Recognition"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-1126-9249","authenticated-orcid":false,"given":"Jintao","family":"Tong","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7067-0275","authenticated-orcid":false,"given":"Shiwei","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3612-3090","authenticated-orcid":false,"given":"Zijian","family":"Zhuang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2074-5961","authenticated-orcid":false,"given":"Jinghan","family":"Hu","sequence":"additional","affiliation":[{"name":"Key Laboratory of Adolescent Cyberpsychology and Behavior (CCNU), Ministry of Education Key Laboratory of Human Development and Mental Health of Hubei Province, School of Psychology, Central China Normal University, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2125-9041","authenticated-orcid":false,"given":"Yixiong","family":"Zou","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,26]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.38094\/jastt20291"},{"key":"e_1_3_2_1_2_1","unstructured":"Shuai Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Sibo Song Kai Dang Peng Wang Shijie Wang Jun Tang et al. 2025. Qwen2. 5-vl technical report. arXiv preprint arXiv:2502.13923 (2025)."},{"key":"e_1_3_2_1_3_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems Vol. 33 (2020) 1877-1901."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01761-6"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2024.106111"},{"key":"e_1_3_2_1_6_1","unstructured":"Qingxiu Dong Lei Li Damai Dai Ce Zheng Jingyuan Ma Rui Li Heming Xia Jingjing Xu Zhiyong Wu Tianyu Liu et al. 2022. A survey on in-context learning. arXiv preprint arXiv:2301.00234 (2022)."},{"key":"e_1_3_2_1_7_1","first-page":"3","article-title":"Lora: Low-rank adaptation of large language models","volume":"1","author":"Hu Edward J","year":"2022","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, Weizhu Chen, et al., 2022b. Lora: Low-rank adaptation of large language models. ICLR, Vol. 1, 2 (2022), 3.","journal-title":"ICLR"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1177\/10497315231179097"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.beth.2024.03.001"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.beth.2022.04.003"},{"key":"e_1_3_2_1_11_1","volume-title":"Machel Reid, Yutaka Matsuo, and Yusuke Iwasawa.","author":"Kojima Takeshi","year":"2022","unstructured":"Takeshi Kojima, Shixiang Shane Gu, Machel Reid, Yutaka Matsuo, and Yusuke Iwasawa. 2022. Large language models are zero-shot reasoners. Advances in neural information processing systems, Vol. 35 (2022), 22199-22213."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680608"},{"key":"e_1_3_2_1_13_1","first-page":"4435","volume-title":"Adaptive Low-Precision Training for Embeddings in Click-Through Rate Prediction. In Thirty-Seventh AAAI Conference on Artificial Intelligence, AAAI","author":"Li Shiwei","year":"2023","unstructured":"Shiwei Li, Huifeng Guo, Lu Hou, Wei Zhang, Xing Tang, Ruiming Tang, Rui Zhang, and Ruixuan Li. 2023. Adaptive Low-Precision Training for Embeddings in Click-Through Rate Prediction. In Thirty-Seventh AAAI Conference on Artificial Intelligence, AAAI 2023. AAAI Press, Washington, DC, USA, 4435-4443."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637841"},{"key":"e_1_3_2_1_15_1","volume-title":"Beyond Zero Initialization: Investigating the Impact of Non-Zero Initialization on LoRA Fine-Tuning Dynamics. In The Forty-Second International Conference on Machine Learning, ICML 2025","author":"Li Shiwei","year":"2025","unstructured":"Shiwei Li, Xiandi Luo, Xing Tang, Haozhao Wang, Hao Chen, Weihong Luo, Yuhua Li, Xiuqiang He, and Ruixuan Li. 2025a. Beyond Zero Initialization: Investigating the Impact of Non-Zero Initialization on LoRA Fine-Tuning Dynamics. In The Forty-Second International Conference on Machine Learning, ICML 2025, Vancouver, Canada, 13th-19th July, 2025."},{"key":"e_1_3_2_1_16_1","volume-title":"The Forty-Second International Conference on Machine Learning, ICML 2025","author":"Li Shiwei","year":"2025","unstructured":"Shiwei Li, Xiandi Luo, Haozhao Wang, Xing Tang, Shijie Xu, Weihong Luo, Yuhua Li, Xiuqiang He, and Ruixuan Li. 2025b. The Panaceas for Improving Low-Rank Decomposition in Communication-Efficient Federated Learning.. In The Forty-Second International Conference on Machine Learning, ICML 2025, Vancouver, Canada, 13th-19th July, 2025."},{"key":"e_1_3_2_1_17_1","volume-title":"Forty-first International Conference on Machine Learning, ICML 2024","author":"Li Shiwei","year":"2024","unstructured":"Shiwei Li, Wenchao Xu, Haozhao Wang, Xing Tang, Yining Qi, Shijie Xu, Weihong Luo, Yuhua Li, Xiuqiang He, and Ruixuan Li. 2024c. FedBAT: Communication-Efficient Federated Learning via Learnable Binarization. In Forty-first International Conference on Machine Learning, ICML 2024, Vienna, Austria, July 21-27, 2024."},{"key":"e_1_3_2_1_18_1","volume-title":"Affectgpt: A new dataset, model, and benchmark for emotion understanding with multimodal large language models. arXiv preprint arXiv:2501.16566","author":"Lian Zheng","year":"2025","unstructured":"Zheng Lian, Haoyu Chen, Lan Chen, Haiyang Sun, Licai Sun, Yong Ren, Zebang Cheng, Bin Liu, Rui Liu, Xiaojiang Peng, et al., 2025a. Affectgpt: A new dataset, model, and benchmark for emotion understanding with multimodal large language models. arXiv preprint arXiv:2501.16566 (2025)."},{"key":"e_1_3_2_1_19_1","volume-title":"MER 2025: When Affective Computing Meets Large Language Models. arXiv preprint arXiv:2504","author":"Lian Zheng","year":"2025","unstructured":"Zheng Lian, Rui Liu, Kele Xu, Bin Liu, Xuefei Liu, Yazhou Zhang, Xin Liu, Yong Li, Zebang Cheng, Haolin Zuo, et al., 2025b. MER 2025: When Affective Computing Meets Large Language Models. arXiv preprint arXiv:2504.19423 (2025)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102367"},{"key":"e_1_3_2_1_21_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2023. Visual instruction tuning. Advances in neural information processing systems, Vol. 36 (2023), 34892-34916."},{"key":"e_1_3_2_1_22_1","volume-title":"Multimodal emotion recognition in response to videos","author":"Soleymani Mohammad","year":"2011","unstructured":"Mohammad Soleymani, Maja Pantic, and Thierry Pun. 2011. Multimodal emotion recognition in response to videos. IEEE transactions on affective computing, Vol. 3, 2 (2011), 211-223."},{"key":"e_1_3_2_1_23_1","volume-title":"FlowCut: Rethinking Redundancy via Information Flow for Efficient Vision-Language Models. arXiv preprint arXiv:2505.19536","author":"Tong Jintao","year":"2025","unstructured":"Jintao Tong, Wenwei Jin, Pengda Qin, Anqi Li, Yixiong Zou, Yuhong Li, Yuhua Li, and Ruixuan Li. 2025a. FlowCut: Rethinking Redundancy via Information Flow for Efficient Vision-Language Models. arXiv preprint arXiv:2505.19536 (2025)."},{"key":"e_1_3_2_1_24_1","volume-title":"Adapter Naturally Serves as Decoupler for Cross-Domain Few-Shot Semantic Segmentation. arXiv preprint arXiv:2506.07376","author":"Tong Jintao","year":"2025","unstructured":"Jintao Tong, Ran Ma, Yixiong Zou, Guangyao Chen, Yuhua Li, and Ruixuan Li. 2025b. Adapter Naturally Serves as Decoupler for Cross-Domain Few-Shot Semantic Segmentation. arXiv preprint arXiv:2506.07376 (2025)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00284"},{"key":"e_1_3_2_1_26_1","volume-title":"Self-Disentanglement and Re-Composition for Cross-Domain Few-Shot Segmentation. arXiv preprint arXiv:2506.02677","author":"Tong Jintao","year":"2025","unstructured":"Jintao Tong, Yixiong Zou, Guangyao Chen, Yuhua Li, and Ruixuan Li. 2025c. Self-Disentanglement and Re-Composition for Cross-Domain Few-Shot Segmentation. arXiv preprint arXiv:2506.02677 (2025)."},{"key":"e_1_3_2_1_27_1","first-page":"96728","article-title":"Lightweight frequency masker for cross-domain few-shot semantic segmentation","volume":"37","author":"Tong Jintao","year":"2024","unstructured":"Jintao Tong, Yixiong Zou, Yuhua Li, and Ruixuan Li. 2024b. Lightweight frequency masker for cross-domain few-shot semantic segmentation. Advances in Neural Information Processing Systems, Vol. 37 (2024), 96728-96749.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2017.2764438"},{"key":"e_1_3_2_1_29_1","unstructured":"Peng Wang Shuai Bai Sinan Tan Shijie Wang Zhihao Fan Jinze Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge et al. 2024. Qwen2-vl: Enhancing vision-language model's perception of the world at any resolution. arXiv preprint arXiv:2409.12191 (2024)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02025"},{"key":"e_1_3_2_1_31_1","volume-title":"Denny Zhou, et al.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al., 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems, Vol. 35 (2022), 24824-24837."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02511"},{"key":"e_1_3_2_1_33_1","unstructured":"An Yang Anfeng Li Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chang Gao Chengen Huang Chenxu Lv et al. 2025. Qwen3 technical report. arXiv preprint arXiv:2505.09388 (2025)."},{"key":"e_1_3_2_1_34_1","volume-title":"Emotion recognition using multi-modal data and machine learning techniques: A tutorial and review. Information fusion","author":"Zhang Jianhua","year":"2020","unstructured":"Jianhua Zhang, Zhong Yin, Peng Chen, and Stefano Nichele. 2020. Emotion recognition using multi-modal data and machine learning techniques: A tutorial and review. Information fusion, Vol. 59 (2020), 103-126."},{"key":"e_1_3_2_1_35_1","volume-title":"Adalora: Adaptive budget allocation for parameter-efficient fine-tuning. arXiv preprint arXiv:2303.10512","author":"Zhang Qingru","year":"2023","unstructured":"Qingru Zhang, Minshuo Chen, Alexander Bukharin, Nikos Karampatziakis, Pengcheng He, Yu Cheng, Weizhu Chen, and Tuo Zhao. 2023a. Adalora: Adaptive budget allocation for parameter-efficient fine-tuning. arXiv preprint arXiv:2303.10512 (2023)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.121692"},{"key":"e_1_3_2_1_37_1","volume-title":"Multimodal chain-of-thought reasoning in language models. arXiv preprint arXiv:2302.00923","author":"Zhang Zhuosheng","year":"2023","unstructured":"Zhuosheng Zhang, Aston Zhang, Mu Li, Hai Zhao, George Karypis, and Alex Smola. 2023b. Multimodal chain-of-thought reasoning in language models. arXiv preprint arXiv:2302.00923 (2023)."},{"key":"e_1_3_2_1_38_1","unstructured":"Denny Zhou Nathanael Sch\u00e4rli Le Hou Jason Wei Nathan Scales Xuezhi Wang Dale Schuurmans Claire Cui Olivier Bousquet Quoc Le et al. 2022. Least-to-most prompting enables complex reasoning in large language models. arXiv preprint arXiv:2205.10625 (2022)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02225"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413849"},{"key":"e_1_3_2_1_41_1","volume-title":"Margin-based few-shot class-incremental learning with class-level overfitting mitigation. Advances in neural information processing systems","author":"Zou Yixiong","year":"2022","unstructured":"Yixiong Zou, Shanghang Zhang, Yuhua Li, and Ruixuan Li. 2022. Margin-based few-shot class-incremental learning with class-level overfitting mitigation. Advances in neural information processing systems, Vol. 35 (2022), 27267-27279."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.5555\/3692070.3694678"}],"event":{"name":"MM '25:The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland"},"container-title":["Proceedings of the 3rd International Workshop on Multimodal and Responsible Affective Computing"],"original-title":[],"deposited":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T17:22:55Z","timestamp":1761153775000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746270.3760231"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,26]]},"references-count":42,"alternative-id":["10.1145\/3746270.3760231","10.1145\/3746270"],"URL":"https:\/\/doi.org\/10.1145\/3746270.3760231","relation":{},"subject":[],"published":{"date-parts":[[2025,10,26]]},"assertion":[{"value":"2025-10-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}