{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T16:18:13Z","timestamp":1772554693876,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":71,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755352","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:54:15Z","timestamp":1761375255000},"page":"5677-5686","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["FEALLM: Advancing Facial Emotion Analysis in Multimodal Large Language Models with Emotional Synergy and Reasoning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-5801-3238","authenticated-orcid":false,"given":"Zhuozhao","family":"Hu","sequence":"first","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2353-2436","authenticated-orcid":false,"given":"Kaishen","family":"Yuan","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2242-6139","authenticated-orcid":false,"given":"Xin","family":"Liu","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China and Lappeenranta-Lahti University of Technology, Lappeenranta, Finland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6505-3304","authenticated-orcid":false,"given":"Zitong","family":"Yu","sequence":"additional","affiliation":[{"name":"Great Bay University, Dongguan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0839-8792","authenticated-orcid":false,"given":"Yuan","family":"Zong","sequence":"additional","affiliation":[{"name":"Southeast University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7070-6365","authenticated-orcid":false,"given":"Jingang","family":"Shi","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2517-9783","authenticated-orcid":false,"given":"Huanjing","family":"Yue","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7521-7920","authenticated-orcid":false,"given":"Jingyu","family":"Yang","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al., 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","unstructured":"Jinze Bai Shuai Bai Yunfei Chu Zeyu Cui Kai Dang Xiaodong Deng Yang Fan Wenbin Ge Yu Han Fei Huang et al. 2023. Qwen technical report. arXiv preprint arXiv:2309.16609 (2023)."},{"key":"e_1_3_2_1_3_1","volume-title":"Improving image captioning descriptiveness by ranking and llm-based fusion. arXiv preprint arXiv:2306.11593","author":"Bianco Simone","year":"2023","unstructured":"Simone Bianco, Luigi Celona, Marco Donzella, and Paolo Napoletano. 2023. Improving image captioning descriptiveness by ranking and llm-based fusion. arXiv preprint arXiv:2306.11593 (2023)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00188"},{"key":"e_1_3_2_1_5_1","volume-title":"MMFuser: Multimodal Multi-Layer Feature Fuser for Fine-Grained Vision-Language Understanding. arXiv preprint arXiv:2410.11829","author":"Cao Yue","year":"2024","unstructured":"Yue Cao, Yangzhou Liu, Zhe Chen, Guangchen Shi, Wenhai Wang, Danhuai Zhao, and Tong Lu. 2024. MMFuser: Multimodal Multi-Layer Feature Fuser for Fine-Grained Vision-Language Understanding. arXiv preprint arXiv:2410.11829 (2024)."},{"key":"e_1_3_2_1_6_1","volume-title":"Minigpt-v2: large language model as a unified interface for vision-language multi-task learning. arXiv preprint arXiv:2310.09478","author":"Chen Jun","year":"2023","unstructured":"Jun Chen, Deyao Zhu, Xiaoqian Shen, Xiang Li, Zechun Liu, Pengchuan Zhang, Raghuraman Krishnamoorthi, Vikas Chandra, Yunyang Xiong, and Mohamed Elhoseiny. 2023. Minigpt-v2: large language model as a unified interface for vision-language multi-task learning. arXiv preprint arXiv:2310.09478 (2023)."},{"key":"e_1_3_2_1_7_1","unstructured":"Zhe Chen Weiyun Wang Yue Cao Yangzhou Liu Zhangwei Gao Erfei Cui Jinguo Zhu Shenglong Ye Hao Tian Zhaoyang Liu et al. 2024a. Expanding performance boundaries of open-source multimodal models with model data and test-time scaling. arXiv preprint arXiv:2412.05271 (2024)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Zhe Chen Weiyun Wang Hao Tian Shenglong Ye Zhangwei Gao Erfei Cui Wenwen Tong Kongzhi Hu Jiapeng Luo Zheng Ma et al. 2024b. How Far Are We to GPT-4V? Closing the Gap to Commercial Multimodal Models with Open-Source Suites. arXiv preprint arXiv:2404.16821 (2024).","DOI":"10.1007\/s11432-024-4231-5"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.52202\/079017-3518"},{"key":"e_1_3_2_1_10_1","volume-title":"Xing","author":"Chiang Wei-Lin","year":"2023","unstructured":"Wei-Lin Chiang, Zhuohan Li, Zi Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph E. Gonzalez, Ion Stoica, and Eric P. Xing. 2023. Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90%* ChatGPT Quality. https:\/\/lmsys.org\/blog\/2023-03-30-vicuna\/"},{"key":"e_1_3_2_1_11_1","first-page":"49250","volume-title":"Levine (Eds.)","volume":"36","author":"Dai Wenliang","year":"2023","unstructured":"Wenliang Dai, Junnan Li, DONGXU LI, Anthony Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale N Fung, and Steven Hoi. 2023. InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning. In Advances in Neural Information Processing Systems, A. Oh, T. Naumann, A. Globerson, K. Saenko, M. Hardt, and S. Levine (Eds.), Vol. 36. Curran Associates, Inc., 49250-49267. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/9a6a435e75419a836fe47ab6793623e6-Paper-Conference.pdf"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1196\/annals.1280.010"},{"key":"e_1_3_2_1_13_1","volume-title":"Facial action coding system. Environmental Psychology & Nonverbal Behavior","author":"Ekman Paul","year":"1978","unstructured":"Paul Ekman and Wallace V Friesen. 1978. Facial action coding system. Environmental Psychology & Nonverbal Behavior (1978)."},{"key":"e_1_3_2_1_14_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_15_1","volume-title":"Minicpm: Unveiling the potential of small language models with scalable training strategies. arXiv preprint arXiv:2404.06395","author":"Hu Shengding","year":"2024","unstructured":"Shengding Hu, Yuge Tu, Xu Han, Chaoqun He, Ganqu Cui, Xiang Long, Zhi Zheng, Yewei Fang, Yuxiang Huang, Weilin Zhao, et al., 2024a. Minicpm: Unveiling the potential of small language models with scalable training strategies. arXiv preprint arXiv:2404.06395 (2024)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i3.27999"},{"key":"e_1_3_2_1_17_1","first-page":"245","article-title":"Facing imbalanced data-recommendations for the use of performance metrics. In 2013 Humaine association conference on affective computing and intelligent interaction","author":"Jeni L\u00e1szl\u00f3 A","year":"2013","unstructured":"L\u00e1szl\u00f3 A Jeni, Jeffrey F Cohn, and Fernando De La Torre. 2013. Facing imbalanced data-recommendations for the use of performance metrics. In 2013 Humaine association conference on affective computing and intelligent interaction. IEEE, 245-251.","journal-title":"IEEE"},{"key":"e_1_3_2_1_18_1","volume-title":"Aff-wild2: Extending the aff-wild database for affect recognition. arXiv","author":"Kollias D","year":"2018","unstructured":"D Kollias and S Zafeiriou. 2018. Aff-wild2: Extending the aff-wild database for affect recognition. arXiv 2018. arXiv preprint arXiv:1811.07770, Vol. 2 (2018)."},{"key":"e_1_3_2_1_19_1","volume-title":"Expllm: Towards chain of thought for facial expression recognition. arXiv preprint arXiv:2409.02828","author":"Lan Xing","year":"2024","unstructured":"Xing Lan, Jian Xue, Ji Qi, Dongmei Jiang, Ke Lu, and Tat-Seng Chua. 2024. Expllm: Towards chain of thought for facial expression recognition. arXiv preprint arXiv:2409.02828 (2024)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.5014\/ajot.2020.043463"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2024.3396656"},{"key":"e_1_3_2_1_22_1","volume-title":"Deemo: De-identity multimodal emotion recognition and reasoning. arXiv preprint arXiv:2504.19549","author":"Li Deng","year":"2025","unstructured":"Deng Li, Bohao Xing, Xin Liu, Baiqiang Xia, Bihan Wen, and Heikki K\u00e4lvi\u00e4inen. 2025c. Deemo: De-identity multimodal emotion recognition and reasoning. arXiv preprint arXiv:2504.19549 (2025)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018594"},{"key":"e_1_3_2_1_24_1","volume-title":"MVT: mask vision transformer for facial expression recognition in the wild. arXiv preprint arXiv:2106.04520","author":"Li Hanting","year":"2021","unstructured":"Hanting Li, Mingzhe Sui, Feng Zhao, Zhengjun Zha, and Feng Wu. 2021. MVT: mask vision transformer for facial expression recognition in the wild. arXiv preprint arXiv:2106.04520 (2021)."},{"key":"e_1_3_2_1_25_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730-19742."},{"key":"e_1_3_2_1_26_1","volume-title":"Deep facial expression recognition: A survey","author":"Li Shan","year":"2020","unstructured":"Shan Li and Weihong Deng. 2020. Deep facial expression recognition: A survey. IEEE transactions on affective computing, Vol. 13, 3 (2020), 1195-1215."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.277"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72649-1_10"},{"key":"e_1_3_2_1_29_1","volume-title":"Visual Large Language Models for Generalized and Specialized Applications. arXiv preprint arXiv:2501.02765","author":"Li Yifan","year":"2025","unstructured":"Yifan Li, Zhixin Lai, Wentao Bao, Zhen Tan, Anh Dao, Kewei Sui, Jiayi Shen, Dong Liu, Huan Liu, and Yu Kong. 2025b. Visual Large Language Models for Generalized and Specialized Applications. arXiv preprint arXiv:2501.02765 (2025)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102367"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00566"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01049"},{"key":"e_1_3_2_1_34_1","volume-title":"Multi-scale promoted self-adjusting correlation learning for facial action unit detection","author":"Liu Xin","year":"2024","unstructured":"Xin Liu, Kaishen Yuan, Xuesong Niu, Jingang Shi, Zitong Yu, Huanjing Yue, and Jingyu Yang. 2024b. Multi-scale promoted self-adjusting correlation learning for facial action unit detection. IEEE Transactions on Affective Computing (2024)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3363660"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00037"},{"key":"e_1_3_2_1_37_1","volume-title":"Learning multi-dimensional edge feature-based au relation graph for facial action unit recognition. arXiv preprint arXiv:2205.01782","author":"Luo Cheng","year":"2022","unstructured":"Cheng Luo, Siyang Song, Weicheng Xie, Linlin Shen, and Hatice Gunes. 2022. Learning multi-dimensional edge feature-based au relation graph for facial action unit recognition. arXiv preprint arXiv:2205.01782 (2022)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2021.3122146"},{"key":"e_1_3_2_1_39_1","volume-title":"Automatic analysis of facial actions: A survey","author":"Martinez Brais","year":"2017","unstructured":"Brais Martinez, Michel F Valstar, Bihan Jiang, and Maja Pantic. 2017. Automatic analysis of facial actions: A survey. IEEE transactions on affective computing, Vol. 10, 3 (2017), 325-347."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/T-AFFC.2013.4"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2017.2740923"},{"key":"e_1_3_2_1_42_1","volume-title":"International conference on machine learning. PMLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748-8763."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2019.102740"},{"key":"e_1_3_2_1_44_1","volume-title":"Few-Shot VQA with Frozen LLMs: A Tale of Two Approaches. arXiv preprint arXiv:2403.11317","author":"Sterner Igor","year":"2024","unstructured":"Igor Sterner, Weizhe Lin, Jinghong Chen, and Bill Byrne. 2024. Few-Shot VQA with Frozen LLMs: A Tale of Two Approaches. arXiv preprint arXiv:2403.11317 (2024)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.3390\/biomimetics8020199"},{"key":"e_1_3_2_1_46_1","volume-title":"European Conference on Computer Vision. Springer, 70-89","author":"Wu Hongtao","year":"2024","unstructured":"Hongtao Wu, Yijun Yang, Angelica I Aviles-Rivero, Jingjing Ren, Sixiang Chen, Haoyu Chen, and Lei Zhu. 2024a. Semi-supervised Video Desnowing Network via Temporal Decoupling Experts and Distribution-Driven Contrastive Regularization. In European Conference on Computer Vision. Springer, 70-89."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612001"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680916"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02408"},{"key":"e_1_3_2_1_50_1","volume-title":"Emotionhallucer: Evaluating emotion hallucinations in multimodal large language models. arXiv preprint arXiv:2505.11405","author":"Xing Bohao","year":"2025","unstructured":"Bohao Xing, Xin Liu, Guoying Zhao, Chengyu Liu, Xiaolan Fu, and Heikki K\u00e4lvi\u00e4inen. 2025a. Emotionhallucer: Evaluating emotion hallucinations in multimodal large language models. arXiv preprint arXiv:2505.11405 (2025)."},{"key":"e_1_3_2_1_51_1","volume-title":"Emo-llama: Enhancing facial emotion understanding with instruction tuning. arXiv preprint arXiv:2408.11424","author":"Xing Bohao","year":"2024","unstructured":"Bohao Xing, Zitong Yu, Xin Liu, Kaishen Yuan, Qilang Ye, Weicheng Xie, Huanjing Yue, Jingyu Yang, and Heikki K\u00e4lvi\u00e4inen. 2024. Emo-llama: Enhancing facial emotion understanding with instruction tuning. arXiv preprint arXiv:2408.11424 (2024)."},{"key":"e_1_3_2_1_52_1","volume-title":"AU-TTT: Vision Test-Time Training model for Facial Action Unit Detection. arXiv preprint arXiv:2503.23450","author":"Xing Bohao","year":"2025","unstructured":"Bohao Xing, Kaishen Yuan, Zitong Yu, Xin Liu, and Heikki K\u00e4lvi\u00e4inen. 2025b. AU-TTT: Vision Test-Time Training model for Facial Action Unit Detection. arXiv preprint arXiv:2503.23450 (2025)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00596"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2025.3563775"},{"key":"e_1_3_2_1_55_1","volume-title":"Learning Adaptive Lighting via Channel-Aware Guidance. arXiv preprint arXiv:2412.01493","author":"Yang Qirui","year":"2024","unstructured":"Qirui Yang, Peng-Tao Jiang, Hao Zhang, Jinwei Chen, Bo Li, Huanjing Yue, and Jingyu Yang. 2024a. Learning Adaptive Lighting via Channel-Aware Guidance. arXiv preprint arXiv:2412.01493 (2024)."},{"key":"e_1_3_2_1_56_1","volume-title":"Efficient hdr reconstruction from real-world raw images. arXiv preprint arXiv:2306.10311","author":"Yang Qirui","year":"2023","unstructured":"Qirui Yang, Yihao Liu, Qihua Chen, Huanjing Yue, Kun Li, and Jingyu Yang. 2023b. Efficient hdr reconstruction from real-world raw images. arXiv preprint arXiv:2306.10311 (2023)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.displa.2023.102549"},{"key":"e_1_3_2_1_58_1","volume-title":"Emollm: Multimodal emotional understanding meets large language models. arXiv preprint arXiv:2406.16442","author":"Yang Qu","year":"2024","unstructured":"Qu Yang, Mang Ye, and Bo Du. 2024b. Emollm: Multimodal emotional understanding meets large language models. arXiv preprint arXiv:2406.16442 (2024)."},{"key":"e_1_3_2_1_59_1","volume-title":"DSDNet: Raw Domain Demoir\\eing via Dual Color-Space Synergy. arXiv preprint arXiv:2504.15756","author":"Yang Qirui","year":"2025","unstructured":"Qirui Yang, Fangpu Zhang, Yeying Jin, Qihua Cheng, Pengtao Jiang, Huanjing Yue, and Jingyu Yang. 2025b. DSDNet: Raw Domain Demoir\\eing via Dual Color-Space Synergy. arXiv preprint arXiv:2504.15756 (2025)."},{"key":"e_1_3_2_1_60_1","volume-title":"Dense Connector for MLLMs. arXiv preprint arXiv:2405.13800","author":"Yao Huanjin","year":"2024","unstructured":"Huanjin Yao, Wenhao Wu, Taojiannan Yang, YuXin Song, Mengxi Zhang, Haocheng Feng, Yifan Sun, Zhiheng Li, Wanli Ouyang, and Jingdong Wang. 2024. Dense Connector for MLLMs. arXiv preprint arXiv:2405.13800 (2024)."},{"key":"e_1_3_2_1_61_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 13040-13051","author":"Ye Qinghao","year":"2024","unstructured":"Qinghao Ye, Haiyang Xu, Jiabo Ye, Ming Yan, Anwen Hu, Haowei Liu, Qi Qian, Ji Zhang, and Fei Huang. 2024. mplug-owl2: Revolutionizing multi-modal large language model with modality collaboration. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 13040-13051."},{"key":"e_1_3_2_1_62_1","volume-title":"A survey on multimodal large language models. arXiv preprint arXiv:2306.13549","author":"Yin Shukang","year":"2023","unstructured":"Shukang Yin, Chaoyou Fu, Sirui Zhao, Ke Li, Xing Sun, Tong Xu, and Enhong Chen. 2023. A survey on multimodal large language models. arXiv preprint arXiv:2306.13549 (2023)."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72973-7_25"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01965"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2014.06.002"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02722"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2025.3576490"},{"key":"e_1_3_2_1_68_1","first-page":"17616","article-title":"Relative uncertainty learning for facial expression recognition","volume":"34","author":"Zhang Yuhang","year":"2021","unstructured":"Yuhang Zhang, Chengrui Wang, and Weihong Deng. 2021. Relative uncertainty learning for facial expression recognition. Advances in Neural Information Processing Systems, Vol. 34 (2021), 17616-17627.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19809-0_24"},{"key":"e_1_3_2_1_70_1","volume-title":"MedTVT-R1: A Multimodal LLM Empowering Medical Reasoning and Diagnosis. arXiv preprint arXiv:2506.18512","author":"Zhang Yuting","year":"2025","unstructured":"Yuting Zhang, Kaishen Yuan, Hao Lu, Yutao Yue, Jintai Chen, and Kaishun Wu. 2025c. MedTVT-R1: A Multimodal LLM Empowering Medical Reasoning and Diagnosis. arXiv preprint arXiv:2506.18512 (2025)."},{"key":"e_1_3_2_1_71_1","volume-title":"Deformable detr: Deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159","author":"Zhu Xizhou","year":"2020","unstructured":"Xizhou Zhu, Weijie Su, Lewei Lu, Bin Li, Xiaogang Wang, and Jifeng Dai. 2020. Deformable detr: Deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159 (2020)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755352","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:02:45Z","timestamp":1765339365000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755352"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":71,"alternative-id":["10.1145\/3746027.3755352","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755352","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}