{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:50:23Z","timestamp":1765309823570,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","funder":[{"name":"the project of Chongqing Science and Technology Bureau","award":["CSTB2023TIAD-STX0037"],"award-info":[{"award-number":["CSTB2023TIAD-STX0037"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3762066","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:54:17Z","timestamp":1761375257000},"page":"13972-13978","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Emotion-Qwen-VL: A Fully Fine-Tuned Multimodal Large Language Model for Micro-Expression Visual Question Answering"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-8089-4676","authenticated-orcid":false,"given":"Yujing","family":"Wang","sequence":"first","affiliation":[{"name":"Lianxin Digital (Technology), Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3611-814X","authenticated-orcid":false,"given":"Ruotong","family":"Fang","sequence":"additional","affiliation":[{"name":"Lianxin Digital (Technology), Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2681-0614","authenticated-orcid":false,"given":"Xing","family":"Huang","sequence":"additional","affiliation":[{"name":"Lianxin Digital (Technology), Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5630-5344","authenticated-orcid":false,"given":"Zhiyuan","family":"Han","sequence":"additional","affiliation":[{"name":"Lianxin Digital (Technology), Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4793-2544","authenticated-orcid":false,"given":"Xiaoqing","family":"Lin","sequence":"additional","affiliation":[{"name":"Lianxin Digital (Technology), Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4821-326X","authenticated-orcid":false,"given":"Yuhao","family":"Shan","sequence":"additional","affiliation":[{"name":"Southwest University, Chongqing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3805-4138","authenticated-orcid":false,"given":"Tong","family":"Chen","sequence":"additional","affiliation":[{"name":"Southwest University, Chongqing, China and Institute of Psychology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"1359","article-title":"Research on micro-expressions and its applications","volume":"18","author":"Wu Qi","year":"2010","unstructured":"Qi Wu, Xunbing Shen, and Xiaolan Fu, 2010. Research on micro-expressions and its applications. Adv. Psychol. Sci. 18, 9 (2010), 1359--1368.","journal-title":"Adv. Psychol. Sci."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.3724\/sp.j.1042.2017.00211"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.3390\/app15126417"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.3390\/sym11101189"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/taffc.2025.3551773"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","unstructured":"Jinze Bai Shuai Bai Shusheng Yang Shijie Wang Sinan Tan Peng Wang Junyang Lin Chang Zhou and Jingren Zhou. 2023. Qwen-VL: A Versatile Vision-Language Model for Understanding Localization Text Reading and Beyond. https:\/\/doi.org\/10.48550\/ARXIV.2308.12966","DOI":"10.48550\/ARXIV.2308.12966"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2017.05.001"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2506.15298"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/fg47880.2020.00029"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2022.3174895"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2007.1110"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1049\/ic.2009.0244"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/taffc.2015.2485205"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","unstructured":"Huai-Qian Khor John See Raphael C. W. Phan and Weiyao Lin. 2018. Enriched Long-term Recurrent Convolutional Network for Facial Micro-Expression Recognition. https:\/\/doi.org\/10.48550\/ARXIV.1805.08417","DOI":"10.48550\/ARXIV.1805.08417"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/tip.2021.3064258"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","unstructured":"Hang Pan Lun Xie and Zhiliang Wang. 2020. Local Bilinear Convolutional Neural Network for Spotting Macro- and Micro-expression Intervals in Long Video Sequences. In 2020 15th IEEE International Conference on Automatic Face and Gesture Recognition (FG 2020) November 2020. IEEE Buenos Aires Argentina 749--753. https:\/\/doi.org\/10.1109\/fg47880.2020.00052","DOI":"10.1109\/fg47880.2020.00052"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/fg.2011.5771451"},{"key":"e_1_3_2_1_18_1","volume-title":"Proc. ICML","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, et al. 2021. Learning transferable visual models from natural language supervision. In Proc. ICML 2021. PMLR."},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning (ICML'23)","volume":"202","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. In Proceedings of the 40th International Conference on Machine Learning (ICML'23), Vol. 202. JMLR.org, Article 814, 19730--19742."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","unstructured":"Haotian Liu Chunyuan Li Yuheng Li and Yong Jae Lee. 2024. Improved Baselines with Visual Instruction Tuning. https:\/\/doi.org\/10.48550\/arXiv.2310.03744","DOI":"10.48550\/arXiv.2310.03744"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2025.111783"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","unstructured":"Ayman Farahat. 2025. Quantifying Bias in Language Models Using Log-Likelihood Scores. https:\/\/doi.org\/10.2139\/ssrn.5291835","DOI":"10.2139\/ssrn.5291835"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3--319--39513--5_5"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.02.022"},{"key":"e_1_3_2_1_25_1","first-page":"20","article-title":"MTL-LoRA: Low-Rank Adaptation for Multi-Task Learning","volume":"39","author":"Yang Yaming","year":"2025","unstructured":"Yaming Yang, Dilxat Muhtar, Yelong Shen, Yuefeng Zhan, Jianfeng Liu, Yujing Wang, Hao Sun, Weiwei Deng, Feng Sun, Qi Zhang, Weizhu Chen, and Yunhai Tong. 2025. MTL-LoRA: Low-Rank Adaptation for Multi-Task Learning. AAAI 39, 20 (April 2025), 22010--22018.","journal-title":"AAAI"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/fg.2019.8756611"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_28_1","first-page":"74","volume-title":"ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out, pages 74--81, Barcelona, Spain. Association for Computational Linguistics."},{"key":"e_1_3_2_1_29_1","volume-title":"QVQ: To See the World with Wisdom.","author":"Team Qwen","year":"2024","unstructured":"Qwen Team. 2024. QVQ: To See the World with Wisdom. Retrieved from https:\/\/huggingface.co\/Qwen\/QVQ-72B-Preview"},{"key":"e_1_3_2_1_30_1","volume-title":"Friesen","author":"Ekman Paul","year":"1978","unstructured":"Paul Ekman and Wallace V. Friesen. 1978. Facial Action Coding System: A Technique for the Measurement of Facial Movement. Consulting Psychologists Press."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3762066","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:45:41Z","timestamp":1765309541000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3762066"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":30,"alternative-id":["10.1145\/3746027.3762066","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3762066","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}