{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:04:24Z","timestamp":1765339464414,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","funder":[{"name":"the Strategic Priority Research Program of Chinese Academy of Sciences","award":["XDB0500103"],"award-info":[{"award-number":["XDB0500103"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No.62276259,No.62201572,No.62271083, No.U21B2010"],"award-info":[{"award-number":["No.62276259,No.62201572,No.62271083, No.U21B2010"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3762019","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:55:00Z","timestamp":1761375300000},"page":"13917-13923","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing Multimodal Personality Assessment with LLM-Augmented Hierarchical Fusion"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-9543-833X","authenticated-orcid":false,"given":"Longjiang","family":"Yang","sequence":"first","affiliation":[{"name":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China and The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7146-535X","authenticated-orcid":false,"given":"Cong","family":"Yu","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China and The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0617-5526","authenticated-orcid":false,"given":"Chenxi","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7931-4754","authenticated-orcid":false,"given":"Fengyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-8456-5976","authenticated-orcid":false,"given":"Ran","family":"Liu","sequence":"additional","affiliation":[{"name":"Tianjin Normal University, Tianjin, China and The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3978-9373","authenticated-orcid":false,"given":"Zhuofan","family":"Wen","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China and The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8448-5507","authenticated-orcid":false,"given":"Shun","family":"Chen","sequence":"additional","affiliation":[{"name":"The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2065-0405","authenticated-orcid":false,"given":"Hailiang","family":"Yao","sequence":"additional","affiliation":[{"name":"Tianjin Normal University, Tianjin, China and The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1529-1552","authenticated-orcid":false,"given":"Bin","family":"Liu","sequence":"additional","affiliation":[{"name":"The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9477-0599","authenticated-orcid":false,"given":"Zheng","family":"Lian","sequence":"additional","affiliation":[{"name":"The State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9344-6428","authenticated-orcid":false,"given":"Jianhua","family":"Tao","sequence":"additional","affiliation":[{"name":"Department of Automation, BNRist, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"Relja Arandjelovic and Andrew Zisserman. 2017. Look Listen and Learn. arXiv:1705.08168 [cs.CV] https:\/\/arxiv.org\/abs\/1705.08168","DOI":"10.1109\/ICCV.2017.73"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Zhe Chen Weiyun Wang Hao Tian Shenglong Ye Zhangwei Gao Erfei Cui Wenwen Tong Kongzhi Hu Jiapeng Luo Zheng Ma et al. 2024. How Far Are We to GPT-4V? Closing the Gap to Commercial Multimodal Models with Open-Source Suites. arXiv preprint arXiv:2404.16821 (2024).","DOI":"10.1007\/s11432-024-4231-5"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1423"},{"key":"e_1_3_2_1_5_1","volume-title":"Eva-02: A visual representation for neon genesis. Image and Vision Computing","author":"Fang Yuxin","year":"2024","unstructured":"Yuxin Fang, Quan Sun, Xinggang Wang, Tiejun Huang, Xinlong Wang, and Yue Cao. 2024. Eva-02: A visual representation for neon genesis. Image and Vision Computing (2024), 105171."},{"key":"e_1_3_2_1_6_1","volume-title":"Unsupervised multimodal learning for dependency-free personality recognition","author":"Ghassemi Sina","year":"2023","unstructured":"Sina Ghassemi, Tianyi Zhang, Ward van Breda, Antonis Koutsoumpis, Janneke K Oostrom, Djurre Holtrop, and Reinout E de Vries. 2023. Unsupervised multimodal learning for dependency-free personality recognition. IEEE transactions on affective computing 15, 3 (2023), 1053--1066."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1037\/\/0003-066x.48.1.26"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3551876.3554811"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3551876.3554811"},{"key":"e_1_3_2_1_10_1","volume-title":"Emotion-Qwen: Training Hybrid Experts for Unified Emotion and General Vision-Language Understanding. arXiv preprint arXiv:2505.06685","author":"Huang Dawei","year":"2025","unstructured":"Dawei Huang, Qing Li, Chuan Yan, Zebang Cheng, Yurong Huang, Xiang Li, Bin Li, Xiaohui Wang, Zheng Lian, and Xiaojiang Peng. 2025. Emotion-Qwen: Training Hybrid Experts for Unified Emotion and General Vision-Language Understanding. arXiv preprint arXiv:2505.06685 (2025)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.chb.2023.108128"},{"key":"e_1_3_2_1_12_1","unstructured":"Min Lin Qiang Chen and Shuicheng Yan. 2014. Network In Network. arXiv:1312.4400 [cs.NE] https:\/\/arxiv.org\/abs\/1312.4400"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00623"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3475957.3484457"},{"key":"e_1_3_2_1_15_1","first-page":"689","article-title":"Multimodal deep learning","volume":"11","author":"Ngiam Jiquan","year":"2011","unstructured":"Jiquan Ngiam, Aditya Khosla, Mingyu Kim, Juhan Nam, Honglak Lee, Andrew Y Ng, et al. 2011. Multimodal deep learning. In ICML, Vol. 11. 689--696.","journal-title":"ICML"},{"volume-title":"The development and psychometric properties of LIWC2015","author":"Pennebaker James W","key":"e_1_3_2_1_16_1","unstructured":"James W Pennebaker, Ryan L Boyd, Kayla Jordan, and Kate Blackburn. 2015. The development and psychometric properties of LIWC2015. University of Texas at Austin (2015)."},{"key":"e_1_3_2_1_17_1","volume-title":"Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever.","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. arXiv:2103.00020 [cs.CV] https:\/\/arxiv.org\/ abs\/2103.00020"},{"key":"e_1_3_2_1_18_1","volume-title":"Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever.","author":"Radford Alec","year":"2022","unstructured":"Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2022. Robust Speech Recognition via Large-Scale Weak Supervision. arXiv:2212.04356 [eess.AS] https:\/\/arxiv.org\/abs\/2212.04356"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3423327.3423672"},{"key":"e_1_3_2_1_20_1","volume-title":"Eva-clip: Improved training techniques for clip at scale. arXiv preprint arXiv:2303.15389","author":"Sun Quan","year":"2023","unstructured":"Quan Sun, Yuxin Fang, Ledell Wu, Xinlong Wang, and Yue Cao. 2023. Eva-clip: Improved training techniques for clip at scale. arXiv preprint arXiv:2303.15389 (2023)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1177\/0261927X09351676"},{"key":"e_1_3_2_1_22_1","volume-title":"J. Zico Kolter, Louis-Philippe Morency, and Ruslan Salakhutdinov.","author":"Hubert Tsai Yao-Hung","year":"2019","unstructured":"Yao-Hung Hubert Tsai, Shaojie Bai, Paul Pu Liang, J. Zico Kolter, Louis-Philippe Morency, and Ruslan Salakhutdinov. 2019. Multimodal Transformer for Unaligned Multimodal Language Sequences. arXiv:1906.00295 [cs.CL] https:\/\/arxiv.org\/ abs\/1906.00295"},{"key":"e_1_3_2_1_23_1","first-page":"213","article-title":"Mult: Multi-task learning for multimodal sentiment analysis","volume":"13","author":"Hubert Tsai Yao-Hung","year":"2021","unstructured":"Yao-Hung Hubert Tsai, Paul Pu Liang, Amir Zadeh, Louis-Philippe Morency, and Ruslan Salakhutdinov. 2021. Mult: Multi-task learning for multimodal sentiment analysis. IEEE Transactions on Affective Computing 13, 1 (2021), 213--226.","journal-title":"IEEE Transactions on Affective Computing"},{"key":"e_1_3_2_1_24_1","unstructured":"Liang Wang Nan Yang Xiaolong Huang Linjun Yang Rangan Majumder and Furu Wei. 2024. Multilingual E5 Text Embeddings: A Technical Report. arXiv preprint arXiv:2402.05672 (2024)."},{"key":"e_1_3_2_1_25_1","volume-title":"Towards semantic equivalence of tokenization in multimodal llm. arXiv preprint arXiv:2406.05127","author":"Wu Shengqiong","year":"2024","unstructured":"Shengqiong Wu, Hao Fei, Xiangtai Li, Jiayi Ji, Hanwang Zhang, Tat-Seng Chua, and Shuicheng Yan. 2024. Towards semantic equivalence of tokenization in multimodal llm. arXiv preprint arXiv:2406.05127 (2024)."},{"key":"e_1_3_2_1_26_1","unstructured":"Zhiyu Wu Xiaokang Chen Zizheng Pan Xingchao Liu Wen Liu Damai Dai Huazuo Gao Yiyang Ma Chengyue Wu Bingxuan Wang et al. 2024. Deepseekvl2: Mixture-of-experts vision-language models for advanced multimodal understanding. arXiv preprint arXiv:2412.10302 (2024)."},{"key":"e_1_3_2_1_27_1","unstructured":"An Yang Anfeng Li Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chang Gao Chengen Huang Chenxu Lv Chujie Zheng Dayiheng Liu Fan Zhou Fei Huang Feng Hu Hao Ge Haoran Wei Huan Lin Jialong Tang Jian Yang Jianhong Tu Jianwei Zhang Jianxin Yang Jiaxi Yang Jing Zhou Jingren Zhou Junyang Lin Kai Dang Keqin Bao Kexin Yang Le Yu Lianghao Deng Mei Li Mingfeng Xue Mingze Li Pei Zhang Peng Wang Qin Zhu Rui Men Ruize Gao Shixuan Liu Shuang Luo Tianhao Li Tianyi Tang Wenbiao Yin Xingzhang Ren Xinyu Wang Xinyu Zhang Xuancheng Ren Yang Fan Yang Su Yichang Zhang Yinger Zhang Yu Wan Yuqiong Liu Zekun Wang Zeyu Cui Zhenru Zhang Zhipeng Zhou and Zihan Qiu. 2025. Qwen3 Technical Report. arXiv:2505.09388 [cs.CL] https:\/\/arxiv.org\/abs\/2505.09388"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1322192.1322216"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2024.3374875"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3762019","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:00:04Z","timestamp":1765339204000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3762019"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":29,"alternative-id":["10.1145\/3746027.3762019","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3762019","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}