{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T15:13:16Z","timestamp":1777734796205,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":21,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,24]],"date-time":"2025-03-24T00:00:00Z","timestamp":1742774400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,24]]},"DOI":"10.1145\/3708359.3712106","type":"proceedings-article","created":{"date-parts":[[2025,3,19]],"date-time":"2025-03-19T12:50:34Z","timestamp":1742388634000},"page":"1232-1241","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["SAE: A Multimodal Sentiment Analysis Large Language Model"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-4849-3949","authenticated-orcid":false,"given":"Xiyin","family":"Zeng","sequence":"first","affiliation":[{"name":"South China Normal University, Aberdeen Institute of Data Science and Artificial Intelligence, Guangzhou, China,"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4941-0647","authenticated-orcid":false,"given":"Qianyi","family":"Zhou","sequence":"additional","affiliation":[{"name":"South China Normal University, Aberdeen Institute of Data Science and Artificial Intelligence, Guangzhou, China,"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2795-0685","authenticated-orcid":false,"given":"Shouqiang","family":"Liu","sequence":"additional","affiliation":[{"name":"South China Normal University, School of Artificial Intelligence, Foshan, Guangdong, China,"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,3,24]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"Georg Ahnert Max Pellert David Garcia and Markus Strohmaier. 2024. Extracting Affect Aggregates from Longitudinal Social Media Data with Temporal Adapters for Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.17990 (2024)."},{"key":"e_1_3_3_2_3_2","unstructured":"Alexei Baevski Yuhao Zhou Abdelrahman Mohamed and Michael Auli. 2020. wav2vec 2.0: A framework for self-supervised learning of speech representations. Advances in neural information processing systems 33 (2020) 12449\u201312460."},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"crossref","unstructured":"Chongqing Chen Dezhi Han and Jun Wang. 2020. Multimodal encoder-decoder attention networks for visual question answering. Ieee Access 8 (2020) 35662\u201335671.","DOI":"10.1109\/ACCESS.2020.2975093"},{"key":"e_1_3_3_2_5_2","unstructured":"Keqin Chen Zhao Zhang Weili Zeng Richong Zhang Feng Zhu and Rui Zhao. 2023. Shikra: Unleashing multimodal llm\u2019s referential dialogue magic. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.15195 (2023)."},{"key":"e_1_3_3_2_6_2","unstructured":"Qingkai Fang Shoutao Guo Yan Zhou Zhengrui Ma Shaolei Zhang and Yang Feng. [n. d.]. Llama-omni: Seamless speech interaction with large language models 2024. URL https:\/\/arxiv. org\/abs\/2409.06666 ([n. d.])."},{"key":"e_1_3_3_2_7_2","unstructured":"Mingze Gao Jingyu Liu Mingda Li Jiangtao Xie Qingbin Liu Bo Zhao Xi Chen and Hui Xiong. 2024. TC-LLaVA: Rethinking the Transfer from Image to Video Understanding with Temporal Considerations. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.03206 (2024)."},{"key":"e_1_3_3_2_8_2","unstructured":"Xiaoxue Gao Chen Zhang Yiming Chen Huayun Zhang and Nancy\u00a0F Chen. 2024. Emo-dpo: Controllable emotional speech synthesis through direct preference optimization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.10157 (2024)."},{"key":"e_1_3_3_2_9_2","unstructured":"Team GLM Aohan Zeng Bin Xu Bowen Wang Chenhui Zhang Da Yin Dan Zhang Diego Rojas Guanyu Feng Hanlin Zhao et\u00a0al. 2024. Chatglm: A family of large language models from glm-130b to glm-4 all tools. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.12793 (2024)."},{"key":"e_1_3_3_2_10_2","unstructured":"Jiaming Han Renrui Zhang Wenqi Shao Peng Gao Peng Xu Han Xiao Kaipeng Zhang Chris Liu Song Wen Ziyu Guo et\u00a0al. 2023. Imagebind-llm: Multi-modality instruction tuning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.03905 (2023)."},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-64302-6_5"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01353"},{"key":"e_1_3_3_2_13_2","unstructured":"Fan Ma Xiaojie Jin Heng Wang Yuchen Xian Jiashi Feng and Yi Yang. 2023. Vista-llama: Reliable video narrator via equal distance to visual tokens. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.08870 (2023)."},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"GS Nijaguna J\u00a0Ananda Babu BD Parameshachari Roc\u00edo\u00a0P\u00e9rez de Prado and Jaroslav Frnda. 2023. Quantum Fruit Fly algorithm and ResNet50-VGG16 for medical diagnosis. Applied Soft Computing 136 (2023) 110055.","DOI":"10.1016\/j.asoc.2023.110055"},{"key":"e_1_3_3_2_15_2","unstructured":"Xingjian Shi Zhourong Chen Hao Wang Dit-Yan Yeung Wai-Kin Wong and Wang-chun Woo. 2015. Convolutional LSTM network: A machine learning approach for precipitation nowcasting. Advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"crossref","unstructured":"Pengyu Wang Dong Zhang Linyang Li Chenkun Tan Xinghao Wang Ke Ren Botian Jiang and Xipeng Qiu. 2024. Inferaligner: Inference-time alignment for harmlessness through cross-model guidance. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.11206 (2024).","DOI":"10.18653\/v1\/2024.emnlp-main.585"},{"key":"e_1_3_3_2_17_2","unstructured":"Yuxuan Wang Cihang Xie Yang Liu and Zilong Zheng. 2024. Videollamb: Long-context video understanding with recurrent memory bridges. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.01071 (2024)."},{"key":"e_1_3_3_2_18_2","unstructured":"Shiwei Wu Joya Chen Kevin\u00a0Qinghong Lin Qimeng Wang Yan Gao Qianli Xu Tong Xu Yao Hu Enhong Chen and Mike\u00a0Zheng Shou. 2024. Videollm-mod: Efficient video-language streaming with mixture-of-depths vision computation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.16730 (2024)."},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00965"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581754.3584136"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00171"},{"key":"e_1_3_3_2_22_2","unstructured":"Ce Zhang Taixi Lu Md\u00a0Mohaiminul Islam Ziyang Wang Shoubin Yu Mohit Bansal and Gedas Bertasius. 2023. A simple llm framework for long-range video question-answering. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.17235 (2023)."}],"event":{"name":"IUI '25: 30th International Conference on Intelligent User Interfaces","location":"Cagliari Italy","acronym":"IUI '25","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 30th International Conference on Intelligent User Interfaces"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3708359.3712106","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3708359.3712106","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:09:46Z","timestamp":1750295386000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3708359.3712106"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,24]]},"references-count":21,"alternative-id":["10.1145\/3708359.3712106","10.1145\/3708359"],"URL":"https:\/\/doi.org\/10.1145\/3708359.3712106","relation":{},"subject":[],"published":{"date-parts":[[2025,3,24]]},"assertion":[{"value":"2025-03-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}