{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T14:56:39Z","timestamp":1773932199199,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":72,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,13]]},"DOI":"10.1145\/3716553.3750767","type":"proceedings-article","created":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T13:13:16Z","timestamp":1760188396000},"page":"326-336","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["USER-VLM 360: Personalized Vision Language Models with User-aware Tuning for Social Human-Robot Interactions"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9179-8625","authenticated-orcid":false,"given":"Hamed","family":"Rahimi","sequence":"first","affiliation":[{"name":"Institut des Syst\u00e8mes Intelligents et de Robotique (ISIR), Sorbonne University, Paris, France"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7842-8726","authenticated-orcid":false,"given":"Adil","family":"Bahaj","sequence":"additional","affiliation":[{"name":"Mohammed VI Polytechnic University, Rabat, Morocco"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3727-4892","authenticated-orcid":false,"given":"Mouad","family":"Abrini","sequence":"additional","affiliation":[{"name":"Institut des Syst\u00e8mes Intelligents et de Robotique (ISIR), Sorbonne University, Paris, France"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9470-2561","authenticated-orcid":false,"given":"Mahdi","family":"Khoramshahi","sequence":"additional","affiliation":[{"name":"Institut des Syst\u00e8mes Intelligents et de Robotique (ISIR), Sorbonne University, Paris, France"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0055-7867","authenticated-orcid":false,"given":"Mounir","family":"Ghogho","sequence":"additional","affiliation":[{"name":"Mohammed VI Polytechnic University, Rabat, Morocco"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2920-4539","authenticated-orcid":false,"given":"Mohamed","family":"Chetouani","sequence":"additional","affiliation":[{"name":"Institut des Syst\u00e8mes Intelligents et de Robotique (ISIR), Sorbonne University, Paris, France"}]}],"member":"320","published-online":{"date-parts":[[2025,10,12]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Pravesh Agrawal Szymon Antoniak Emma\u00a0Bou Hanna Baptiste Bout Devendra Chaplot Jessica Chudnovsky Diogo Costa Baudouin De\u00a0Monicault Saurabh Garg Theophile Gervet et\u00a0al. 2024. Pixtral 12B. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.07073 (2024)."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/HRI.2016.7451870"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72624-8_5"},{"key":"e_1_3_3_1_5_2","unstructured":"Ahmed Allam. 2024. Biasdpo: Mitigating bias in language models through direct preference optimization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.13928 (2024)."},{"key":"e_1_3_3_1_6_2","unstructured":"Ruichuan An Sihan Yang Ming Lu Kai Zeng Yulin Luo Ying Chen Jiajun Cao Hao Liang Qi She Shanghang Zhang et\u00a0al. 2024. MC-LLaVA: Multi-Concept Personalized Vision-Language Model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.11706 (2024)."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"crossref","unstructured":"Antonio Andriella Carme Torras and Guillem Alenya. 2020. Short-term human\u2013robot interaction adaptability in real-world environments. International Journal of Social Robotics 12 3 (2020) 639\u2013657.","DOI":"10.1007\/s12369-019-00606-y"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Andrea Bonci Francesco Gaudeni Maria\u00a0Cristina Giannini and Sauro Longhi. 2023. Robot Operating System 2 (ROS2)-based frameworks for increasing robot autonomy: A survey. applied sciences 13 23 (2023) 12796.","DOI":"10.3390\/app132312796"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Elena Cavallini Irene Ceccato Silvana Bertoglio Andrea Francescani Federico Vigato Aladar\u00a0Bruno Ianes and Serena Lecce. 2021. Can theory of mind of healthy older adults living in a nursing home be improved? A randomized controlled trial. Aging Clinical and Experimental Research 33 (2021) 3029\u20133037.","DOI":"10.1007\/s40520-021-01811-4"},{"key":"e_1_3_3_1_10_2","unstructured":"Alex Chen. 2024. VLM-DPO-Example Dataset. https:\/\/huggingface.co\/datasets\/alexchen4ai\/vlm-dpo-example Accessed: 2025-01-29."},{"key":"e_1_3_3_1_11_2","unstructured":"Lichang Chen Shiyang Li Jun Yan Hai Wang Kalpa Gunaratna Vikas Yadav Zheng Tang Vijay Srinivasan Tianyi Zhou Heng Huang et\u00a0al. 2023. Alpagasus: Training a better alpaca with fewer data. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.08701 (2023)."},{"key":"e_1_3_3_1_12_2","unstructured":"Pengan Chen Ziniu Wu Jiawei Sun Dong Wang Peng Zhou Nieqing Cao Yan Ding Bin Zhao Xuelong Li et\u00a0al. 2024. AlignBot: Aligning VLM-powered Customized Task Planning with User Reminders Through Fine-Tuning for Household Robots. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.11905 (2024)."},{"key":"e_1_3_3_1_13_2","unstructured":"Shaoxiang Chen Zequn Jie and Lin Ma. 2024. Llava-mole: Sparse mixture of lora experts for mitigating data conflicts in instruction finetuning mllms. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.16160 (2024)."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"crossref","unstructured":"Thierry Deruyttere Simon Vandenhende Dusan Grujicic Luc Van\u00a0Gool and Marie-Francine Moens. 2019. Talk2car: Taking control of your self-driving car. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1909.10838 (2019).","DOI":"10.18653\/v1\/D19-1215"},{"key":"e_1_3_3_1_15_2","unstructured":"Zichao Dong Weikun Zhang Xufeng Huang Hang Ji Xin Zhan and Junbo Chen. 2023. HuBo-VLM: Unified Vision-Language Model designed for HUman roBOt interaction tasks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.12537 (2023)."},{"key":"e_1_3_3_1_16_2","unstructured":"Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Amy Yang Angela Fan et\u00a0al. 2024. The llama 3 herd of models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.21783 (2024)."},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"Joel Eapen and VS Adhithyan. 2023. Personalization and customization of llm responses. International Journal of Research Publication and Reviews 4 12 (2023) 2617\u20132627.","DOI":"10.55248\/gengpi.4.1223.123512"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"Chris Frith and Uta Frith. 2005. Theory of mind. Current biology 15 17 (2005) R644\u2013R645.","DOI":"10.1016\/j.cub.2005.08.041"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72998-0_18"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.670"},{"key":"e_1_3_3_1_21_2","unstructured":"Edward\u00a0J Hu Yelong Shen Phillip Wallis Zeyuan Allen-Zhu Yuanzhi Li Shean Wang Lu Wang and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2106.09685 (2021)."},{"key":"e_1_3_3_1_22_2","unstructured":"Patrick\u00a0Amadeus Irawan. 2024. VQA-NLE-LLaVA Dataset. https:\/\/huggingface.co\/datasets\/patrickamadeus\/vqa-nle-llava Accessed: 2025-01-29."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3434074.3444881"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3563359.3597383"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"crossref","unstructured":"Aleksandar Jevti\u0107 Andr\u00e9s\u00a0Flores Valle Guillem Aleny\u00e0 Greg Chance Praminda Caleb-Solly Sanja Dogramadzi and Carme Torras. 2018. Personalized robot assistant for support in dressing. IEEE transactions on cognitive and developmental systems 11 3 (2018) 363\u2013374.","DOI":"10.1109\/TCDS.2018.2817283"},{"key":"e_1_3_3_1_26_2","unstructured":"Albert\u00a0Q Jiang Alexandre Sablayrolles Arthur Mensch Chris Bamford Devendra\u00a0Singh Chaplot Diego de\u00a0las Casas Florian Bressand Gianna Lengyel Guillaume Lample Lucile Saulnier et\u00a0al. 2023. Mistral 7B. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.06825 (2023)."},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00159"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"crossref","unstructured":"Susanne Kristen and Beate Sodian. 2014. Theory of Mind (ToM) in early education. Contemporary perspectives on research in theory of mind in early childhood education (2014) 291\u2013320.","DOI":"10.1108\/978-1-62396-513-620251019"},{"key":"e_1_3_3_1_29_2","unstructured":"Hugo Lauren\u00e7on L\u00e9o Tronchon Matthieu Cord and Victor Sanh. 2024. What matters when building vision-language models? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.02246 (2024)."},{"key":"e_1_3_3_1_30_2","unstructured":"Patrick Lewis Ethan Perez Aleksandra Piktus Fabio Petroni Vladimir Karpukhin Naman Goyal Heinrich K\u00fcttler Mike Lewis Wen-tau Yih Tim Rockt\u00e4schel et\u00a0al. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Advances in Neural Information Processing Systems 33 (2020) 9459\u20139474."},{"key":"e_1_3_3_1_31_2","unstructured":"Bohao Li Rui Wang Guangzhi Wang Yuying Ge Yixiao Ge and Ying Shan. 2023. Seed-bench: Benchmarking multimodal llms with generative comprehension. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.16125 (2023)."},{"key":"e_1_3_3_1_32_2","unstructured":"Chenyi Li Guande Wu Gromit Yeuk-Yin Chan Dishita\u00a0G Turakhia Sonia\u00a0Castelo Quispe Dong Li Leslie Welch Claudio Silva and Jing Qian. 2024. Satori: Towards Proactive AR Assistant with Belief-Desire-Intention User Modeling. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.16668 (2024)."},{"key":"e_1_3_3_1_33_2","unstructured":"Kevin\u00a0Y Li Sachin Goyal Joao\u00a0D Semedo and J\u00a0Zico Kolter. 2024. Inference optimal vlms need only one visual token but larger models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.03312 (2024)."},{"key":"e_1_3_3_1_34_2","first-page":"74","volume-title":"Text Summarization Branches Out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out. Association for Computational Linguistics, Barcelona, Spain, 74\u201381. https:\/\/aclanthology.org\/W04-1013\/"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"e_1_3_3_1_36_2","unstructured":"Haotian Liu Chunyuan Li Qingyang Wu and Yong\u00a0Jae Lee. 2024. Visual instruction tuning. Advances in neural information processing systems 36 (2024)."},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/CASE59546.2024.10711845"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01050"},{"key":"e_1_3_3_1_39_2","unstructured":"Paolo Magri Javad Amirian and Mohamed Chetouani. 2024. Upgrading Pepper Robot s Social Interaction with Advanced Hardware and Perception Enhancements. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.01036 (2024)."},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3577190.3616524"},{"key":"e_1_3_3_1_41_2","unstructured":"Lin Ning Luyang Liu Jiaxing Wu Neo Wu Devora Berlowitz Sushant Prakash Bradley Green Shawn O\u2019Banion and Jun Xie. 2024. User-LLM: Efficient LLM Contextualization with User Embeddings. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.13598 (2024)."},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"crossref","unstructured":"Olivia Nocentini Laura Fiorini Giorgia Acerbi Alessandra Sorrentino Gianmaria Mancioppi and Filippo Cavallo. 2019. A survey of behavioral models for social robots. Robotics 8 3 (2019) 54.","DOI":"10.3390\/robotics8030054"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"crossref","unstructured":"Catharine Oertel Ginevra Castellano Mohamed Chetouani Jauwairia Nasir Mohammad Obaid Catherine Pelachaud and Christopher Peters. 2020. Engagement in human-agent interaction: An overview. Frontiers in Robotics and AI 7 (2020) 92.","DOI":"10.3389\/frobt.2020.00092"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73027-6_17"},{"key":"e_1_3_3_1_45_2","unstructured":"Long Ouyang Jeffrey Wu Xu Jiang Diogo Almeida Carroll Wainwright Pamela Mishkin Chong Zhang Sandhini Agarwal Katarina Slama Alex Ray et\u00a0al. 2022. Training language models to follow instructions with human feedback. Advances in neural information processing systems 35 (2022) 27730\u201327744."},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","unstructured":"Amit\u00a0Kumar Pandey and Rodolphe Gelin. 2018. A Mass-Produced Sociable Humanoid Robot: Pepper: The First Machine of Its Kind. IEEE Robotics & Automation Magazine 25 3 (2018) 40\u201348. 10.1109\/MRA.2018.2833157","DOI":"10.1109\/MRA.2018.2833157"},{"key":"e_1_3_3_1_47_2","unstructured":"Generated Photos. 2024. Generated Photos: Unique worry-free model photos. https:\/\/generated.photos\/ Accessed: 2025-01-29."},{"key":"e_1_3_3_1_48_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_3_1_49_2","first-page":"28492","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2023","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2023. Robust speech recognition via large-scale weak supervision. In International conference on machine learning. PMLR, 28492\u201328518."},{"key":"e_1_3_3_1_50_2","unstructured":"Rafael Rafailov Archit Sharma Eric Mitchell Christopher\u00a0D Manning Stefano Ermon and Chelsea Finn. 2024. Direct preference optimization: Your language model is secretly a reward model. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_1_51_2","unstructured":"Hamed Rahimi Mouad Abrini Mahdi Khoramshahi and Mohamed Chetouani. 2025. User-VLM: LLM Contextualization with Multimodal Pre-trained User Models. The 39th Annual AAAI Conference on Artificial Intelligence (2025)."},{"key":"e_1_3_3_1_52_2","unstructured":"Guruprasad\u00a0Viswanathan Ramesh. 2024. Face_Bench_Five_Task_Sample. https:\/\/huggingface.co\/datasets\/gp06aug\/Face_Bench_Five_Task_Sample Accessed: 2025-01-29."},{"key":"e_1_3_3_1_53_2","doi-asserted-by":"crossref","unstructured":"Nicole Robinson Brendan Tidd Dylan Campbell Dana Kuli\u0107 and Peter Corke. 2023. Robotic vision for human-robot interaction and collaboration: A survey and systematic review. ACM Transactions on Human-Robot Interaction 12 1 (2023) 1\u201366.","DOI":"10.1145\/3570731"},{"key":"e_1_3_3_1_54_2","doi-asserted-by":"publisher","DOI":"10.1109\/RO-MAN53752.2022.9900550"},{"key":"e_1_3_3_1_55_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00828"},{"key":"e_1_3_3_1_56_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"e_1_3_3_1_57_2","doi-asserted-by":"crossref","unstructured":"Daeun Song Jing Liang Amirreza Payandeh Amir\u00a0Hossain Raj Xuesu Xiao and Dinesh Manocha. 2024. VLM-Social-Nav: Socially Aware Robot Navigation through Scoring using Vision-Language Models. IEEE Robotics and Automation Letters (2024).","DOI":"10.1109\/LRA.2024.3511409"},{"key":"e_1_3_3_1_58_2","unstructured":"Andreas Steiner Andr\u00e9\u00a0Susano Pinto Michael Tschannen Daniel Keysers Xiao Wang Yonatan Bitton Alexey Gritsenko Matthias Minderer Anthony Sherbondy Shangbang Long et\u00a0al. 2024. PaliGemma 2: A Family of Versatile VLMs for Transfer. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.03555 (2024)."},{"key":"e_1_3_3_1_59_2","unstructured":"Zhi\u00a0Rui Tam. 2023. Alexa-QA Dataset. https:\/\/huggingface.co\/datasets\/theblackcat102\/alexa-qa Accessed: 2025-01-29."},{"key":"e_1_3_3_1_60_2","doi-asserted-by":"crossref","unstructured":"Ana Tanevska Francesco Rea Giulio Sandini Lola Ca\u00f1amero and Alessandra Sciutti. 2020. A socially adaptable framework for human-robot interaction. Frontiers in Robotics and AI 7 (2020) 121.","DOI":"10.3389\/frobt.2020.00121"},{"key":"e_1_3_3_1_61_2","unstructured":"Gemma Team Morgane Riviere Shreya Pathak Pier\u00a0Giuseppe Sessa Cassidy Hardin Surya Bhupatiraju L\u00e9onard Hussenot Thomas Mesnard Bobak Shahriari Alexandre Ram\u00e9 et\u00a0al. 2024. Gemma 2: Improving open language models at a practical size. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.00118 (2024)."},{"key":"e_1_3_3_1_62_2","unstructured":"Mistral\u00a0AI team. 2024. Mistral NeMo. https:\/\/mistral.ai\/news\/mistral-nemo Accessed: 2024-01-29."},{"key":"e_1_3_3_1_63_2","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar et\u00a0al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.13971 (2023)."},{"key":"e_1_3_3_1_64_2","unstructured":"Yuanjie\u00a0(Tukey) Tu. 2024. Human Face Emotions Dataset. https:\/\/huggingface.co\/datasets\/tukey\/human_face_emotions_roboflow. Accessed: 2025-01-29."},{"key":"e_1_3_3_1_65_2","unstructured":"Beichen Wang Juexiao Zhang Shuwen Dong Irving Fang and Chen Feng. 2024. Vlm see robot do: Human demo video to robot action plan via vision language model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.08792 (2024)."},{"key":"e_1_3_3_1_66_2","unstructured":"Xun Wu Shaohan Huang and Furu Wei. 2024. Mixture of lora experts. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.13628 (2024)."},{"key":"e_1_3_3_1_67_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01833"},{"key":"e_1_3_3_1_68_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-96530-3"},{"key":"e_1_3_3_1_69_2","doi-asserted-by":"crossref","unstructured":"Jingyi Zhang Jiaxing Huang Sheng Jin and Shijian Lu. 2024. Vision-language models for vision tasks: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence (2024).","DOI":"10.1109\/TPAMI.2024.3369699"},{"key":"e_1_3_3_1_70_2","unstructured":"Qizhe Zhang Aosong Cheng Ming Lu Zhiyong Zhuo Minqi Wang Jiajun Cao Shaobo Guo Qi She and Shanghang Zhang. 2024. [CLS] Attention is All You Need for Training-Free Visual Token Pruning: Make VLM Inference Faster. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.01818 (2024)."},{"key":"e_1_3_3_1_71_2","unstructured":"Tianyi Zhang Varsha Kishore Felix Wu Kilian\u00a0Q Weinberger and Yoav Artzi. 2019. Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1904.09675 (2019)."},{"key":"e_1_3_3_1_72_2","doi-asserted-by":"crossref","unstructured":"Kaiyang Zhou Jingkang Yang Chen\u00a0Change Loy and Ziwei Liu. 2022. Learning to prompt for vision-language models. International Journal of Computer Vision 130 9 (2022) 2337\u20132348.","DOI":"10.1007\/s11263-022-01653-1"},{"key":"e_1_3_3_1_73_2","unstructured":"Yuchen Zhuang Haotian Sun Yue Yu Rushi Qiang Qifan Wang Chao Zhang and Bo Dai. 2024. Hydra: Model factorization framework for black-box llm personalization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.02888 (2024)."}],"event":{"name":"ICMI '25: International Conference on Multimodal Interaction","location":"Canberra Australia","acronym":"ICMI '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 27th International Conference on Multimodal Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3716553.3750767","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,26]],"date-time":"2026-01-26T22:28:20Z","timestamp":1769466500000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3716553.3750767"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"references-count":72,"alternative-id":["10.1145\/3716553.3750767","10.1145\/3716553"],"URL":"https:\/\/doi.org\/10.1145\/3716553.3750767","relation":{},"subject":[],"published":{"date-parts":[[2025,10,12]]},"assertion":[{"value":"2025-10-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}