{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T04:31:59Z","timestamp":1773376319608,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","funder":[{"name":"HWUM James Watt Scholarship","award":["JWS 2022\/01"],"award-info":[{"award-number":["JWS 2022\/01"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,3,16]]},"DOI":"10.1145\/3776734.3794476","type":"proceedings-article","created":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T20:05:48Z","timestamp":1773345948000},"page":"650-654","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Stable or Stuck? Understanding MLLM Engagement Prediction in Uncontrolled and Controlled HRI"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-5709-8493","authenticated-orcid":false,"given":"Jia Yap","family":"Lim","sequence":"first","affiliation":[{"name":"Heriot-Watt University Malaysia, Putrajaya, Malaysia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3005-4109","authenticated-orcid":false,"given":"John","family":"See","sequence":"additional","affiliation":[{"name":"Heriot-Watt University Malaysia, Putrajaya, Malaysia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0720-7250","authenticated-orcid":false,"given":"William Weimin","family":"Yoo","sequence":"additional","affiliation":[{"name":"Heriot-Watt University Malaysia, Putrajaya, Malaysia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4821-5871","authenticated-orcid":false,"given":"Christian","family":"Dondrup","sequence":"additional","affiliation":[{"name":"Heriot-Watt University, Edinburgh, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,3,16]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Jinze Bai Shuai Bai Yunfei Chu Zeyu Cui Kai Dang Xiaodong Deng Yang Fan Wenbin Ge Yu Han Fei Huang et al. 2023. Qwen technical report. arXiv preprint arXiv:2309.16609."},{"key":"e_1_3_2_1_2_1","volume-title":"Human-robot interaction: An introduction","author":"Bartneck Christoph","unstructured":"Christoph Bartneck, Tony Belpaeme, Friederike Eyssel, Takayuki Kanda, Merel Keijsers, and Selma \u0160abanovi\u0107. 2024. Human-robot interaction: An introduction. Cambridge University Press."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3136755.3136814"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/s12369-019-00591-2"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/RO-MAN47096.2020.9223340"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2017.2737019"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2020.00116"},{"key":"e_1_3_2_1_8_1","unstructured":"Pengcheng He Jianfeng Gao and Weizhu Chen. 2021. DeBERTaV3: Improving DeBERTa using ELECTRA-Style Pre-Training with Gradient-Disentangled Embedding Sharing. arxiv:2111.09543."},{"key":"e_1_3_2_1_9_1","volume-title":"Deberta: Decoding-enhanced bert with disentangled attention. arXiv preprint arXiv:2006.03654.","author":"He Pengcheng","year":"2020","unstructured":"Pengcheng He, Xiaodong Liu, Jianfeng Gao, and Weizhu Chen. 2020. Deberta: Decoding-enhanced bert with disentangled attention. arXiv preprint arXiv:2006.03654."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-acl.837"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/HRI61500.2025.10973944"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/HRI.2013.6483545"},{"key":"e_1_3_2_1_13_1","volume-title":"International conference on machine learning. 4904\u20134916","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc Le, Yun-Hsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling up visual and vision-language representation learning with noisy text supervision. In International conference on machine learning. 4904\u20134916."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-022-09632-1"},{"key":"e_1_3_2_1_15_1","volume-title":"Multimodal Engagement Prediction in Human-Robot Interaction Using Transformer Neural Networks. In International Conference on Multimedia Modeling. 3\u201317","author":"Dondrup Christian","year":"2025","unstructured":"Jia Yap Lim, John See, and Christian Dondrup. 2025. Multimodal Engagement Prediction in Human-Robot Interaction Using Transformer Neural Networks. In International Conference on Multimedia Modeling. 3\u201317."},{"key":"e_1_3_2_1_16_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems, 36","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2023. Visual instruction tuning. Advances in neural information processing systems, 36 (2023), 34892\u201334916."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3678957.3685729"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.3390\/publications9010012"},{"key":"e_1_3_2_1_19_1","volume-title":"International conference on machine learning. 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. 8748\u20138763."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3434074.3447157"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMI.2002.1166980"},{"key":"e_1_3_2_1_22_1","unstructured":"Gemini Team Rohan Anil Sebastian Borgeaud Jean-Baptiste Alayrac Jiahui Yu Radu Soricut Johan Schalkwyk Andrew M Dai Anja Hauth Katie Millican et al. 2023. Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805."},{"key":"e_1_3_2_1_23_1","volume-title":"Mm-llms: Recent advances in multimodal large language models. arXiv preprint arXiv:2401.13601.","author":"Zhang Duzhen","year":"2024","unstructured":"Duzhen Zhang, Yahan Yu, Jiahua Dong, Chenxing Li, Dan Su, Chenhui Chu, and Dong Yu. 2024. Mm-llms: Recent advances in multimodal large language models. arXiv preprint arXiv:2401.13601."}],"event":{"name":"HRI '26: 21st ACM\/IEEE International Conference on Human-Robot Interaction","location":"Edinburgh Scotland UK","acronym":"HRI '26","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction","IEEE RAS"]},"container-title":["Companion Proceedings of the 21st ACM\/IEEE International Conference on Human-Robot Interaction"],"original-title":[],"deposited":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T20:09:41Z","timestamp":1773346181000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3776734.3794476"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,16]]},"references-count":23,"alternative-id":["10.1145\/3776734.3794476","10.1145\/3776734"],"URL":"https:\/\/doi.org\/10.1145\/3776734.3794476","relation":{},"subject":[],"published":{"date-parts":[[2026,3,16]]},"assertion":[{"value":"2026-03-16","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}