{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T00:54:44Z","timestamp":1773536084594,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","funder":[{"name":"JST Moonshot","award":["JPMJMS2011"],"award-info":[{"award-number":["JPMJMS2011"]}]},{"name":"JSPS KAKENHI","award":["24H00722"],"award-info":[{"award-number":["24H00722"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,3,16]]},"DOI":"10.1145\/3757279.3785619","type":"proceedings-article","created":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T00:27:38Z","timestamp":1773102458000},"page":"934-942","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Welcome to the Shop! Field Trial of a VLM-Powered Autonomous Shop Worker Robot"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9898-9918","authenticated-orcid":false,"given":"Sachi","family":"Edirisinghe","sequence":"first","affiliation":[{"name":"Kyoto University, Kyoto, Japan"},{"name":"ATR, Kyoto, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6482-1877","authenticated-orcid":false,"given":"Satoru","family":"Satake","sequence":"additional","affiliation":[{"name":"ATR, Kyoto, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9546-5825","authenticated-orcid":false,"given":"Takayuki","family":"Kanda","sequence":"additional","affiliation":[{"name":"Kyoto University, Kyoto, Japan"},{"name":"ATR, Kyoto, Japan"}]}],"member":"320","published-online":{"date-parts":[[2026,3,16]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/HRI61500.2025.10973830"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.eacl-demo.8"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3300188"},{"key":"e_1_3_2_2_4_1","unstructured":"Shuai Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Sibo Song Kai Dang Peng Wang Shijie Wang Jun Tang et al. 2025. Qwen2. 5-vl technical report. arXiv preprint arXiv:2502.13923."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368554"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2891668"},{"key":"e_1_3_2_2_7_1","volume-title":"2020 29th IEEE international conference on robot and human interactive communication (RO-MAN). 573\u2013579","author":"Donnermann Melissa","year":"2020","unstructured":"Melissa Donnermann, Philipp Schaper, and Birgit Lugrin. 2020. Integrating a social robot in higher education\u2013a field study. In 2020 29th IEEE international conference on robot and human interactive communication (RO-MAN). 573\u2013579."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610977.3635007"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3680292"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.5555\/895623"},{"key":"e_1_3_2_2_11_1","volume-title":"Meetings and Conferencing. https:\/\/meet.google.com\/landing Accessed: 2025-09-30","author":"Google","year":"2025","unstructured":"Google LLC. 2025. Google Meet: Online Video Calls, Meetings and Conferencing. https:\/\/meet.google.com\/landing Accessed: 2025-09-30"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2006.889486"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2016.2616343"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3708359.3712145"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.3390\/s23156997"},{"key":"e_1_3_2_2_16_1","unstructured":"Ruben Janssens and Tony Belpaeme. 2025. Towards Multimodal Social Conversations with Robots: Using Vision-Language Models. arXiv preprint arXiv:2507.19196."},{"key":"e_1_3_2_2_17_1","volume-title":"Generating Visually Grounded Conversation Starters for Human-Robot Dialogue. In 2022 17th ACM\/IEEE International Conference on Human-Robot Interaction (HRI). 821\u2013825","author":"Janssens Ruben","year":"2022","unstructured":"Ruben Janssens, Pieter Wolfert, Thomas Demeester, and Tony Belpaeme. 2022. \u2018Cool glasses, where did you get them?\u201d Generating Visually Grounded Conversation Starters for Human-Robot Dialogue. In 2022 17th ACM\/IEEE International Conference on Human-Robot Interaction (HRI). 821\u2013825."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2024.3428704"},{"key":"e_1_3_2_2_19_1","volume-title":"Guillem Aleny\u00e0, Greg Chance, Praminda Caleb-Solly, Sanja Dogramadzi, and Carme Torras.","author":"Jevti\u0107 Aleksandar","year":"2018","unstructured":"Aleksandar Jevti\u0107, Andr\u00e9s Flores Valle, Guillem Aleny\u00e0, Greg Chance, Praminda Caleb-Solly, Sanja Dogramadzi, and Carme Torras. 2018. Personalized robot assistant for support in dressing. IEEE transactions on cognitive and developmental systems, 11, 3 (2018), 363\u2013374."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2010.2062550"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3434073.3444679"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/UR49135.2020.9144932"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2019.1599727"},{"key":"e_1_3_2_2_24_1","volume-title":"Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific","author":"Mach\u00e1\u010dek Dominik","year":"2023","unstructured":"Dominik Mach\u00e1\u010dek, Raj Dabre, and Ond\u0159ej Bojar. 2023. Turning Whisper into Real-Time Transcription System. In Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics: System Demonstrations, Sriparna Saha and Herry Sujaini (Eds.). Association for Computational Linguistics, Bali, Indonesia. 17\u201324. https:\/\/aclanthology.org\/2023.ijcnlp-demo.3"},{"key":"e_1_3_2_2_25_1","volume-title":"Social robots: Technological, societal and ethical aspects of human-robot interaction","author":"Niemel\u00e4 Marketta","unstructured":"Marketta Niemel\u00e4, P\u00e4ivi Heikkil\u00e4, Hanna Lammi, and Virpi Oksman. 2019. A social robot in a shopping mall: studies on acceptance and stakeholder expectations. In Social robots: Technological, societal and ethical aspects of human-robot interaction. Springer, 119\u2013144."},{"key":"e_1_3_2_2_26_1","unstructured":"OpenAI. 2024. Introducing Structured Outputs in the API. https:\/\/openai.com\/index\/introducing-structured-outputs-in-the-api\/ Accessed: 2025-09-25"},{"key":"e_1_3_2_2_27_1","unstructured":"OpenAI. 2025. Introducing GPT-4.1 in the API. https:\/\/openai.com\/index\/gpt-4-1\/ Accessed: 2025-09-07"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"crossref","unstructured":"Hamed Rahimi Adil Bahaj Mouad Abrini Mahdi Khoramshahi Mounir Ghogho and Mohamed Chetouani. 2025. User-vlm 360: Personalized vision language models with user-aware tuning for social human-robot interactions. arXiv preprint arXiv:2502.10636.","DOI":"10.1145\/3716553.3750767"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/s12369-015-0332-9"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"crossref","unstructured":"Kazuhiro Sasabuchi Naoki Wake Atsushi Kanehira Jun Takamatsu and Katsushi Ikeuchi. 2025. Agreeing to interact in human-robot interaction using large language models and vision language models. arXiv preprint arXiv:2503.15491.","DOI":"10.1109\/RO-MAN63969.2025.11217646"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/s12369-013-0180-4"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/1349822.1349843"},{"key":"e_1_3_2_2_33_1","volume-title":"Francisco Javier Rodr\u00edguez-Lera, and Vicente Matell\u00e1n-Olivera","author":"Sobr\u00edn-Hidalgo David","year":"2024","unstructured":"David Sobr\u00edn-Hidalgo, Miguel \u00c1ngel Gonz\u00e1lez-Santamarta, \u00c1ngel Manuel Guerrero-Higueras, Francisco Javier Rodr\u00edguez-Lera, and Vicente Matell\u00e1n-Olivera. 2024. Enhancing robot explanation capabilities through vision-language models: a preliminary study by interpreting visual inputs for improved human-robot interaction. arXiv preprint arXiv:2404.09705."},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbusres.2022.03.087"},{"key":"e_1_3_2_2_35_1","volume-title":"Xuesu Xiao, and Dinesh Manocha.","author":"Song Daeun","year":"2024","unstructured":"Daeun Song, Jing Liang, Amirreza Payandeh, Amir Hossain Raj, Xuesu Xiao, and Dinesh Manocha. 2024. Vlm-social-nav: Socially aware robot navigation through scoring using vision-language models. IEEE Robotics and Automation Letters."},{"key":"e_1_3_2_2_36_1","unstructured":"SYSTRAN. 2025. faster-whisper: a reimplementation of OpenAI\u2019s Whisper using CTranslate2. https:\/\/github.com\/SYSTRAN\/faster-whisper Accessed: 2025-08-30"},{"key":"e_1_3_2_2_37_1","volume-title":"Janome: Japanese morphological analysis engine in pure Python. https:\/\/github.com\/mocobeta\/janome Accessed: 2025-09-30","author":"Uchida Tomoko","year":"2025","unstructured":"Tomoko Uchida and contributors. 2025. Janome: Japanese morphological analysis engine in pure Python. https:\/\/github.com\/mocobeta\/janome Accessed: 2025-09-30"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.3390\/s22239545"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3369699"}],"event":{"name":"HRI '26: 21st ACM\/IEEE International Conference on Human-Robot Interaction","location":"Edinburgh Scotland UK","acronym":"HRI '26","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction","IEEE RAS"]},"container-title":["Proceedings of the 21st ACM\/IEEE International Conference on Human-Robot Interaction"],"original-title":[],"deposited":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T00:31:58Z","timestamp":1773534718000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3757279.3785619"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,16]]},"references-count":39,"alternative-id":["10.1145\/3757279.3785619","10.1145\/3757279"],"URL":"https:\/\/doi.org\/10.1145\/3757279.3785619","relation":{},"subject":[],"published":{"date-parts":[[2026,3,16]]},"assertion":[{"value":"2026-03-16","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}