{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T22:37:53Z","timestamp":1777070273171,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,2]],"date-time":"2024-05-02T00:00:00Z","timestamp":1714608000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,2]]},"DOI":"10.1145\/3613905.3651029","type":"proceedings-article","created":{"date-parts":[[2024,5,11]],"date-time":"2024-05-11T08:15:21Z","timestamp":1715415321000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":66,"title":["LaMI: Large Language Models for Multi-Modal Human-Robot Interaction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1913-2524","authenticated-orcid":false,"given":"Chao","family":"Wang","sequence":"first","affiliation":[{"name":"Honda Research Institute Europe, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3690-3223","authenticated-orcid":false,"given":"Stephan","family":"Hasler","sequence":"additional","affiliation":[{"name":"Honda Research Institute Europe, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1363-7970","authenticated-orcid":false,"given":"Daniel","family":"Tanneberg","sequence":"additional","affiliation":[{"name":"Honda Research Institute EU, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5320-2525","authenticated-orcid":false,"given":"Felix","family":"Ocker","sequence":"additional","affiliation":[{"name":"Honda Research Institute Europe GmbH, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4421-1737","authenticated-orcid":false,"given":"Frank","family":"Joublin","sequence":"additional","affiliation":[{"name":"Honda Research Institute Europe, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1075-459X","authenticated-orcid":false,"given":"Antonello","family":"Ceravola","sequence":"additional","affiliation":[{"name":"Honda Research Institute Europe, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-5931-6973","authenticated-orcid":false,"given":"Joerg","family":"Deigmoeller","sequence":"additional","affiliation":[{"name":"Honda Research Institute Europe, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8036-2519","authenticated-orcid":false,"given":"Michael","family":"Gienger","sequence":"additional","affiliation":[{"name":"Honda Research Institute Europe, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,5,11]]},"reference":[{"key":"e_1_3_3_3_1_1","volume-title":"Do as i can, not as i say: Grounding language in robotic affordances. arXiv preprint arXiv:2204.01691","author":"Ahn Michael","year":"2022","unstructured":"Michael Ahn, Anthony Brohan, Noah Brown, Yevgen Chebotar, Omar Cortes, Byron David, Chelsea Finn, Chuyuan Fu, Keerthana Gopalakrishnan, Karol Hausman, 2022. Do as i can, not as i say: Grounding language in robotic affordances. arXiv preprint arXiv:2204.01691 (2022)."},{"key":"e_1_3_3_3_2_1","volume-title":"Incremental Learning of Humanoid Robot Behavior from Natural Interaction and Large Language Models. arXiv preprint arXiv:2309.04316","author":"B\u00e4rmann Leonard","year":"2023","unstructured":"Leonard B\u00e4rmann, Rainer Kartmann, Fabian Peller-Konrad, Alex Waibel, and Tamim Asfour. 2023. Incremental Learning of Humanoid Robot Behavior from Natural Interaction and Large Language Models. arXiv preprint arXiv:2309.04316 (2023)."},{"key":"e_1_3_3_3_3_1","volume-title":"Human-robot interaction: An introduction","author":"Bartneck Christoph","unstructured":"Christoph Bartneck, Tony Belpaeme, Friederike Eyssel, Takayuki Kanda, Merel Keijsers, and Selma \u0160abanovi\u0107. 2020. Human-robot interaction: An introduction. Cambridge University Press."},{"key":"e_1_3_3_3_4_1","volume-title":"Graph of thoughts: Solving elaborate problems with large language models. arXiv preprint arXiv:2308.09687","author":"Besta Maciej","year":"2023","unstructured":"Maciej Besta, Nils Blach, Ales Kubicek, Robert Gerstenberger, Lukas Gianinazzi, Joanna Gajda, Tomasz Lehmann, Michal Podstawski, Hubert Niewiadomski, Piotr Nyczyk, 2023. Graph of thoughts: Solving elaborate problems with large language models. arXiv preprint arXiv:2308.09687 (2023)."},{"key":"e_1_3_3_3_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2010.5652230"},{"key":"e_1_3_3_3_6_1","volume-title":"Meta-in-context learning in large language models. arXiv preprint arXiv:2305.12907","author":"Coda-Forno Julian","year":"2023","unstructured":"Julian Coda-Forno, Marcel Binz, Zeynep Akata, Matthew Botvinick, Jane\u00a0X Wang, and Eric Schulz. 2023. Meta-in-context learning in large language models. arXiv preprint arXiv:2305.12907 (2023)."},{"key":"e_1_3_3_3_7_1","volume-title":"PaLM-E: An Embodied Multimodal Language Model. (3","author":"Driess Danny","year":"2023","unstructured":"Danny Driess, Fei Xia, Mehdi S.\u00a0M. Sajjadi, Corey Lynch, Aakanksha Chowdhery, Brian Ichter, Ayzaan Wahid, Jonathan Tompson, Quan Vuong, Tianhe Yu, Wenlong Huang, Yevgen Chebotar, Pierre Sermanet, Daniel Duckworth, Sergey Levine, Vincent Vanhoucke, Karol Hausman, Marc Toussaint, Klaus Greff, Andy Zeng, Igor Mordatch, and Pete Florence. 2023. PaLM-E: An Embodied Multimodal Language Model. (3 2023). http:\/\/arxiv.org\/abs\/2303.03378"},{"key":"e_1_3_3_3_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2015.2492999"},{"key":"e_1_3_3_3_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/11678816_19"},{"key":"e_1_3_3_3_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.2974688"},{"key":"e_1_3_3_3_11_1","volume-title":"A multimodal emotional human\u2013robot interaction architecture for social robots engaged in bidirectional communication","author":"Hong Alexander","year":"2020","unstructured":"Alexander Hong, Nolan Lunscher, Tianhao Hu, Yuma Tsuboi, Xinyi Zhang, Silas\u00a0Franco dos Reis\u00a0Alves, Goldie Nejat, and Beno Benhabib. 2020. A multimodal emotional human\u2013robot interaction architecture for social robots engaged in bidirectional communication. IEEE transactions on cybernetics 51, 12 (2020), 5954\u20135968."},{"key":"e_1_3_3_3_12_1","volume-title":"CoPAL: Corrective Planning of Robot Actions with Large Language Models. arXiv preprint arXiv:2310.07263","author":"Joublin Frank","year":"2023","unstructured":"Frank Joublin, Antonello Ceravola, Pavel Smirnov, Felix Ocker, Joerg Deigmoeller, Anna Belardinelli, Chao Wang, Stephan Hasler, Daniel Tanneberg, and Michael Gienger. 2023. CoPAL: Corrective Planning of Robot Actions with Large Language Models. arXiv preprint arXiv:2310.07263 (2023)."},{"key":"e_1_3_3_3_13_1","volume-title":"International Conference on Machine Learning. PMLR, 22137\u201322176","author":"Liu Zichang","year":"2023","unstructured":"Zichang Liu, Jue Wang, Tri Dao, Tianyi Zhou, Binhang Yuan, Zhao Song, Anshumali Shrivastava, Ce Zhang, Yuandong Tian, Christopher Re, 2023. Deja vu: Contextual sparsity for efficient llms at inference time. In International Conference on Machine Learning. PMLR, 22137\u201322176."},{"key":"e_1_3_3_3_14_1","volume-title":"Exploring Large Language Models as a Source of Common-Sense Knowledge for Robots. arXiv preprint arXiv:2311.08412","author":"Ocker Felix","year":"2023","unstructured":"Felix Ocker, J\u00f6rg Deigm\u00f6ller, and Julian Eggert. 2023. Exploring Large Language Models as a Source of Common-Sense Knowledge for Robots. arXiv preprint arXiv:2311.08412 (2023)."},{"key":"e_1_3_3_3_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-03413-3_43"},{"key":"e_1_3_3_3_16_1","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2021.577107"},{"key":"e_1_3_3_3_17_1","doi-asserted-by":"publisher","unstructured":"Hang Su Wen Qi Jiahao Chen Chenguang Yang Juan Sandoval and Med\u00a0Amine Laribi. 2023. Recent advancements in multimodal human\u2013robot interaction. https:\/\/doi.org\/10.3389\/fnbot.2023.1084000","DOI":"10.3389\/fnbot.2023.1084000"},{"key":"e_1_3_3_3_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2559636.2559663"},{"key":"e_1_3_3_3_19_1","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2015.1031697"},{"key":"e_1_3_3_3_20_1","first-page":"20","article-title":"Chatgpt for robotics: Design principles and model abilities","volume":"2","author":"Vemprala Sai","year":"2023","unstructured":"Sai Vemprala, Rogerio Bonatti, Arthur Bucker, and Ashish Kapoor. 2023. Chatgpt for robotics: Design principles and model abilities. Microsoft Auton. Syst. Robot. Res 2 (2023), 20.","journal-title":"Microsoft Auton. Syst. Robot. Res"},{"key":"e_1_3_3_3_21_1","volume-title":"Chatgpt empowered long-step robot control in various environments: A case application. arXiv preprint arXiv:2304.03893","author":"Wake Naoki","year":"2023","unstructured":"Naoki Wake, Atsushi Kanehira, Kazuhiro Sasabuchi, Jun Takamatsu, and Katsushi Ikeuchi. 2023. Chatgpt empowered long-step robot control in various environments: A case application. arXiv preprint arXiv:2304.03893 (2023)."},{"key":"e_1_3_3_3_22_1","volume-title":"Efficient Large Language Models: A Survey. arXiv preprint arXiv:2312.03863","author":"Wan Zhongwei","year":"2023","unstructured":"Zhongwei Wan, Xin Wang, Che Liu, Samiul Alam, Yu Zheng, Zhongnan Qu, Shen Yan, Yi Zhu, Quanlu Zhang, Mosharaf Chowdhury, 2023. Efficient Large Language Models: A Survey. arXiv preprint arXiv:2312.03863 (2023)."},{"key":"e_1_3_3_3_23_1","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume":"35","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc\u00a0V Le, Denny Zhou, 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in Neural Information Processing Systems 35 (2022), 24824\u201324837.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_3_3_24_1","volume-title":"A prompt pattern catalog to enhance prompt engineering with chatgpt. arXiv preprint arXiv:2302.11382","author":"White Jules","year":"2023","unstructured":"Jules White, Quchen Fu, Sam Hays, Michael Sandborn, Carlos Olea, Henry Gilbert, Ashraf Elnashar, Jesse Spencer-Smith, and Douglas\u00a0C Schmidt. 2023. A prompt pattern catalog to enhance prompt engineering with chatgpt. arXiv preprint arXiv:2302.11382 (2023)."},{"key":"e_1_3_3_3_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/URAI.2014.7057497"},{"key":"e_1_3_3_3_26_1","volume-title":"Tree of thoughts: Deliberate problem solving with large language models. arXiv preprint arXiv:2305.10601","author":"Yao Shunyu","year":"2023","unstructured":"Shunyu Yao, Dian Yu, Jeffrey Zhao, Izhak Shafran, Thomas\u00a0L Griffiths, Yuan Cao, and Karthik Narasimhan. 2023. Tree of thoughts: Deliberate problem solving with large language models. arXiv preprint arXiv:2305.10601 (2023)."},{"key":"e_1_3_3_3_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3282111"},{"key":"e_1_3_3_3_28_1","volume-title":"Statler: State-maintaining language models for embodied reasoning. arXiv preprint arXiv:2306.17840","author":"Yoneda Takuma","year":"2023","unstructured":"Takuma Yoneda, Jiading Fang, Peng Li, Huanyu Zhang, Tianchong Jiang, Shengjie Lin, Ben Picker, David Yunis, Hongyuan Mei, and Matthew\u00a0R Walter. 2023. Statler: State-maintaining language models for embodied reasoning. arXiv preprint arXiv:2306.17840 (2023)."},{"key":"e_1_3_3_3_29_1","volume-title":"From Text to Motion: Grounding GPT-4 in a Humanoid Robot \"Alter3\". (12","author":"Yoshida Takahide","year":"2023","unstructured":"Takahide Yoshida, Atsushi Masumori, and Takashi Ikegami. 2023. From Text to Motion: Grounding GPT-4 in a Humanoid Robot \"Alter3\". (12 2023). http:\/\/arxiv.org\/abs\/2312.06571"},{"key":"e_1_3_3_3_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10341488"},{"key":"e_1_3_3_3_31_1","volume-title":"TreePrompt: Learning to Compose Tree Prompts for Explainable Visual Grounding. arXiv preprint arXiv:2305.11497","author":"Zhang Chenchi","year":"2023","unstructured":"Chenchi Zhang, Jun Xiao, Lei Chen, Jian Shao, and Long Chen. 2023. TreePrompt: Learning to Compose Tree Prompts for Explainable Visual Grounding. arXiv preprint arXiv:2305.11497 (2023)."}],"event":{"name":"CHI '24: CHI Conference on Human Factors in Computing Systems","location":"Honolulu HI USA","acronym":"CHI '24","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGACCESS ACM Special Interest Group on Accessible Computing"]},"container-title":["Extended Abstracts of the CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3613905.3651029","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3613905.3651029","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T23:56:53Z","timestamp":1750291013000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3613905.3651029"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,2]]},"references-count":31,"alternative-id":["10.1145\/3613905.3651029","10.1145\/3613905"],"URL":"https:\/\/doi.org\/10.1145\/3613905.3651029","relation":{},"subject":[],"published":{"date-parts":[[2024,5,2]]},"assertion":[{"value":"2024-05-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}