{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T04:32:22Z","timestamp":1773376342709,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","funder":[{"name":"Carl-Zeiss-Stiftung for the project Co-Presence of Humans and Interactive Companions for Seniors (CO-HUMANICS, 06\\\/2021\u201305\\\/2026) http:\\\/\\\/www.co-humanics. de","award":["-"],"award-info":[{"award-number":["-"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,3,16]]},"DOI":"10.1145\/3776734.3794452","type":"proceedings-article","created":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T20:05:48Z","timestamp":1773345948000},"page":"531-535","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Flexible LLM-Based Voice Assistance for Mobile Robots"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8447-0584","authenticated-orcid":false,"given":"S\u00f6hnke Benedikt","family":"Fischedick","sequence":"first","affiliation":[{"name":"TU Ilmenau, Ilmenau, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-9263-213X","authenticated-orcid":false,"given":"Robin","family":"Schmidt","sequence":"additional","affiliation":[{"name":"TU Ilmenau, Ilmenau, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5445-3935","authenticated-orcid":false,"given":"Benedict","family":"Stephan","sequence":"additional","affiliation":[{"name":"TU Ilmenau, Ilmenau, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9712-0225","authenticated-orcid":false,"given":"Horst-Michael","family":"Gross","sequence":"additional","affiliation":[{"name":"TU Ilmenau, Ilmenau, Germany"}]}],"member":"320","published-online":{"date-parts":[[2026,3,16]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Anthropic. 2024. Introducing the Model Context Protocol \u2014 anthropic.com. https:\/\/www.anthropic.com\/news\/model-context-protocol [Accessed 24-11-2025]"},{"key":"e_1_3_2_2_2_1","unstructured":"Anthropic. 2025. Claude Sonnet 4.5. https:\/\/www.anthropic.com\/news\/claude-sonnet-4-5"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/RO-MAN60168.2024.10731330"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3568294.3580040"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/s12369-024-01143-z"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/mhs63891.2024.10856297"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6385959"},{"key":"e_1_3_2_2_8_1","volume-title":"Proc. of ISR.","author":"Fischedick S\u00f6hnke Benedikt","year":"2023","unstructured":"S\u00f6hnke Benedikt Fischedick, Kay Richter, Tim Wengefeld, et al. 2023. Bridging Distance with a Collaborative Telepresence Robot for Older Adults \u2013 Report on Progress in the CO-HUMANICS Project. In Proc. of ISR."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.3390\/s25072024"},{"key":"e_1_3_2_2_10_1","unstructured":"Dhruv Jain Harshit Shukla Gautam Rajeev Ashish Kulkarni Chandra Khatri and Shubham Agarwal. 2025. VoiceAgentBench: Are Voice Assistants ready for agentic tasks? arXiv preprint arXiv:2510.07978."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1002\/spe.3377"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_2_2_13_1","unstructured":"Alexander H. Liu Andy Ehrenberg Andy Lo et al. 2025. Voxtral. arXiv preprint arXiv:2507.13264."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abm6074"},{"key":"e_1_3_2_2_15_1","unstructured":"Christopher E Mower Yuhui Wan Hongzhan Yu Antoine Grosnit Jonas Gonzalez-Billandon Matthieu Zimmer Jinlong Wang Xinyu Zhang Yao Zhao Anbang Zhai et al. 2024. ROS-LLM: A ROS Framework for Embodied AI with Task Feedback and Structured Reasoning. arXiv preprint arXiv:2406.19741."},{"key":"e_1_3_2_2_16_1","unstructured":"OpenAI. 2024. GPT-4o System Card. arxiv:2410.21276. arxiv:2410.21276"},{"key":"e_1_3_2_2_17_1","unstructured":"OpenAI. 2025. GPT-5 System Card. https:\/\/cdn.openai.com\/gpt-5-system-card.pdf [Accessed 01-12-2025]"},{"key":"e_1_3_2_2_18_1","unstructured":"Sihyeong Park Sungryeol Jeon Chaelyn Lee et al. 2025. A Survey on Inference Engines for Large Language Models: Perspectives on Optimization and Efficiency. arXiv preprint arXiv:2505.01658."},{"key":"e_1_3_2_2_19_1","volume-title":"ICRA Workshop on Open Source Software.","author":"Quigley Morgan","year":"2009","unstructured":"Morgan Quigley, Ken Conley, and Brian Gerkey. 2009. ROS: an open-source Robot Operating System. In ICRA Workshop on Open Source Software."},{"key":"e_1_3_2_2_20_1","unstructured":"Stanislau Stankevich and Wojciech Dudek. 2024. Interpreting and learning voice commands with a Large Language Model for a robot system. arxiv:2407.21512. arxiv:2407.21512"},{"key":"e_1_3_2_2_21_1","unstructured":"Sidharth Surapaneni Hoang Nguyen Jash Mehta Aman Tiwari Oluwanifemi Bamgbose Akshay Kalkunte Sai Rajeswar and Sathwik Tejaswi Madhusudhan. 2025. AU-Harness: An Open-Source Toolkit for Holistic Evaluation of Audio LLMs. arXiv preprint arXiv:2509.08031."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10801517"},{"key":"e_1_3_2_2_23_1","unstructured":"Gemma Team. 2025. Gemma 3n. https:\/\/ai.google.dev\/gemma\/docs\/gemma-3n"},{"key":"e_1_3_2_2_24_1","unstructured":"Meituan LongCat Team Bairui Wang Bayan and Bin Xiao. 2025. LongCat-Flash-Omni Technical Report. arxiv:2511.00279. arxiv:2511.00279"},{"key":"e_1_3_2_2_25_1","unstructured":"Gijs Wijngaard Elia Formisano Michel Dumontier et al. 2025. AudioToolAgent: An Agentic Framework for Audio-Language Models. arxiv:2510.02995. arxiv:2510.02995"},{"key":"e_1_3_2_2_26_1","unstructured":"Jin Xu Zhifang Guo Jinzheng He et al. 2025. Qwen2.5-omni technical report. arXiv preprint arXiv:2503.20215."},{"key":"e_1_3_2_2_27_1","unstructured":"Jin Xu Zhifang Guo Hangrui Hu et al. 2025. Qwen3-Omni: An Audio-Visual Language Model with Function Calling. arXiv preprint arXiv:2509.17765."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3672539.3686759"},{"key":"e_1_3_2_2_29_1","unstructured":"Yuchong Zhang Bastian Orthmann and Shichen Ji. 2025. Multimodal \"Puppeteer\": An Exploration of Robot Teleoperation Via Virtual Counterpart with LLM-Driven Voice and Gesture Interaction in Augmented Reality. arxiv:2506.13189. arxiv:2506.13189"}],"event":{"name":"HRI '26: 21st ACM\/IEEE International Conference on Human-Robot Interaction","location":"Edinburgh Scotland UK","acronym":"HRI '26","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction","IEEE RAS"]},"container-title":["Companion Proceedings of the 21st ACM\/IEEE International Conference on Human-Robot Interaction"],"original-title":[],"deposited":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T20:10:52Z","timestamp":1773346252000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3776734.3794452"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,16]]},"references-count":29,"alternative-id":["10.1145\/3776734.3794452","10.1145\/3776734"],"URL":"https:\/\/doi.org\/10.1145\/3776734.3794452","relation":{},"subject":[],"published":{"date-parts":[[2026,3,16]]},"assertion":[{"value":"2026-03-16","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}