{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T09:23:06Z","timestamp":1771924986103,"version":"3.50.1"},"reference-count":18,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T00:00:00Z","timestamp":1764720000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T00:00:00Z","timestamp":1764720000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,3]]},"DOI":"10.1109\/robio66223.2025.11375936","type":"proceedings-article","created":{"date-parts":[[2026,2,23]],"date-time":"2026-02-23T20:43:52Z","timestamp":1771879432000},"page":"1642-1647","source":"Crossref","is-referenced-by-count":0,"title":["From Speech to Action: Design and Implementation of a Multimodal Robotic Grasping System Driven by Large Language Models"],"prefix":"10.1109","author":[{"given":"Songji","family":"Chen","sequence":"first","affiliation":[{"name":"School of Electromechanical Engineering, Guangdong University of Technology,Biomimetic and Intelligent Robotics Lab (BIRL),Guangzhou,China,510006"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiaowen","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Electromechanical Engineering, Guangdong University of Technology,Biomimetic and Intelligent Robotics Lab (BIRL),Guangzhou,China,510006"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Li","family":"Luo","sequence":"additional","affiliation":[{"name":"School of Electromechanical Engineering, Guangdong University of Technology,Biomimetic and Intelligent Robotics Lab (BIRL),Guangzhou,China,510006"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huan","family":"Peng","sequence":"additional","affiliation":[{"name":"School of Electromechanical Engineering, Guangdong University of Technology,Biomimetic and Intelligent Robotics Lab (BIRL),Guangzhou,China,510006"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yisheng","family":"Guan","sequence":"additional","affiliation":[{"name":"School of Electromechanical Engineering, Guangdong University of Technology,Biomimetic and Intelligent Robotics Lab (BIRL),Guangzhou,China,510006"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1177\/02783649241281508"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.20517\/ir.2025.13"},{"key":"ref3","first-page":"2165","article-title":"Rt-2: Vision-language-action models transfer web knowledge to robotic control","volume-title":"Conference on Robot Learning. PMLR","author":"Zitkovich","year":"2023"},{"key":"ref4","author":"Driess","year":"2023","journal-title":"Palm-e: An embodied multimodal language model"},{"key":"ref5","author":"Huang","year":"2024","journal-title":"Rekep: Spatio-temporal reasoning of relational keypoint constraints for robotic manipulation"},{"key":"ref6","author":"Kim","year":"2024","journal-title":"Openvla: An open-source vision-language-action model"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9340777"},{"key":"ref8","author":"Zeng","year":"2023","journal-title":"Large language models for robotics: A survey"},{"key":"ref9","author":"Stankevich","year":"2024","journal-title":"Interpreting and learning voice commands with a large language model for a robot system"},{"key":"ref10","author":"Huang","year":"2024","journal-title":"Understanding the planning of 11 m agents: A survey"},{"key":"ref11","first-page":"24 824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume":"35","author":"Wei","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1142\/S2972335324500029"},{"issue":"3","key":"ref13","first-page":"6","volume":"2","author":"Jiang","year":"2022","journal-title":"Vima: General robot manipulation with multimodal prompts"},{"key":"ref14","first-page":"894","article-title":"Cliport: What and where pathways for robotic manipulation","volume-title":"Conference on robot learning. PMLR","author":"Shridhar","year":"2022"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00280"},{"key":"ref16","author":"Shen","year":"2021","journal-title":"How much can clip benefit vision-andlanguage tasks?"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01146"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1177\/0278364917735594"}],"event":{"name":"2025 IEEE International Conference on Robotics and Biomimetics (ROBIO)","location":"Chengdu, China","start":{"date-parts":[[2025,12,3]]},"end":{"date-parts":[[2025,12,7]]}},"container-title":["2025 IEEE International Conference on Robotics and Biomimetics (ROBIO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11373909\/11375854\/11375936.pdf?arnumber=11375936","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T08:34:08Z","timestamp":1771922048000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11375936\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,3]]},"references-count":18,"URL":"https:\/\/doi.org\/10.1109\/robio66223.2025.11375936","relation":{},"subject":[],"published":{"date-parts":[[2025,12,3]]}}}