{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T08:20:15Z","timestamp":1766132415963,"version":"3.48.0"},"reference-count":58,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iros60139.2025.11246863","type":"proceedings-article","created":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:54:45Z","timestamp":1764269685000},"page":"1914-1921","source":"Crossref","is-referenced-by-count":0,"title":["On the Vulnerability of LLM\/VLM-Controlled Robotics"],"prefix":"10.1109","author":[{"given":"Xiyang","family":"Wu","sequence":"first","affiliation":[{"name":"University of Maryland,College Park,MD,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Souradip","family":"Chakraborty","sequence":"additional","affiliation":[{"name":"University of Maryland,College Park,MD,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruiqi","family":"Xian","sequence":"additional","affiliation":[{"name":"University of Maryland,College Park,MD,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jing","family":"Liang","sequence":"additional","affiliation":[{"name":"University of Maryland,College Park,MD,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianrui","family":"Guan","sequence":"additional","affiliation":[{"name":"University of Maryland,College Park,MD,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fuxiao","family":"Liu","sequence":"additional","affiliation":[{"name":"University of Maryland,College Park,MD,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Brian M.","family":"Sadler","sequence":"additional","affiliation":[{"name":"DEVCOM Army Research Laboratory,Adelphi,MD,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dinesh","family":"Manocha","sequence":"additional","affiliation":[{"name":"University of Maryland,College Park,MD,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Amrit Singh","family":"Bedi","sequence":"additional","affiliation":[{"name":"University of Central Florida,Orlando,FL,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2025.102963"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.procir.2023.04.001"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.4375268"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.01363"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-43458-7_34"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.375"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.548"},{"article-title":"Universal and transferable adversarial attacks on aligned language models","year":"2023","author":"Zou","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/icra55743.2025.11128119"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/icra55743.2025.11128436"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/s10845-023-02294-y"},{"key":"ref12","article-title":"Can an embodied agent find your","author":"Dorbala","year":"2023","journal-title":"IEEE Robotics and Automation Letters"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3511409"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/icra55743.2025.11127890"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/lra.2025.3559822"},{"article-title":"Awesome-llm-robotics","year":"2022","author":"Kira","key":"ref16"},{"article-title":"Everything-llms-and-robotics","year":"2023","author":"Rintamaki","key":"ref17"},{"key":"ref18","first-page":"17359","article-title":"The unsurprising effectiveness of pre-trained vision models for control","volume-title":"International Conference on Machine Learning","author":"Parisi"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160969"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58539-6_16"},{"article-title":"Social-llava: Enhancing robot navigation through human-language reasoning in social spaces","year":"2024","author":"Payandeh","key":"ref21"},{"article-title":"Vima: General robot manipulation with multimodal prompts","year":"2023","author":"Jiang","key":"ref22"},{"key":"ref23","first-page":"785","article-title":"Perceiver-actor: A multi-task transformer for robotic manipulation","volume-title":"Conference on Robot Learning","author":"Shridhar"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161068"},{"article-title":"Rt-2: Vision-language-action models transfer web knowledge to robotic control","year":"2023","author":"Brohan","key":"ref25"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.70"},{"article-title":"Openvla: An open-source vision-language-action model","year":"2024","author":"Kim","key":"ref27"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2025.xxi.018"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/icra57147.2024.10611319"},{"article-title":"Open x-embodiment: Robotic learning datasets and rt-x models","year":"2023","author":"Padalkar","key":"ref30"},{"article-title":"Do as i can, not as i say: Grounding language in robotic affordances","year":"2022","author":"Ahn","key":"ref31"},{"article-title":"Language to rewards for robotic skill synthesis","year":"2023","author":"Yu","key":"ref32"},{"article-title":"Mutex: Learning unified policies from multimodal task specifications","year":"2023","author":"Shah","key":"ref33"},{"article-title":"Aha: A vision-language-model for detecting and reasoning over failures in robotic manipulation","year":"2024","author":"Duan","key":"ref34"},{"article-title":"Intriguing properties of neural networks","year":"2013","author":"Szegedy","key":"ref35"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.eacl-main.14"},{"article-title":"Automatically auditing large language models via discrete optimization","year":"2023","author":"Jones","key":"ref37"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3605764.3623985"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.461"},{"article-title":"Aligning large multi-modal model with robust instruction tuning","year":"2023","author":"Liu","key":"ref40"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1016\/j.birob.2023.100131"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/j.jai.2024.12.003"},{"article-title":"Embodied red teaming for auditing robotic foundation models","year":"2024","author":"Karnik","key":"ref43"},{"article-title":"Trojanrobot: Physical-world backdoor attacks against vlm-based robotic manipulation","year":"2024","author":"Wang","key":"ref44"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10801576"},{"article-title":"Baseline defenses for adversarial attacks against aligned language models","year":"2023","author":"Jain","key":"ref46"},{"article-title":"A survey on vision-language-action models for embodied ai","year":"2024","author":"Ma","key":"ref47"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.32388\/ob1z2a"},{"key":"ref49","first-page":"34892","article-title":"Visual instruction tuning","volume":"36","author":"Liu","year":"2023","journal-title":"Advances in neural information processing systems"},{"key":"ref50","first-page":"23716","article-title":"Flamingo: a visual language model for few-shot learning","volume":"35","author":"Alayrac","year":"2022","journal-title":"Advances in neural information processing systems"},{"article-title":"Instruct2act: Mapping multi-modality instructions to robotic actions with large language model","year":"2023","author":"Huang","key":"ref51"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160591"},{"article-title":"Aligning modalities in vision large language models via preference fine-tuning","year":"2024","author":"Zhou","key":"ref53"},{"article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","year":"2020","author":"Dosovitskiy","key":"ref54"},{"article-title":"Gpt-4 technical report","year":"2023","author":"Achiam","key":"ref55"},{"article-title":"The llama 3 herd of models","year":"2024","author":"Dubey","key":"ref56"},{"article-title":"A survey on large language models for code generation","year":"2024","author":"Jiang","key":"ref57"},{"issue":"1","key":"ref58","first-page":"1","article-title":"The dawn of lmms: Preliminary explorations with gpt-4v (ision)","volume":"9","author":"Yang","year":"2023"}],"event":{"name":"2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2025,10,19]]},"location":"Hangzhou, China","end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11245651\/11245652\/11246863.pdf?arnumber=11246863","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T08:17:17Z","timestamp":1766132237000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11246863\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":58,"URL":"https:\/\/doi.org\/10.1109\/iros60139.2025.11246863","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}