{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T20:55:08Z","timestamp":1779310508775,"version":"3.51.4"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iros60139.2025.11247043","type":"proceedings-article","created":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:54:45Z","timestamp":1764269685000},"page":"14923-14930","source":"Crossref","is-referenced-by-count":1,"title":["Robotic Programmer: Video Instructed Policy Code Generation for Robotic Manipulation"],"prefix":"10.1109","author":[{"given":"Senwei","family":"Xie","sequence":"first","affiliation":[{"name":"Chinese Academy of Sciences (CAS),Key Laboratory of AI Safety of CAS, Institute of Computing Technology,Beijing,China,100190"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongyu","family":"Wang","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences (CAS),Key Laboratory of AI Safety of CAS, Institute of Computing Technology,Beijing,China,100190"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhanqi","family":"Xiao","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences (CAS),Key Laboratory of AI Safety of CAS, Institute of Computing Technology,Beijing,China,100190"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruiping","family":"Wang","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences (CAS),Key Laboratory of AI Safety of CAS, Institute of Computing Technology,Beijing,China,100190"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xilin","family":"Chen","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences (CAS),Key Laboratory of AI Safety of CAS, Institute of Computing Technology,Beijing,China,100190"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Gpt-4 technical report","author":"Achiam","year":"2023"},{"key":"ref2","article-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"ref3","article-title":"Rt-2: Vision-language-action models transfer web knowledge to robotic control","author":"Brohan","year":"2023"},{"key":"ref4","article-title":"Open x-embodiment: Robotic learning datasets and rt-x models","author":"Padalkar","year":"2023"},{"key":"ref5","article-title":"Openvla: An open-source vision-language-action model","author":"Kim","year":"2024"},{"key":"ref6","article-title":"Robocodex: Multimodal code generation for robotic behavior synthesis","author":"Mu","year":"2024"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01324"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.120"},{"key":"ref9","first-page":"785","article-title":"Perceiver-actor: A multitask transformer for robotic manipulation","volume-title":"Conference on Robot Learning","author":"Shridhar"},{"key":"ref10","article-title":"Procedures as a representation for data in a computer program for understanding natural language","author":"Winograd","year":"1971"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref12","article-title":"SAM-E: leveraging visual foundation model with sequence imitation for embodied manipulation","volume-title":"International Conference on Machine Learning","author":"Zhang"},{"key":"ref13","article-title":"Llarva: Vision-action instruction tuning enhances robot learning","author":"Niu","year":"2024"},{"key":"ref14","first-page":"287","article-title":"Do as i can, not as i say: Grounding language in robotic affordances","volume-title":"Conference on robot learning","author":"Brohan"},{"key":"ref15","first-page":"8469","article-title":"Palm-e: An embodied multimodal language model","volume-title":"International Conference on Machine Learning","author":"Driess"},{"key":"ref16","first-page":"1769","article-title":"Inner monologue: Embodied reasoning through planning with language models","volume-title":"Conference on Robot Learning","author":"Huang"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161317"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160591"},{"key":"ref19","article-title":"Instruct2act: Mapping multi-modality instructions to robotic actions with large language model","author":"Huang","year":"2023"},{"key":"ref20","first-page":"540","article-title":"Voxposer: Composable 3d value maps for robotic manipulation with language models","volume-title":"Conference on Robot Learning","author":"Huang"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/JRA.1987.1087068"},{"key":"ref22","article-title":"Gemini: a family of highly capable multimodal models","author":"Anil","year":"2023"},{"key":"ref23","article-title":"Deepseek-coder-v2: Breaking the barrier of closed-source models in code intelligence","author":"Zhu","year":"2024"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2023.3281153"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01100"},{"key":"ref27","article-title":"Qwen technical report","author":"Bai","year":"2023"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72643-9_22"},{"key":"ref30","article-title":"Hello gpt-4o","year":"2024"},{"key":"ref31","first-page":"27 730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2974707"},{"key":"ref33","article-title":"Libero: Benchmarking knowledge transfer for lifelong robot learning","volume":"36","author":"Liu","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref34","article-title":"Deepseek-coder: When the large language model meets programming\u2013the rise of code intelligence","author":"Guo","year":"2024"}],"event":{"name":"2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","location":"Hangzhou, China","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11245651\/11245652\/11247043.pdf?arnumber=11247043","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T12:37:40Z","timestamp":1766061460000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11247043\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/iros60139.2025.11247043","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}