{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,29]],"date-time":"2026-07-29T19:34:54Z","timestamp":1785353694715,"version":"3.55.0"},"reference-count":44,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10610455","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"15172-15179","source":"Crossref","is-referenced-by-count":24,"title":["Distilling and Retrieving Generalizable Knowledge for Robot Manipulation via Language Corrections"],"prefix":"10.1109","author":[{"given":"Lihan","family":"Zha","sequence":"first","affiliation":[{"name":"Stanford University,Computer Science Department,Stanford,CA,USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuchen","family":"Cui","sequence":"additional","affiliation":[{"name":"Stanford University,Computer Science Department,Stanford,CA,USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Li-Heng","family":"Lin","sequence":"additional","affiliation":[{"name":"Stanford University,Computer Science Department,Stanford,CA,USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Minae","family":"Kwon","sequence":"additional","affiliation":[{"name":"Stanford University,Computer Science Department,Stanford,CA,USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Montserrat Gonzalez","family":"Arenas","sequence":"additional","affiliation":[{"name":"Google Deepmind,Moutain View,CA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Andy","family":"Zeng","sequence":"additional","affiliation":[{"name":"Google Deepmind,Moutain View,CA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fei","family":"Xia","sequence":"additional","affiliation":[{"name":"Google Deepmind,Moutain View,CA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dorsa","family":"Sadigh","sequence":"additional","affiliation":[{"name":"Stanford University,Computer Science Department,Stanford,CA,USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/icra48891.2023.10160591"},{"key":"ref2","article-title":"Inner monologue: Embodied reasoning through planning with language models","author":"Huang","year":"2022"},{"key":"ref3","article-title":"React: Synergizing reasoning and acting in language models","author":"Yao","year":"2022"},{"key":"ref4","article-title":"Language to rewards for robotic skill synthesis","author":"Yu","year":"2023"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-101119-071628"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2022.XVIII.065"},{"key":"ref7","article-title":"Robots that ask for help: Uncertainty alignment for large language model planners","author":"Ren","year":"2023"},{"key":"ref8","article-title":"Toward grounded social reasoning","author":"Kwon","year":"2023"},{"key":"ref9","first-page":"9118","article-title":"Language models as zero-shot planners: Extracting actionable knowledge for embodied agents","volume-title":"International Conference on Machine Learning","author":"Huang"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10341577"},{"key":"ref11","article-title":"Do as i can and not as i say: Grounding language in robotic affordances","author":"Ahn","year":"2022"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-023-10131-7"},{"key":"ref13","article-title":"Progprompt: Generating situated robot task plans using large language models","author":"Singh","year":"2022"},{"key":"ref14","article-title":"Reward design with language models","author":"Kwon","year":"2023"},{"key":"ref15","article-title":"Language instructed reinforcement learning for human-ai coordination","volume-title":"40th International Conference on Machine Learning (ICML)","author":"Hu"},{"key":"ref16","article-title":"Large language models as general pattern machines","author":"Mirchandani","year":"2023"},{"key":"ref17","article-title":"Gesture-informed robot assistance via foundation models","volume-title":"7th Annual Conference on Robot Learning","author":"Lin"},{"key":"ref18","article-title":"Voxposer: Composable 3d value maps for robotic manipulation with language models","author":"Huang","year":"2023"},{"key":"ref19","article-title":"Perceiver-actor: A multitask transformer for robotic manipulation","volume-title":"Proceedings of the 6th Conference on Robot Learning (CoRL)","author":"Shridhar"},{"key":"ref20","article-title":"Kite: Keypoint-conditioned policies for semantic manipulation","author":"Sundaresan","year":"2023"},{"key":"ref21","article-title":"Reflect: Summarizing robot experiences for failure explanation and correction","author":"Liu","year":"2023"},{"key":"ref22","article-title":"Doremi: Grounding language model by detecting and recovering from plan-execution misalignment","author":"Guo","year":"2023"},{"issue":"8","key":"ref23","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"key":"ref24","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref25","article-title":"Few-shot learning with retrieval augmented language models","author":"Izacard","year":"2022"},{"key":"ref26","first-page":"8483","article-title":"Language models with image descriptors are strong few-shot video-language learners","volume":"35","author":"Wang","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref27","article-title":"Socratic models: Composing zero-shot multimodal reasoning with language","author":"Zeng","year":"2022"},{"key":"ref28","article-title":"Transformers generalize differently from information stored in context vs in weights","author":"Chan","year":"2022"},{"key":"ref29","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR48806.2021.9412102"},{"key":"ref31","article-title":"Albert: A lite bert for self-supervised learning of language representations","author":"Lan","year":"2019"},{"key":"ref32","article-title":"Voyager: An open-ended embodied agent with large language models","author":"Wang","year":"2023"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1093\/oed\/3308720859"},{"key":"ref34","article-title":"Guiding policies with language via meta-learning","volume-title":"International Conference on Learning Representations","author":"Co-Reyes"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981810"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/icra48891.2023.10161068"},{"key":"ref37","article-title":"Towards real-time natural language corrections for assistive robots","volume-title":"RSS Workshop on Model Learning for Human-Robot Communication","author":"Broad"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3568162.3578623"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3295255"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref41","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"ref42","article-title":"Chain-of-thought prompting elicits reasoning in large language models","author":"Wei","year":"2023"},{"key":"ref43","article-title":"Dino2: Learning robust visual features without supervision","author":"Oquab","year":"2023"},{"key":"ref44","article-title":"Gpt-4 technical report","year":"2023"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","location":"Yokohama, Japan","start":{"date-parts":[[2024,5,13]]},"end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10610455.pdf?arnumber=10610455","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T05:20:25Z","timestamp":1723267225000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10610455\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":44,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10610455","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}