{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,16]],"date-time":"2026-04-16T19:17:58Z","timestamp":1776367078663,"version":"3.51.2"},"reference-count":64,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10610634","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"15083-15091","source":"Crossref","is-referenced-by-count":21,"title":["Statler: State-Maintaining Language Models for Embodied Reasoning"],"prefix":"10.1109","author":[{"given":"Takuma","family":"Yoneda","sequence":"first","affiliation":[{"name":"Toyota Technological Institute at Chicago"}]},{"given":"Jiading","family":"Fang","sequence":"additional","affiliation":[{"name":"Toyota Technological Institute at Chicago"}]},{"given":"Peng","family":"Li","sequence":"additional","affiliation":[{"name":"Fudan University"}]},{"given":"Huanyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Chicago"}]},{"given":"Tianchong","family":"Jiang","sequence":"additional","affiliation":[{"name":"University of Chicago"}]},{"given":"Shengjie","family":"Lin","sequence":"additional","affiliation":[{"name":"Toyota Technological Institute at Chicago"}]},{"given":"Ben","family":"Picker","sequence":"additional","affiliation":[{"name":"University of Chicago"}]},{"given":"David","family":"Yunis","sequence":"additional","affiliation":[{"name":"Toyota Technological Institute at Chicago"}]},{"given":"Hongyuan","family":"Mei","sequence":"additional","affiliation":[{"name":"Toyota Technological Institute at Chicago"}]},{"given":"Matthew R.","family":"Walter","sequence":"additional","affiliation":[{"name":"Toyota Technological Institute at Chicago"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Large language models are zero-shot reasoners","author":"Kojima","year":"2022"},{"key":"ref2","article-title":"Evaluating large language models trained on code","author":"Chen","year":"2021"},{"key":"ref3","article-title":"GPT-4 technical report","year":"2023"},{"key":"ref4","article-title":"Do as I can, not as I say: Grounding language in robotic affordances","author":"Ahn","year":"2022"},{"key":"ref5","article-title":"Code as policies: Language model programs for embodied control","author":"Liang","year":"2022"},{"key":"ref6","article-title":"Inner monologue: Embodied reasoning through planning with language models","volume-title":"Proceedings of the Conference on Robot Learning (CoRL)","author":"Huang"},{"key":"ref7","article-title":"Foundation models for decision making: Problems, methods, and opportunities","author":"Yang","year":"2023"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-11900-7_17"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref10","author":"Koller","year":"2009","journal-title":"Probabilistic graphical models: principles and techniques"},{"key":"ref11","article-title":"Anthropic introducing 100k Context windows"},{"key":"ref12","article-title":"Lost in the middle: How language models use long contexts","author":"Liu","year":"2023"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.62"},{"key":"ref14","article-title":"On the planning abilities of large language models (a critical investigation with a proposed benchmark)","author":"Valmeekam","year":"2023"},{"key":"ref15","article-title":"Leveraging pre-trained large language models to construct and utilize world models for model-based task planning","author":"Guan","year":"2023"},{"key":"ref16","article-title":"Llm+ p: Empowering large language models with optimal planning proficiency","author":"Liu","year":"2023"},{"key":"ref17","article-title":"Generalized planning in PDDL domains with pretrained large language models","author":"Silver","year":"2023"},{"key":"ref18","article-title":"Chain-of- thought prompting elicits reasoning in large language models","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Wei"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00180"},{"key":"ref20","author":"Ouyang","year":"2022","journal-title":"Training language models to follow instructions with human feedback"},{"key":"ref21","article-title":"A survey for in-context learning","author":"Dong","year":"2022"},{"key":"ref22","first-page":"12697","article-title":"Calibrate before use: Improving few-shot performance of language models","volume-title":"International Conference on Machine Learning","author":"Zhao"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/0167-2789(90)90087-6"},{"key":"ref24","article-title":"Procedures as a representation for data in a computer program for understanding natural language","volume-title":"Ph.D. dissertation, Massachusetts Institute of Technology","author":"Winograd","year":"1971"},{"key":"ref25","article-title":"Walk the talk: Connecting language, knowledge, and action in route instructions","volume-title":"Proceedings of the National Conference on Artificial Intelligence (AAAI)","author":"MacMahon"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/HRI.2010.5453186"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/HRI.2010.5453189"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v25i1.7974"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v25i1.7979"},{"key":"ref30","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-319-00065-7_28","article-title":"Learning to parse natural language commands to a robot control system","volume-title":"Proceedings of the International Symposium on Experimental Robotics (ISER)","author":"Matuszek"},{"key":"ref31","article-title":"Learning to interpret natural language commands through human-robot dialog","volume-title":"Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI)","author":"Thomason"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2014.6907841"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1177\/0278364915602060"},{"key":"ref34","article-title":"Learning multi-modal grounded linguistic semantics by playing \u201dI spy\"","volume-title":"Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI)","author":"Thomason"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11966"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.028"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1177\/0278364918777627"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10364"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00387"},{"key":"ref40","article-title":"Speaker-follower models for vision-and- language navigation","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Fried"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01003"},{"key":"ref42","article-title":"FILM: Following instructions in language with modular methods","author":"Min","year":"2021"},{"key":"ref43","article-title":"Language models as zero-shot planners: Extracting actionable knowledge for embodied agents","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","author":"Huang"},{"key":"ref44","article-title":"Programmatically grounded, compositionally generalizable robotic manipulation","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Wang"},{"key":"ref45","article-title":"Leveraging language for accelerated learning of tool manipulation","volume-title":"Proceedings of the Conference on Robot Learning (CoRL)","author":"Ren"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58539-6_16"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02219"},{"key":"ref48","article-title":"LM-Nav: Robotic navigation with large pre-trained models of language, vision, and action","volume-title":"Proceedings of the Conference on Robot Learning (CoRL)","author":"Shah"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/icra48891.2023.10160969"},{"key":"ref50","article-title":"CLIPort: What and where pathways for robotic manipulation","author":"Shridhar","year":"2021"},{"key":"ref51","article-title":"Socratic models: Composing zero-shot multimodal reasoning with language","author":"Zeng","year":"2022"},{"key":"ref52","article-title":"PaLM-E: An embodied multimodal language model","author":"Driess","year":"2023"},{"key":"ref53","article-title":"Measuring coding challenge competence with APPS","author":"Hendrycks","year":"2021"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1126\/science.abq1158"},{"key":"ref55","article-title":"CodeT: Code generation with generated tests","author":"Chen","year":"2022"},{"key":"ref56","article-title":"Language models are few-shot learners","author":"Brown","year":"2020"},{"key":"ref57","article-title":"Toolformer: Language models can teach themselves to use tools","author":"Schick","year":"2023"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01092"},{"key":"ref59","article-title":"Gorilla: Large language model connected with massive APIs","author":"Patil","year":"2023"},{"key":"ref60","article-title":"Show your work: Scratchpads for intermediate computation with language models","author":"Nye","year":"2021"},{"key":"ref61","article-title":"Learning to reason with relational abstractions","author":"Nam","year":"2022"},{"key":"ref62","article-title":"Autoregressive modeling with lookahead attention","author":"Du","year":"2023"},{"key":"ref63","article-title":"From word models to world models: Translating from natural language to the probabilistic language of thought","author":"Wong","year":"2023"},{"key":"ref64","article-title":"Language models meet world models: Embodied experiences enhance language models","author":"Xiang","year":"2023"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","location":"Yokohama, Japan","start":{"date-parts":[[2024,5,13]]},"end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10610634.pdf?arnumber=10610634","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T05:52:12Z","timestamp":1723269132000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10610634\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":64,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10610634","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}