{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T08:56:27Z","timestamp":1767084987220,"version":"3.48.0"},"reference-count":21,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iros60139.2025.11246127","type":"proceedings-article","created":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:54:45Z","timestamp":1764269685000},"page":"21825-21832","source":"Crossref","is-referenced-by-count":2,"title":["Embodied Instruction Following in Unknown Environments"],"prefix":"10.1109","author":[{"given":"Zhenyu","family":"Wu","sequence":"first","affiliation":[{"name":"Beijing University of Posts and Telecommunications,School of Automation"}]},{"given":"Ziwei","family":"Wang","sequence":"additional","affiliation":[{"name":"Nanyang Technological University,School of Electrical and Electronic Engineering"}]},{"given":"Xiuwei","family":"Xu","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Automation"}]},{"given":"Hang","family":"Yin","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Automation"}]},{"given":"Yinan","family":"Liang","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Automation"}]},{"given":"Angyuan","family":"Ma","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Automation"}]},{"given":"Jiwen","family":"Lu","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Automation"}]},{"given":"Haibin","family":"Yan","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications,School of Automation"}]}],"member":"263","reference":[{"article-title":"Objectnav revisited: On evaluation of embodied agents navigating to objects","year":"2020","author":"Batra","key":"ref1"},{"key":"ref2","first-page":"706","article-title":"A persistent spatial semantic representation for high-level natural language instruction execution","volume-title":"CoRL","author":"Blukis","year":"2022"},{"key":"ref3","first-page":"5982","article-title":"Procthor: Large-scale embodied ai using procedural generation","volume-title":"NeurIPS","volume":"35","author":"Deitke"},{"article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","year":"2018","author":"Devlin","key":"ref4"},{"key":"ref5","first-page":"1743","article-title":"Embodied concept learner: Self-supervised learning of concepts and mapping through instruction following","volume-title":"CoRL","author":"Ding","year":"2023"},{"key":"ref6","first-page":"540","article-title":"Voxposer: Composable 3d value maps for robotic manipulation with language models","volume-title":"CoRL","author":"Huang","year":"2023"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.066"},{"article-title":"Open-world task and motion planning via vision-language model inferred constraints","year":"2024","author":"Kumar","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3503250"},{"article-title":"Film: Following instructions in language with modular methods","volume-title":"ICLR","author":"Min","key":"ref10"},{"key":"ref11","article-title":"Embodiedgpt: Vision-language pre-training via embodied chain of thought","volume-title":"NIPS","volume":"36","author":"Mu"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01564"},{"key":"ref13","article-title":"Sayplan: Grounding large language models using 3d scene graphs for scalable robot task planning","author":"Rana","year":"2023","journal-title":"CoRL"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01075"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01504"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00280"},{"article-title":"Embodied task planning with large language models","year":"2023","author":"Wu","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.77"},{"key":"ref19","first-page":"5285","article-title":"Sgnav: Online 3d scene graph prompting for llm-based zero-shot object navigation","volume-title":"NeurIPS","volume":"37","author":"Yin"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01775"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10342512"}],"event":{"name":"2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2025,10,19]]},"location":"Hangzhou, China","end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11245651\/11245652\/11246127.pdf?arnumber=11246127","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T06:09:59Z","timestamp":1765519799000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11246127\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":21,"URL":"https:\/\/doi.org\/10.1109\/iros60139.2025.11246127","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}