{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T14:54:57Z","timestamp":1781621697958,"version":"3.54.5"},"reference-count":37,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62173324"],"award-info":[{"award-number":["62173324"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"International Partnership Program of the Chinese Academy of Sciences","award":["104GJHZ2022013GC"],"award-info":[{"award-number":["104GJHZ2022013GC"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cogn. Dev. Syst."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1109\/tcds.2025.3543364","type":"journal-article","created":{"date-parts":[[2025,2,18]],"date-time":"2025-02-18T13:26:46Z","timestamp":1739885206000},"page":"1163-1174","source":"Crossref","is-referenced-by-count":14,"title":["RoboGPT: An LLM-Based Long-Term Decision-Making Embodied Agent for Instruction Following Tasks"],"prefix":"10.1109","volume":"17","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9356-0610","authenticated-orcid":false,"given":"Yaran","family":"Chen","sequence":"first","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6966-6538","authenticated-orcid":false,"given":"Wenbo","family":"Cui","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0537-7074","authenticated-orcid":false,"given":"Yuanwen","family":"Chen","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1504-6169","authenticated-orcid":false,"given":"Mining","family":"Tan","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-9276-5792","authenticated-orcid":false,"given":"Xinyao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2797-5408","authenticated-orcid":false,"given":"Jinrui","family":"Liu","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2559-9585","authenticated-orcid":false,"given":"Haoran","family":"Li","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8218-9633","authenticated-orcid":false,"given":"Dongbin","family":"Zhao","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3365-4620","authenticated-orcid":false,"given":"He","family":"Wang","sequence":"additional","affiliation":[{"name":"Center on Frontiers of Computing Studies, Peking University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","first-page":"2165","article-title":"Rt-2: Vision-language-action models transfer web knowledge to robotic control","volume-title":"Proc. 7th Conf. Robot Learn.","author":"Zitkovich","year":"2023"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.07.091"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2024.3387401"},{"key":"ref4","article-title":"Prompter: Utilizing large language model prompting for a data efficient embodied instruction following","author":"Inoue","year":"2022"},{"key":"ref5","article-title":"FILM: Following instructions in language with modular methods","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Min","year":"2022"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01564"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25094"},{"key":"ref8","article-title":"LLaMA: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00280"},{"key":"ref10","first-page":"287","article-title":"Do as I can, not as I say: Grounding language in robotic affordances","volume-title":"Proc. 6th Conf. Robot Learn.","author":"Brohan","year":"2023"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-023-10131-7"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01075"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2021.3139543"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.80"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2025.XXI.019"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/2043174.2043195"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1609\/icaps.v31i1.16005"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2024.3379969"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161317"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10342169"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3387941"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160591"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.754"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2022.3225200"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3028964"},{"key":"ref26","article-title":"Fast segment anything","author":"Zhao","year":"2023"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.93.4.1591"},{"issue":"2","key":"ref29","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","volume-title":"Proc. NAACL-HLT","volume":"1","author":"Kenton","year":"2019"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01004"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.37"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"ref33","first-page":"706","article-title":"A persistent spatial semantic representation for high-level natural language instruction execution","volume-title":"Proc. 5th Conf. Robot Learn.","volume":"164","author":"Blukis","year":"2022"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3178804"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01504"},{"key":"ref36","article-title":"LEBP\u2013Language expectation & binding policy: A two-stream framework for embodied vision-and-language interaction task learning agents","author":"Liu","year":"2022"},{"issue":"8","key":"ref37","first-page":"17","article-title":"A planning based neural-symbolic approach for embodied instruction following","volume":"9","author":"Liu","year":"2022","journal-title":"Interactions"}],"container-title":["IEEE Transactions on Cognitive and Developmental Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7274989\/11197738\/10891883.pdf?arnumber=10891883","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,16]],"date-time":"2025-12-16T18:33:20Z","timestamp":1765910000000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10891883\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":37,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tcds.2025.3543364","relation":{},"ISSN":["2379-8920","2379-8939"],"issn-type":[{"value":"2379-8920","type":"print"},{"value":"2379-8939","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}