{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T15:31:49Z","timestamp":1777735909496,"version":"3.51.4"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001381","name":"National Research Foundation, Singapore","doi-asserted-by":"publisher","award":["A-0009428-08-00"],"award-info":[{"award-number":["A-0009428-08-00"]}],"id":[{"id":"10.13039\/501100001381","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11128715","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"10744-10750","source":"Crossref","is-referenced-by-count":3,"title":["Goal-Guided Reinforcement Learning: Leveraging Large Language Models for Long-Horizon Task Decomposition"],"prefix":"10.1109","author":[{"given":"Ceng","family":"Zhang","sequence":"first","affiliation":[{"name":"National University of Singapore,Department of Mechanical Engineering,Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhanhong","family":"Sun","sequence":"additional","affiliation":[{"name":"National University of Singapore,Department of Mechanical Engineering,Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gregory S.","family":"Chirikjian","sequence":"additional","affiliation":[{"name":"National University of Singapore,Department of Mechanical Engineering,Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Reinforcement learning: an introduction","volume-title":"A Bradford Book","author":"Sutton","year":"2018"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref3","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"arXiv preprint"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161317"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160591"},{"key":"ref6","article-title":"Sayplan: Grounding large language models using 3d scene graphs for scalable robot task planning","volume-title":"7th Annual Conference on Robot Learning","author":"Rana","year":"2023"},{"key":"ref7","first-page":"12888","article-title":"Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation","volume-title":"International Conference on Machine Learning","author":"Li","year":"2022"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-023-10131-7"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref10","article-title":"Exploration by random network distillation","author":"Burda","year":"2018","journal-title":"arXiv preprint"},{"key":"ref11","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023","journal-title":"arXiv preprint"},{"key":"ref12","article-title":"Gpt-4 technical report","author":"Achiam","year":"2023","journal-title":"arXiv preprint"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1177\/02783649241281508"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.jai.2024.12.003"},{"key":"ref15","article-title":"Foundation models for decision making: Problems, methods, and opportunities","author":"Yang","year":"2023","journal-title":"arXiv preprint"},{"key":"ref16","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref17","article-title":"Fine-grained human feedback gives better rewards for language model training","volume":"36","author":"Wu","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/icra57147.2024.10610784"},{"key":"ref19","article-title":"Speak foreign languages with your own voice: Cross-lingual neural codec language modeling","author":"Zhang","year":"2023","journal-title":"arXiv preprint"},{"key":"ref20","article-title":"True knowledge comes from practice: Aligning llms with embodied environments via reinforcement learning","author":"Tan","year":"2024","journal-title":"arXiv preprint"},{"key":"ref21","first-page":"3676","article-title":"Grounding large language models in interactive environments with online reinforcement learning","volume-title":"International Conference on Machine Learning","author":"Carta","year":"2023"},{"key":"ref22","article-title":"Lora: Low-rank adaptation of large language models","author":"Hu","year":"2021","journal-title":"arXiv preprint"},{"key":"ref23","first-page":"8657","article-title":"Guiding pretraining in reinforcement learning with large language models","volume-title":"International Conference on Machine Learning","author":"Du","year":"2023"},{"key":"ref24","article-title":"Intrinsic language-guided exploration for complex long-horizon robotic manipulation tasks","author":"Triantafyllidis","year":"2023","journal-title":"arXiv preprint"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2024\/627"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(98)00023-X"},{"key":"ref27","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv preprint"},{"key":"ref28","article-title":"Large language models as commonsense knowledge for large-scale task planning","volume":"36","author":"Zhao","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref29","first-page":"5776","article-title":"Minilm: Deep self-attention distillation for task-agnostic compression of pretrained transformers","volume":"33","author":"Wang","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref30","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume":"35","author":"Wei","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref31","article-title":"Reinforcement learning from demonstration through shaping","volume-title":"Twenty-fourth international joint conference on artificial intelligence","author":"Brys","year":"2015"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201311"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00886"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-023-00709-2"},{"key":"ref35","article-title":"Final ik plugin for unity","volume-title":"Unity Asset Store","year":"2024"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","location":"Atlanta, GA, USA","start":{"date-parts":[[2025,5,19]]},"end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11128715.pdf?arnumber=11128715","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T06:16:02Z","timestamp":1756880162000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11128715\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11128715","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}