{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T20:53:45Z","timestamp":1775163225671,"version":"3.50.1"},"reference-count":30,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/100031839","name":"Korea Research Institute for Defense Technology Planning and Advancement","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100031839","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003626","name":"Defense Acquisition Program Administration","doi-asserted-by":"publisher","award":["KRIT-CT-23-003\/20%"],"award-info":[{"award-number":["KRIT-CT-23-003\/20%"]}],"id":[{"id":"10.13039\/501100003626","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003626","name":"Defense Acquisition Program Administration","doi-asserted-by":"publisher","award":["RS-2022-I1220951-LBA\/10%"],"award-info":[{"award-number":["RS-2022-I1220951-LBA\/10%"]}],"id":[{"id":"10.13039\/501100003626","id-type":"DOI","asserted-by":"publisher"}]},{"name":"NRF","award":["RS-2024-00353991-SPARC\/10%"],"award-info":[{"award-number":["RS-2024-00353991-SPARC\/10%"]}]},{"name":"NRF","award":["RS-2023-00274280-HEI\/10%"],"award-info":[{"award-number":["RS-2023-00274280-HEI\/10%"]}]},{"name":"NRF","award":["RS-2024-00358416-AutoRL\/20%"],"award-info":[{"award-number":["RS-2024-00358416-AutoRL\/20%"]}]},{"DOI":"10.13039\/501100003662","name":"Korea Evaluation Institute of Industrial Technology","doi-asserted-by":"publisher","award":["RS-2025-25453780\/10%"],"award-info":[{"award-number":["RS-2025-25453780\/10%"]}],"id":[{"id":"10.13039\/501100003662","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003661","name":"Korea Institute for Advancement of Technology","doi-asserted-by":"publisher","award":["RS-2025-25460896\/10%"],"award-info":[{"award-number":["RS-2025-25460896\/10%"]}],"id":[{"id":"10.13039\/501100003661","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Korean government"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1109\/lra.2026.3673900","type":"journal-article","created":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T19:55:25Z","timestamp":1773431725000},"page":"5883-5890","source":"Crossref","is-referenced-by-count":0,"title":["Climb With SHERPA: Heuristic-Guided Reinforcement Learning via Segmented Experience Relay"],"prefix":"10.1109","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-0615-4305","authenticated-orcid":false,"given":"Minji","family":"Kim","sequence":"first","affiliation":[{"name":"Department of Computer Science and Engineering, Seoul National University, Seoul, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5575-6441","authenticated-orcid":false,"given":"Ganghun","family":"Lee","sequence":"additional","affiliation":[{"name":"Interdisciplinary Program in Artificial Intelligence and AIIS, Seoul National University, Seoul, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9601-3863","authenticated-orcid":false,"given":"Minsu","family":"Lee","sequence":"additional","affiliation":[{"name":"School of AI Convergence, Sungshin Women&#x2019;s University, Seoul, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9890-0389","authenticated-orcid":false,"given":"Byoung-Tak","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Seoul National University, Seoul, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00227"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.tre.2022.102712"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref5","article-title":"Diffusion-reinforcement learning hierarchical motion planning in multi-agent adversarial games","author":"Wu","year":"2025"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636226"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i6.16638"},{"key":"ref8","first-page":"8657","article-title":"Guiding pretraining in reinforcement learning with large language models","volume-title":"Proc. 40th Int. Conf. Mach. Learn.","volume":"202","author":"Du","year":"2023"},{"key":"ref9","first-page":"13550","article-title":"Heuristic-guided reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Cheng","year":"2021"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-020-09459-6"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3400189"},{"key":"ref13","first-page":"305","article-title":"ALVINN: An autonomous land vehicle in a neural network","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"1","author":"Pomerleau","year":"1988"},{"key":"ref14","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proc. 14th Int. Conf. Artif. Intell. Statist. JMLR Workshop Conf. Proc.","author":"Ross","year":"2011"},{"key":"ref15","first-page":"4572","article-title":"Generative adversarial imitation learning","volume-title":"Proc. 30th Int. Conf. Neural Inf. Process. Syst.","author":"Ho","year":"2016"},{"key":"ref16","article-title":"Learning robust rewards with adversarial inverse reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Fu","year":"2018"},{"key":"ref17","article-title":"Imitation learning via off-policy distribution matching","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kostrikov","year":"2020"},{"key":"ref18","first-page":"4028","article-title":"IQ-Learn: Inverse soft-Q learning for imitation","volume-title":"Proc. 35th Int. Conf. Neural Inf. Process. Syst.","author":"Garg","year":"2021"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/687"},{"key":"ref20","article-title":"Offline reinforcement learning with implicit Q-learning","volume-title":"Proc. Int. Conf. Learn. Representation","author":"Kostrikov","year":"2022"},{"key":"ref21","article-title":"DemoDICE: Offline imitation learning with supplementary imperfect demonstrations","volume-title":"Proc. Int. Conf. Learn. Representation","author":"Kim","year":"2022"},{"key":"ref22","first-page":"18404","article-title":"Imitation learning from imperfection: Theoretical justifications and algorithms","volume-title":"Proc. 37th Conf. Neural Inf. Process. Syst.","author":"Li","year":"2023"},{"key":"ref23","article-title":"Eureka: Human-level reward design via coding large language models","volume-title":"Proc. Int. Conf. Learn. Representation","author":"Ma","year":"2024"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01554"},{"key":"ref25","article-title":"Vision-language models are zero-shot reward models for reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representation","author":"Rocamonde","year":"2024"},{"key":"ref26","article-title":"Multi-goal reinforcement learning: Challenging robotics environments and request for research","author":"Plappert","year":"2018"},{"key":"ref27","article-title":"panda-gym: Open-source goal-conditioned environments for robotic learning","volume-title":"Proc. 4th Robot Learn. Workshop: Self-Supervised Lifelong Learn. NeurIPS","author":"Galloudec","year":"2021"},{"key":"ref28","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"key":"ref29","first-page":"5048","article-title":"Hindsight experience replay","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Andrychowicz","year":"2017"},{"key":"ref30","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto","year":"2018"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7083369\/11435997\/11434514.pdf?arnumber=11434514","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T19:52:37Z","timestamp":1775159557000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11434514\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5]]},"references-count":30,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/lra.2026.3673900","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,5]]}}}