{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T03:34:24Z","timestamp":1771558464397,"version":"3.50.1"},"reference-count":63,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"\u201cNew Generation Artificial Intelligence\u201d Key Field Research and Development Plan of Guangdong Province","award":["2021B0101410002"],"award-info":[{"award-number":["2021B0101410002"]}]},{"DOI":"10.13039\/501100002855","name":"National Science and Technology Major Project of the Ministry of Science and Technology of China","doi-asserted-by":"publisher","award":["2018AAA0102900"],"award-info":[{"award-number":["2018AAA0102900"]}],"id":[{"id":"10.13039\/501100002855","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1109\/tnnls.2024.3354061","type":"journal-article","created":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T13:47:15Z","timestamp":1706795235000},"page":"2705-2719","source":"Crossref","is-referenced-by-count":10,"title":["Goal-Conditioned Hierarchical Reinforcement Learning With High-Level Model Approximation"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6229-4639","authenticated-orcid":false,"given":"Yu","family":"Luo","sequence":"first","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}]},{"given":"Tianying","family":"Ji","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3546-6305","authenticated-orcid":false,"given":"Fuchun","family":"Sun","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}]},{"given":"Huaping","family":"Liu","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7856-5760","authenticated-orcid":false,"given":"Jianwei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Informatics, University of Hamburg, Hamburg, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4335-9455","authenticated-orcid":false,"given":"Mingxuan","family":"Jing","sequence":"additional","affiliation":[{"name":"Science and Technology on Integrated Information System Laboratory, Institute of Software, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2566-4159","authenticated-orcid":false,"given":"Wenbing","family":"Huang","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/tnn.1998.712192"},{"key":"ref4","first-page":"1471","article-title":"Unifying count-based exploration and intrinsic motivation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Bellemare"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2018.2890773"},{"key":"ref6","first-page":"1496","article-title":"Solving challenging dexterous manipulation tasks with trajectory optimisation and reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Charlesworth"},{"key":"ref7","first-page":"521","article-title":"Skill discovery for exploration and planning using deep skill graphs","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Bagaria"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022140919877"},{"key":"ref9","first-page":"3540","article-title":"Feudal networks for hierarchical reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Vezhnevets"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3453160"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3089834"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2022.3190379"},{"key":"ref13","first-page":"3303","article-title":"Data-efficient hierarchical reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Nachum"},{"key":"ref14","first-page":"21732","article-title":"Hierarchical reinforcement learning with timed subgoals","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"G\u00fcrtler"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2891792"},{"key":"ref16","first-page":"1","article-title":"Learning subgoal representations with slow dynamics","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Li"},{"key":"ref17","first-page":"1","article-title":"Learning multi-level hierarchies with hindsight","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Levy"},{"key":"ref18","first-page":"1430","article-title":"Goal-conditioned reinforcement learning with imagined subgoals","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Chane-Sane"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3190100"},{"key":"ref20","first-page":"15246","article-title":"Search on the replay buffer: Bridging planning and reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Eysenbach"},{"key":"ref21","first-page":"5020","article-title":"Sub-goal trees a framework for goal-based reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jurgenson"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3059912"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2023.3305983"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"ref26","first-page":"271","article-title":"Feudal reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Dayan"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3192418"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3087733"},{"key":"ref29","first-page":"9419","article-title":"Language as an abstraction for hierarchical deep reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Jiang"},{"key":"ref30","first-page":"1","article-title":"Semi-parametric topological memory for navigation","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Savinov"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460730"},{"key":"ref32","first-page":"1","article-title":"Latent skill planning for exploration and transfer","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Xie"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref34","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schulman"},{"key":"ref35","first-page":"1","article-title":"Hierarchical foresight: Self-supervised learning of long-horizon tasks via visual subgoal generation","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Nair"},{"key":"ref36","first-page":"9190","article-title":"Model-based reinforcement learning via latent-space collocation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Rybkin"},{"issue":"51","key":"ref37","first-page":"1563","article-title":"Near-optimal regret bounds for reinforcement learning","volume":"11","author":"Jaksch","year":"2010","journal-title":"J. Mach. Learn. Res."},{"key":"ref38","first-page":"10746","article-title":"Reinforcement learning in feature space: Matrix bandit, kernels, and regret bound","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Yang"},{"key":"ref39","first-page":"6708","article-title":"On efficiency in hierarchical reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wen"},{"key":"ref40","first-page":"271","article-title":"Feudal reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"5","author":"Dayan"},{"key":"ref41","first-page":"1","article-title":"Model-ensemble trust-region policy optimization","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Kurutach"},{"key":"ref42","first-page":"1","article-title":"Guided policy search","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Levine"},{"key":"ref43","first-page":"8289","article-title":"Differentiable MPC for end-to-end planning and control","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Amos"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.2514\/1.G001921"},{"key":"ref45","first-page":"3003","article-title":"(More) efficient reinforcement learning via posterior sampling","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Osband"},{"key":"ref46","first-page":"21579","article-title":"Generating adjacency-constrained subgoals in hierarchical reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhang"},{"key":"ref47","first-page":"28336","article-title":"Landmark-guided subgoal generation in hierarchical reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kim"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00452"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2019.2921336"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6911(01)00191-8"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TCST.2012.2185697"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2019.108675"},{"key":"ref53","first-page":"10170","article-title":"Model-free reinforcement learning in infinite-horizon average-reward Markov decision processes","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wei"},{"key":"ref54","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref55","first-page":"573","article-title":"Decentralized cooperative reinforcement learning with hierarchical information structure","volume-title":"Proc. Int. Conf. Algorithmic Learn. Theory","author":"Kao"},{"key":"ref56","first-page":"8833","article-title":"Deep hierarchy in bandits","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Hong"},{"key":"ref57","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref58","first-page":"1300","article-title":"ROBEL: Robotics benchmarks for learning with low-cost robots","volume-title":"Proc. Conf. Robot Learn.","author":"Ahn"},{"key":"ref59","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Duan"},{"key":"ref60","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014","journal-title":"arXiv:1412.6980"},{"key":"ref61","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref62","first-page":"1","article-title":"Temporal difference models: Model-free deep RL for model-based control","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Pong"},{"key":"ref63","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Silver"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10877690\/10418512.pdf?arnumber=10418512","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T18:39:08Z","timestamp":1764959948000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10418512\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2]]},"references-count":63,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2024.3354061","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2]]}}}