{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T12:50:32Z","timestamp":1774702232240,"version":"3.50.1"},"reference-count":60,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1109\/tnnls.2024.3425809","type":"journal-article","created":{"date-parts":[[2024,8,12]],"date-time":"2024-08-12T13:35:07Z","timestamp":1723469707000},"page":"8455-8469","source":"Crossref","is-referenced-by-count":3,"title":["Guided Cooperation in Hierarchical Reinforcement Learning via Model-Based Rollout"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4622-0119","authenticated-orcid":false,"given":"Haoran","family":"Wang","sequence":"first","affiliation":[{"name":"Department of Computer Science and Technology, College of Electronics and Information Engineering, Tongji University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8765-6464","authenticated-orcid":false,"given":"Zeshen","family":"Tang","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, College of Electronics and Information Engineering, Tongji University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6052-2781","authenticated-orcid":false,"given":"Yaoru","family":"Sun","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, College of Electronics and Information Engineering, Tongji University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1987-9150","authenticated-orcid":false,"given":"Fang","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Brunel University London, Uxbridge, U.K."}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0001-0204","authenticated-orcid":false,"given":"Siyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, College of Electronics and Information Engineering, Tongji University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5515-1943","authenticated-orcid":false,"given":"Yeming","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, College of Electronics and Information Engineering, Tongji University, Shanghai, China"}]}],"member":"263","reference":[{"key":"ref1","first-page":"3540","article-title":"Feudal networks for hierarchical reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Vezhnevets"},{"key":"ref2","first-page":"21579","article-title":"Generating adjacency-constrained subgoals in hierarchical reinforcement learning","volume-title":"Proc. NIPS","author":"Zhang"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3192418"},{"key":"ref4","first-page":"1","article-title":"Learning subgoal representations with slow dynamics","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Li"},{"key":"ref5","first-page":"1","article-title":"DHRL: A graph-based approach for long-horizon and sparse hierarchical reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lee"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3069005"},{"key":"ref7","first-page":"1942","article-title":"Mapping state space using landmarks for universal goal reaching","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Huang"},{"key":"ref8","first-page":"15246","article-title":"Search on the replay buffer: Bridging planning and reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Eysenbach"},{"key":"ref9","first-page":"12611","article-title":"World model as a graph: Learning latent landmarks for planning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhang"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/1102351.1102454"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3165941"},{"key":"ref12","first-page":"1","article-title":"Exploration by random network distillation","volume-title":"Proc. 7th Int. Conf. Learn. Represent.","author":"Burda"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3080521"},{"key":"ref14","first-page":"1","article-title":"Accelerating reinforcement learning with value-conditional state entropy exploration","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Kim"},{"key":"ref15","first-page":"3303","article-title":"Data-efficient hierarchical reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Nachum"},{"key":"ref16","first-page":"1","article-title":"Learning multi-level hierarchies with hindsight","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Levy"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/480"},{"key":"ref18","first-page":"5049","article-title":"Hindsight experience replay","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Andrychowicz"},{"key":"ref19","first-page":"5713","article-title":"Context-aware dynamics model for generalization in model-based reinforcement learning","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Lee"},{"key":"ref20","first-page":"12968","article-title":"Trajectory-wise multiple choice learning for dynamics generalization in reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Seo"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3057023"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3060404"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487175"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487140"},{"key":"ref25","first-page":"9348","article-title":"Hierarchical foresight: Self-supervised learning of long-horizon tasks via visual subgoal generation","volume-title":"Proc. Int. Conf. Learn. Represent.","volume":"12","author":"Nair"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487172"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.14569\/ijacsa.2022.0130904"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3568231.3568258"},{"issue":"2","key":"ref29","first-page":"447","article-title":"Combinations of micro-macro states and subgoals discovery in hierarchical reinforcement learning for path finding","volume":"18","author":"Setyawan","year":"2022","journal-title":"Int. J. Innov. Comput. Inf. Control"},{"key":"ref30","first-page":"28336","article-title":"Landmark-guided subgoal generation in hierarchical reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Kim"},{"key":"ref31","first-page":"2379","article-title":"Stochastic neural networks for hierarchical reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","volume":"4","author":"Florensa"},{"key":"ref32","first-page":"7586","article-title":"Near-optimal representation learning for hierarchical reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","volume":"10","author":"Nachum"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3059912"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2023.3296765"},{"key":"ref35","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref36","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Lillicrap"},{"key":"ref37","first-page":"5362","article-title":"Semi-parametric topological memory for navigation","volume-title":"Proc. Int. Conf. Learn. Represent.","volume":"7","author":"Savinov"},{"key":"ref38","first-page":"5251","article-title":"Sparse graphical memory for robust planning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Emmons"},{"key":"ref39","first-page":"935","article-title":"Plan2Vec: Unsupervised representation learning by latent plans","volume-title":"Proc. Mach. Learn. Res.","author":"Yang"},{"key":"ref40","article-title":"Imitating graph-based planning with goal-conditioned policies","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Kim"},{"key":"ref41","first-page":"6215","article-title":"Hallucinative topological memory for zero-shot visual planning","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Liu"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1561\/2200000086"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/122344.122377"},{"key":"ref44","first-page":"4754","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Chua"},{"key":"ref45","first-page":"1288","article-title":"Algorithmic framework for model-based deep reinforcement learning with theoretical guarantees","volume-title":"Proc. Int. Conf. Learn. Represent.","volume":"2","author":"Luo"},{"key":"ref46","first-page":"12487","article-title":"When to trust your model: Model-based policy optimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"16","author":"Janner"},{"key":"ref47","article-title":"Model-based value expansion for efficient model-free reinforcement learning","author":"Feinberg","year":"2018","journal-title":"arXiv:1803.00101"},{"key":"ref48","first-page":"8224","article-title":"Sample-efficient reinforcement learning with stochastic ensemble value expansion","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Buckman"},{"key":"ref49","first-page":"1","article-title":"Guided policy search","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Levine"},{"key":"ref50","first-page":"566","article-title":"Imagined value gradients: Model-based policy optimization with tranferable latent dynamics models","volume-title":"Proc. Mach. Learn. Res.","author":"Byravan"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-022-3696-5"},{"key":"ref52","first-page":"3219","article-title":"Model-ensemble trust-region policy optimization","volume-title":"Proc. Int. Conf. Learn. Represent.","volume":"4","author":"Kurutach"},{"key":"ref53","first-page":"2823","article-title":"Model-based policy optimization with unsupervised model adaptation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Shen"},{"key":"ref54","first-page":"14129","article-title":"MOPO: Model-based offline policy optimization","volume-title":"Proc. Int. Conf. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Yu"},{"key":"ref55","first-page":"28954","article-title":"COMBO: Conservative offline model-based policy optimization","volume-title":"Proc. Int. Conf. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Yu"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-022-06144-5"},{"key":"ref57","first-page":"11752","article-title":"Stabilizing off-policy Q-learning via bootstrapping error reduction","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"15","author":"Kumar"},{"key":"ref58","article-title":"Robust offline reinforcement learning with gradient penalty and constraint relaxation","author":"Gao","year":"2022","journal-title":"arXiv:2210.10469"},{"key":"ref59","first-page":"5768","article-title":"Improved training of Wasserstein GANs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Gulrajani"},{"key":"ref60","first-page":"1613","article-title":"Weight uncertainty in neural network","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Blundell"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/5962385\/10982361\/10633748.pdf?arnumber=10633748","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T18:39:37Z","timestamp":1764959977000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10633748\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5]]},"references-count":60,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2024.3425809","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5]]}}}