{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T12:09:07Z","timestamp":1771330147326,"version":"3.50.1"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,18]]},"DOI":"10.1109\/ijcnn54540.2023.10190993","type":"proceedings-article","created":{"date-parts":[[2023,8,2]],"date-time":"2023-08-02T17:30:03Z","timestamp":1690997403000},"page":"1-8","source":"Crossref","is-referenced-by-count":7,"title":["Balancing Exploration and Exploitation in Hierarchical Reinforcement Learning via Latent Landmark Graphs"],"prefix":"10.1109","author":[{"given":"Qingyang","family":"Zhang","sequence":"first","affiliation":[{"name":"Institute of Automation,Chinese Academy of Sciences,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yiming","family":"Yang","sequence":"additional","affiliation":[{"name":"Institute of Automation,Chinese Academy of Sciences,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingqing","family":"Ruan","sequence":"additional","affiliation":[{"name":"Institute of Automation,Chinese Academy of Sciences,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuantang","family":"Xiong","sequence":"additional","affiliation":[{"name":"Institute of Automation,Chinese Academy of Sciences,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dengpeng","family":"Xing","sequence":"additional","affiliation":[{"name":"Institute of Automation,Chinese Academy of Sciences,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Xu","sequence":"additional","affiliation":[{"name":"Institute of Automation,Chinese Academy of Sciences,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref35","article-title":"Hindsight experience replay","volume":"30","author":"andrychowicz","year":"2017","journal-title":"Advances in Neural Information Pro-cessing Systems"},{"key":"ref12","first-page":"5251","article-title":"Sparse graphical memory for robust planning","volume":"33","author":"emmons","year":"2020","journal-title":"ADVANCES IN NEURAL IN-FORMATION PROCESSING SYSTEMS"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/0376-5075(77)90014-9"},{"key":"ref15","article-title":"Learning multi-level hierarchies with hindsight","author":"levy","year":"2019","journal-title":"International Con-ference on Learning Representations"},{"key":"ref14","author":"nachum","year":"2019","journal-title":"Why does hierarchy (sometimes) work so well in reinforcement learning?"},{"key":"ref31","first-page":"1027","article-title":"k-means++: The ad-vantages of careful seeding","author":"vassilvitskii","year":"2006","journal-title":"Proceedings of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms"},{"key":"ref30","first-page":"1312","article-title":"Universal value function approximators","author":"schaul","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref11","article-title":"Search on the replay buffer: Bridging planning and reinforcement learning","volume":"32","author":"eysenbach","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/509907.509965"},{"key":"ref10","article-title":"Active hierarchical ex-ploration with stable subgoal representation learning","author":"li","year":"2021","journal-title":"International Conference on Learning Representations"},{"key":"ref32","article-title":"# exploration: A study of count-based exploration for deep reinforcement learning","volume":"30","author":"tang","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref1","article-title":"Feudal reinforcement learning","volume":"5","author":"dayan","year":"1992","journal-title":"Advances in neural information processing systems"},{"key":"ref17","author":"nair","year":"2019","journal-title":"Hierarchical foresight Self-supervised learning of long-horizon tasks via visual sub-goal generation"},{"key":"ref16","author":"pere","year":"2018","journal-title":"Unsupervised learning of goal spaces for intrinsically motivated goal exploration"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1162\/089976602317318938"},{"key":"ref18","article-title":"Learning actionable representations with goal-conditioned policies","author":"ghosh","year":"2018","journal-title":"ar Xiv preprint"},{"key":"ref24","article-title":"Hierarchical reinforcement learning with abductive planning","author":"yamamoto","year":"2018","journal-title":"ar Xiv preprint"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-61616-8_33"},{"key":"ref26","author":"shang","year":"2019","journal-title":"Learning world graphs to accelerate hierarchical rein-forcement learning"},{"key":"ref25","author":"li","year":"2022","journal-title":"Hierarchical planning through goal-conditioned offline reinforcement learning"},{"key":"ref20","article-title":"Focal: Efficient fully-offline meta-reinforcement learning via distance metric learning and behavior regularization","author":"li","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"5125","DOI":"10.1609\/aaai.v34i04.5955","article-title":"Count-based exploration with the successor represen-tation","volume":"34","author":"machado","year":"2020","journal-title":"Proceedings of the AAAI Conference on Arti-ficial Intelligence"},{"key":"ref21","author":"zhang","year":"2019","journal-title":"Scheduled intrin-sic drive A hierarchical take on intrinsically motivated exploration"},{"key":"ref28","first-page":"12611","article-title":"World model as a graph: Learning latent landmarks for planning","author":"zhang","year":"2021","journal-title":"International Conference on Machine Learning"},{"key":"ref27","author":"jin","year":"2021","journal-title":"Graph-enhanced exploration for goal-oriented reinforcement learning"},{"key":"ref29","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref8","first-page":"21 579","article-title":"Generating adjacency-constrained subgoals in hierarchical reinforcement learning","volume":"33","author":"zhang","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2891792"},{"key":"ref9","article-title":"Learning subgoal representations with slow dynamics","author":"li","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref4","article-title":"Hierar-chical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation","volume":"29","author":"kulkarni","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022140919877"},{"key":"ref6","first-page":"3540","article-title":"Feudal networks for hierarchical reinforcement learning","author":"vezhnevets","year":"2017","journal-title":"International Conference on Machine Learning"},{"key":"ref5","article-title":"Data-efficient hierarchical reinforcement learning","volume":"31","author":"nachum","year":"2018","journal-title":"Advances in neural information processing systems"}],"event":{"name":"2023 International Joint Conference on Neural Networks (IJCNN)","location":"Gold Coast, Australia","start":{"date-parts":[[2023,6,18]]},"end":{"date-parts":[[2023,6,23]]}},"container-title":["2023 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10190990\/10190992\/10190993.pdf?arnumber=10190993","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T17:46:19Z","timestamp":1692639979000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10190993\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,18]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/ijcnn54540.2023.10190993","relation":{},"subject":[],"published":{"date-parts":[[2023,6,18]]}}}