{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T18:19:20Z","timestamp":1780337960792,"version":"3.54.1"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"23","license":[{"start":{"date-parts":[[2023,10,6]],"date-time":"2023-10-06T00:00:00Z","timestamp":1696550400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,10,6]],"date-time":"2023-10-06T00:00:00Z","timestamp":1696550400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"National Key R &D Program","award":["2022YFC3301800"],"award-info":[{"award-number":["2022YFC3301800"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s10489-023-05022-4","type":"journal-article","created":{"date-parts":[[2023,10,6]],"date-time":"2023-10-06T09:02:05Z","timestamp":1696582925000},"page":"28555-28569","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Uncertainty-aware hierarchical reinforcement learning for long-horizon tasks"],"prefix":"10.1007","volume":"53","author":[{"given":"Wenning","family":"Hu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hongbin","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ming","family":"He","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nianbin","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2023,10,6]]},"reference":[{"key":"5022_CR1","doi-asserted-by":"crossref","unstructured":"Song Y, Wang J, Lukasiewicz T, et al (2019) Diversity-driven extensible hierarchical reinforcement learning. In: Proceedings of the AAAI conference on artificial intelligence, pp 4992\u20134999","DOI":"10.1609\/aaai.v33i01.33014992"},{"key":"5022_CR2","unstructured":"Mai V, Mani K, Paull L (2022) Sample efficient deep reinforcement learning via uncertainty estimation. In: The 10th international conference on learning representations"},{"key":"5022_CR3","unstructured":"Kendall A, Gal Y (2017) What uncertainties do we need in bayesian deep learning for computer vision. In: Advances in neural information processing systems, pp 5574\u20135584"},{"key":"5022_CR4","doi-asserted-by":"publisher","unstructured":"Padakandla S, J. PK, Bhatnagar S (2020) Reinforcement learning algorithm for non-stationary environments. Appl Intell 50(11):3590\u20133606. https:\/\/doi.org\/10.1007\/s10489-020-01758-5","DOI":"10.1007\/s10489-020-01758-5"},{"key":"5022_CR5","unstructured":"Ghosh D, Rahme J, Kumar A, et al (2021) Why generalization in RL is difficult: Epistemic pomdps and implicit partial observability. In: Advances in neural information processing systems, pp 25,502\u201325,515"},{"key":"5022_CR6","unstructured":"Lakshminarayanan B, Pritzel A, Blundell C (2017) Simple and scalable predictive uncertainty estimation using deep ensembles. In: Advances in neural information processing systems, pp 6402\u20136413"},{"key":"5022_CR7","unstructured":"Chua K, Calandra R, McAllister R, et al (2018) Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In: Advances in neural information processing systems, pp 4759\u20134770"},{"key":"5022_CR8","doi-asserted-by":"crossref","unstructured":"Todorov E, Erez T, Tassa Y (2012) Mujoco: A physics engine for model-based control. In: 2012 IEEE\/RSJ international conference on intelligent robots and systems, pp 5026\u20135033","DOI":"10.1109\/IROS.2012.6386109"},{"key":"5022_CR9","doi-asserted-by":"publisher","first-page":"1353","DOI":"10.1016\/j.ins.2022.07.156","volume":"609","author":"D Li","year":"2022","unstructured":"Li D, Meng L, Li J et al (2022) Domain adaptive state representation alignment for reinforcement learning. Inf Sci 609:1353\u20131368. https:\/\/doi.org\/10.1016\/j.ins.2022.07.156","journal-title":"Inf Sci"},{"key":"5022_CR10","unstructured":"Hoang C, Sohn S, Choi J, et al (2021) Successor feature landmarks for long-horizon goal-conditioned reinforcement learning. In: Advances in neural information processing systems, pp 26,963\u201326,975"},{"issue":"110","key":"5022_CR11","doi-asserted-by":"publisher","first-page":"401","DOI":"10.1016\/j.knosys.2023.110401","volume":"267","author":"N Tasfi","year":"2023","unstructured":"Tasfi N, Santana E, Liboni L et al (2023) Dynamic successor features for transfer learning and guided exploration. Knowl-Based Syst 267(110):401. https:\/\/doi.org\/10.1016\/j.knosys.2023.110401","journal-title":"Knowl-Based Syst"},{"issue":"9","key":"5022_CR12","doi-asserted-by":"publisher","first-page":"9701","DOI":"10.1007\/s10489-021-02873-7","volume":"52","author":"H Ge","year":"2022","unstructured":"Ge H, Ge Z, Sun L et al (2022) Enhancing cooperation by cognition differences and consistent representation in multi-agent reinforcement learning. Appl Intell 52(9):9701\u20139716. https:\/\/doi.org\/10.1007\/s10489-021-02873-7","journal-title":"Appl Intell"},{"key":"5022_CR13","unstructured":"Tian S, Nair S, Ebert F, et al (2021) Model-based visual planning with self-supervised functional distances. In: The 9th international conference on learning representations"},{"key":"5022_CR14","doi-asserted-by":"crossref","unstructured":"Zhu M, Liu M, Shen J, et al (2021) Mapgo: Model-assisted policy optimization for goal-oriented tasks. In: Proceedings of the thirtieth international joint conference on artificial intelligence, pp 3484\u20133491","DOI":"10.24963\/ijcai.2021\/480"},{"key":"5022_CR15","unstructured":"Kipf TN, van der Pol E, Welling M (2020) Contrastive learning of structured world models. In: The 8th international conference on learning representations"},{"issue":"1","key":"5022_CR16","doi-asserted-by":"publisher","first-page":"194","DOI":"10.1109\/TIV.2022.3185159","volume":"8","author":"J Wu","year":"2022","unstructured":"Wu J, Huang Z, Lv C (2022) Uncertainty-aware model-based reinforcement learning: Methodology and application in autonomous driving. IEEE Trans Intell Veh 8(1):194\u2013203","journal-title":"IEEE Trans Intell Veh"},{"key":"5022_CR17","unstructured":"Eysenbach B, Salakhutdinov R, Levine S (2019) Search on the replay buffer: Bridging planning and reinforcement learning. In: Advances in neural information processing systems, pp 15,220\u201315,231"},{"key":"5022_CR18","unstructured":"Zhang L, Yang G, Stadie BC (2021) World model as a graph: Learning latent landmarks for planning. In: Proceedings of the 38th international conference on machine learning, ICML 2021, pp 12,611\u201312,620"},{"key":"5022_CR19","unstructured":"Emmons S, Jain A, Laskin M, et al (2020) Sparse graphical memory for robust planning. In: Advances in neural information processing systems"},{"key":"5022_CR20","unstructured":"Li S, Zheng L, Wang J, et al (2021) Learning subgoal representations with slow dynamics. In: The 9th international conference on learning representations"},{"key":"5022_CR21","unstructured":"Sharma A, Gu S, Levine S, et al (2020) Dynamics-aware unsupervised discovery of skills. In: The 8th international conference on learning representations"},{"issue":"120","key":"5022_CR22","doi-asserted-by":"publisher","first-page":"625","DOI":"10.1016\/j.eswa.2023.120625","volume":"230","author":"W Song","year":"2023","unstructured":"Song W, Jeon S, Choi H et al (2023) Learning disentangled skills for hierarchical reinforcement learning through trajectory autoencoder with weak labels. Expert Syst Appl 230(120):625. https:\/\/doi.org\/10.1016\/j.eswa.2023.120625","journal-title":"Expert Syst Appl"},{"key":"5022_CR23","doi-asserted-by":"crossref","unstructured":"Bacon P, Harb J, Precup D (2017) The option-critic architecture. In: Proceedings of the AAAI conference on artificial intelligence, pp 1726\u20131734","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"5022_CR24","unstructured":"Kulkarni TD, Narasimhan K, Saeedi A, et al (2016) Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation. In: Advances in neural information processing systems, pp 3675\u20133683"},{"key":"5022_CR25","unstructured":"Chane-Sane E, Schmid C, Laptev I (2021) Goal-conditioned reinforcement learning with imagined subgoals. In: Proceedings of the 38th international conference on machine learning, pp 1430\u20131440"},{"key":"5022_CR26","unstructured":"Li S, Zhang J, Wang J, et al (2022) Active hierarchical exploration with stable subgoal representation learning. In: The tenth international conference on learning representations"},{"issue":"105","key":"5022_CR27","first-page":"152","volume":"114","author":"Y Zhou","year":"2022","unstructured":"Zhou Y, Ho HW (2022) Online robot guidance and navigation in non-stationary environment with hybrid hierarchical reinforcement learning. Eng Appl Artif Intell 114(105):152","journal-title":"Eng Appl Artif Intell"},{"key":"5022_CR28","unstructured":"Kim J, Seo Y, Shin J (2021) Landmark-guided subgoal generation in hierarchical reinforcement learning. In: Advances in neural information processing systems, pp 28,336\u201328,349"},{"key":"5022_CR29","unstructured":"Zhang T, Guo S, Tan T, et al (2020) Generating adjacency-constrained subgoals in hierarchical reinforcement learning. In: Advances in neural information processing systems, pp 21,579\u201321,590"},{"key":"5022_CR30","unstructured":"Levy A, Konidaris GD, Jr. RP, et al (2019) Learning multi-level hierarchies with hindsight. In: The 7th international conference on learning representations"},{"key":"5022_CR31","unstructured":"Nachum O, Gu SS, Lee H, et al (2018) Data-efficient hierarchical reinforcement learning. Adv Neural Inf Process 31"},{"key":"5022_CR32","unstructured":"Andrychowicz M, Crow D, Ray A, et al (2017) Hindsight experience replay. In: Advances in neural information processing systems, pp 5048\u20135058"},{"key":"5022_CR33","unstructured":"Jiao Y, Tsuruoka Y (2022) Hirl: Dealing with non-stationarity in hierarchical reinforcement learning via high-level relearning. Paper presented at the AAAI-22 Workshop on Reinforcement Learning in Games"},{"key":"5022_CR34","unstructured":"Wu Y, Zhai S, Srivastava N, et al (2021) Uncertainty weighted actor-critic for offline reinforcement learning. In: Proceedings of the 38th international conference on machine learning, pp 11,319\u201311,328"},{"key":"5022_CR35","unstructured":"Kumar A, Fu J, Soh M, et al (2019) Stabilizing off-policy q-learning via bootstrapping error reduction. In: Advances in neural information processing systems, pp 11,761\u201311,771"},{"issue":"119","key":"5022_CR36","doi-asserted-by":"publisher","first-page":"011","DOI":"10.1016\/j.ins.2023.119011","volume":"640","author":"F Huang","year":"2023","unstructured":"Huang F, Deng X, He Y et al (2023) A novel policy based on action confidence limit to improve exploration efficiency in reinforcement learning. Inf Sci 640(119):011. https:\/\/doi.org\/10.1016\/j.ins.2023.119011","journal-title":"Inf Sci"},{"key":"5022_CR37","doi-asserted-by":"crossref","unstructured":"Lockwood O, Si M (2022) A review of uncertainty for deep reinforcement learning. In: Proceedings of the eighteenth AAAI conference on artificial intelligence and interactive digital entertainment, pp 155\u2013162","DOI":"10.1609\/aiide.v18i1.21959"},{"key":"5022_CR38","unstructured":"Osband I, Roy BV, Russo DJ, et al (2019) Deep exploration via randomized value functions. J Mach Learn Res 20:124:1\u2013124:62"},{"key":"5022_CR39","unstructured":"Fujimoto S, van Hoof H, Meger D (2018) Addressing function approximation error in actor-critic methods. In: Proceedings of the 35th international conference on machine learning, pp 1582\u20131591"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-023-05022-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-023-05022-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-023-05022-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,29]],"date-time":"2023-11-29T14:17:13Z","timestamp":1701267433000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-023-05022-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,6]]},"references-count":39,"journal-issue":{"issue":"23","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["5022"],"URL":"https:\/\/doi.org\/10.1007\/s10489-023-05022-4","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10,6]]},"assertion":[{"value":"18 September 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 October 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing of interest"}}]}}