{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,28]],"date-time":"2025-04-28T14:33:24Z","timestamp":1745850804489,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031184604"},{"type":"electronic","value":"9783031184611"}],"license":[{"start":{"date-parts":[[2022,10,13]],"date-time":"2022-10-13T00:00:00Z","timestamp":1665619200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,10,13]],"date-time":"2022-10-13T00:00:00Z","timestamp":1665619200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-18461-1_35","type":"book-chapter","created":{"date-parts":[[2022,10,12]],"date-time":"2022-10-12T07:15:14Z","timestamp":1665558914000},"page":"538-547","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Uncertainty-Aware Hierarchical Reinforcement Learning Robust to\u00a0Noisy Observations"],"prefix":"10.1007","author":[{"given":"Felippe Schmoeller","family":"Roza","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,13]]},"reference":[{"key":"35_CR1","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1016\/j.inffus.2021.05.008","volume":"76","author":"M Abdar","year":"2021","unstructured":"Abdar, M., et al.: A review of uncertainty quantification in deep learning: techniques, applications and challenges. Inf. Fusion 76, 243\u2013297 (2021)","journal-title":"Inf. Fusion"},{"doi-asserted-by":"crossref","unstructured":"Bacon, P.-L., Harb, J., Precup, D.: The option-critic architecture. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a031 (2017)","key":"35_CR2","DOI":"10.1609\/aaai.v31i1.10916"},{"issue":"4","key":"35_CR3","doi-asserted-by":"publisher","first-page":"515","DOI":"10.1038\/nn.2277","volume":"12","author":"D Badre","year":"2009","unstructured":"Badre, D., Hoffman, J., Cooney, J.W., D\u2019esposito, M.: Hierarchical cognitive control deficits following damage to the human frontal lobe. Nat. Neurosci. 12(4), 515\u2013522 (2009)","journal-title":"Nat. Neurosci."},{"issue":"5","key":"35_CR4","doi-asserted-by":"publisher","first-page":"408","DOI":"10.1016\/j.tics.2019.02.006","volume":"23","author":"M Botvinick","year":"2019","unstructured":"Botvinick, M., Ritter, S., Wang, J.X., Kurth-Nelson, Z., Blundell, C., Hassabis, D.: Reinforcement learning, fast and slow. Trends Cogn. Sci. 23(5), 408\u2013422 (2019)","journal-title":"Trends Cogn. Sci."},{"issue":"3","key":"35_CR5","doi-asserted-by":"publisher","first-page":"262","DOI":"10.1016\/j.cognition.2008.08.011","volume":"113","author":"MM Botvinick","year":"2009","unstructured":"Botvinick, M.M., Niv, Y., Barto, A.G.: Hierarchically organized behavior and its neural foundations: a reinforcement learning perspective. Cognition 113(3), 262\u2013280 (2009)","journal-title":"Cognition"},{"issue":"6","key":"35_CR6","doi-asserted-by":"publisher","first-page":"956","DOI":"10.1016\/j.conb.2012.05.008","volume":"22","author":"MM Botvinick","year":"2012","unstructured":"Botvinick, M.M.: Hierarchical reinforcement learning and decision making. Curr. Opinion Neurobiol. 22(6), 956\u2013962 (2012)","journal-title":"Curr. Opinion Neurobiol."},{"unstructured":"Fort, S., Hu, H., Lakshminarayanan, B.: Deep ensembles: a loss landscape perspective. arXiv preprint arXiv:1912.02757 (2019)","key":"35_CR7"},{"unstructured":"Gal, Y., Ghahramani, Z.: Dropout as a Bayesian approximation: representing model uncertainty in deep learning. In: International Conference on Machine Learning, pp. 1050\u20131059. PMLR (2016)","key":"35_CR8"},{"unstructured":"Haider, T., Roza, F.S., Eilers, D., Roscher, K., G\u00fcnnemann, S.: Domain shifts in reinforcement learning: identifying disturbances in environments (2021)","key":"35_CR9"},{"unstructured":"Henne, M., Schwaiger, A., Roscher, K., Weiss, G.: Benchmarking uncertainty estimation methods for deep learning with safety-related metrics. In: SafeAI@ AAAI, pp. 83\u201390 (2020)","key":"35_CR10"},{"unstructured":"Henne, M., Schwaiger, A., Weiss, G.: Managing uncertainty of AI-based perception for autonomous systems. In: AISafety@ IJCAI (2019)","key":"35_CR11"},{"doi-asserted-by":"crossref","unstructured":"Hoel, C.-J., Wolff, K., Laine, L.: Tactical decision-making in autonomous driving by reinforcement learning with uncertainty estimation. In: 2020 IEEE Intelligent Vehicles Symposium (IV), pp. 1563\u20131569. IEEE (2020)","key":"35_CR12","DOI":"10.1109\/IV47402.2020.9304614"},{"unstructured":"Jong, N.K., Hester, T., Stone, P.: The utility of temporal abstraction in reinforcement learning. In: AAMAS (1), pp. 299\u2013306. Citeseer (2008)","key":"35_CR13"},{"unstructured":"Kahn, G., Villaflor, A., Pong, V., Abbeel, P., Levine, S.: Uncertainty-aware reinforcement learning for collision avoidance. arXiv preprint arXiv:1702.01182 (2017)","key":"35_CR14"},{"unstructured":"Kulkarni, T.D., Narasimhan, K., Saeedi, A., Tenenbaum, J.: Hierarchical deep reinforcement learning: integrating temporal abstraction and intrinsic motivation. In: Advances in Neural Information Processing Systems, vol. 29 (2016)","key":"35_CR15"},{"unstructured":"Liaw, R., Liang, E., Nishihara, R., Moritz, P., Gonzalez, J.E., Stoica, I.: Tune: a research platform for distributed model selection and training. arXiv preprint arXiv:1807.05118 (2018)","key":"35_CR16"},{"doi-asserted-by":"crossref","unstructured":"L\u00fctjens, B., Everett, M., How, J.P.: Safe reinforcement learning with model uncertainty estimates. In: 2019 International Conference on Robotics and Automation (ICRA), pp. 8662\u20138668. IEEE (2019)","key":"35_CR17","DOI":"10.1109\/ICRA.2019.8793611"},{"unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)","key":"35_CR18"},{"unstructured":"Nachum, O., Gu, S.S., Lee, H., Levine, S.: Data-efficient hierarchical reinforcement learning. In: Advances in Neural Information Processing Systems, vol. 31 (2018)","key":"35_CR19"},{"unstructured":"Pertsch, K., Lee, Y., Lim, J.J.: Accelerating reinforcement learning with learned skill priors. arXiv preprint arXiv:2010.11944 (2020)","key":"35_CR20"},{"issue":"2","key":"35_CR21","doi-asserted-by":"publisher","first-page":"370","DOI":"10.1016\/j.neuron.2011.05.042","volume":"71","author":"JJF Ribas-Fernandes","year":"2011","unstructured":"Ribas-Fernandes, J.J.F., et al.: A neural signature of hierarchical reinforcement learningd. Neuron 71(2), 370\u2013379 (2011)","journal-title":"Neuron"},{"issue":"7839","key":"35_CR22","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1038\/s41586-020-03051-4","volume":"588","author":"J Schrittwieser","year":"2020","unstructured":"Schrittwieser, J., et al.: Mastering Atari, go, chess and shogi by planning with a learned model. Nature 588(7839), 604\u2013609 (2020)","journal-title":"Nature"},{"unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)","key":"35_CR23"},{"unstructured":"Schwaiger, A., Sinhamahapatra, P., Gansloser, J., Roscher, K.: Is uncertainty quantification in deep learning sufficient for out-of-distribution detection? In: AISafety@ IJCAI (2020)","key":"35_CR24"},{"unstructured":"Schwaiger, F., et al.: From black-box to white-box: examining confidence calibration under different conditions. arXiv preprint arXiv:2101.02971 (2021)","key":"35_CR25"},{"doi-asserted-by":"crossref","unstructured":"Sedlmeier, A., Gabor, T., Phan, T., Belzner, L., Linnhoff-Popien, C.: Uncertainty-based out-of-distribution detection in deep reinforcement learning. arXiv preprint arXiv:1901.02219 (2019)","key":"35_CR26","DOI":"10.5220\/0008949905220529"},{"issue":"7587","key":"35_CR27","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"issue":"7676","key":"35_CR28","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., et al.: Mastering the game of go without human knowledge. Nature 550(7676), 354\u2013359 (2017)","journal-title":"Nature"},{"issue":"1\u20132","key":"35_CR29","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between MDPS and semi-MDPS: a framework for temporal abstraction in reinforcement learning. Artif. Intell. 112(1\u20132), 181\u2013211 (1999)","journal-title":"Artif. Intell."},{"unstructured":"Van\u00a0Amersfoort, J., Smith, L., Teh, Y.W., Gal, Y.: Uncertainty estimation using a single deep deterministic neural network. In: International Conference on Machine Learning, pp. 9690\u20139700. PMLR (2020)","key":"35_CR30"},{"unstructured":"Vezhnevets, A.S., et al.: Feudal networks for hierarchical reinforcement learning. In: International Conference on Machine Learning, pp. 3540\u20133549. PMLR (2017)","key":"35_CR31"},{"issue":"11","key":"35_CR32","doi-asserted-by":"publisher","first-page":"5174","DOI":"10.1109\/TNNLS.2018.2805379","volume":"29","author":"Z Yang","year":"2018","unstructured":"Yang, Z., Merrick, K., Jin, L., Abbass, H.A.: Hierarchical deep reinforcement learning for continuous action control. IEEE Trans. Neural Netw. Learn. Syst. 29(11), 5174\u20135184 (2018)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."}],"container-title":["Lecture Notes in Networks and Systems","Proceedings of the Future Technologies Conference (FTC) 2022, Volume 1"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-18461-1_35","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,12]],"date-time":"2022-10-12T07:25:23Z","timestamp":1665559523000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-18461-1_35"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,13]]},"ISBN":["9783031184604","9783031184611"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-18461-1_35","relation":{},"ISSN":["2367-3370","2367-3389"],"issn-type":[{"type":"print","value":"2367-3370"},{"type":"electronic","value":"2367-3389"}],"subject":[],"published":{"date-parts":[[2022,10,13]]},"assertion":[{"value":"13 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"FTC 2022","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Proceedings of the Future Technologies Conference","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vancouver, BC","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Canada","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ftc2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/saiconference.com\/FTC","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}