{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T10:11:33Z","timestamp":1760609493623,"version":"3.40.3"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031210891"},{"type":"electronic","value":"9783031210907"}],"license":[{"start":{"date-parts":[[2022,12,15]],"date-time":"2022-12-15T00:00:00Z","timestamp":1671062400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,12,15]],"date-time":"2022-12-15T00:00:00Z","timestamp":1671062400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-21090-7_12","type":"book-chapter","created":{"date-parts":[[2022,12,14]],"date-time":"2022-12-14T18:11:35Z","timestamp":1671041495000},"page":"188-204","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Hierarchical Reinforcement Learning Under Mixed Observability"],"prefix":"10.1007","author":[{"given":"Hai","family":"Nguyen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhihan","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andrea","family":"Baisero","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiao","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Robert","family":"Platt","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christopher","family":"Amato","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,12,15]]},"reference":[{"unstructured":"Andrychowicz, M., Wolski, F., Ray, A., Schneider, J., Fong, R., Welinder, P., McGrew, B., Tobin, J., Abbeel, P., Zaremba, W.: Hindsight experience replay (2017). arXiv:1707.01495","key":"12_CR1"},{"key":"12_CR2","doi-asserted-by":"publisher","first-page":"174","DOI":"10.1016\/0022-247X(65)90154-X","volume":"10","author":"KJ Astrom","year":"1965","unstructured":"Astrom, K.J.: Optimal control of markov decision processes with incomplete state estimation. J. Math. Anal. Appl. 10, 174\u2013205 (1965)","journal-title":"J. Math. Anal. Appl."},{"doi-asserted-by":"crossref","unstructured":"Chen, M., Nikolaidis, S., Soh, H., Hsu, D., Srinivasa, S.: Planning with trust for human-robot collaboration. In: Proceedings of the 2018 ACM\/IEEE International Conference on Human-Robot Interaction, pp. 307\u2013315 (2018)","key":"12_CR3","DOI":"10.1145\/3171221.3171264"},{"doi-asserted-by":"crossref","unstructured":"Cho, K., Van\u00a0Merri\u00ebnboer, B., Gulcehre, C., Bahdanau, D., Bougares, F., Schwenk, H., Bengio, Y.: Learning phrase representations using rnn encoder-decoder for statistical machine translation (2014). arXiv:1406.1078","key":"12_CR4","DOI":"10.3115\/v1\/D14-1179"},{"issue":"4","key":"12_CR5","doi-asserted-by":"publisher","first-page":"299","DOI":"10.1007\/s10514-011-9241-4","volume":"31","author":"TH Chung","year":"2011","unstructured":"Chung, T.H., Hollinger, G.A., Isler, V.: Search and pursuit-evasion in mobile robotics. Auton. Robots 31(4), 299\u2013316 (2011)","journal-title":"Auton. Robots"},{"key":"12_CR6","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1613\/jair.639","volume":"13","author":"TG Dietterich","year":"2000","unstructured":"Dietterich, T.G.: Hierarchical reinforcement learning with the maxq value function decomposition. J. Artif. Intell. Res. 13, 227\u2013303 (2000)","journal-title":"J. Artif. Intell. Res."},{"unstructured":"Fujimoto, S., Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. In: International Conference on Machine Learning, pp. 1587\u20131596. PMLR (2018)","key":"12_CR7"},{"unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp. 1861\u20131870. PMLR (2018)","key":"12_CR8"},{"unstructured":"Han, D., Doya, K., Tani, J.: Variational recurrent models for solving partially observable control tasks. In: 8th International Conference on Learning Representations, ICLR (2020)","key":"12_CR9"},{"unstructured":"Heess, N., Hunt, J.J., Lillicrap, T.P., Silver, D.: Memory-based control with recurrent neural networks (2015). arXiv:1512.04455","key":"12_CR10"},{"unstructured":"Hernandez-Gardiol, N., Mahadevan, S.: Hierarchical memory-based reinforcement learning. In: Advances in Neural Information Processing Systems, pp. 1047\u20131053 (2001)","key":"12_CR11"},{"issue":"8","key":"12_CR12","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"doi-asserted-by":"crossref","unstructured":"Hsiao, K., Kaelbling, L.P., Lozano-Perez, T.: Grasping pomdps. In: Proceedings 2007 IEEE International Conference on Robotics and Automation, pp. 4685\u20134692. IEEE (2007)","key":"12_CR13","DOI":"10.1109\/ROBOT.2007.364201"},{"unstructured":"Kulkarni, T.D., Narasimhan, K., Saeedi, A., Tenenbaum, J.: Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation. Advances in Neural Information Processing Systems, vol. 29, pp. 3675\u20133683 (2016)","key":"12_CR14"},{"key":"12_CR15","doi-asserted-by":"publisher","first-page":"49089","DOI":"10.1109\/ACCESS.2018.2854283","volume":"6","author":"TP Le","year":"2018","unstructured":"Le, T.P., Vien, N.A., Chung, T.: A deep hierarchical reinforcement learning algorithm in partially observable markov decision processes. Ieee Access 6, 49089\u201349102 (2018)","journal-title":"Ieee Access"},{"unstructured":"Levy, A., Konidaris, G.D., Jr., R.P., Saenko, K.: Learning multi-level hierarchies with hindsight. In: 7th International Conference on Learning Representations, ICLR (2019)","key":"12_CR16"},{"unstructured":"Li, A.C., Florensa, C., Clavera, I., Abbeel, P.: Sub-policy adaptation for hierarchical reinforcement learning. In: 8th International Conference on Learning Representations, ICLR (2020)","key":"12_CR17"},{"unstructured":"Lillicrap, T.P., Hunt, J.J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D., Wierstra, D.: Continuous control with deep reinforcement learning (2015). arXiv:1509.02971","key":"12_CR18"},{"unstructured":"Ma, X., Karkus, P., Hsu, D., Lee, W.S., Ye, N.: Discriminative particle filter reinforcement learning for complex partial observations. In: 8th International Conference on Learning Representations, ICLR (2020)","key":"12_CR19"},{"unstructured":"Nachum, O., Gu, S.S., Lee, H., Levine, S.: Data-efficient hierarchical reinforcement learning. In: Advances in Neural Information Processing Systems, vol.\u00a031 (2018)","key":"12_CR20"},{"doi-asserted-by":"crossref","unstructured":"Nikolaidis, S., Zhu, Y.X., Hsu, D., Srinivasa, S.: Human-robot mutual adaptation in shared autonomy. In: 2017 12th ACM\/IEEE International Conference on Human-Robot Interaction (HRI), pp. 294\u2013302. IEEE (2017)","key":"12_CR21","DOI":"10.1145\/2909824.3020252"},{"doi-asserted-by":"crossref","unstructured":"Ong, S.C., Png, S.W., Hsu, D., Lee, W.S.: Pomdps for robotic tasks with mixed observability. In: Robotics: Science and Systems, vol.\u00a05, p.\u00a04 (2009)","key":"12_CR22","DOI":"10.7551\/mitpress\/8727.003.0027"},{"unstructured":"Precup, D., Sutton, R.S.: Multi-time models for temporally abstract planning. Advances in Neural Information Processing Systems, 10 (1997)","key":"12_CR23"},{"doi-asserted-by":"crossref","unstructured":"Schwarm, E., Gravesmill, K.M., Whitney, J.P.: A floating-piston hydrostatic linear actuator and remote-direct-drive 2-dof gripper. In: 2019 International Conference on Robotics and Automation (ICRA), pp. 7562\u20137568. IEEE (2019)","key":"12_CR24","DOI":"10.1109\/ICRA.2019.8794378"},{"doi-asserted-by":"crossref","unstructured":"Steckelmacher, D., Roijers, D.M., Harutyunyan, A., Vrancx, P., Plisnier, H., Now\u00e9, A.: Reinforcement learning in pomdps with memoryless options and option-observation initiation sets. In: Thirty-Second AAAI Conference on Artificial Intelligence (2018)","key":"12_CR25","DOI":"10.1609\/aaai.v32i1.11606"},{"doi-asserted-by":"crossref","unstructured":"Todorov, E., Erez, T., Tassa, Y.: Mujoco: a physics engine for model-based control. In: 2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 5026\u20135033. IEEE (2012)","key":"12_CR26","DOI":"10.1109\/IROS.2012.6386109"},{"unstructured":"Wang, N., Pynadath, D.V., Hill, S.G.: The impact of pomdp-generated explanations on trust and performance in human-robot teams. In: AAMAS, pp. 997\u20131005 (2016)","key":"12_CR27"},{"issue":"2","key":"12_CR28","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1177\/105971239700600202","volume":"6","author":"M Wiering","year":"1997","unstructured":"Wiering, M., Schmidhuber, J.: Hq-learning. Adapt. Behav. 6(2), 219\u2013246 (1997)","journal-title":"Adapt. Behav."},{"doi-asserted-by":"crossref","unstructured":"Xiao, Y., Katt, S., ten Pas, A., Chen, S., Amato, C.: Online planning for target object search in clutter under partial observability. In: 2019 International Conference on Robotics and Automation (ICRA). pp. 8241\u20138247. IEEE (2019)","key":"12_CR29","DOI":"10.1109\/ICRA.2019.8793494"},{"unstructured":"Yang, Z., Nguyen, H.: Recurrent off-policy baselines for memory-based continuous control. Deep RL Workshop NeurIPS (2021)","key":"12_CR30"}],"container-title":["Springer Proceedings in Advanced Robotics","Algorithmic Foundations of Robotics XV"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-21090-7_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,2]],"date-time":"2023-12-02T23:36:39Z","timestamp":1701560199000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-21090-7_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,15]]},"ISBN":["9783031210891","9783031210907"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-21090-7_12","relation":{},"ISSN":["2511-1256","2511-1264"],"issn-type":[{"type":"print","value":"2511-1256"},{"type":"electronic","value":"2511-1264"}],"subject":[],"published":{"date-parts":[[2022,12,15]]},"assertion":[{"value":"15 December 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"WAFR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on the Algorithmic Foundations of Robotics","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":", MD","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 June 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 June 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"wafr2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/wafr2022.github.io","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}