{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:10:56Z","timestamp":1755839456663},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T00:00:00Z","timestamp":1666483200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T00:00:00Z","timestamp":1666483200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,10,23]]},"DOI":"10.1109\/iros47612.2022.9981607","type":"proceedings-article","created":{"date-parts":[[2022,12,26]],"date-time":"2022-12-26T14:38:15Z","timestamp":1672065495000},"page":"2483-2489","source":"Crossref","is-referenced-by-count":4,"title":["Learning Skills to Navigate without a Master: A Sequential Multi-Policy Reinforcement Learning Algorithm"],"prefix":"10.1109","volume":"550","author":[{"given":"Ambedkar","family":"Dukkipati","sequence":"first","affiliation":[{"name":"Indian Institute of Science,Department of Computer Science and Automation,Bangalore,India,560012"}]},{"given":"Rajarshi","family":"Banerjee","sequence":"additional","affiliation":[{"name":"Indian Institute of Science,Department of Computer Science and Automation,Bangalore,India,560012"}]},{"given":"Ranga Shaarad","family":"Ayyagari","sequence":"additional","affiliation":[{"name":"Indian Institute of Science,Department of Computer Science and Automation,Bangalore,India,560012"}]},{"given":"Dhaval Parmar","family":"Udaybhai","sequence":"additional","affiliation":[{"name":"Indian Institute of Science,Department of Computer Science and Automation,Bangalore,India,560012"}]}],"member":"263","reference":[{"key":"ref13","first-page":"271","article-title":"Feudal reinforcement learning","author":"dayan","year":"1993","journal-title":"Advances in neural information processing systems"},{"key":"ref12","article-title":"Soft options critic","author":"lobo","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1613\/jair.639"},{"key":"ref14","first-page":"1043","article-title":"Reinforcement learning with hierarchies of machines","author":"parr","year":"1998","journal-title":"Advances in neural information processing systems"},{"key":"ref31","first-page":"6008","article-title":"Gener-alization through simulation: Integrating simulated and real data into deep reinforcement learning for vision-based autonomous flight","author":"kang","year":"0","journal-title":"2019 International Conference on Robotics and Automation (ICRA)"},{"key":"ref30","article-title":"Safe, multiagent, reinforcement learning for autonomous driving","author":"shalev-shwartz","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref11","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2004.180"},{"journal-title":"Duckietown environments for openai gym","year":"2018","author":"chevalier-boisvert","key":"ref10"},{"key":"ref2","article-title":"Starcraft ii: A new challenge for reinforcement learning","author":"vinyals","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref1","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"Neural Information Processing Systems"},{"key":"ref17","first-page":"3540","article-title":"Feudal networks for hierarchical reinforcement learning","author":"vezhnevets","year":"0","journal-title":"Proceedings of the 34th International Conference on Machine Learning-Volume 70 JMLR org"},{"key":"ref16","first-page":"11570","article-title":"Variational temporal abstraction","author":"kim","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref19","first-page":"3303","article-title":"Data-efficient hierarchical reinforcement learning","author":"nachum","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref18","article-title":"Soac: The soft option actor-critic architecture","author":"li","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref24","article-title":"Iterative hierarchi-cal optimization for misspecified problems (ihomp)","author":"mankowitz","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref23","article-title":"Meta learning shared hierarchies","author":"frans","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref26","article-title":"Progressive neural networks","author":"rusu","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref25","article-title":"Option discovery using deep skill chaining","author":"bagaria","year":"0","journal-title":"NeurIPS 2019 Deep Reinforcement Learning Workshop"},{"key":"ref20","first-page":"5048","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014691"},{"key":"ref21","article-title":"Learning multilevel hierarchies with hindsight","author":"levy","year":"0","journal-title":"Proceedings of International Conference on Learning Representations"},{"key":"ref28","article-title":"Soft actor-critic algorithms and applications","author":"haarnoja","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref27","article-title":"Modeling purposeful adaptive behavior with the principle of maximum causal entropy","author":"ziebart","year":"2010","journal-title":"aAI3438449"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460655"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref7","first-page":"1349","article-title":"In-vestigating human priors for playing video games","author":"dubey","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/0010-0277(93)90058-4"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/nature24270","article-title":"Mastering the game of go without human knowledge","volume":"550","author":"silver","year":"2017","journal-title":"Nature"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"ref5","article-title":"The promise of hierarchical reinforcement learning","author":"flet-berliac","year":"2019","journal-title":"GradientSHAP"}],"event":{"name":"2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2022,10,23]]},"location":"Kyoto, Japan","end":{"date-parts":[[2022,10,27]]}},"container-title":["2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9981026\/9981028\/09981607.pdf?arnumber=9981607","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,30]],"date-time":"2023-11-30T19:10:14Z","timestamp":1701371414000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9981607\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,23]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/iros47612.2022.9981607","relation":{},"subject":[],"published":{"date-parts":[[2022,10,23]]}}}