{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T19:09:47Z","timestamp":1729624187276,"version":"3.28.0"},"reference-count":53,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,10,24]],"date-time":"2020-10-24T00:00:00Z","timestamp":1603497600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,10,24]],"date-time":"2020-10-24T00:00:00Z","timestamp":1603497600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,10,24]],"date-time":"2020-10-24T00:00:00Z","timestamp":1603497600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,10,24]]},"DOI":"10.1109\/iros45743.2020.9340891","type":"proceedings-article","created":{"date-parts":[[2021,3,15]],"date-time":"2021-03-15T14:49:56Z","timestamp":1615819796000},"page":"5572-5579","source":"Crossref","is-referenced-by-count":1,"title":["Hypothesis-Driven Skill Discovery for Hierarchical Deep Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Caleb","family":"Chuck","sequence":"first","affiliation":[{"name":"The University of Texas at Austin Personal Robotics and Automation Lab."}]},{"given":"Supawit","family":"Chockchowwat","sequence":"additional","affiliation":[{"name":"The University of Texas at Austin Personal Robotics and Automation Lab."}]},{"given":"Scott","family":"Niekum","sequence":"additional","affiliation":[{"name":"The University of Texas at Austin Personal Robotics and Automation Lab."}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref38","first-page":"5690","article-title":"Imagination-augmented agents for deep reinforcement learning","author":"racani\u00e8re","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref33","first-page":"2746","article-title":"Embed to control: A locally linear latent dynamics model for control from raw images","author":"watter","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref32","first-page":"3191","article-title":"The predictron: End-to-end learning and planning","volume":"70","author":"silver","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning"},{"article-title":"Model-based reinforcement learning for atari","year":"2019","author":"kaiser","key":"ref31"},{"key":"ref30","first-page":"4539","article-title":"Visual interaction networks: Learning a physics simulator from video","author":"watters","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"article-title":"Plan online, learn offline: Efficient learning and exploration via modelbased control","year":"2018","author":"lowrey","key":"ref36"},{"article-title":"Learning real-world robot policies by dreaming","year":"2018","author":"piergiovanni","key":"ref35"},{"article-title":"On learning to think: Algorithmic information theory for novel combinations of reinforcement learning controllers and recurrent neural world models","year":"2015","author":"schmidhuber","key":"ref34"},{"article-title":"Relational deep reinforcement learning","year":"2018","author":"zambaldi","key":"ref28"},{"key":"ref27","first-page":"1809","article-title":"Schema networks: Zero-shot transfer with a generative causal model of intuitive physics","volume":"70","author":"kansky","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning"},{"key":"ref29","first-page":"1026","article-title":"Object-oriented curriculum generation for reinforcement learning","author":"silva","year":"2018","journal-title":"Proc of International Conference on Autonomous Agents and Multiagent Systems"},{"article-title":"A review of robot learning for manipulation: Challenges, representations, and algorithms","year":"2019","author":"kroemer","key":"ref2"},{"article-title":"A review of robot learning for manipulation: Challenges, representations, and algorithms","year":"2019","author":"kroemer","key":"ref1"},{"article-title":"Openai gym","year":"2016","author":"brockman","key":"ref20"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511803161"},{"article-title":"Evolution strategies as a scalable alternative to reinforcement learning","year":"2017","author":"salimans","key":"ref21"},{"article-title":"Woulda, coulda, shoulda: Counterfactually-guided policy search","year":"2018","author":"buesing","key":"ref24"},{"key":"ref23","first-page":"3195","article-title":"Learning causal graphs with small interventions","author":"shanmugam","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref26","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.12089","article-title":"Action schema networks: Generalised policies with deep learning","author":"toyer","year":"2018","journal-title":"Thirty-Second AAAI Conference on Artificial Intelligence"},{"article-title":"Relational inductive biases, deep learning, and graph networks","year":"2018","author":"battaglia","key":"ref25"},{"article-title":"Large-scale study of curiosity-driven learning","year":"2018","author":"burda","key":"ref50"},{"key":"ref51","first-page":"5048","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"article-title":"Contingency-aware exploration in reinforcement learning","year":"2018","author":"choi","key":"ref53"},{"key":"ref52","article-title":"Investigating contingency awareness using atari 2600 games","author":"bellemare","year":"2012","journal-title":"Twenty-Sixth AAAI Conference on Artificial Intelligence"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/DEVLRN.2010.5578845"},{"key":"ref40","first-page":"3675","article-title":"Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation","author":"kulkarni","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref12","first-page":"1015","article-title":"Skill discovery in continuous reinforcement learning domains using skill chaining","author":"konidaris","year":"2009","journal-title":"Advances in neural information processing systems"},{"article-title":"Reverse curriculum generation for reinforcement learning","year":"2017","author":"florensa","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2015.7139383"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.2307\/2291223"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref16"},{"key":"ref17","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICEC.1996.542381"},{"key":"ref19","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11796","article-title":"Rainbow: Combining improvements in deep reinforcement learning","author":"hessel","year":"2018","journal-title":"AAAI Conference on Artificial Intelligence"},{"key":"ref4","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v31i1.10916","article-title":"The option-critic architecture","author":"bacon","year":"2017","journal-title":"Thirty-First AAAI Conference on Artificial Intelligence"},{"key":"ref3","first-page":"1497","article-title":"Skill characterization based on betweenness","author":"?im?ek","year":"2009","journal-title":"Advances in neural information processing systems"},{"key":"ref6","first-page":"3540","article-title":"Feudal networks for hierarchical reinforcement learning","author":"vezhnevets","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning-Volume 70"},{"key":"ref5","first-page":"1648","article-title":"Constructing abstraction hierarchies using a skill-symbol loop","volume":"2016","author":"konidaris","year":"2016","journal-title":"IJCAI proceedings of the conference"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref7","first-page":"26","article-title":"Dynamicsaware unsupervised skill discovery","author":"sharma","year":"2020","journal-title":"International Conference on Learning Representations (ICLR)"},{"article-title":"Goexplore: a new approach for hard-exploration problems","year":"2019","author":"ecoffet","key":"ref49"},{"key":"ref9","first-page":"2721","article-title":"Count-based exploration with neural density models","volume":"70","author":"ostrovski","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning"},{"key":"ref46","first-page":"2753","article-title":"# exploration: A study of count-based exploration for deep reinforcement learning","author":"tang","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref45","first-page":"2721","article-title":"Count-based exploration with neural density models","volume":"70","author":"ostrovski","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning"},{"article-title":"Episodic curiosity through reachability","year":"2018","author":"savinov","key":"ref48"},{"article-title":"Exploration by random network distillation","year":"2018","author":"burda","key":"ref47"},{"key":"ref42","first-page":"4055","article-title":"Successor features for transfer in reinforcement learning","author":"barreto","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref41","first-page":"273","article-title":"Hierarchical relative entropy policy search","author":"daniel","year":"2012","journal-title":"Artificial Intelligence and Statistics"},{"article-title":"Classifying options for deep reinforcement learning","year":"2016","author":"arulkumaran","key":"ref44"},{"key":"ref43","first-page":"3540","article-title":"Feudal networks for hierarchical reinforcement learning","volume":"70","author":"vezhnevets","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning"}],"event":{"name":"2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2020,10,24]]},"location":"Las Vegas, NV, USA","end":{"date-parts":[[2021,1,24]]}},"container-title":["2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9340668\/9340635\/09340891.pdf?arnumber=9340891","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,21]],"date-time":"2022-12-21T14:51:16Z","timestamp":1671634276000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9340891\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10,24]]},"references-count":53,"URL":"https:\/\/doi.org\/10.1109\/iros45743.2020.9340891","relation":{},"subject":[],"published":{"date-parts":[[2020,10,24]]}}}