{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T15:42:46Z","timestamp":1774021366813,"version":"3.50.1"},"reference-count":29,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,11]]},"DOI":"10.1109\/iros40897.2019.8968149","type":"proceedings-article","created":{"date-parts":[[2020,1,30]],"date-time":"2020-01-30T23:53:51Z","timestamp":1580428431000},"page":"1818-1825","source":"Crossref","is-referenced-by-count":8,"title":["Hierarchical Reinforcement Learning for Concurrent Discovery of Compound and Composable Policies"],"prefix":"10.1109","author":[{"given":"Domingo","family":"Esteban","sequence":"first","affiliation":[{"name":"Istituto Italiano di Tecnologia,Department of Advanced Robotics,Genova,Italy,16163"}]},{"given":"Leonel","family":"Rozo","sequence":"additional","affiliation":[{"name":"Bosch Center for Artificial Intelligence,Renningen,Germany,71272"}]},{"given":"Darwin G.","family":"Caldwell","sequence":"additional","affiliation":[{"name":"Istituto Italiano di Tecnologia,Department of Advanced Robotics,Genova,Italy,16163"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022140919877"},{"key":"ref11","first-page":"338","article-title":"Learning elementary movements jointly with a higher level task","author":"kober","year":"2011","journal-title":"IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2014.2358639"},{"key":"ref13","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v33i01.33014975","article-title":"Composable modular reinforcement learning","author":"simpkins","year":"2019","journal-title":"AAAI Conference on Artificial Intelligence"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1162\/089976602753712972"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2014.6907631"},{"key":"ref16","first-page":"761","article-title":"Horde: A scalable real-time architecture for learning knowledge from unsupervised sensorimotor interaction","author":"sutton","year":"2011","journal-title":"International Conference on Autonomous Agents and Multiagent Systems (AAMAS)"},{"key":"ref17","first-page":"207","article-title":"The intentional unintentional agent: Learning to solve many continuous control tasks simultaneously","author":"cabi","year":"2017","journal-title":"Conference on Robot Learning (CoRL)"},{"key":"ref18","first-page":"1312","article-title":"Universal value function approximators","author":"schaul","year":"2015","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref19","article-title":"Reinforcement learning with unsupervised auxiliary tasks","author":"jaderberg","year":"2016","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref28","first-page":"2016","author":"coumans","year":"0","journal-title":"Pybullet a python module for physics simulation for games robotics and machine learning"},{"key":"ref4","article-title":"Policy search in continuous action domains: an overview","author":"sigaud","year":"2018","journal-title":"arXiv preprint arXiv 1803 04706"},{"key":"ref27","article-title":"Soft actor-critic algorithms and applications","author":"haarnoja","year":"2018","journal-title":"arXiv preprint arXiv 1812 02588"},{"key":"ref3","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"Journal of Machine Learning Research"},{"key":"ref6","first-page":"411","article-title":"Learning table tennis with a mixture of motor primitives","author":"m\u00fclling","year":"2010","journal-title":"IEEE-RAS International Conference on Humanoid Robots (Humanoids)"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206447"},{"key":"ref5","first-page":"1","article-title":"Hierarchical relative entropy policy search","volume":"17","author":"daniel","year":"2016","journal-title":"Journal of Machine Learning Research"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"6244","DOI":"10.1109\/ICRA.2018.8460756","article-title":"Composable deep reinforcement learning for robotic manipulation","author":"haarnoja","year":"2018","journal-title":"IEEE International Conference on Robotics and Automation (ICRA)"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2018.1509018"},{"key":"ref2","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2016","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref9","first-page":"1445","article-title":"Multiple-goal reinforcement learning with modular sarsa(o)","author":"sprague","year":"2003","journal-title":"International Joint Conference on Artificial Intelligence (IJCAI)"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-32552-1_15"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2805379"},{"key":"ref22","author":"ziebart","year":"2010","journal-title":"Modeling Purposeful Adaptive Behavior with the Principle of Maximum Causal Entropy"},{"key":"ref21","first-page":"4344","article-title":"Learning by playing solving sparse reward tasks from scratch","volume":"80","author":"riedmiller","year":"2018","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref24","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref23","article-title":"A unified view of entropy-regularized markov decision processes","author":"neu","year":"2017","journal-title":"arXiv preprint arXiv 1705 07798"},{"key":"ref26","author":"sutton","year":"2018","journal-title":"Introduction to Reinforcement Learning"},{"key":"ref25","article-title":"Reinforcement learning and control as probabilistic inference: Tutorial and review","author":"levine","year":"2018","journal-title":"arXiv preprint arXiv 1805 00909"}],"event":{"name":"2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","location":"Macau, China","start":{"date-parts":[[2019,11,3]]},"end":{"date-parts":[[2019,11,8]]}},"container-title":["2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8957008\/8967518\/08968149.pdf?arnumber=8968149","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T18:24:26Z","timestamp":1753813466000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8968149\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/iros40897.2019.8968149","relation":{},"subject":[],"published":{"date-parts":[[2019,11]]}}}