{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T15:14:29Z","timestamp":1769181269415,"version":"3.49.0"},"reference-count":40,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,5]]},"DOI":"10.1109\/icra40945.2020.9196642","type":"proceedings-article","created":{"date-parts":[[2020,9,15]],"date-time":"2020-09-15T21:25:46Z","timestamp":1600205146000},"page":"413-419","source":"Crossref","is-referenced-by-count":32,"title":["Learning Generalizable Locomotion Skills with Hierarchical Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Tianyu","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nathan","family":"Lambert","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Roberto","family":"Calandra","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Franziska","family":"Meier","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Akshara","family":"Rai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","article-title":"Improving sample efficiency in model-free reinforcement learning from images","author":"yarats","year":"2019","journal-title":"arXiv preprint arXiv 1910 01741"},{"key":"ref38","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-015-0196-0"},{"key":"ref32","article-title":"Bayesian optimization in variational latent spaces with dynamic compression","author":"antonova","year":"2019","journal-title":"arXiv preprint arXiv 1907 09977"},{"key":"ref31","doi-asserted-by":"crossref","first-page":"81:1","DOI":"10.1145\/2897824.2925881","article-title":"Terrain-adaptive locomotion skills using deep reinforcement learning","volume":"35","author":"peng","year":"2016","journal-title":"ACM Trans Graph"},{"key":"ref30","article-title":"Mcp: Learning composable hierarchical control with multiplicative compositional policies","volume":"abs 1905 9808","author":"peng","year":"2019","journal-title":"CoRR"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1098\/rsta.2006.1919"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/s00422-013-0572-4"},{"key":"ref35","article-title":"Data efficient reinforcement learning for legged robots","author":"yang","year":"2019","journal-title":"arXiv preprint arXiv 1907 09977"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1038\/nature14422"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461236"},{"key":"ref40","article-title":"Pybullet simulator","year":"0"},{"key":"ref11","first-page":"5048","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2018.2806083"},{"key":"ref13","article-title":"Curious ilqr: Resolving uncertainty in model-based rl","author":"bechtle","year":"2019","journal-title":"arXiv preprint arXiv 1904 01870"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-007-9071-6"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-017-00348-9"},{"key":"ref16","article-title":"Soft actor-critic algorithms and applications","author":"haarnoja","year":"2018","journal-title":"arXiv preprint arXiv 1812 02588"},{"key":"ref17","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/S0005-1098(99)00214-9"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8594448"},{"key":"ref28","doi-asserted-by":"crossref","DOI":"10.1145\/3072959.3073602","article-title":"Deeploco: Dynamic locomotion skills using hierarchical deep reinforcement learning","volume":"36","author":"peng","year":"2017","journal-title":"ACM Trans on Graphics (Proc of SIGGRAPH)"},{"key":"ref4","article-title":"Emergence of locomotion behaviours in rich environments","author":"heess","year":"2017","journal-title":"arXiv preprint arXiv 1707 07328"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2013.6630937"},{"key":"ref3","first-page":"4754","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","author":"chua","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793864"},{"key":"ref29","article-title":"Hierarchical visuomotor control of humanoids","author":"merel","year":"2018","journal-title":"arXiv preprint arXiv 1811 09656"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.010"},{"key":"ref8","article-title":"Learning to walk via deep reinforcement learning","author":"haarnoja","year":"2018","journal-title":"arXiv preprint arXiv 1812 11467"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"eaau5872","DOI":"10.1126\/scirobotics.aau5872","article-title":"Learning agile and dynamic motor skills for legged robots","volume":"4","author":"hwangbo","year":"2019","journal-title":"Robotics Science"},{"key":"ref2","article-title":"Meta learning shared hierarchies","author":"frans","year":"2017","journal-title":"arXiv preprint arXiv 1710 09767"},{"key":"ref9","article-title":"Online hierarchical optimization for humanoid control","author":"feng","year":"2016"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073602"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2015.XI.047"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2015.7353843"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1163\/016918610X493552"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-016-5580-x"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/Humanoids.2011.6100841"},{"key":"ref25","article-title":"The option-critic architecture","author":"bacon","year":"2016"}],"event":{"name":"2020 IEEE International Conference on Robotics and Automation (ICRA)","location":"Paris, France","start":{"date-parts":[[2020,5,31]]},"end":{"date-parts":[[2020,8,31]]}},"container-title":["2020 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9187508\/9196508\/09196642.pdf?arnumber=9196642","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,28]],"date-time":"2022-06-28T00:24:55Z","timestamp":1656375895000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9196642\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,5]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/icra40945.2020.9196642","relation":{},"subject":[],"published":{"date-parts":[[2020,5]]}}}