{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T14:55:37Z","timestamp":1781621737877,"version":"3.54.5"},"reference-count":33,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100004543","name":"China Scholarship Council","doi-asserted-by":"publisher","award":["201908440400"],"award-info":[{"award-number":["201908440400"]}],"id":[{"id":"10.13039\/501100004543","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2022,9]]},"DOI":"10.1109\/tnnls.2021.3059912","type":"journal-article","created":{"date-parts":[[2021,3,1]],"date-time":"2021-03-01T21:34:38Z","timestamp":1614634478000},"page":"4727-4741","source":"Crossref","is-referenced-by-count":86,"title":["Hierarchical Reinforcement Learning With Universal Policies for Multistep Robotic Manipulation"],"prefix":"10.1109","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7612-614X","authenticated-orcid":false,"given":"Xintong","family":"Yang","sequence":"first","affiliation":[{"name":"Center for Artificial Intelligence, Robotics and Human-Machine Systems (IROHMS), School of Engineering, Cardiff University, Cardiff, U.K."}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8968-9902","authenticated-orcid":false,"given":"Ze","family":"Ji","sequence":"additional","affiliation":[{"name":"Center for Artificial Intelligence, Robotics and Human-Machine Systems (IROHMS), School of Engineering, Cardiff University, Cardiff, U.K."}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5123-9861","authenticated-orcid":false,"given":"Jing","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Informatics, Cardiff University, Cardiff, U.K."}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2094-5680","authenticated-orcid":false,"given":"Yu-Kun","family":"Lai","sequence":"additional","affiliation":[{"name":"School of Computer Science and Informatics, Cardiff University, Cardiff, U.K."}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5788-6573","authenticated-orcid":false,"given":"Changyun","family":"Wei","sequence":"additional","affiliation":[{"name":"Department of Robotics Engineering, Hohai University, Changzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6614-6932","authenticated-orcid":false,"given":"Guoliang","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Control Science and Engineering, Shandong University, Jinan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7207-6544","authenticated-orcid":false,"given":"Rossitza","family":"Setchi","sequence":"additional","affiliation":[{"name":"Center for Artificial Intelligence, Robotics and Human-Machine Systems (IROHMS), School of Engineering, Cardiff University, Cardiff, U.K."}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1087","article-title":"One-shot imitation learning","volume-title":"Proc. NIPS","author":"Duan"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref3","first-page":"1312","article-title":"Universal value function approximators","volume-title":"Proc. ICML","author":"Schaul"},{"key":"ref4","first-page":"5048","article-title":"Hindsight experience replay","volume-title":"Proc. NIPS","author":"Andrychowicz"},{"key":"ref5","article-title":"Data-efficient deep reinforcement learning for dexterous manipulation","author":"Popov","year":"2017","journal-title":"arXiv:1704.03073"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2543000"},{"key":"ref7","article-title":"Learning multi-level hierarchies with hindsight","volume-title":"Proc. ICLR","author":"Levy"},{"key":"ref8","first-page":"3303","article-title":"Data-efficient hierarchical reinforcement learning","volume-title":"Proc. NIPS","author":"Nachum"},{"key":"ref9","first-page":"9414","article-title":"Language as an abstraction for hierarchical deep reinforcement learning","volume-title":"Proc. NIPS","author":"Jiang"},{"key":"ref10","first-page":"3681","article-title":"Mcp: Learning composable hierarchical control with multiplicative compositional policies","volume-title":"Proc. NIPS","author":"Peng"},{"key":"ref11","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. ICLR","author":"Lillicrap"},{"key":"ref12","first-page":"556","article-title":"Intra-option learning about temporally abstract actions","volume-title":"Proc. ICML","volume":"98","author":"Sutton"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref14","article-title":"Meta learning shared hierarchies","volume-title":"Proc. ICLR","author":"Frans"},{"key":"ref15","article-title":"Sub-policy adaptation for hierarchical reinforcement learning","volume-title":"Proc. ICML","author":"Li"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2891792"},{"key":"ref17","first-page":"3540","article-title":"Feudal networks for hierarchical reinforcement learning","volume-title":"Proc. ICML","author":"Vezhnevets"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2812709"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2805379"},{"issue":"2","key":"ref20","doi-asserted-by":"crossref","first-page":"325","DOI":"10.1023\/A:1017944732463","article-title":"Structure in the space of value functions","volume":"49","author":"Foster","year":"2002","journal-title":"Mach. Learn."},{"key":"ref21","first-page":"761","article-title":"Horde: A scalable real-time architecture for learning knowledge from unsupervised sensorimotor interaction","volume-title":"Proc. AAMAS","author":"Sutton"},{"key":"ref22","first-page":"4344","article-title":"Learning by playing-solving sparse reward tasks from scratch","volume-title":"Proc. ICML","author":"Riedmiller"},{"key":"ref23","first-page":"530","article-title":"Contextual imagined goals for self-supervised robotic learning","volume-title":"Proc. CoRL","author":"Nair"},{"key":"ref24","volume-title":"Principles of Robot Motion: Theory, Algorithms, and Implementation","author":"Choset","year":"2005"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2016.XII.002"},{"key":"ref26","article-title":"Monitoring the execution of temporal plans for robotic systems","author":"Levine","year":"2012"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1129"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2011.VII.008"},{"key":"ref29","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"ref30","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. ICML","author":"Fujimoto"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref32","article-title":"Multi-goal reinforcement learning: Challenging robotics environments and request for research","author":"Plappert","year":"2018","journal-title":"arXiv:1802.09464"},{"key":"ref33","first-page":"8024","article-title":"Pytorch: An imperative style, high-performance deep learning library","volume-title":"Proc. NIPS","author":"Paszke"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/9872163\/09366328.pdf?arnumber=9366328","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,9]],"date-time":"2024-01-09T23:23:55Z","timestamp":1704842635000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9366328\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9]]},"references-count":33,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2021.3059912","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,9]]}}}