{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T18:15:09Z","timestamp":1772302509794,"version":"3.50.1"},"reference-count":30,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2020,4,1]],"date-time":"2020-04-01T00:00:00Z","timestamp":1585699200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,4,1]],"date-time":"2020-04-01T00:00:00Z","timestamp":1585699200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,4,1]],"date-time":"2020-04-01T00:00:00Z","timestamp":1585699200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"EPSRC CDT in Robotics and Autonomous Systems","award":["EP\/L016834\/1"],"award-info":[{"award-number":["EP\/L016834\/1"]}]},{"name":"Future AI and Robotics for Space","award":["EP\/R026092\/1"],"award-info":[{"award-number":["EP\/R026092\/1"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2020,4]]},"DOI":"10.1109\/lra.2020.2972879","type":"journal-article","created":{"date-parts":[[2020,2,10]],"date-time":"2020-02-10T20:21:32Z","timestamp":1581366092000},"page":"2610-2617","source":"Crossref","is-referenced-by-count":52,"title":["Learning Natural Locomotion Behaviors for Humanoid Robots Using Human Bias"],"prefix":"10.1109","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9082-5193","authenticated-orcid":false,"given":"Chuanyu","family":"Yang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2349-4192","authenticated-orcid":false,"given":"Kai","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Shuai","family":"Heng","sequence":"additional","affiliation":[]},{"given":"Taku","family":"Komura","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6357-7419","authenticated-orcid":false,"given":"Zhibin","family":"Li","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref30","first-page":"623","article-title":"Reinforcement learning for CPG-driven biped robot","author":"mori","year":"0","journal-title":"Proc Amer Assoc Artif Intell"},{"key":"ref10","first-page":"1","article-title":"Deepmimic: Example-guided deep reinforcement learning of physics-based character skills","author":"peng","year":"2018","journal-title":"ACM Trans Graph"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794127"},{"key":"ref12","first-page":"916","article-title":"Policies modulating trajectory generators","author":"iscen","year":"0","journal-title":"Proc 2nd Conf Robot Lear"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593995"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3072959.3073663","article-title":"Phase-functioned neural networks for character control","author":"holden","year":"2017","journal-title":"ACM Trans Graph"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3197517.3201366","article-title":"Mode-adaptive neural networks for quadruped motion control","author":"zhang","year":"2018","journal-title":"ACM Trans Graph"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2006.1641933"},{"key":"ref17","article-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","author":"abadi","year":"2016","journal-title":"arXiv 1603 04467"},{"key":"ref18","article-title":"Pybullet, a python module for physics simulation for games, robotics and machine learning","author":"coumans","year":"2016"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3099564.3099567"},{"key":"ref28","first-page":"4045","article-title":"Time limits in reinforcement learning","author":"pardo","year":"0","journal-title":"Proc 35th Int Conf Mach Learn"},{"key":"ref4","article-title":"Concrete problems in AI safety","author":"amodei","year":"2016"},{"key":"ref27","article-title":"Leave no trace: Learning to reset for safe and autonomous reinforcement learning","author":"eysenbach","year":"0","journal-title":"Proc Intl Conf on Learning Representations"},{"key":"ref3","first-page":"8538","article-title":"Variational inverse control with events: A general framework for data-driven reward definition","author":"fu","year":"0","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref6","article-title":"A survey on policy search algorithms for learning robot controllers in a handful of trials","author":"chatzilygeroudis","year":"2019","journal-title":"IEEE Trans Robot"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/PL00007977"},{"key":"ref5","first-page":"4742","article-title":"Structured control nets for deep reinforcement learning","author":"srouji","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref8","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","author":"ng","year":"0","journal-title":"Proc 17th Int Conf Mach Learn"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.1.88"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2012.6225018"},{"key":"ref9","first-page":"4565","article-title":"Generative adversarial imitation learning","author":"ho","year":"0","journal-title":"Proc Neural Inf Process Syst"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2016.2629489"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/HUMANOIDS.2018.8625045"},{"key":"ref22","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017"},{"key":"ref21","doi-asserted-by":"crossref","DOI":"10.1126\/scirobotics.aau5872","article-title":"Learning agile and dynamic motor skills for legged robots","volume":"4","author":"hwangbo","year":"2019","journal-title":"Robotics Science"},{"key":"ref24","article-title":"Deep reinforcement learning in parameterized action space","author":"hausknecht","year":"2015"},{"key":"ref23","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"2015"},{"key":"ref26","article-title":"Prioritizing starting states for reinforcement learning","author":"tavakoli","year":"2018"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/HUMANOIDS.2017.8246900"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7083369\/8932682\/08990011.pdf?arnumber=8990011","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T17:32:54Z","timestamp":1651080774000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8990011\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,4]]},"references-count":30,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/lra.2020.2972879","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,4]]}}}