{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T17:29:34Z","timestamp":1778347774074,"version":"3.51.4"},"reference-count":21,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,10]]},"DOI":"10.1109\/iros.2018.8593722","type":"proceedings-article","created":{"date-parts":[[2019,1,24]],"date-time":"2019-01-24T02:33:30Z","timestamp":1548297210000},"page":"1241-1246","source":"Crossref","is-referenced-by-count":148,"title":["Feedback Control For Cassie With Deep Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Zhaoming","family":"Xie","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Glen","family":"Berseth","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Patrick","family":"Clary","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jonathan","family":"Hurst","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michiel","family":"van de Panne","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","volume":"abs 1707 2201","author":"merel","year":"2017","journal-title":"Learning human behaviors from motion capture by adversarial imitation"},{"key":"ref11","volume":"abs 1707 2747","author":"wang","year":"2017","journal-title":"Robust imitation of diverse behaviors"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487279"},{"key":"ref13","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"1999","journal-title":"Proceedings of the 12th International Conference on Neural Information Processing Systems"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2015.7172044"},{"key":"ref15","author":"tedrake","year":"2018","journal-title":"Underactuated Robotics Algorithms for Walking Running Swimming Flying and Manipulation (Course Notes for MIT 6 832)"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3099564.3099567"},{"key":"ref17","volume":"abs 1707 6347","author":"schulman","year":"2017","journal-title":"Proximal policy optimization algorithms"},{"key":"ref18","volume":"abs 1709 6560","author":"henderson","year":"2017","journal-title":"Deep reinforcement learning that matters"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref4","author":"yu","year":"2018","journal-title":"Learning Symmetry and Low-energy Locomotion"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"41:1","DOI":"10.1145\/3072959.3073602","article-title":"Deeploco: Dynamic locomotion skills using hierarchical deep reinforcement learning","volume":"36","author":"peng","year":"2017","journal-title":"ACM Trans Graph"},{"key":"ref6","volume":"abs 1710 6537","author":"peng","year":"2017","journal-title":"Sim-to-real transfer of robotic control with dynamics randomization"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1177\/0278364917710318"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2010.5650765"},{"key":"ref7","first-page":"1939","article-title":"Learning to walk in 20 minutes","volume":"95585","author":"tedrake","year":"2005","journal-title":"Proceedings of the Fourteenth Yale Workshop on Adaptive and Learning Systems"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2016.2582731"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487270"},{"key":"ref9","volume":"abs 1707 2286","author":"heess","year":"2017","journal-title":"Emergence of locomotion behaviours in rich environments"},{"key":"ref20","author":"paszke","year":"2017","journal-title":"On Automatic Differentiation"},{"key":"ref21","volume":"abs 1412 6980","author":"kingma","year":"2014","journal-title":"Adam A method for stochastic optimization"}],"event":{"name":"2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","location":"Madrid","start":{"date-parts":[[2018,10,1]]},"end":{"date-parts":[[2018,10,5]]}},"container-title":["2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8574473\/8593358\/08593722.pdf?arnumber=8593722","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,24]],"date-time":"2020-08-24T05:51:27Z","timestamp":1598248287000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8593722\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,10]]},"references-count":21,"URL":"https:\/\/doi.org\/10.1109\/iros.2018.8593722","relation":{},"subject":[],"published":{"date-parts":[[2018,10]]}}}