{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T22:12:24Z","timestamp":1780351944170,"version":"3.54.1"},"reference-count":43,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,5]]},"DOI":"10.1109\/simpar.2018.8376268","type":"proceedings-article","created":{"date-parts":[[2018,6,11]],"date-time":"2018-06-11T19:11:56Z","timestamp":1528744316000},"page":"35-42","source":"Crossref","is-referenced-by-count":39,"title":["Reinforcement learning for non-prehensile manipulation: Transfer from simulation to physical system"],"prefix":"10.1109","author":[{"given":"Kendall","family":"Lowrey","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Svetoslav","family":"Kolev","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jeremy","family":"Dao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Aravind","family":"Rajeswaran","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Emanuel","family":"Todorov","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref39","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"ICML"},{"key":"ref38","author":"peters","year":"2007","journal-title":"Machine learning of motor skills for robotics"},{"key":"ref33","article-title":"Transfer learning for reinforcement learning on a physical robot","author":"barrett","year":"2010","journal-title":"Ninth International Conference on Autonomous Agents and Multiagent Systems-Adaptive Learning Agents Workshop (AAMAS-ALA)"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/s10479-005-5732-z"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989383"},{"key":"ref30","article-title":"Learning complex dexterous manipulation with deep reinforcement learning and demonstrations","volume":"abs11709 10087","author":"rajeswaran","year":"2017","journal-title":"CoRR"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1162\/089976698300017746"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143845"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2014.6907423"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1145\/2185520.2185539","article-title":"Discovery of complex behaviors through contact-invariant optimization","volume":"31","author":"mordatch","year":"2012","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"ref40","author":"schulman","year":"2017","journal-title":"Proximal policy optimization algorithms"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2014.6907001"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1002\/rob.21559"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2015.7353843"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.3182\/20080706-5-KR-1001.01833"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s10339-011-0404-1"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487140"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2015.7172032"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/1778765.1778810"},{"key":"ref19","author":"lee","year":"2017","journal-title":"Gp-ilqg Data-driven robust optimal control for uncertain nonlinear dynamical systems"},{"key":"ref28","author":"yahya","year":"2016","journal-title":"Collective robot reinforcement learning with distributed asynchronous guided policy search"},{"key":"ref4","article-title":"Towards Generalization and Simplicity in Continuous Control","author":"rajeswaran","year":"2017","journal-title":"NIPS"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989384"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2007.11.026"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2015.7354126"},{"key":"ref29","first-page":"849","article-title":"Policy search for motor primitives in robotics","author":"kober","year":"2009","journal-title":"Advances in neural information processing systems"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/HUMANOIDS.2015.7363481"},{"key":"ref8","article-title":"Sim-to-real transfer of robotic control with dynamics randomization","volume":"abs 1710 6537","author":"peng","year":"2017","journal-title":"CoRR"},{"key":"ref7","author":"rajeswaran","year":"2016","journal-title":"Epopt Learning robust neural network policies using model ensembles"},{"key":"ref2","article-title":"A natural policy gradient","author":"kakade","year":"2001","journal-title":"NIPS"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386025"},{"key":"ref1","first-page":"5026","author":"todorov","year":"2012","journal-title":"Mujoco A physics engine for model-based control"},{"key":"ref20","author":"ross","year":"2012","journal-title":"Agnostic system identification for model-based reinforcement learning"},{"key":"ref22","first-page":"1","article-title":"Reinforcement learning for humanoid robotics","author":"peters","year":"2003","journal-title":"Proceedings of the Third IEEE-RAS International Conference on Humanoid Robots"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1002\/rob.21571"},{"key":"ref42","author":"bezanson","year":"2012","journal-title":"Julia A Fast Dynamic Language for Technical Computing"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref41","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"2016","journal-title":"ICLRE"},{"key":"ref23","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","article-title":"Reinforcement learning: A survey","volume":"4","author":"kaelbling","year":"1996","journal-title":"Journal of Artificial Intelligence Research"},{"key":"ref26","first-page":"465","article-title":"Pilco: A model-based and data-efficient approach to policy search","author":"deisenroth","year":"2011","journal-title":"Proceedings of the 28th International Conference on Machine Learning (ICML-11)"},{"key":"ref43","first-page":"3132","article-title":"Interactive control of diverse complex characters with neural networks","author":"mordatch","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"}],"event":{"name":"2018 IEEE International Conference on Simulation, Modeling, and Programming for Autonomous Robots (SIMPAR)","location":"Brisbane, QLD","start":{"date-parts":[[2018,5,16]]},"end":{"date-parts":[[2018,5,19]]}},"container-title":["2018 IEEE International Conference on Simulation, Modeling, and Programming for Autonomous Robots (SIMPAR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8370757\/8376259\/08376268.pdf?arnumber=8376268","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,19]],"date-time":"2019-10-19T05:35:17Z","timestamp":1571463317000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8376268\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,5]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/simpar.2018.8376268","relation":{},"subject":[],"published":{"date-parts":[[2018,5]]}}}