{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T15:42:40Z","timestamp":1767109360738,"version":"3.28.0"},"reference-count":45,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,9]]},"DOI":"10.1109\/iros.2017.8205959","type":"proceedings-article","created":{"date-parts":[[2017,12,14]],"date-time":"2017-12-14T17:12:59Z","timestamp":1513271579000},"page":"1537-1544","source":"Crossref","is-referenced-by-count":28,"title":["Policy transfer via modularity and reward guiding"],"prefix":"10.1109","author":[{"given":"Ignasi","family":"Clavera","sequence":"first","affiliation":[]},{"given":"David","family":"Held","sequence":"additional","affiliation":[]},{"given":"Pieter","family":"Abbeel","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.366"},{"key":"ref38","first-page":"536","author":"brachmann","year":"2014","journal-title":"Learning 6D Object Pose Estimation Using 3D Object Coordinates"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487140"},{"journal-title":"Transfer from simulation to real world through learning deep inverse dynamics model","year":"2016","author":"christiano","key":"ref32"},{"journal-title":"Sim-to-real robot learning from pixels with progressive nets","year":"2016","author":"rusu","key":"ref31"},{"journal-title":"Path integral guided policy search","year":"2017","author":"chebotar","key":"ref30"},{"key":"ref37","first-page":"384","author":"krull","year":"2015","journal-title":"6-DOF Model Based Tracking via Object Coordinate Regression[J]"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299105"},{"key":"ref35","article-title":"Transfer learning for reinforcement learning on a physical robot","author":"barrett","year":"2010","journal-title":"Ninth International Conference on Autonomous Agents and Multiagent Systems-Adaptive Learning Agents Workshop (AAMAS-ALA)"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487756"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2016.7758091"},{"journal-title":"(CAD)2RL Real singel-image flight without a singel real image","year":"2016","author":"sadeghi","key":"ref40"},{"key":"ref11","first-page":"10","article-title":"Learning to walk in 20 min","author":"tedrake","year":"2005","journal-title":"Proceedings of Seventh Yale Workshop on Adaptive and Learning Systems"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2008.02.003"},{"key":"ref13","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"ICML"},{"journal-title":"High-dimensional continuous control using generalized advantage estimation","year":"2015","author":"schulman","key":"ref14"},{"journal-title":"Continuous control with deep reinforcement learning","year":"2015","author":"lillicrap","key":"ref15"},{"key":"ref16","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2015.7139990"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2014.6942742"},{"key":"ref19","article-title":"Benchmarking deep reinforcement learning for continuous control","author":"duan","year":"2016","journal-title":"Proc of the International Conference on Machine Learning (ICML)"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487155"},{"key":"ref4","first-page":"264","article-title":"Hierarchical planning for multi-contact non-prehensile manipulation","author":"lee","year":"2015","journal-title":"Intelligent Robots and Systems (IROS) 2015 IEEE\/RSJ International Conference on"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2008.4543795"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1177\/027836498600500303"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/0043-1648(91)90104-3"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-016-9571-3"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2010.5652970"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1115\/1.2899703"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1177\/027836499601500603"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2011.6094737"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.1992.587370"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2011.VII.009"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.2174\/1573399812666160613113556"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-015-0232-0"},{"key":"ref21","article-title":"Two-level rrt planner for robotic push manipulation","author":"zito","year":"2012","journal-title":"Proc IEEE Intelligent Robots and Systems (IROS)"},{"key":"ref42","first-page":"604","article-title":"Potential-based shaping in model-based reinforcement learning","author":"asmuth","year":"2008","journal-title":"AAAI"},{"journal-title":"A vision-based learning method for pushing manipulation","year":"1993","author":"salganicoff","key":"ref24"},{"key":"ref41","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume":"99","author":"ng","year":"1999","journal-title":"ICML"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2016.7759653"},{"volume":"abs 1605 2688","journal-title":"Theano A Python framework for fast computation of mathematical expressions","year":"2016","key":"ref44"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2011.5979740"},{"key":"ref43","first-page":"1471","article-title":"Variance reduction techniques for gradient estimates in reinforcement learning","volume":"5","author":"greensmith","year":"2004","journal-title":"Journal of Machine Learning Research"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-014-9414-z"}],"event":{"name":"2017 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2017,9,24]]},"location":"Vancouver, BC","end":{"date-parts":[[2017,9,28]]}},"container-title":["2017 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8119304\/8202121\/08205959.pdf?arnumber=8205959","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2018,2,28]],"date-time":"2018-02-28T16:25:32Z","timestamp":1519835132000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8205959\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,9]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/iros.2017.8205959","relation":{},"subject":[],"published":{"date-parts":[[2017,9]]}}}