{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T00:31:03Z","timestamp":1729643463715,"version":"3.28.0"},"reference-count":25,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009,3]]},"DOI":"10.1109\/adprl.2009.4927549","type":"proceedings-article","created":{"date-parts":[[2009,5,19]],"date-time":"2009-05-19T19:50:44Z","timestamp":1242762644000},"page":"226-232","source":"Crossref","is-referenced-by-count":7,"title":["Using reward-weighted imitation for robot Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Jan","family":"Peters","sequence":"first","affiliation":[],"role":[{"role":"author","vocab":"crossref"}]},{"given":"Jens","family":"Kober","sequence":"additional","affiliation":[],"role":[{"role":"author","vocab":"crossref"}]}],"member":"263","reference":[{"key":"19","doi-asserted-by":"crossref","first-page":"425","DOI":"10.1016\/S0079-6123(06)65027-9","article-title":"dynamics systems vs. optimal control - a unifying view","volume":"165","author":"schaal","year":"2007","journal-title":"Progress in Brain Research"},{"key":"17","article-title":"policy search of motor primitives for robotics","author":"kober","year":"2008","journal-title":"Advanccs in Neural Information proccssing Systcms (NIPS)"},{"key":"18","article-title":"learning attractor landscapes for learning motor primitives","author":"ijspeert","year":"2003","journal-title":"Advanccs in Ncural Inlormation Proccssing Systcms (NIPS)"},{"key":"15","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-2097-1_141"},{"key":"16","article-title":"using local trajectory optimizer.i' to speed up global optimization in dynamic programming","author":"atkeson","year":"1994","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"13","article-title":"state-dependent exploration for policy gradient methods","author":"ruckstieb","year":"2008","journal-title":"Proceedings of the European Conference on Machine Learning (ECML)"},{"key":"14","doi-asserted-by":"publisher","DOI":"10.1177\/0278364907087548"},{"journal-title":"The EM Algorithm and Extensions Wiley Series in Prohahility and Statistics","year":"1997","author":"mclachan","key":"11"},{"key":"12","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.2.271"},{"journal-title":"Modcling and Control of Rohot Manipulators","year":"2007","author":"sciavicco","key":"21"},{"year":"2008","key":"20"},{"key":"22","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2001.973374"},{"key":"23","doi-asserted-by":"publisher","DOI":"10.1109\/72.478391"},{"key":"24","doi-asserted-by":"publisher","DOI":"10.1177\/0278364908091463"},{"key":"25","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-007-9051-x"},{"key":"3","article-title":"pegasus: a policy sem-ch method for largc mdps and pomdps","author":"ng","year":"2000","journal-title":"Int Conf Uncertainty in Artificial Intelligence"},{"key":"2","article-title":"policy sem-ch hy dynamic programming","author":"bagnell","year":"2003","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"10","article-title":"attention and motor skill learning","author":"wulf","year":"2007","journal-title":"Human Kinetics"},{"journal-title":"Reinforcement Learning","year":"1998","author":"sutton","key":"1"},{"key":"7","article-title":"policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Advances in Neural Information I'rocessing Systems (NIl'S)"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"5","article-title":"bayesian policy leal11ing with trans-dimensional mcmc","author":"hollman","year":"2007","journal-title":"Advances in Neural Information Processing Systems (Nfl'S)"},{"key":"4","doi-asserted-by":"crossref","first-page":"1521","DOI":"10.1163\/156855307782148550","article-title":"reinforcement learning for imitating constrained reaching movements","volume":"21","author":"guenter","year":"2007","journal-title":"Advanced Robotics"},{"key":"9","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2006.282564"},{"key":"8","article-title":"covariant policy search. in international","author":"bagnell","year":"2003","journal-title":"Joint Conference on Artificial Intelligence (J JCAI)"}],"event":{"name":"2009 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","start":{"date-parts":[[2009,3,30]]},"location":"Nashville, TN, USA","end":{"date-parts":[[2009,4,2]]}},"container-title":["2009 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/4910084\/4927513\/04927549.pdf?arnumber=4927549","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,18]],"date-time":"2020-05-18T11:28:33Z","timestamp":1589801313000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/4927549\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,3]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/adprl.2009.4927549","relation":{},"subject":[],"published":{"date-parts":[[2009,3]]}}}