{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T10:09:51Z","timestamp":1729678191804,"version":"3.28.0"},"reference-count":21,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014,5]]},"DOI":"10.1109\/icra.2014.6907337","type":"proceedings-article","created":{"date-parts":[[2014,9,30]],"date-time":"2014-09-30T16:32:36Z","timestamp":1412094756000},"page":"3323-3330","source":"Crossref","is-referenced-by-count":3,"title":["Bayesian exploration and interactive demonstration in continuous state MAXQ-learning"],"prefix":"10.1109","author":[{"given":"Kathrin","family":"Grave","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sven","family":"Behnke","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1177\/0278364911428653"},{"key":"ref11","first-page":"73","article-title":"Bayesian Hierarchical Reinforcement Learning","author":"cao","year":"2012","journal-title":"Proc Neural Information Processing Systems"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-04180-8_59"},{"key":"ref13","first-page":"213","article-title":"R-MAX - A General Polynomial Time Algorithm for Near-Optimal Reinforcement Learning","volume":"3","author":"brafman","year":"2003","journal-title":"The Journal of Machine Learning Research"},{"journal-title":"Tech Rep","year":"1995","author":"gordon","key":"ref14"},{"key":"ref15","first-page":"1215","article-title":"Online Planning for Large MDPs with MAXQ Decomposition","volume":"3","author":"bai","year":"2012","journal-title":"Proc 1st Int Conf Autonomous Agents Multiagent Syst"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_11"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1023\/A:1012771025575"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2008.10.024"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICHR.2010.5686846"},{"journal-title":"Gaussian Processes for Machine Learning","year":"2006","author":"rasmussen","key":"ref3"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2013.6630937"},{"key":"ref5","article-title":"Reinforcement Learning in Robotics: A Survey","author":"kober","year":"0","journal-title":"Int Journal of Robotics Research 2013"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/Humanoids.2011.6100841"},{"key":"ref7","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v24i1.7727","article-title":"Relative Entropy Policy Search","author":"peters","year":"2010","journal-title":"Proc Nat Conf on Artificial Intelligence"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1613\/jair.639","article-title":"Hierarchical Reinforcement Learning with the MAXQ Value Function Decomposition","volume":"13","author":"dietterich","year":"2000","journal-title":"Journal of Artificial Intelligence Research"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_9"},{"key":"ref9","first-page":"3137","article-title":"A Generalized Path Integral Control Approach to Reinforcement Learning","volume":"11","author":"theodorou","year":"2010","journal-title":"Journal of Machine Learning Research"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2004.1389727"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386116"}],"event":{"name":"2014 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2014,5,31]]},"location":"Hong Kong, China","end":{"date-parts":[[2014,6,7]]}},"container-title":["2014 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6895053\/6906581\/06907337.pdf?arnumber=6907337","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,16]],"date-time":"2023-07-16T19:03:13Z","timestamp":1689534193000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6907337\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,5]]},"references-count":21,"URL":"https:\/\/doi.org\/10.1109\/icra.2014.6907337","relation":{},"subject":[],"published":{"date-parts":[[2014,5]]}}}