{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T08:46:42Z","timestamp":1729673202743,"version":"3.28.0"},"reference-count":40,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013,4]]},"DOI":"10.1109\/adprl.2013.6614992","type":"proceedings-article","created":{"date-parts":[[2014,9,10]],"date-time":"2014-09-10T19:29:28Z","timestamp":1410377368000},"page":"77-84","source":"Crossref","is-referenced-by-count":3,"title":["Optimistic planning for belief-augmented Markov Decision Processes"],"prefix":"10.1109","author":[{"given":"Raphael","family":"Fonteneau","sequence":"first","affiliation":[]},{"given":"Lucian","family":"Busoniu","sequence":"additional","affiliation":[]},{"given":"Remi","family":"Munos","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"journal-title":"Modification of UCT with patterns in Monte-Carlo Go","year":"2006","author":"gelly","key":"19"},{"key":"35","article-title":"Variance-based rewards for approximate bayesian reinforcement learning","author":"sorg","year":"2010","journal-title":"Uncertainty in Artificial Intelligence"},{"journal-title":"Optimal Learning Computational Procedures for Bayesadaptive Markov Decision Processes","year":"2002","author":"duff","key":"17"},{"key":"36","first-page":"943","article-title":"A Bayesian framework for reinforcement learning","author":"strens","year":"2000","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"18","first-page":"874","article-title":"Dual control theory","volume":"21","author":"feldbaum","year":"1960","journal-title":"Automation and Remote Control"},{"key":"33","first-page":"317","article-title":"Neural fitted Q iteration-first experiences with a data efficient neural reinforcement learning method","author":"riedmiller","year":"2005","journal-title":"European Conference on Machine Learning (ECML)"},{"key":"15","doi-asserted-by":"publisher","DOI":"10.1109\/CIMCA.2008.32"},{"key":"34","article-title":"Monte-carlo planning in large pomdps","volume":"46","author":"silver","year":"2010","journal-title":"Neural Information Processing Systems (NIPS)"},{"key":"16","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-008-5069-3"},{"key":"39","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v24i1.7689","article-title":"Integrating sample-based planning and model-based reinforcement learning","author":"walsh","year":"2010","journal-title":"AAAI Conference on Artificial Intelligence (AAAI)"},{"key":"13","doi-asserted-by":"crossref","first-page":"72","DOI":"10.1007\/978-3-540-75538-8_7","article-title":"Efficient selectivity and backup operators in monte-carlo tree search","author":"coulom","year":"2007","journal-title":"Computers and Games"},{"key":"14","first-page":"761","article-title":"Bayesian Q-learning","author":"dearden","year":"1998","journal-title":"National Conference on Artificial Intelligence"},{"key":"37","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15880-3_19"},{"journal-title":"Reinforcement Learning","year":"1998","author":"sutton","key":"38"},{"key":"12","article-title":"Learning exploration\/ exploitation strategies for single trajectory reinforcement learning","author":"castronovo","year":"2012","journal-title":"Proc Eur Workshop Reinforcement Learning (EWRL)"},{"key":"21","article-title":"Efficient bayes-adaptive reinforcement learning using sample-based search","author":"guez","year":"2012","journal-title":"Neural Information Processing Systems (NIPS)"},{"journal-title":"Multiarmed Bandits Allocation Indices","year":"1989","author":"gittins","key":"20"},{"key":"40","doi-asserted-by":"crossref","DOI":"10.1609\/icaps.v22i1.13507","article-title":"Bandit-based planning and learning in continuous-action markov decision processes","author":"weinstein","year":"2012","journal-title":"International Conference on Automated Planning and Scheduling (ICAPS)"},{"key":"22","doi-asserted-by":"crossref","first-page":"151","DOI":"10.1007\/978-3-540-89722-4_12","article-title":"Optimistic planning of deterministic systems","author":"hren","year":"2008","journal-title":"Recent Advances in Reinforcement Learning"},{"key":"23","article-title":"Theory of financial decision making","author":"ingersoll","year":"1987","journal-title":"Rowman and Littlefield Publishers Inc"},{"key":"24","first-page":"1563","article-title":"Near-optimal regret bounds for reinforcement learning","volume":"11","author":"jaksch","year":"2010","journal-title":"Journal of Machine Learning Research"},{"key":"25","doi-asserted-by":"crossref","first-page":"282","DOI":"10.1007\/11871842_29","article-title":"Bandit based monte-carlo planning","author":"kocsis","year":"2006","journal-title":"Machine Learning ECML 2006"},{"key":"26","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553441"},{"key":"27","article-title":"Optimistic optimization of deterministic functions without the knowledge of its smoothness","author":"munos","year":"2011","journal-title":"Neural Information Processing Systems (NIPS)"},{"journal-title":"The Optimistic Principle Applied to Games Optimization and Planning Towards Foundations of Monte-Carlo Tree Search","year":"2012","author":"munos","key":"28"},{"key":"29","doi-asserted-by":"publisher","DOI":"10.1111\/1467-9868.00389"},{"key":"3","article-title":"Approaching bayes-optimalilty using monte-carlo tree search","author":"asmuth","year":"2011","journal-title":"International Conference on Automated Planning and Scheduling (ICAPS)"},{"key":"2","first-page":"19","article-title":"A bayesian sampling approach to exploration in reinforcement learning","author":"asmuth","year":"2009","journal-title":"Uncertainty in Artificial Intelligence"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2011.5967375"},{"key":"1","article-title":"Near-optimal brl using optimistic local transitions","author":"araya","year":"2012","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"30","article-title":"Logarithmic online regret bounds for undiscounted reinforcement learning","author":"ortner","year":"2007","journal-title":"Neural Information Processing Systems (NIPS)"},{"key":"7","first-page":"477","article-title":"Open loop optimistic planning","author":"bubeck","year":"2010","journal-title":"Conference on Learning Theory (COLT)"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.1162\/153244303765208377"},{"key":"32","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143932"},{"journal-title":"Dynamic Programming","year":"1957","author":"bellman","key":"5"},{"key":"31","first-page":"1","article-title":"Reinforcement learning for humanoid robotics","author":"peters","year":"2003","journal-title":"IEEE-RAS International Conference on Humanoid Robots"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"key":"9","first-page":"182","article-title":"Optimistic planning for markov decision processes","volume":"22","author":"busoniu","year":"2012","journal-title":"International Conference on Artificial Intelligence and Satistics (AISTATS)"},{"key":"8","first-page":"201","article-title":"Online optimization in x-armed bandits","author":"bubeck","year":"2009","journal-title":"Neural Information Processing Systems (NIPS)"}],"event":{"name":"2013 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","start":{"date-parts":[[2013,4,16]]},"location":"Singapore, Singapore","end":{"date-parts":[[2013,4,19]]}},"container-title":["2013 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6596003\/6614979\/06614992.pdf?arnumber=6614992","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,16]],"date-time":"2023-07-16T13:50:12Z","timestamp":1689515412000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6614992\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,4]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/adprl.2013.6614992","relation":{},"subject":[],"published":{"date-parts":[[2013,4]]}}}