{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T08:36:15Z","timestamp":1774946175821,"version":"3.50.1"},"reference-count":24,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011,4]]},"DOI":"10.1109\/adprl.2011.5967375","type":"proceedings-article","created":{"date-parts":[[2011,8,4]],"date-time":"2011-08-04T01:40:00Z","timestamp":1312422000000},"page":"48-55","source":"Crossref","is-referenced-by-count":7,"title":["Optimistic planning for sparsely stochastic systems"],"prefix":"10.1109","author":[{"given":"Lucian","family":"Busoniu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Remi","family":"Munos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bart","family":"De Schutter","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Robert","family":"Babuska","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","article-title":"Planning for Markov decision processes with sparse stochasticity","volume":"17","author":"likhachev","year":"2004","journal-title":"Advances in neural information processing systems"},{"key":"ref11","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-84628-690-2","author":"chang","year":"2007","journal-title":"Simulation-Based Algorithms for Markov Decision Processes"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-89722-4_12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2009.4927538"},{"key":"ref14","first-page":"477","article-title":"Open loop optimistic planning","author":"bubeck","year":"2010","journal-title":"Proceedings 23rd Annual Conference on Learning Theory (COLT-10)"},{"key":"ref15","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref16","volume":"2","author":"bertsekas","year":"2007","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref17","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-031-01551-9","author":"szepesv\u00e1ri","year":"2010","journal-title":"Algorithms for Reinforcement Learning"},{"key":"ref18","author":"sigaud","year":"2010","journal-title":"Markov Decision Processes in Artificial Intelligence"},{"key":"ref19","article-title":"Reinforcement Learning and Dynamic Programming Using Function Approximators","author":"bu\u00e7oniu","year":"2010","journal-title":"Automation and Control Engineering Series"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-89722-4_1"},{"key":"ref3","first-page":"153","article-title":"Online resolution techniques","author":"p\u00e9ret","year":"2010","journal-title":"Markov Decision Processes in Artificial Intelligence"},{"key":"ref6","author":"camacho","year":"2004","journal-title":"Model Predictive Control"},{"key":"ref5","author":"maciejowski","year":"2002","journal-title":"Predictive Control with Constraints"},{"key":"ref8","first-page":"67","article-title":"Bandit algorithms for tree search","author":"coquelin","year":"2007","journal-title":"Proc 23rd Conference on Uncertainty in Artificial Intelligence"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017984413808"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511546877"},{"key":"ref9","first-page":"201","article-title":"Online optimization in X-armed bandits","volume":"21","author":"bubeck","year":"2009","journal-title":"Advances in neural information processing systems"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2010.02.006"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1056\/NEJM199905273402114"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"223","DOI":"10.3934\/mbe.2004.1.223","article-title":"Dynamic multidrug therapies for HIV: Optimal and STI control approaches","volume":"1","author":"adams","year":"2004","journal-title":"Mathematical Biosciences and Engineering"},{"key":"ref24","article-title":"Cross-entropy optimization of control policies with adaptive basis functions","volume":"41","author":"bu\u00e7oniu","year":"2011","journal-title":"IEEE Transactions on Systems Man and Cybernetics-Part B Cybernetics"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2006.377527"}],"event":{"name":"2011 Ieee Symposium On Adaptive Dynamic Programming And Reinforcement Learning","location":"Paris, France","start":{"date-parts":[[2011,4,11]]},"end":{"date-parts":[[2011,4,15]]}},"container-title":["2011 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/5958170\/5967347\/05967375.pdf?arnumber=5967375","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,9]],"date-time":"2024-04-09T14:54:35Z","timestamp":1712674475000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5967375\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,4]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/adprl.2011.5967375","relation":{},"subject":[],"published":{"date-parts":[[2011,4]]}}}