{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T21:28:51Z","timestamp":1768339731188,"version":"3.49.0"},"publisher-location":"Berlin, Heidelberg","reference-count":11,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783540897217","type":"print"},{"value":"9783540897224","type":"electronic"}],"license":[{"start":{"date-parts":[[2008,1,1]],"date-time":"2008-01-01T00:00:00Z","timestamp":1199145600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008]]},"DOI":"10.1007\/978-3-540-89722-4_12","type":"book-chapter","created":{"date-parts":[[2008,11,26]],"date-time":"2008-11-26T13:57:14Z","timestamp":1227707834000},"page":"151-164","source":"Crossref","is-referenced-by-count":38,"title":["Optimistic Planning of Deterministic Systems"],"prefix":"10.1007","author":[{"given":"Jean-Fran\u00e7ois","family":"Hren","sequence":"first","affiliation":[]},{"given":"R\u00e9mi","family":"Munos","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"2-3","key":"12_CR1","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P. Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multiarmed bandit problem. Machine Learning Journal\u00a047(2-3), 235\u2013256 (2002)","journal-title":"Machine Learning Journal"},{"key":"12_CR2","unstructured":"Bertsekas, D.P., Tsitsiklis, J.: Neuro-Dynamic Programming. Athena Scientific (1996)"},{"key":"12_CR3","unstructured":"Coquelin, P.-A., Munos, R.: Bandit algorithms for tree search. In: Uncertainty in Artificial Intelligence (2007)"},{"key":"12_CR4","unstructured":"Gelly, S., Wang, Y., Munos, R., Teytaud, O.: Modification of UCT with patterns in Monte-Carlo go. Technical Report INRIA RR-6062 (2006)"},{"key":"12_CR5","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1023\/A:1017932429737","volume":"49","author":"M. Kearns","year":"2002","unstructured":"Kearns, M., Mansour, Y., Ng, A.Y.: A sparse sampling algorithm for near-optimal planning in large Markovian decision processes. Machine Learning\u00a049, 193\u2013208 (2002)","journal-title":"Machine Learning"},{"key":"12_CR6","doi-asserted-by":"crossref","unstructured":"Kocsis, L., Szepesvari, C.: Bandit based monte-carlo planning. In: European Conference on Machine Learning, pp. 282\u2013293 (2006)","DOI":"10.1007\/11871842_29"},{"key":"12_CR7","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/0196-8858(85)90002-8","volume":"6","author":"T.L. Lai","year":"1985","unstructured":"Lai, T.L., Robbins, H.: Asymptotically efficient adaptive allocation rules. Advances in Applied Mathematics\u00a06, 4\u201322 (1985)","journal-title":"Advances in Applied Mathematics"},{"key":"12_CR8","unstructured":"P\u00e9ret, L., Garcia, F.: On-line search for solving large Markov decision processes. In: Proceedings of the 16th European Conference on Artificial Intelligence (2004)"},{"key":"12_CR9","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"M.L. Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. John Wiley and Sons, Chichester (1994)"},{"key":"12_CR10","doi-asserted-by":"publisher","first-page":"527","DOI":"10.1090\/S0002-9904-1952-09620-8","volume":"58","author":"H. Robbins","year":"1952","unstructured":"Robbins, H.: Some aspects of the sequential design of experiments. Bulletin of the American Mathematics Society\u00a058, 527\u2013535 (1952)","journal-title":"Bulletin of the American Mathematics Society"},{"key":"12_CR11","volume-title":"Reinforcement Learning","author":"R. Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning. MIT Press, Cambridge (1998)"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-89722-4_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,3,4]],"date-time":"2019-03-04T05:19:13Z","timestamp":1551676753000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-89722-4_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008]]},"ISBN":["9783540897217","9783540897224"],"references-count":11,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-89722-4_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2008]]}}}