{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T08:35:45Z","timestamp":1774946145862,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":19,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642158797","type":"print"},{"value":"9783642158803","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010]]},"DOI":"10.1007\/978-3-642-15880-3_44","type":"book-chapter","created":{"date-parts":[[2010,8,17]],"date-time":"2010-08-17T14:08:53Z","timestamp":1282054133000},"page":"601-616","source":"Crossref","is-referenced-by-count":11,"title":["Gaussian Processes for Sample Efficient Reinforcement Learning with RMAX-Like Exploration"],"prefix":"10.1007","author":[{"given":"Tobias","family":"Jung","sequence":"first","affiliation":[]},{"given":"Peter","family":"Stone","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"44_CR1","doi-asserted-by":"crossref","unstructured":"Bernstein, A., Shimkin, N.: Adaptive-resolution reinforcement learning with efficient exploration. Machine Learning (2010), doi:10.1007\/s10994-010-5186-7 (published online: May 5, 2010)","DOI":"10.1007\/s10994-010-5186-7"},{"key":"44_CR2","doi-asserted-by":"crossref","unstructured":"Boone, G.: Minimum-time control of the acrobot. In: Proc. of IEEE International Conference on Robotics and Automation, vol.\u00a04, pp. 3281\u20133287 (1997)","DOI":"10.1109\/ROBOT.1997.606789"},{"key":"44_CR3","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1162\/153244303765208377","volume":"3","author":"R. Brafman","year":"2002","unstructured":"Brafman, R., Tennenholtz, M.: R-MAX, a general polynomial time algorithm for near-optimal reinforcement learning. JMLR\u00a03, 213\u2013231 (2002)","journal-title":"JMLR"},{"key":"44_CR4","doi-asserted-by":"crossref","unstructured":"Busoniu, L., Ernst, D., De Schutter, B., Babuska, R.: Online least-squares policy iteration for reinforcement learning control. In: American Control Conference, ACC 2010 (2010)","DOI":"10.1109\/ACC.2010.5530856"},{"key":"44_CR5","volume-title":"NIPS 9","author":"S. Davies","year":"1996","unstructured":"Davies, S.: Multidimensional triangulation and interpolation for reinforcement learning. In: NIPS 9. Morgan, San Francisco (1996)"},{"issue":"7-9","key":"44_CR6","doi-asserted-by":"publisher","first-page":"1508","DOI":"10.1016\/j.neucom.2008.12.019","volume":"72","author":"M.P. Deisenroth","year":"2009","unstructured":"Deisenroth, M.P., Rasmussen, C.E., Peters, J.: Gaussian process dynamic programming. Neurocomputing\u00a072(7-9), 1508\u20131524 (2009)","journal-title":"Neurocomputing"},{"key":"44_CR7","unstructured":"Engel, Y., Mannor, S., Meir, R.: Bayes meets Bellman: The Gaussian process approach to temporal difference learning. In: Proc. of ICML 20, pp. 154\u2013161 (2003)"},{"key":"44_CR8","first-page":"503","volume":"6","author":"D. Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., Wehenkel, L.: Tree-based batch mode reinforcement learning. JMLR\u00a06, 503\u2013556 (2005)","journal-title":"JMLR"},{"key":"44_CR9","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1007\/s002110050241","volume":"75","author":"L. Gr\u00fcne","year":"1997","unstructured":"Gr\u00fcne, L.: An adaptive grid scheme for the discrete Hamilton-Jacobi-Bellman equation. Numerische Mathematik\u00a075, 319\u2013337 (1997)","journal-title":"Numerische Mathematik"},{"key":"44_CR10","doi-asserted-by":"crossref","unstructured":"Jong, N.K., Stone, P.: Model-based exploration in continuous state spaces. In: The 7th Symposium on Abstraction, Reformulation and Approximation (2007)","DOI":"10.1007\/978-3-540-73580-9_21"},{"key":"44_CR11","first-page":"33","volume":"1","author":"T. Jung","year":"2007","unstructured":"Jung, T., Polani, D.: Learning robocup-keepaway with kernels. JMLR: Workshop and Conference Proceedings (Gaussian Processes in Practice)\u00a01, 33\u201357 (2007)","journal-title":"JMLR: Workshop and Conference Proceedings (Gaussian Processes in Practice)"},{"key":"44_CR12","doi-asserted-by":"publisher","first-page":"1107","DOI":"10.1162\/jmlr.2003.4.6.1107","volume":"4","author":"M.G. Lagoudakis","year":"2003","unstructured":"Lagoudakis, M.G., Parr, R.: Least-squares policy iteration. JMLR\u00a04, 1107\u20131149 (2003)","journal-title":"JMLR"},{"key":"44_CR13","unstructured":"Li, L., Littman, M.L., Mansley, C.R.: Online exploration in least-squares policy iteration. In: Proc. of 8th AAMAS (2009)"},{"key":"44_CR14","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1023\/A:1017992615625","volume":"49","author":"R. Munos","year":"2002","unstructured":"Munos, R., Moore, A.: Variable resolution discretization in optimal control. Machine Learning\u00a049, 291\u2013323 (2002)","journal-title":"Machine Learning"},{"key":"44_CR15","unstructured":"Nouri, A., Littman, M.L.: Multi-resolution exploration in continuous spaces. In: NIPS 21 (2008)"},{"key":"44_CR16","doi-asserted-by":"crossref","first-page":"203","DOI":"10.7551\/mitpress\/7496.003.0011","volume-title":"Large Scale Learning Machines","author":"J. Qui\u00f1onero-Candela","year":"2007","unstructured":"Qui\u00f1onero-Candela, J., Rasmussen, C.E., Williams, C.K.I.: Approximation methods for gaussian process regression. In: Bottou, L., Chapelle, O., DeCoste, D., Weston, J. (eds.) Large Scale Learning Machines, pp. 203\u2013223. MIT Press, Cambridge (2007)"},{"key":"44_CR17","volume-title":"Gaussian Processes for Machine Learning","author":"C.E. Rasmussen","year":"2006","unstructured":"Rasmussen, C.E., Williams, C.K.I.: Gaussian Processes for Machine Learning. MIT Press, Cambridge (2006)"},{"key":"44_CR18","unstructured":"Riedmiller, M.: Neural fitted q-iteration. In: Proc. of 16th ECML (2005)"},{"key":"44_CR19","volume-title":"Reinforcement Learning: An Introduction","author":"R. Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-15880-3_44.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,29]],"date-time":"2024-03-29T17:28:25Z","timestamp":1711733305000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-15880-3_44"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010]]},"ISBN":["9783642158797","9783642158803"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-15880-3_44","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010]]}}}