{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T12:14:25Z","timestamp":1725538465682},"publisher-location":"Berlin, Heidelberg","reference-count":16,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642049200"},{"type":"electronic","value":"9783642049217"}],"license":[{"start":{"date-parts":[[2009,1,1]],"date-time":"2009-01-01T00:00:00Z","timestamp":1230768000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009]]},"DOI":"10.1007\/978-3-642-04921-7_37","type":"book-chapter","created":{"date-parts":[[2009,9,22]],"date-time":"2009-09-22T07:46:25Z","timestamp":1253605585000},"page":"360-369","source":"Crossref","is-referenced-by-count":5,"title":["Improving Optimistic Exploration in Model-Free Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Marek","family":"Grze\u015b","sequence":"first","affiliation":[]},{"given":"Daniel","family":"Kudenko","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"37_CR1","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Athena Scientific (1996)"},{"key":"37_CR2","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"37_CR3","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L.P. Kaelbling","year":"1996","unstructured":"Kaelbling, L.P., Littman, M.L., Moore, A.P.: Reinforcement learning: A survey. Journal of Artificial Intelligence Research\u00a04, 237\u2013285 (1996)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"37_CR4","unstructured":"Thrun, S.: Efficient exploration in reinforcement learning. Technical Report CMU-CS-92-102, Carnegie Mellon University, Computer Science Department (1992)"},{"issue":"2","key":"37_CR5","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1023\/A:1007541107674","volume":"35","author":"N. Meuleau","year":"1999","unstructured":"Meuleau, N., Bourgine, P.: Exploration of multi-state environments: Local measures and back-propagation of uncertainty. Machine Learning\u00a035(2), 117\u2013154 (1999)","journal-title":"Machine Learning"},{"key":"37_CR6","unstructured":"Brafman, R.I., Tennenholtz, M.: R-max - a general polynomial time algorithm for near-optimal reinforcement learning. Journal of Machine Learning Research (2002)"},{"key":"37_CR7","doi-asserted-by":"crossref","unstructured":"Szita, I., L\u0151rincz, A.: The many faces of optimism: a unifying approach. In: Proceedings of the International Conference on Machine Learning (2008)","DOI":"10.1145\/1390156.1390288"},{"key":"37_CR8","first-page":"12","volume":"28","author":"M.L. Anderson","year":"2007","unstructured":"Anderson, M.L., Oates, T.: A review of recent research on metareasoning and metalearning. AI Magazine\u00a028, 12\u201316 (2007)","journal-title":"AI Magazine"},{"key":"37_CR9","volume-title":"Empirical methods for artificial intelligence","author":"P.R. Cohen","year":"1995","unstructured":"Cohen, P.R.: Empirical methods for artificial intelligence. MIT Press, Cambridge (1995)"},{"key":"37_CR10","first-page":"9","volume":"3","author":"R. Sutton","year":"1988","unstructured":"Sutton, R.: Learning to predict by the methods of temporal differences. Machine Learning\u00a03, 9\u201344 (1988)","journal-title":"Machine Learning"},{"key":"37_CR11","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1007495401240","volume":"32","author":"S. Singh","year":"1998","unstructured":"Singh, S., Dayan, P.: Analytical mean squared error curves for temporal difference learning. Machine Learning\u00a032, 5\u201340 (1998)","journal-title":"Machine Learning"},{"key":"37_CR12","doi-asserted-by":"crossref","unstructured":"Sutton, R.S.: Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In: Proceedings of the 7th International Conference on Machine Learning, pp. 216\u2013224 (1990)","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"37_CR13","doi-asserted-by":"crossref","unstructured":"Epshteyn, A., DeJong, G.: Qualitative reinforcement learning. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 305\u2013312 (2006)","DOI":"10.1145\/1143844.1143883"},{"key":"37_CR14","doi-asserted-by":"crossref","unstructured":"Wiering, M., Schmidhuber, J.: Efficient model-based exploration. In: Proceedings of the 5th international conference on simulation of adaptive behavior: From animals to animats, pp. 223\u2013228 (1998)","DOI":"10.7551\/mitpress\/3119.003.0034"},{"key":"37_CR15","volume-title":"Artificial Intelligence: A Modern Approach","author":"S.J. Russell","year":"2002","unstructured":"Russell, S.J., Norvig, P.: Artificial Intelligence: A Modern Approach, 2nd edn. Prentice Hall, Englewood Cliffs (2002)","edition":"2"},{"key":"37_CR16","doi-asserted-by":"publisher","first-page":"530","DOI":"10.1109\/72.134290","volume":"2","author":"C.S. Lin","year":"1991","unstructured":"Lin, C.S., Kim, H.: Cmac-based adaptive critic self-learning control. IEEE Transactions on Neural Networks\u00a02, 530\u2013533 (1991)","journal-title":"IEEE Transactions on Neural Networks"}],"container-title":["Lecture Notes in Computer Science","Adaptive and Natural Computing Algorithms"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-04921-7_37","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,16]],"date-time":"2024-03-16T13:57:45Z","timestamp":1710597465000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-04921-7_37"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009]]},"ISBN":["9783642049200","9783642049217"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-04921-7_37","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2009]]}}}