{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T17:48:14Z","timestamp":1725558494098},"publisher-location":"Berlin, Heidelberg","reference-count":17,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540201212"},{"type":"electronic","value":"9783540398578"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2003]]},"DOI":"10.1007\/978-3-540-39857-8_29","type":"book-chapter","created":{"date-parts":[[2010,6,28]],"date-time":"2010-06-28T04:07:15Z","timestamp":1277698035000},"page":"313-324","source":"Crossref","is-referenced-by-count":6,"title":["Using MDP Characteristics to Guide Exploration in Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Bohdana","family":"Ratitch","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Doina","family":"Precup","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"29_CR1","unstructured":"Dearden, R., Friedman, N., Russell, S.: Bayesian Q-learning. In: Proc. AAAI, pp. 761\u2013768 (1998)"},{"key":"29_CR2","unstructured":"Dearden, R., Friedman, N., Andre, D.: Model-Based Bayesian Exploration. In: Proc. of the 15th UAI Conference, pp. 150\u2013159 (1999)"},{"key":"29_CR3","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/4168.001.0001","volume-title":"Learning in embedded systems","author":"L.P. Kaelbling","year":"1993","unstructured":"Kaelbling, L.P.: Learning in embedded systems. MIT Press, Cambridge (1993)"},{"key":"29_CR4","unstructured":"Kearns, M., Singh, S.: Near-Optimal Reinforcement Learning in Polynomial Time. In: Proc. of the 15th ICML, pp. 260\u2013268 (1998)"},{"key":"29_CR5","unstructured":"Kirman, J.: Predicting Real-Time Planner Performance by Domain Characterization. Ph.D. Thesis, Brown University (1995)"},{"key":"29_CR6","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1137\/0323023","volume":"23","author":"P.R. Kumar","year":"1985","unstructured":"Kumar, P.R.: A survey of some results in stochastic adaptive control. SIAM Journal of Control and Optimization\u00a023, 329\u2013338 (1985)","journal-title":"SIAM Journal of Control and Optimization"},{"issue":"2","key":"29_CR7","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1023\/A:1007541107674","volume":"35","author":"N. Meuleau","year":"1999","unstructured":"Meuleau, N., Bourgine, P.: Exploration of Multi-State Environments: Local Measures and Back-Propagation of Uncertainty. Machine Learning\u00a035(2), 117\u2013154 (1999)","journal-title":"Machine Learning"},{"key":"29_CR8","unstructured":"Piater, J.H., Cohen, P.R., Zhang, X., Atighetchi, M.: A Randomized ANOVA Procedure for Comparing Performance Curves. In: Proc. of the 15th ICML, pp. 430-438 (1998)"},{"key":"29_CR9","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1007\/3-540-36755-1_33","volume-title":"Machine Learning: ECML 2002","author":"B. Ratitch","year":"2002","unstructured":"Ratitch, B., Precup, D.: Characterizing Markov Decision Processes. In: Elomaa, T., Mannila, H., Toivonen, H. (eds.) ECML 2002. LNCS (LNAI), vol.\u00a02430, pp. 391\u2013404. Springer, Heidelberg (2002)"},{"key":"29_CR10","unstructured":"Schmidhuber, J.H.: Adaptive Confidence and Adaptive Curiosity. Technical Report FKI-149-91, Technische Universitat Munchen (1991)"},{"key":"29_CR11","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1023\/A:1007678930559","volume":"39","author":"S. Singh","year":"2000","unstructured":"Singh, S., Jaakkola, T., Littman, M.L., Szepesvari, C.: Convergence Results for Single-Step On-Policy Reinforcement Learning Algorithms. Machine Learning\u00a039, 287\u2013308 (2000)","journal-title":"Machine Learning"},{"key":"29_CR12","doi-asserted-by":"crossref","unstructured":"Sutton, R.: Integrated architecture for learning, planning and reacting based on approximating dynamic programming. In: Proc. of the 7th ICML, pp. 216\u2013224 (1990)","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"29_CR13","volume-title":"Reinforcement Learning. An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning. An Introduction. The MIT Press, Cambridge (1998)"},{"key":"29_CR14","unstructured":"Thrun, S.B.: Efficient Exploration in Reinforcement Learning. Technical Report CMU-CS- 92-102. School of Computer Science, Carnegie Mellon University (1992)"},{"key":"29_CR15","unstructured":"Vignat, C., Bercher, J.-F.: Un estimateur r\u00e9cursif de l\u2019entropie. 17\u00e8me Colloque GRETSI, Vannes, 701\u2013704 (1999)"},{"key":"29_CR16","doi-asserted-by":"crossref","unstructured":"Wiering, M.A., Schmidhuber, J.: Efficient Model-Based Exploration. In: Proc. of the 5th International Conference on Simulation of Adaptive Behavior, pp. 223\u2013228 (1998)","DOI":"10.7551\/mitpress\/3119.003.0034"},{"key":"29_CR17","unstructured":"Wiatt, J.: Exploration and Inference in Learning from Reinforcement. Ph.D. Thesis. University of Edingburg (1997)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning: ECML 2003"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-39857-8_29","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,27]],"date-time":"2024-03-27T21:02:28Z","timestamp":1711573348000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-39857-8_29"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2003]]},"ISBN":["9783540201212","9783540398578"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-39857-8_29","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2003]]}}}