{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,30]],"date-time":"2025-04-30T09:42:34Z","timestamp":1746006154151},"publisher-location":"Berlin, Heidelberg","reference-count":19,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642299452"},{"type":"electronic","value":"9783642299469"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-29946-9_8","type":"book-chapter","created":{"date-parts":[[2012,5,18]],"date-time":"2012-05-18T17:01:49Z","timestamp":1337360509000},"page":"42-53","source":"Crossref","is-referenced-by-count":4,"title":["Active Learning of MDP Models"],"prefix":"10.1007","author":[{"given":"Mauricio","family":"Araya-L\u00f3pez","sequence":"first","affiliation":[]},{"given":"Olivier","family":"Buffet","sequence":"additional","affiliation":[]},{"given":"Vincent","family":"Thomas","sequence":"additional","affiliation":[]},{"given":"Fran\u00e7ois","family":"Charpillet","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"8_CR1","unstructured":"Araya-L\u00f3pez, M., Buffet, O., Thomas, V., Charpillet, F.: A POMDP extension with belief-dependent rewards. In: Advances in Neural Information Processing Systems 23 (NIPS 2010) (2010)"},{"key":"8_CR2","unstructured":"Asmuth, J., Li, L., Littman, M., Nouri, A., Wingate, D.: A Bayesian sampling approach to exploration in reinforcement learning. In: Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence (UAI 2009) (2009)"},{"key":"8_CR3","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1090\/S0002-9904-1954-09848-8","volume":"60","author":"R. Bellman","year":"1954","unstructured":"Bellman, R.: The theory of dynamic programming. Bull. Amer. Math. Soc.\u00a060, 503\u2013516 (1954)","journal-title":"Bull. Amer. Math. Soc."},{"key":"8_CR4","first-page":"213","volume":"3","author":"R. Brafman","year":"2003","unstructured":"Brafman, R., Tennenholtz, M.: R-max - a general polynomial time algorithm for near-optimal reinforcement learning. Journal of Machine Learning Research\u00a03, 213\u2013231 (2003)","journal-title":"Journal of Machine Learning Research"},{"key":"8_CR5","doi-asserted-by":"crossref","unstructured":"\u015eim\u015fek, O., Barto, A.G.: An intrinsic reward mechanism for efficient exploration. In: Proceedings of the 23rd International Conference on Machine Learning, ICML 2006, pp. 833\u2013840. ACM, New York (2006)","DOI":"10.1145\/1143844.1143949"},{"key":"8_CR6","doi-asserted-by":"crossref","unstructured":"Dimitrakakis, C.: Tree exploration for Bayesian RL exploration. In: CIMCA\/IAWTIC\/ISE, pp. 1029\u20131034 (2008)","DOI":"10.1109\/CIMCA.2008.32"},{"key":"8_CR7","unstructured":"Duff, M.: Optimal learning: Computational procedures for Bayes-adaptive Markov decision processes. Ph.D. thesis, University of Massachusetts Amherst (2002)"},{"issue":"2","key":"8_CR8","doi-asserted-by":"crossref","first-page":"148","DOI":"10.1111\/j.2517-6161.1979.tb01068.x","volume":"41","author":"J.C. Gittins","year":"1979","unstructured":"Gittins, J.C.: Bandit processes and dynamic allocation indices. Journal of the Royal Statistical Society\u00a041(2), 148\u2013177 (1979)","journal-title":"Journal of the Royal Statistical Society"},{"key":"8_CR9","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1007\/978-3-540-73580-9_22","volume-title":"Abstraction, Reformulation, and Approximation","author":"A. Jonsson","year":"2007","unstructured":"Jonsson, A., Barto, A.G.: Active Learning of Dynamic Bayesian Networks in Markov Decision Processes. In: Miguel, I., Ruml, W. (eds.) SARA 2007. LNCS (LNAI), vol.\u00a04612, pp. 273\u2013284. Springer, Heidelberg (2007)"},{"key":"8_CR10","doi-asserted-by":"crossref","unstructured":"Kolter, J., Ng, A.: Near-Bayesian exploration in polynomial time. In: Proceedings of the Twenty-Sixth International Conference on Machine Learning, ICML 2009 (2009)","DOI":"10.1145\/1553374.1553441"},{"key":"8_CR11","unstructured":"Ng, A.Y., Harada, D., Russell, S.: Policy invariance under reward transformations: Theory and application to reward shaping. In: Proceedings of the Sixteenth International Conference on Machine Learning, pp. 278\u2013287. Morgan Kaufmann (1999)"},{"key":"8_CR12","doi-asserted-by":"crossref","unstructured":"Poupart, P., Vlassis, N., Hoey, J., Regan, K.: An analytic solution to discrete Bayesian reinforcement learning. In: Proceedings of the Twenty-Third International Conference on Machine Learning (ICML 2006) (2006)","DOI":"10.1145\/1143844.1143932"},{"key":"8_CR13","doi-asserted-by":"crossref","unstructured":"Puterman, M.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley-Interscience (April 1994)","DOI":"10.1002\/9780470316887"},{"issue":"2","key":"8_CR14","doi-asserted-by":"publisher","first-page":"637","DOI":"10.1016\/j.patcog.2007.06.023","volume":"41","author":"T. Rauber","year":"2008","unstructured":"Rauber, T., Braun, T., Berns, K.: Probabilistic distance measures of the Dirichlet and Beta distributions. Pattern Recognition\u00a041(2), 637\u2013645 (2008)","journal-title":"Pattern Recognition"},{"key":"8_CR15","unstructured":"Roy, N., Thrun, S.: Coastal navigation with mobile robots. In: Advances in Neural Information Processing Systems 12, pp. 1043\u20131049 (1999)"},{"key":"8_CR16","unstructured":"Sorg, J., Singh, S., Lewis, R.: Variance-based rewards for approximate Bayesian reinforcement learning. In: Proceedings of the Twenty-Sixth Conference on Uncertainty in Artificial Intelligence (2010)"},{"key":"8_CR17","unstructured":"Strens, M.J.A.: A Bayesian framework for reinforcement learning. In: Proceedings of the International Conference on Machine Learning (ICML 2000), pp. 943\u2013950 (2000)"},{"key":"8_CR18","doi-asserted-by":"crossref","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning: An Introduction. MIT Press (1998)","DOI":"10.1109\/TNN.1998.712192"},{"key":"8_CR19","unstructured":"Szepesv\u00e1ri, C.: Reinforcement learning algorithms for MDPs \u2013 a survey. Tech. Rep. TR09-13, Department of Computing Science, University of Alberta (2009)"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-29946-9_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T17:29:37Z","timestamp":1713979777000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-29946-9_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642299452","9783642299469"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-29946-9_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}