{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T20:55:22Z","timestamp":1764276922204,"version":"3.37.3"},"reference-count":24,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2018,12,19]],"date-time":"2018-12-19T00:00:00Z","timestamp":1545177600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100005073","name":"Agency for Defense Development","doi-asserted-by":"publisher","award":["UD170018CD"],"award-info":[{"award-number":["UD170018CD"]}],"id":[{"id":"10.13039\/501100005073","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010418","name":"Institute for Information and communications Technology Promotion","doi-asserted-by":"publisher","award":["2017-0-01778"],"award-info":[{"award-number":["2017-0-01778"]}],"id":[{"id":"10.13039\/501100010418","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2019,5]]},"DOI":"10.1007\/s10994-018-5767-4","type":"journal-article","created":{"date-parts":[[2018,12,19]],"date-time":"2018-12-19T13:04:29Z","timestamp":1545224669000},"page":"765-783","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Bayesian optimistic Kullback\u2013Leibler exploration"],"prefix":"10.1007","volume":"108","author":[{"given":"Kanghoon","family":"Lee","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0114-9643","authenticated-orcid":false,"given":"Geon-Hyeong","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Pedro","family":"Ortega","sequence":"additional","affiliation":[]},{"given":"Daniel D.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Kee-Eung","family":"Kim","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,12,19]]},"reference":[{"key":"5767_CR1","unstructured":"Araya-L\u00f3pez, M., Thomas, V., & Buffet, O. (2012). Near-optimal BRL using optimistic local transitions. In Proceedings of the 29th international conference on machine learning (pp. 97\u2013104)."},{"key":"5767_CR2","unstructured":"Asmuth, J., Li, L., Littman, M. L., Nouri, A., & Wingate, D. (2009). A Bayesian sampling approach to exploration in reinforcement learning. In Proceedings of the 25th conference on uncertainty in artificial intelligence (pp. 19\u201326)."},{"key":"5767_CR3","unstructured":"Asmuth, J. T. (2013). Model-based Bayesian reinforcement learning with generalized priors. Ph.D. thesis, Rutgers University-Graduate School-New Brunswick."},{"key":"5767_CR4","doi-asserted-by":"publisher","first-page":"1876","DOI":"10.1016\/j.tcs.2009.01.016","volume":"410","author":"JY Audibert","year":"2009","unstructured":"Audibert, J. Y., Munos, R., & Szepesv\u00e1ri, C. (2009). Exploration\u2013exploitation tradeoff using variance estimates in multi-armed bandits. Theoretical Computer Science, 410, 1876\u20131902.","journal-title":"Theoretical Computer Science"},{"key":"5767_CR5","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511804441","volume-title":"Convex optimization","author":"S Boyd","year":"2004","unstructured":"Boyd, S., & Vandenberghe, L. (2004). Convex optimization. Cambridge: Cambridge University Press."},{"key":"5767_CR6","first-page":"213","volume":"3","author":"RI Brafman","year":"2002","unstructured":"Brafman, R. I., & Tennenholtz, M. (2002). R-MAX\u2014A general polynomial time algorithm for near-optimal reinforcement learning. Journal of Machine Learning Research, 3, 213\u2013231.","journal-title":"Journal of Machine Learning Research"},{"key":"5767_CR7","unstructured":"Dearden, R., Friedman, N., & Russell, S. (1998). Bayesian Q-learning. In Proceedings of the fifteenth national conference on artificial intelligence (pp. 761\u2013768)."},{"key":"5767_CR8","unstructured":"Duff, M. O. (2002). Optimal learning: Computational procedures for Bayes-adaptive Markov decision processes. Ph.D. thesis, University of Massachusetts Amherst."},{"key":"5767_CR9","doi-asserted-by":"crossref","unstructured":"Filippi, S., Capp\u00e9, O., & Garivier, A. (2010). Optimism in reinforcement learning and Kullback\u2013Leibler divergence. In 48th Annual Allerton conference on communication, control, and computing (Allerton) (pp. 115\u2013122).","DOI":"10.1109\/ALLERTON.2010.5706896"},{"key":"5767_CR10","unstructured":"Garivier, A., & Capp\u00e9, O. (2011) The KL-UCB algorithm for bounded stochastic bandits and beyond. In The 24rd annual conference on learning theory (pp. 359\u2013376)."},{"key":"5767_CR11","first-page":"1563","volume":"11","author":"T Jaksch","year":"2010","unstructured":"Jaksch, T., Ortner, R., & Auer, P. (2010). Near-optimal regret bounds for reinforcement learning. Journal of Machine Learning Research, 11, 1563\u20131600.","journal-title":"Journal of Machine Learning Research"},{"key":"5767_CR12","unstructured":"Kaufmann, E., Capp\u00e9, O., & Garivier, A. (2012). On Bayesian upper confidence bounds for bandit problems. In Fifteenth international conference on artificial intelligence and statistics (pp. 592\u2013600)."},{"key":"5767_CR13","unstructured":"Kearns, M., & Singh, S. (1998) Near-optimal reinforcement learning in polynomial time. In Proceedings of the 15th international conference on machine learning (pp. 260\u2013268)."},{"key":"5767_CR14","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1023\/A:1017984413808","volume":"49","author":"M Kearns","year":"2002","unstructured":"Kearns, M., & Singh, S. (2002). Near-optimal reinforcement learning in polynomial time. Machine Learning, 49, 209\u2013232.","journal-title":"Machine Learning"},{"key":"5767_CR15","doi-asserted-by":"crossref","unstructured":"Kolter, J. Z., & Ng, A. Y. (2009). Near-Bayesian exploration in polynomial time. In Proceedings of the 26th international conference on machine learning (pp. 513\u2013520).","DOI":"10.1145\/1553374.1553441"},{"key":"5767_CR16","unstructured":"Ortner, R., & Ryabko, D. (2012). Online regret bounds for undiscounted continuous reinforcement learning. In Proceedings of the 25th international conference on neural information processing systems (pp. 1763\u20131771)."},{"key":"5767_CR17","unstructured":"Osband, I., Roy, B. V., & Russo, D. (2013). (More) efficient reinforcement learning via posterior sampling. In Proceedings of the 26th international conference on neural information processing systems (pp. 3003\u20133011)."},{"key":"5767_CR18","doi-asserted-by":"crossref","unstructured":"Poupart, P., Vlassis, N., Hoey, J., & Regan, K. (2006). An analytic solution to discrete Bayesian reinforcement learning. In Proceedings of the 23rd international conference on machine learning (pp. 697\u2013704).","DOI":"10.1145\/1143844.1143932"},{"key":"5767_CR19","volume-title":"Markov decision processes: Discrete Stochastic Dynamic Programming","author":"ML Puterman","year":"2005","unstructured":"Puterman, M. L. (2005). Markov decision processes: Discrete Stochastic Dynamic Programming. New York: Wiley-Interscience."},{"key":"5767_CR20","unstructured":"Ross, S., Chaib-draa, B., & Pineau, J. (2007). Bayes-adaptive POMDPs. In Proceedings of the 20th international conference on neural information processing systems (pp. 1225\u20131232)."},{"key":"5767_CR21","unstructured":"Sorg, J., Singh, S., & Lewis, R. L. (2010). Variance-based rewards for approximate Bayesian reinforcement learning. In Proceedings of the 26th conference on uncertainty in artificial intelligence."},{"key":"5767_CR22","doi-asserted-by":"crossref","unstructured":"Strehl, A. L., & Littman, M. L. (2005) A theoretical analysis of model-based interval estimation. In Proceedings of the 22nd international conference on machine learning (pp. 856\u2013863).","DOI":"10.1145\/1102351.1102459"},{"key":"5767_CR23","doi-asserted-by":"publisher","first-page":"1309","DOI":"10.1016\/j.jcss.2007.08.009","volume":"74","author":"AL Strehl","year":"2008","unstructured":"Strehl, A. L., & Littman, M. L. (2008). An analysis of model-based interval estimation for Markov decision processes. Journal of Computer and System Sciences, 74, 1309\u20131331.","journal-title":"Journal of Computer and System Sciences"},{"key":"5767_CR24","unstructured":"Strens, M. (2000). A Bayesian framework for reinforcement learning. In Proceedings of the 17th international conference on machine learning (pp. 943\u2013950)."}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-018-5767-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-018-5767-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-018-5767-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,12,18]],"date-time":"2019-12-18T20:03:11Z","timestamp":1576699391000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-018-5767-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12,19]]},"references-count":24,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2019,5]]}},"alternative-id":["5767"],"URL":"https:\/\/doi.org\/10.1007\/s10994-018-5767-4","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"type":"print","value":"0885-6125"},{"type":"electronic","value":"1573-0565"}],"subject":[],"published":{"date-parts":[[2018,12,19]]},"assertion":[{"value":"1 April 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 September 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 December 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}