{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T18:06:04Z","timestamp":1773252364563,"version":"3.50.1"},"reference-count":16,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010,9]]},"DOI":"10.1109\/allerton.2010.5706896","type":"proceedings-article","created":{"date-parts":[[2011,2,3]],"date-time":"2011-02-03T21:50:52Z","timestamp":1296769852000},"page":"115-122","source":"Crossref","is-referenced-by-count":26,"title":["Optimism in reinforcement learning and Kullback-Leibler divergence"],"prefix":"10.1109","author":[{"given":"Sarah","family":"Filippi","sequence":"first","affiliation":[]},{"given":"Olivier","family":"Cappe","sequence":"additional","affiliation":[]},{"given":"Aurelien","family":"Garivier","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"1563","article-title":"Near-optimal Regret Bounds for Reinforcement Learning","volume":"11","author":"jaksch","year":"2010","journal-title":"Journal of Machine Learning Research"},{"key":"ref11","article-title":"REGAL: A Regularization based Algorithm for Reinforcement Learning in Weakly Communicating MDPs","author":"bartlett","year":"2009","journal-title":"Annual Conference on Uncertainty in Artificial Intelligence"},{"key":"ref12","article-title":"Near-optimal regret bounds for reinforcement learning","volume":"21","author":"auer","year":"2009","journal-title":"Advances in neural information processing systems"},{"key":"ref13","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","author":"puterman","year":"1994","journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming"},{"key":"ref14","article-title":"Context tree selection: A unifying view","author":"garivier","year":"2010"},{"key":"ref15","article-title":"On upper-confidence bound policies for non-stationary bandit problems","author":"garivier","year":"2008","journal-title":"Arxiv preprint arXiv 0805 3415"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1287\/opre.1050.0216"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2007.08.009"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017984413808"},{"key":"ref6","first-page":"213","article-title":"R-max-a general polynomial time algorithm for near-optimal reinforcement learning","volume":"3","author":"brafman","year":"2003","journal-title":"Journal of Machine Learning Research"},{"key":"ref5","first-page":"1505","article-title":"Optimistic linear programming gives logarithmic regret for irreducible MDPs","volume":"20","author":"tewari","year":"2008","journal-title":"Advances in neural information processing systems"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1287\/moor.22.1.222"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/0196-8858(85)90002-8"},{"key":"ref2","first-page":"1079","article-title":"Action elimination and stopping conditions for the multi-armed bandit and reinforcement learning problems","volume":"7","author":"even-dar","year":"2006","journal-title":"Journal of Machine Learning Research"},{"key":"ref1","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref9","first-page":"49","article-title":"Logarithmic online regret bounds for undiscounted reinforcement learning","author":"auer","year":"2007","journal-title":"Advances in neural information processing systems"}],"event":{"name":"2010 48th Annual Allerton Conference on Communication, Control, and Computing (Allerton)","location":"Monticello, IL, USA","start":{"date-parts":[[2010,9,29]]},"end":{"date-parts":[[2010,10,1]]}},"container-title":["2010 48th Annual Allerton Conference on Communication, Control, and Computing (Allerton)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/5701578\/5706874\/05706896.pdf?arnumber=5706896","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,11,18]],"date-time":"2021-11-18T13:53:10Z","timestamp":1637243590000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5706896\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,9]]},"references-count":16,"URL":"https:\/\/doi.org\/10.1109\/allerton.2010.5706896","relation":{},"subject":[],"published":{"date-parts":[[2010,9]]}}}