{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T03:00:43Z","timestamp":1770346843498,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":20,"publisher":"ACM","license":[{"start":{"date-parts":[[2009,6,14]],"date-time":"2009-06-14T00:00:00Z","timestamp":1244937600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000185","name":"Defense Advanced Research Projects Agency","doi-asserted-by":"publisher","award":["FA8650-05-C-7261"],"award-info":[{"award-number":["FA8650-05-C-7261"]}],"id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2009,6,14]]},"DOI":"10.1145\/1553374.1553441","type":"proceedings-article","created":{"date-parts":[[2009,6,16]],"date-time":"2009-06-16T13:34:36Z","timestamp":1245159276000},"page":"513-520","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":69,"title":["Near-Bayesian exploration in polynomial time"],"prefix":"10.1145","author":[{"given":"J. Zico","family":"Kolter","sequence":"first","affiliation":[{"name":"Stanford University, CA"}]},{"given":"Andrew Y.","family":"Ng","sequence":"additional","affiliation":[{"name":"Stanford University, CA"}]}],"member":"320","published-online":{"date-parts":[[2009,6,14]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"A Bayesian sampling approach to exploration in reinforcement learning. (Preprint)","author":"Asmuth J.","year":"2009","unstructured":"Asmuth , J. , Li , L. , Littman , M. L. , Nouri , A. , &amp; Wingate , D. ( 2009 ). A Bayesian sampling approach to exploration in reinforcement learning. (Preprint) . Asmuth, J., Li, L., Littman, M. L., Nouri, A., &amp; Wingate, D. (2009). A Bayesian sampling approach to exploration in reinforcement learning. (Preprint)."},{"key":"e_1_3_2_1_2_1","volume-title":"Logarithmic online regret bounds for undiscounted reinforcement learning. Neural Information Processing Systems (pp. 49--56)","author":"Auer P.","year":"2007","unstructured":"Auer , P. , &amp; Ortner , R. ( 2007 ). Logarithmic online regret bounds for undiscounted reinforcement learning. Neural Information Processing Systems (pp. 49--56) . Auer, P., &amp; Ortner, R. (2007). Logarithmic online regret bounds for undiscounted reinforcement learning. Neural Information Processing Systems (pp. 49--56)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1162\/153244303765208377"},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the International Conference on Uncertainty in Artificial Intelligence (pp. 53--61)","author":"Brunskill E.","year":"2008","unstructured":"Brunskill , E. , Leffler , B. R. , Li , L. , Littman , M. L. , &amp; Roy , N. ( 2008 ). CORL: A continuous-state offset-dynamics reinforcement learner . Proceedings of the International Conference on Uncertainty in Artificial Intelligence (pp. 53--61) . Brunskill, E., Leffler, B. R., Li, L., Littman, M. L., &amp; Roy, N. (2008). CORL: A continuous-state offset-dynamics reinforcement learner. Proceedings of the International Conference on Uncertainty in Artificial Intelligence (pp. 53--61)."},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the International Conference on Uncertainty in Artificial Intelligence (pp. 150--159)","author":"Dearden R.","year":"1999","unstructured":"Dearden , R. , Friedman , N. , &amp; Andre , D. ( 1999 ). Model based Bayesian exploration . Proceedings of the International Conference on Uncertainty in Artificial Intelligence (pp. 150--159) . Dearden, R., Friedman, N., &amp; Andre, D. (1999). Model based Bayesian exploration. Proceedings of the International Conference on Uncertainty in Artificial Intelligence (pp. 150--159)."},{"key":"e_1_3_2_1_6_1","first-page":"874","article-title":"Dual control theory, parts I-IV","volume":"21","author":"Fel'dbaum A. A.","year":"1961","unstructured":"Fel'dbaum , A. A. ( 1961 ). Dual control theory, parts I-IV . Automation and Remote Control , 21 874 -- 880 , 21 1033--1039, 22 1--12, 22 109--121. Fel'dbaum, A. A. (1961). Dual control theory, parts I-IV. Automation and Remote Control, 21 874--880, 21 1033--1039, 22 1--12, 22 109--121.","journal-title":"Automation and Remote Control"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","DOI":"10.1007\/b96083","volume-title":"Adaptive dual control: Theory and applications","author":"Filatov N.","year":"2004","unstructured":"Filatov , N. , &amp; Unbehauen , H. ( 2004 ). Adaptive dual control: Theory and applications . Springer . Filatov, N., &amp; Unbehauen, H. (2004). Adaptive dual control: Theory and applications. Springer."},{"key":"e_1_3_2_1_8_1","volume-title":"Multiarmed bandit allocation indices","author":"Gittins J. C.","year":"1989","unstructured":"Gittins , J. C. ( 1989 ). Multiarmed bandit allocation indices . Wiley . Gittins, J. C. (1989). Multiarmed bandit allocation indices. Wiley."},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the International Conference on Machine Learning (pp. 306--312)","author":"Kakade S.","year":"2003","unstructured":"Kakade , S. , Kearns , M. , &amp; Langford , J. ( 2003 ). Exploration in metric state spaces . Proceedings of the International Conference on Machine Learning (pp. 306--312) . Kakade, S., Kearns, M., &amp; Langford, J. (2003). Exploration in metric state spaces. Proceedings of the International Conference on Machine Learning (pp. 306--312)."},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the International Joint Conference on Artificial Intelligence (pp. 740--747)","author":"Kearns M.","year":"1999","unstructured":"Kearns , M. , &amp; Koller , D. ( 1999 ). Efficient reinforcement learning in factored MDPs . Proceedings of the International Joint Conference on Artificial Intelligence (pp. 740--747) . Kearns, M., &amp; Koller, D. (1999). Efficient reinforcement learning in factored MDPs. Proceedings of the International Joint Conference on Artificial Intelligence (pp. 740--747)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017984413808"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553441"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143932"},{"key":"e_1_3_2_1_15_1","volume-title":"Markov decision processes: Discrete stochastic dynamic programming","author":"Putterman M. L.","year":"2005","unstructured":"Putterman , M. L. ( 2005 ). Markov decision processes: Discrete stochastic dynamic programming . Wiley . Putterman, M. L. (2005). Markov decision processes: Discrete stochastic dynamic programming. Wiley."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1214\/aop\/1176995801"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143955"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2007.08.009"},{"key":"e_1_3_2_1_19_1","volume-title":"Online linear regression and its application to model-based reinforcement learning. Neural Information Processing Systems (pp. 1417--1424)","author":"Strehl A. L.","year":"2008","unstructured":"Strehl , A. L. , &amp; Littman , M. L. ( 2008 b). Online linear regression and its application to model-based reinforcement learning. Neural Information Processing Systems (pp. 1417--1424) . Strehl, A. L., &amp; Littman, M. L. (2008b). Online linear regression and its application to model-based reinforcement learning. Neural Information Processing Systems (pp. 1417--1424)."},{"key":"e_1_3_2_1_20_1","volume-title":"Proceedings of the International Conference on Machine Learning (pp. 943--950)","author":"Strens M. J.","year":"2000","unstructured":"Strens , M. J. ( 2000 ). A Bayesian framework for reinforcement learning . Proceedings of the International Conference on Machine Learning (pp. 943--950) . Strens, M. J. (2000). A Bayesian framework for reinforcement learning. Proceedings of the International Conference on Machine Learning (pp. 943--950)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/1102351.1102472"}],"event":{"name":"ICML '09: The 26th Annual International Conference on Machine Learning held in conjunction with the 2007 International Conference on Inductive Logic Programming","location":"Montreal Quebec Canada","acronym":"ICML '09","sponsor":["NSF","Microsoft Research Microsoft Research","MITACS"]},"container-title":["Proceedings of the 26th Annual International Conference on Machine Learning"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1553374.1553441","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1553374.1553441","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T13:29:35Z","timestamp":1750253375000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1553374.1553441"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,6,14]]},"references-count":20,"alternative-id":["10.1145\/1553374.1553441","10.1145\/1553374"],"URL":"https:\/\/doi.org\/10.1145\/1553374.1553441","relation":{},"subject":[],"published":{"date-parts":[[2009,6,14]]},"assertion":[{"value":"2009-06-14","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}