{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T03:03:01Z","timestamp":1772679781925,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":8,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783540729259","type":"print"},{"value":"9783540729273","type":"electronic"}],"license":[{"start":{"date-parts":[[2007,1,1]],"date-time":"2007-01-01T00:00:00Z","timestamp":1167609600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2007]]},"DOI":"10.1007\/978-3-540-72927-3_20","type":"book-chapter","created":{"date-parts":[[2007,6,11]],"date-time":"2007-06-11T22:30:27Z","timestamp":1181601027000},"page":"263-277","source":"Crossref","is-referenced-by-count":11,"title":["Bounded Parameter Markov Decision Processes with Average Reward Criterion"],"prefix":"10.1007","author":[{"given":"Ambuj","family":"Tewari","sequence":"first","affiliation":[]},{"given":"Peter L.","family":"Bartlett","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"20_CR1","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1016\/S0004-3702(00)00047-3","volume":"122","author":"R. Givan","year":"2000","unstructured":"Givan, R., Leach, S., Dean, T.: Bounded-parameter Markov decision processes. Artificial Intelligence\u00a0122, 71\u2013109 (2000)","journal-title":"Artificial Intelligence"},{"key":"20_CR2","first-page":"857","volume-title":"Proceedings of the Twenty-Second International Conference on Machine Learning","author":"A.L. Strehl","year":"2005","unstructured":"Strehl, A.L., Littman, M.: A theoretical analysis of model-based interval estimation. In: Proceedings of the Twenty-Second International Conference on Machine Learning, pp. 857\u2013864. ACM Press, New York (2005)"},{"key":"20_CR3","volume-title":"dvances in Neural Information Processing Systems 19","author":"P. Auer","year":"2007","unstructured":"Auer, P., Ortner, R.: Logarithmic online regret bounds for undiscounted reinforcement learning. In: dvances in Neural Information Processing Systems 19, MIT Press, Cambridge (2007) (to appear)"},{"key":"20_CR4","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1162\/153244303765208377","volume":"3","author":"R.I. Brafman","year":"2002","unstructured":"Brafman, R.I., Tennenholtz, M.: R-MAX \u2013 a general polynomial time algorithm for near-optimal reinforcement learning. Journal of Machine Learning Research\u00a03, 213\u2013231 (2002)","journal-title":"Journal of Machine Learning Research"},{"key":"20_CR5","first-page":"1499","volume-title":"Advances in Neural Information Processing Systems 14","author":"E. Even-Dar","year":"2001","unstructured":"Even-Dar, E., Mansour, Y.: Convergence of optimistic and incremental Q-learning. In: Advances in Neural Information Processing Systems 14, pp. 1499\u20131506. MIT Press, Cambridge (2001)"},{"key":"20_CR6","doi-asserted-by":"publisher","first-page":"780","DOI":"10.1287\/opre.1050.0216","volume":"53","author":"A. Nilim","year":"2005","unstructured":"Nilim, A., El Ghaoui, L.: Robust control of Markov decision processes with uncertain transition matrices. Operations Research\u00a053, 780\u2013798 (2005)","journal-title":"Operations Research"},{"key":"20_CR7","unstructured":"Bertsekas, D.P.: Dynamic Programming and Optimal Control. Vol. 2. Athena Scientific, Belmont, MA (1995)"},{"key":"20_CR8","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1287\/moor.22.1.222","volume":"22","author":"A.N. Burnetas","year":"1997","unstructured":"Burnetas, A.N., Katehakis, M.N.: Optimal adaptive policies for Markov decision processes. Mathematics of Operations Research\u00a022, 222\u2013255 (1997)","journal-title":"Mathematics of Operations Research"}],"container-title":["Lecture Notes in Computer Science","Learning Theory"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-72927-3_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,19]],"date-time":"2019-05-19T09:56:32Z","timestamp":1558259792000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-72927-3_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2007]]},"ISBN":["9783540729259","9783540729273"],"references-count":8,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-72927-3_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2007]]}}}