{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T11:23:46Z","timestamp":1725708226548},"publisher-location":"Berlin, Heidelberg","reference-count":17,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642351006"},{"type":"electronic","value":"9783642351013"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-35101-3_2","type":"book-chapter","created":{"date-parts":[[2012,11,29]],"date-time":"2012-11-29T11:40:16Z","timestamp":1354189216000},"page":"15-26","source":"Crossref","is-referenced-by-count":5,"title":["Optimistic Agents Are Asymptotically Optimal"],"prefix":"10.1007","author":[{"given":"Peter","family":"Sunehag","sequence":"first","affiliation":[]},{"given":"Marcus","family":"Hutter","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"doi-asserted-by":"crossref","unstructured":"Auer, P., Ortner, R.: Logarithmic online regret bounds for undiscounted reinforcement learning. In: Proceedings of NIPS 2006, pp. 49\u201356 (2006)","key":"2_CR1","DOI":"10.7551\/mitpress\/7503.003.0011"},{"issue":"3","key":"2_CR2","doi-asserted-by":"publisher","first-page":"882","DOI":"10.1214\/aoms\/1177704456","volume":"33","author":"D. Blackwell","year":"1962","unstructured":"Blackwell, D., Dubins, L.: Merging of Opinions with Increasing Information. The Annals of Mathematical Statistics\u00a033(3), 882\u2013886 (1962)","journal-title":"The Annals of Mathematical Statistics"},{"key":"2_CR3","volume-title":"Stochastic processes","author":"J. Doob","year":"1953","unstructured":"Doob, J.: Stochastic processes. Wiley, New York (1953)"},{"unstructured":"Even-Dar, E., Kakade, S., Mansour, Y.: Reinforcement learning in pomdps without resets. In: Proceedings of IJCAI 2005, pp. 690\u2013695 (2005)","key":"2_CR4"},{"key":"2_CR5","doi-asserted-by":"crossref","DOI":"10.1007\/b138233","volume-title":"Universal Articial Intelligence: Sequential Decisions based on Algorithmic Probability","author":"M. Hutter","year":"2005","unstructured":"Hutter, M.: Universal Articial Intelligence: Sequential Decisions based on Algorithmic Probability. Springer, Berlin (2005)"},{"unstructured":"Hutter, M.: Discrete MDL predicts in total variation. In: Advances in Neural Information Processing Systems, NIPS 2009, vol.\u00a022, pp. 817\u2013825 (2009)","key":"2_CR6"},{"unstructured":"Kearns, M.J., Singh, S.: Near-optimal reinforcement learning in polynomial time. In: Proceedings of the 15 nd International Conference on Machine Learning (ICML 1998), pp. 260\u2013268 (1998)","key":"2_CR7"},{"key":"2_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"368","DOI":"10.1007\/978-3-642-24412-4_29","volume-title":"Algorithmic Learning Theory","author":"T. Lattimore","year":"2011","unstructured":"Lattimore, T., Hutter, M.: Asymptotically Optimal Agents. In: Kivinen, J., Szepesv\u00e1ri, C., Ukkonen, E., Zeugmann, T. (eds.) ALT 2011. LNCS, vol.\u00a06925, pp. 368\u2013382. Springer, Heidelberg (2011)"},{"key":"2_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"383","DOI":"10.1007\/978-3-642-24412-4_30","volume-title":"Algorithmic Learning Theory","author":"T. Lattimore","year":"2011","unstructured":"Lattimore, T., Hutter, M.: Time Consistent Discounting. In: Kivinen, J., Szepesv\u00e1ri, C., Ukkonen, E., Zeugmann, T. (eds.) ALT 2011. LNCS, vol.\u00a06925, pp. 383\u2013397. Springer, Heidelberg (2011)"},{"key":"2_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1007\/978-3-642-34106-9_26","volume-title":"Algorithmic Learning Theory","author":"T. Lattimore","year":"2012","unstructured":"Lattimore, T., Hutter, M.: PAC Bounds for Discounted MDPs. In: Bshouty, N.H., Stoltz, G., Vayatis, N., Zeugmann, T. (eds.) ALT 2012. LNCS, vol.\u00a07568, pp. 320\u2013334. Springer, Heidelberg (2012)"},{"unstructured":"Maillard, O.-A., Munos, R., Ryabko, D.: Selecting the state-representation in reinforcement learning. In: Advances in Neural Information Processing Systems (NIPS 2011), vol.\u00a024, pp. 2627\u20132635 (2011)","key":"2_CR11"},{"key":"2_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1007\/978-3-642-16108-7_28","volume-title":"Algorithmic Learning Theory","author":"L. Orseau","year":"2010","unstructured":"Orseau, L.: Optimality Issues of Universal Greedy Agents with Static Priors. In: Hutter, M., Stephan, F., Vovk, V., Zeugmann, T. (eds.) Algorithmic Learning Theory. LNCS, vol.\u00a06331, pp. 345\u2013359. Springer, Heidelberg (2010)"},{"issue":"3","key":"2_CR13","doi-asserted-by":"publisher","first-page":"274","DOI":"10.1016\/j.tcs.2008.06.039","volume":"405","author":"D. Ryabko","year":"2008","unstructured":"Ryabko, D., Hutter, M.: On the possibility of learning in reactive environments with arbitrary dependence. Theor. C.S.\u00a0405(3), 274\u2013284 (2008)","journal-title":"Theor. C.S."},{"key":"2_CR14","volume-title":"Artificial Intelligence: A Modern Approach","author":"S.J. Russell","year":"2010","unstructured":"Russell, S.J., Norvig, P.: Artificial Intelligence: A Modern Approach, 3rd edn. Prentice Hall, Englewood Cliffs (2010)","edition":"3"},{"unstructured":"Rudin, W.: Principles of mathematical analysis. McGraw-Hill (1976)","key":"2_CR15"},{"doi-asserted-by":"crossref","unstructured":"Strehl, A., Littman, M.: A theoretical analysis of model-based interval estimation. In: Proceedings of ICML 2005, pp. 856\u2013863 (2005)","key":"2_CR16","DOI":"10.1145\/1102351.1102459"},{"doi-asserted-by":"crossref","unstructured":"Strehl, A., Littman, M.: A theoretical analysis of model-based interval estimation. In: Proceedings of ICML 2005, pp. 856\u2013863 (2005)","key":"2_CR17","DOI":"10.1145\/1102351.1102459"}],"container-title":["Lecture Notes in Computer Science","AI 2012: Advances in Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-35101-3_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,27]],"date-time":"2023-06-27T02:22:26Z","timestamp":1687832546000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-35101-3_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642351006","9783642351013"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-35101-3_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}