{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T09:06:34Z","timestamp":1743066394812,"version":"3.40.3"},"publisher-location":"Cham","reference-count":14,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319213644"},{"type":"electronic","value":"9783319213651"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-21365-1_19","type":"book-chapter","created":{"date-parts":[[2015,7,14]],"date-time":"2015-07-14T04:07:08Z","timestamp":1436846828000},"page":"177-186","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Using Localization and Factorization to Reduce the Complexity of Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Peter","family":"Sunehag","sequence":"first","affiliation":[]},{"given":"Marcus","family":"Hutter","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,7,15]]},"reference":[{"key":"19_CR1","doi-asserted-by":"crossref","unstructured":"Diuk, C., Li, L., Leffer, B.R.: The adaptive k-meteorologists problem and its application to structure learning and feature selection in reinforcement learning. In: Danyluk, A.P., Bottou, L., Littman, M.L. (eds.) ICML. ACM International Conference Proceeding Series, vol. 382 (2009)","DOI":"10.1145\/1553374.1553406"},{"key":"19_CR2","doi-asserted-by":"crossref","DOI":"10.1007\/b138233","volume-title":"Universal Articial Intelligence: Sequential Decisions based on Algorithmic Probability","author":"M Hutter","year":"2005","unstructured":"Hutter, M.: Universal Articial Intelligence: Sequential Decisions based on Algorithmic Probability. Springer, Berlin (2005)"},{"key":"19_CR3","unstructured":"Lattimore, T.: Theory of General Reinforcement Learning. Ph.D. thesis, Australian National University (2014)"},{"key":"19_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1007\/978-3-642-34106-9_26","volume-title":"Algorithmic Learning Theory","author":"T Lattimore","year":"2012","unstructured":"Lattimore, T., Hutter, M.: PAC bounds for discounted MDPs. In: Bshouty, N.H., Stoltz, G., Vayatis, N., Zeugmann, T. (eds.) ALT 2012. LNCS, vol. 7568, pp. 320\u2013334. Springer, Heidelberg (2012)"},{"issue":"3","key":"19_CR5","first-page":"28","volume":"28","author":"T Lattimore","year":"2013","unstructured":"Lattimore, T., Hutter, M., Sunehag, P.: The sample-complexity of general reinforcement learning. Journal of Machine Learning Research, W&CP: ICML 28(3), 28\u201336 (2013)","journal-title":"Journal of Machine Learning Research, W&CP: ICML"},{"key":"19_CR6","unstructured":"Russell, S.J., Norvig, P.: Artificial Intelligence: A Modern Approach, 3rd edn. Prentice Hall, Englewood Clifs (2010)"},{"key":"19_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"338","DOI":"10.1007\/978-3-642-24412-4_27","volume-title":"Algorithmic Learning Theory","author":"P Sunehag","year":"2011","unstructured":"Sunehag, P., Hutter, M.: Axioms for rational reinforcement learning. In: Kivinen, J., Szepesv\u00e1ri, C., Ukkonen, E., Zeugmann, T. (eds.) ALT 2011. LNCS, vol. 6925, pp. 338\u2013352. Springer, Heidelberg (2011)"},{"key":"19_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/978-3-642-35101-3_2","volume-title":"AI 2012: Advances in Artificial Intelligence","author":"P Sunehag","year":"2012","unstructured":"Sunehag, P., Hutter, M.: Optimistic agents are asymptotically optimal. In: Thielscher, M., Zhang, D. (eds.) AI 2012. LNCS, vol. 7691, pp. 15\u201326. Springer, Heidelberg (2012)"},{"key":"19_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1007\/978-3-642-35506-6_32","volume-title":"Artificial General Intelligence","author":"P Sunehag","year":"2012","unstructured":"Sunehag, P., Hutter, M.: Optimistic AIXI. In: Bach, J., Goertzel, B., Ikl\u00e9, M. (eds.) AGI 2012. LNCS, vol. 7716, pp. 312\u2013321. Springer, Heidelberg (2012)"},{"key":"19_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1007\/978-3-642-39521-5_16","volume-title":"Artificial General Intelligence","author":"P Sunehag","year":"2013","unstructured":"Sunehag, P., Hutter, M.: Learning agents with evolving hypothesis classes. In: K\u00fchnberger, K.-U., Rudolph, S., Wang, P. (eds.) AGI 2013. LNCS, vol. 7999, pp. 150\u2013159. Springer, Heidelberg (2013)"},{"key":"19_CR11","unstructured":"Sunehag, P., Hutter, M.: A dual process theory of optimistic cognition. In: Annual Conference of the Cognitive Science Society, CogSci 2014 (2014)"},{"key":"19_CR12","unstructured":"Sunehag, P., Hutter, M.: Rationality, Optimism and Guarantees in General Reinforcement Learning. Journal of Machine Learning Reserch (to appear, 2015)"},{"issue":"1","key":"19_CR13","doi-asserted-by":"crossref","first-page":"95","DOI":"10.1613\/jair.3125","volume":"40","author":"J Veness","year":"2011","unstructured":"Veness, J., Ng, K.S., Hutter, M., Uther, W., Silver, D.: A Monte-Carlo AIXI approximation. Journal of Artifiicial Intelligence Research 40(1), 95\u2013142 (2011)","journal-title":"Journal of Artifiicial Intelligence Research"},{"key":"19_CR14","doi-asserted-by":"publisher","first-page":"653","DOI":"10.1109\/18.382012","volume":"41","author":"F Willems","year":"1995","unstructured":"Willems, F., Shtarkov, Y., Tjalkens, T.: The context tree weighting method: Basic properties. IEEE Transactions on Information Theory 41, 653\u2013664 (1995)","journal-title":"IEEE Transactions on Information Theory"}],"container-title":["Lecture Notes in Computer Science","Artificial General Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-21365-1_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,28]],"date-time":"2023-01-28T12:12:27Z","timestamp":1674907947000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-21365-1_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319213644","9783319213651"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-21365-1_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]},"assertion":[{"value":"15 July 2015","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}