{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T04:58:00Z","timestamp":1760245080463},"publisher-location":"Berlin, Heidelberg","reference-count":16,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540466499"},{"type":"electronic","value":"9783540466505"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2006]]},"DOI":"10.1007\/11894841_27","type":"book-chapter","created":{"date-parts":[[2006,10,4]],"date-time":"2006-10-04T11:29:53Z","timestamp":1159961393000},"page":"334-347","source":"Crossref","is-referenced-by-count":4,"title":["Asymptotic Learnability of Reinforcement Problems with Arbitrary Dependence"],"prefix":"10.1007","author":[{"given":"Daniil","family":"Ryabko","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marcus","family":"Hutter","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"27_CR1","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4684-0489-0","volume-title":"Nonparametric Statistics for Stochastic Processes","author":"D. Bosq","year":"1996","unstructured":"Bosq, D.: Nonparametric Statistics for Stochastic Processes. Springer, Heidelberg (1996)"},{"unstructured":"Brafman, R.I., Tennenholtz, M.: A general polynomial time algorithm for near-optimal reinforcement learning. In: Proc. 17th International Joint Conference on Artificial Intelligence (IJCAI 2001), pp. 734\u2013739 (1999)","key":"27_CR2"},{"key":"27_CR3","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511546921","volume-title":"Prediction, Learning, and Games","author":"N. Cesa-Bianchi","year":"2006","unstructured":"Cesa-Bianchi, N., Lugosi, G.: Prediction, Learning, and Games. Cambridge University Press, Cambridge (in preparation, 2006)"},{"doi-asserted-by":"crossref","unstructured":"Csiszar, I., Shields, P.C.: Notes on information theory and statistics. In: Foundations and Trends in Communications and Information Theory (2004)","key":"27_CR4","DOI":"10.1561\/0100000004"},{"key":"27_CR5","volume-title":"Stochastic Processes","author":"J.L. Doob","year":"1953","unstructured":"Doob, J.L.: Stochastic Processes. John Wiley & Sons, New York (1953)"},{"unstructured":"Even-Dar, E., Kakade, S.M., Mansour, Y.: Reinforcement learning in POMDPs without resets. In: IJCAI, pp. 690\u2013695 (2005)","key":"27_CR6"},{"key":"27_CR7","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/978-3-540-30215-5_22","volume-title":"Algorithmic Learning Theory","author":"M. Hutter","year":"2004","unstructured":"Hutter, M., Poland, J.: Prediction with expert advice by following the perturbed leader for general weights. In: Ben-David, S., Case, J., Maruoka, A. (eds.) ALT 2004. LNCS (LNAI), vol.\u00a03244, pp. 279\u2013293. Springer, Heidelberg (2004)"},{"key":"27_CR8","series-title":"LNAI","doi-asserted-by":"publisher","first-page":"364","DOI":"10.1007\/3-540-45435-7_25","volume-title":"Computational Learning Theory","author":"M. Hutter","year":"2002","unstructured":"Hutter, M.: Self-optimizing and pareto-optimal policies in general environments based on bayes-mixtures. In: Kivinen, J., Sloan, R.H. (eds.) COLT 2002. LNCS (LNAI), vol.\u00a02375, pp. 364\u2013379. Springer, Heidelberg (2002)"},{"key":"27_CR9","doi-asserted-by":"publisher","first-page":"971","DOI":"10.1162\/jmlr.2003.4.6.971","volume":"4","author":"M. Hutter","year":"2003","unstructured":"Hutter, M.: Optimality of universal Bayesian prediction for general loss and alphabet. Journal of Machine Learning Research\u00a04, 971\u20131000 (2003)","journal-title":"Journal of Machine Learning Research"},{"key":"27_CR10","doi-asserted-by":"crossref","first-page":"300","DOI":"10.1007\/b138233","volume-title":"Universal Artificial Intelligence: Sequential Decisions based on Algorithmic Probability","author":"M. Hutter","year":"2005","unstructured":"Hutter, M.: Universal Artificial Intelligence: Sequential Decisions based on Algorithmic Probability, p. 300. Springer, Berlin (2005), http:\/\/www.idsia.ch\/~marcus\/ai\/uaibook.htm"},{"key":"27_CR11","volume-title":"Stochastic Systems: Estimation, Identification, and Adaptive Control","author":"P.R. Kumar","year":"1986","unstructured":"Kumar, P.R., Varaiya, P.P.: Stochastic Systems: Estimation, Identification, and Adaptive Control. Prentice Hall, Englewood Cliffs, NJ (1986)"},{"key":"27_CR12","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"356","DOI":"10.1007\/11564089_28","volume-title":"Algorithmic Learning Theory","author":"J. Poland","year":"2005","unstructured":"Poland, J., Hutter, M.: Defensive universal learning with experts. In: Jain, S., Simon, H.U., Tomita, E. (eds.) ALT 2005. LNCS (LNAI), vol.\u00a03734, pp. 356\u2013370. Springer, Heidelberg (2005)"},{"unstructured":"Poland, J., Hutter, M.: Universal learning of repeated matrix games. In: Conference Benelearn 2006 and GTDT workshop at AAMAS 2006, Ghent (2006)","key":"27_CR13"},{"key":"27_CR14","volume-title":"Advances in Neural Information Processing Systems 16","author":"D.P. de Farias","year":"2004","unstructured":"de Farias, D.P., Megiddo, N.: How to combine expert (and novice) advice when actions impact the environment? In: Thrun, S., Saul, L., Sch\u00f6lkopf, B. (eds.) Advances in Neural Information Processing Systems 16, MIT Press, Cambridge, MA (2004)"},{"key":"27_CR15","volume-title":"Artificial Intelligence. A Modern Approach","author":"S.J. Russell","year":"1995","unstructured":"Russell, S.J., Norvig, P.: Artificial Intelligence. A Modern Approach. Prentice-Hall, Englewood Cliffs (1995)"},{"key":"27_CR16","volume-title":"Reinforcement learning: An introduction","author":"R. Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement learning: An introduction. MIT Press, Cambridge, MA (1998)"}],"container-title":["Lecture Notes in Computer Science","Algorithmic Learning Theory"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/11894841_27.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,17]],"date-time":"2020-11-17T19:55:35Z","timestamp":1605642935000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/11894841_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2006]]},"ISBN":["9783540466499","9783540466505"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/11894841_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2006]]}}}