{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T19:22:05Z","timestamp":1774120925142,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":77,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642276446","type":"print"},{"value":"9783642276453","type":"electronic"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-27645-3_19","type":"book-chapter","created":{"date-parts":[[2012,3,5]],"date-time":"2012-03-05T22:18:12Z","timestamp":1330985892000},"page":"613-630","source":"Crossref","is-referenced-by-count":17,"title":["Conclusions, Future Directions and Outlook"],"prefix":"10.1007","author":[{"given":"Marco","family":"Wiering","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Martijn","family":"van Otterlo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"19_CR1","unstructured":"Alpaydin, E.: Introduction to Machine learning. The MIT Press (2010)"},{"key":"19_CR2","unstructured":"Azar, M.G., Munos, R., Ghavamzadaeh, M., Kappen, H.J.: Speedy Q-learning. Advances in Neural Information Processing Systems (2011)"},{"key":"19_CR3","unstructured":"Bakker, B., Schmidhuber, J.: Hierarchical reinforcement learning based on subgoal discovery and subpolicy specialization. In: Proceedings of the 8th Conference on Intelligent Autonomous Systems, IAS-8, pp. 438\u2013445 (2004)"},{"key":"19_CR4","unstructured":"Baxter, J., Tridgell, A., Weaver, L.: Knightcap: A chess program that learns by combining TD(\u03bb) with minimax search. Tech. rep., Australian National University, Canberra (1997)"},{"key":"19_CR5","unstructured":"Berliner, H.: Experiences in evaluation with BKG - a program that plays backgammon. In: Proceedings of IJCAI, pp. 428\u2013433 (1977)"},{"key":"19_CR6","volume-title":"Neuro-dynamic Programming","author":"D.P. Bertsekas","year":"1996","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-dynamic Programming. Athena Scientific, Belmont (1996)"},{"key":"19_CR7","volume-title":"Pattern Recognition and Machine learning","author":"C. Bishop","year":"2006","unstructured":"Bishop, C.: Pattern Recognition and Machine learning. Springer, Heidelberg (2006)"},{"issue":"7","key":"19_CR8","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1145\/1538788.1538812","volume":"52","author":"A. Coates","year":"2009","unstructured":"Coates, A., Abbeel, P., Ng, A.: Apprenticeship learning for helicopter control. Commun. ACM\u00a052(7), 97\u2013105 (2009)","journal-title":"Commun. ACM"},{"key":"19_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1007\/978-3-540-75538-8_7","volume-title":"Computers and Games","author":"R. Coulom","year":"2007","unstructured":"Coulom, R.: Efficient Selectivity and Backup Operators in Monte-carlo Tree Search. In: van den Herik, H.J., Ciancarini, P., Donkers, H.H.L.M(J.) (eds.) CG 2006. LNCS, vol.\u00a04630, pp. 72\u201383. Springer, Heidelberg (2007)"},{"key":"19_CR10","unstructured":"Cramer, N.L.: A representation for the adaptive generation of simple sequential programs. In: Grefenstette, J. (ed.) Proceedings of an International Conference on Genetic Algorithms and Their Applications, pp. 183\u2013187 (1985)"},{"key":"19_CR11","unstructured":"Crites, R., Barto, A.: Improving elevator performance using reinforcement learning. In: Touretzky, D., Mozer, M., Hasselmo, M. (eds.) Advances in Neural Information Processing Systems, Cambridge, MA, vol.\u00a08, pp. 1017\u20131023 (1996)"},{"key":"19_CR12","unstructured":"Di Caro, G., Dorigo, M.: An adaptive multi-agent routing algorithm inspired by ants behavior. In: Proceedings of PART 1998 - Fifth Annual Australasian Conference on Parallel and Real-Time Systems (1998)"},{"key":"19_CR13","doi-asserted-by":"crossref","unstructured":"Dietterich, T., Wang, X.: Batch value function approximation via support vectors. In: Advances in Neural Information Processing Systems, vol.\u00a014, pp. 1491\u20131498 (2002)","DOI":"10.7551\/mitpress\/1120.001.0001"},{"issue":"1","key":"19_CR14","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1109\/4235.585892","volume":"1","author":"M. Dorigo","year":"1997","unstructured":"Dorigo, M., Gambardella, L.M.: Ant colony system: A cooperative learning approach to the traveling salesman problem. Evolutionary Computation\u00a01(1), 53\u201366 (1997)","journal-title":"Evolutionary Computation"},{"issue":"1","key":"19_CR15","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1109\/3477.484436","volume":"26","author":"M. Dorigo","year":"1996","unstructured":"Dorigo, M., Maniezzo, V., Colorni, A.: The ant system: Optimization by a colony of cooperating agents. IEEE Transactions on Systems, Man, and Cybernetics-Part B\u00a026(1), 29\u201341 (1996)","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics-Part B"},{"key":"19_CR16","first-page":"503","volume":"6","author":"D. Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., Wehenkel, L.: Tree-based batch mode reinforcement learning. Journal of Machine Learning Research\u00a06, 503\u2013556 (2005)","journal-title":"Journal of Machine Learning Research"},{"key":"19_CR17","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1057\/palgrave.jors.2600676","volume":"50","author":"L.M. Gambardella","year":"1999","unstructured":"Gambardella, L.M., Taillard, E., Dorigo, M.: Ant colonies for the qadratic assignement problem. Journal of the Operational Research Society\u00a050, 167\u2013176 (1999)","journal-title":"Journal of the Operational Research Society"},{"key":"19_CR18","unstructured":"van Hasselt, H.: Double Q-learning. In: Advances in Neural Information Processing Systems, vol.\u00a023, pp. 2613\u20132621 (2010)"},{"key":"19_CR19","volume-title":"Universal Artificial Intelligence: Sequential Decisions based on Algorithmic Probability","author":"M. Hutter","year":"2004","unstructured":"Hutter, M.: Universal Artificial Intelligence: Sequential Decisions based on Algorithmic Probability. Springer, Berlin (2004)"},{"issue":"1-2","key":"19_CR20","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1007\/s10994-010-5223-6","volume":"84","author":"J. Kober","year":"2011","unstructured":"Kober, J., Peters, J.: Policy Search for Motor Primitives in Robotics. Machine Learning\u00a084(1-2), 171\u2013203 (2011)","journal-title":"Machine Learning"},{"key":"19_CR21","first-page":"1","volume":"1","author":"A. Kolmogorov","year":"1965","unstructured":"Kolmogorov, A.: Three approaches to the quantitative definition of information. Problems of Information Transmission\u00a01, 1\u201311 (1965)","journal-title":"Problems of Information Transmission"},{"key":"19_CR22","unstructured":"Koza, J.R.: Genetic evolution and co-evolution of computer programs. In: Langton, C., Taylor, C., Farmer, J.D., Rasmussen, S. (eds.) Artificial Life II, pp. 313\u2013324. Addison Wesley Publishing Company (1992)"},{"key":"19_CR23","unstructured":"Koza, J.R.: Genetic Programming II \u2013 Automatic Discovery of Reusable Programs. MIT Press (1994)"},{"key":"19_CR24","doi-asserted-by":"crossref","unstructured":"Li, M., Vit\u00e1nyi, P.M.B.: An introduction to Kolmogorov complexity and its applications. In: van Leeuwen, J. (ed.) Handbook of Theoretical Computer Science, pp. 188\u2013254. Elsevier Science Publishers B.V (1990)","DOI":"10.1016\/B978-0-444-88071-0.50009-6"},{"key":"19_CR25","unstructured":"Littman, M., Boyan, J.: A distributed reinforcement learning scheme for network routing. In: Alspector, J., Goodman, R., Brown, T. (eds.) Proceedings of the First International Workshop on Applications of Neural Networks to Telecommunication, Hillsdale, New Jersey, pp. 45\u201351 (1993)"},{"key":"19_CR26","doi-asserted-by":"crossref","unstructured":"Lowe, D.: Distinctive image features from scale-invariant keypoints. International Journal of Computer Vision, 315\u2013333 (2004)","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"19_CR27","unstructured":"Maei, H., Szepesvari, C., Bhatnagar, S., Sutton, R.: Toward off-policy learning control with function approximation. In: Proceedings of the International Conference on Machine Learning, pp. 719\u2013726 (2010)"},{"key":"19_CR28","first-page":"387","volume-title":"Machine Learning: Proceedings of the Twelfth International Conference","author":"R.A. McCallum","year":"1995","unstructured":"McCallum, R.A.: Instance-based utile distinctions for reinforcement learning with hidden state. In: Prieditis, A., Russell, S. (eds.) Machine Learning: Proceedings of the Twelfth International Conference, pp. 387\u2013395. Morgan Kaufmann Publishers, San Francisco (1995)"},{"key":"19_CR29","unstructured":"McGovern, A., Andrew, G., Barto, E.M.: Scheduling straight-line code using reinforcement learning and rollouts. In: Proceedings of Neural Information Processing Systems. MIT Press (1999)"},{"key":"19_CR30","volume-title":"Machine learning","author":"T.M. Mitchell","year":"1996","unstructured":"Mitchell, T.M.: Machine learning. McGraw Hill, New York (1996)"},{"key":"19_CR31","first-page":"103","volume":"13","author":"A.W. Moore","year":"1993","unstructured":"Moore, A.W., Atkeson, C.G.: Prioritized sweeping: Reinforcement learning with less data and less time. Machine Learning\u00a013, 103\u2013130 (1993)","journal-title":"Machine Learning"},{"key":"19_CR32","first-page":"11","volume":"22","author":"D.E. Moriarty","year":"1996","unstructured":"Moriarty, D.E., Miikkulainen, R.: Efficient reinforcement learning through symbiotic evolution. Machine Learning\u00a022, 11\u201332 (1996)","journal-title":"Machine Learning"},{"key":"19_CR33","doi-asserted-by":"crossref","unstructured":"Nevmyvaka, Y., Feng, Y., Kearns, M.: Reinforcement learning for optimized trade execution. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 673\u2013680 (2006)","DOI":"10.1145\/1143844.1143929"},{"issue":"1","key":"19_CR34","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1007\/s10994-010-5202-y","volume":"81","author":"A. Nouri","year":"2010","unstructured":"Nouri, A., Littman, M.: Dimension reduction and its application to model-based exploration in continuous spaces. Machine Learning\u00a081(1), 85\u201398 (2010)","journal-title":"Machine Learning"},{"key":"19_CR35","unstructured":"van Otterlo, M.: Efficient reinforcement learning using relational aggregation. Proceedings of the Sixth European Workshop on Reinforcement Learning, EWRL-6 (2003)"},{"key":"19_CR36","doi-asserted-by":"crossref","unstructured":"Peters, J., M\u00fclling, K., Altun, Y.: Relative entropy policy search. In: Fox, M., Poole, D. (eds.) Proceedings of the Twenty-Fourth AAAI Conference on Artificial Intelligence, AAAI 2010 (2010)","DOI":"10.1609\/aaai.v24i1.7727"},{"key":"19_CR37","unstructured":"Peters, J., Vijayakumar, S., Schaal, S.: Reinforcement Learning for Humanoid Robotics. In: IEEE-RAS International Conference on Humanoid Robots, Humanoids (2003)"},{"issue":"4","key":"19_CR38","doi-asserted-by":"publisher","first-page":"682","DOI":"10.1016\/j.neunet.2008.02.003","volume":"21","author":"J. Peters","year":"2008","unstructured":"Peters, J., Schaal, S.: Reinforcement Learning of Motor Skills with Policy Gradients. Neural Networks\u00a021(4), 682\u2013697 (2008), doi:10.1016\/j.neunet.2008.02.003","journal-title":"Neural Networks"},{"key":"19_CR39","unstructured":"Poland, J., Hutter, M.: Universal learning of repeated matrix games. In: Proc. 15th Annual Machine Learning Conf. of Belgium and The Netherlands (Benelearn 2006), Ghent, pp. 7\u201314 (2006)"},{"key":"19_CR40","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1007\/11564096_32","volume-title":"Machine Learning: ECML 2005","author":"M. Riedmiller","year":"2005","unstructured":"Riedmiller, M.: Neural Fitted Q Iteration - First Experiences with a Data Efficient Neural Reinforcement Learning Method. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS (LNAI), vol.\u00a03720, pp. 317\u2013328. Springer, Heidelberg (2005)"},{"key":"19_CR41","unstructured":"Riedmiller, S., Riedmiller, M.: A neural reinforcement learning approach to learn local dispatching policies in production scheduling. In: Proceedings of International Joint Conference on Artificial Intelligence (IJCAI 1999) (1999)"},{"key":"19_CR42","unstructured":"Ring, M.: Continual learning in reinforcement environments. PhD thesis, University of Texas, Austin, Texas (1994)"},{"issue":"2","key":"19_CR43","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1162\/evco.1997.5.2.123","volume":"5","author":"R.P. Sa\u0142ustowicz","year":"1997","unstructured":"Sa\u0142ustowicz, R.P., Schmidhuber, J.H.: Probabilistic incremental program evolution. Evolutionary Computation\u00a05(2), 123\u2013141 (1997)","journal-title":"Evolutionary Computation"},{"key":"19_CR44","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"216","DOI":"10.1007\/3-540-45435-7_15","volume-title":"Computational Learning Theory","author":"J. Schmidhuber","year":"2002","unstructured":"Schmidhuber, J.: The Speed Prior: A New Simplicity Measure Yielding Near-Optimal Computable Predictions. In: Kivinen, J., Sloan, R.H. (eds.) COLT 2002. LNCS (LNAI), vol.\u00a02375, pp. 216\u2013228. Springer, Heidelberg (2002)"},{"key":"19_CR45","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1023\/B:MACH.0000015880.99707.b2","volume":"54","author":"J. Schmidhuber","year":"2004","unstructured":"Schmidhuber, J.: Optimal ordered problem solver. Machine Learning\u00a054, 211\u2013254 (2004)","journal-title":"Machine Learning"},{"issue":"2","key":"19_CR46","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1007\/s12559-009-9014-y","volume":"1","author":"J. Schmidhuber","year":"2009","unstructured":"Schmidhuber, J.: Ultimate cognition \u00e0 la G\u00f6del. Cognitive Computation\u00a01(2), 177\u2013193 (2009)","journal-title":"Cognitive Computation"},{"issue":"3","key":"19_CR47","doi-asserted-by":"publisher","first-page":"230","DOI":"10.1109\/TAMD.2010.2056368","volume":"2","author":"J. Schmidhuber","year":"2010","unstructured":"Schmidhuber, J.: Formal theory of creativity, fun, and intrinsic motivation (1990-2010). IEEE Transactions on Autonomous Mental Development\u00a02(3), 230\u2013247 (2010)","journal-title":"IEEE Transactions on Autonomous Mental Development"},{"key":"19_CR48","doi-asserted-by":"crossref","unstructured":"Schmidhuber, J., Zhao, J., Schraudolph, N.: Reinforcement learning with self-modifying policies. In: Thrun, S., Pratt, L. (eds.) Learning to Learn, pp. 293\u2013309. Kluwer (1997a)","DOI":"10.1007\/978-1-4615-5529-2_12"},{"key":"19_CR49","doi-asserted-by":"crossref","unstructured":"Schmidhuber, J., Zhao, J., Schraudolph, N.N.: Reinforcement learning with self-modifying policies. In: Thrun, S., Pratt, L. (eds.) Learning to Learn. Kluwer (1997b)","DOI":"10.1007\/978-1-4615-5529-2_12"},{"key":"19_CR50","unstructured":"Schmidhuber, J.H.: Temporal-difference-driven learning in recurrent networks. In: Eckmiller, R., Hartmann, G., Hauske, G. (eds.) Parallel Processing in Neural Systems and Computers, pp. 209\u2013212. North-Holland (1990)"},{"key":"19_CR51","first-page":"1458","volume-title":"Proceedings of the International Joint Conference on Neural Networks","author":"J.H. Schmidhuber","year":"1991","unstructured":"Schmidhuber, J.H.: Curious model-building control systems. In: Proceedings of the International Joint Conference on Neural Networks, vol.\u00a02, pp. 1458\u20131463. IEEE, Singapore (1991a)"},{"key":"19_CR52","doi-asserted-by":"crossref","unstructured":"Schmidhuber, J.H.: A possibility for implementing curiosity and boredom in model-building neural controllers. In: Meyer, J.A., Wilson, S.W. (eds.) Proceedings of the International Conference on Simulation of Adaptive Behavior: From Animals to Animats, pp. 222\u2013227. MIT Press\/Bradford Books (1991b)","DOI":"10.7551\/mitpress\/3115.003.0030"},{"key":"19_CR53","volume-title":"Evolutionary Computation: Theory and Applications","author":"J.H. Schmidhuber","year":"1996","unstructured":"Schmidhuber, J.H.: A general method for incremental self-improvement and multi-agent learning in unrestricted environments. In: Yao, X. (ed.) Evolutionary Computation: Theory and Applications. Scientific Publ. Co., Singapore (1996)"},{"key":"19_CR54","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1023\/A:1007383707642","volume":"28","author":"J.H. Schmidhuber","year":"1997","unstructured":"Schmidhuber, J.H., Zhao, J., Wiering, M.A.: Shifting inductive bias with success-story algorithm, adaptive Levin search, and incremental self-improvement. Machine Learning\u00a028, 105\u2013130 (1997c)","journal-title":"Machine Learning"},{"key":"19_CR55","unstructured":"Schoknecht, R.: Optimality of reinforcement learning algorithms with linear function approximation. In: Becker, S., Thrun, S., Obermayer, K. (eds.) Advances in Neural Information Processing Systems, NIPS 2002, pp. 1555\u20131562 (2002)"},{"key":"19_CR56","first-page":"2045","volume":"12","author":"H. van Seijen","year":"2011","unstructured":"van Seijen, H., Whiteson, S., van Hasselt, H., Wiering, M.: Exploiting best-match equations for efficient reinforcement learning. Journal of Machine Learning Research\u00a012, 2045\u20132094 (2011)","journal-title":"Journal of Machine Learning Research"},{"key":"19_CR57","doi-asserted-by":"publisher","first-page":"603","DOI":"10.1145\/1449955.1449811","volume":"43","author":"C. Simpkins","year":"2008","unstructured":"Simpkins, C., Bhat, S., Isbell Jr., C., Mateas, M.: Towards adaptive programming: integrating reinforcement learning into a programming language. SIGPLAN Not.\u00a043, 603\u2013614 (2008)","journal-title":"SIGPLAN Not."},{"key":"19_CR58","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1613\/jair.859","volume":"16","author":"S. Singh","year":"2002","unstructured":"Singh, S., Litman, D., Kearns, M., Walker, M.: Optimizing dialogue management with reinforcement learning: Experiments with the NJFun system. Journal of Artificial Intelligence Research\u00a016, 105\u2013133 (2002)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"19_CR59","doi-asserted-by":"crossref","unstructured":"Smart, W., Kaelbling, L.: Effective reinforcement learning for mobile robots. In: Proceedings of the IEEE International Conference on Robotics and Automation, pp. 3404\u20133410 (2002)","DOI":"10.1109\/ROBOT.2002.1014237"},{"key":"19_CR60","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/S0019-9958(64)90223-2","volume":"7","author":"R. Solomonoff","year":"1964","unstructured":"Solomonoff, R.: A formal theory of inductive inference. Part I. Information and Control\u00a07, 1\u201322 (1964)","journal-title":"Information and Control"},{"issue":"5","key":"19_CR61","doi-asserted-by":"publisher","first-page":"422","DOI":"10.1109\/TIT.1978.1055913","volume":"IT-24","author":"R. Solomonoff","year":"1978","unstructured":"Solomonoff, R.: Complexity-based induction systems. IEEE Transactions on Information Theory\u00a0IT-24(5), 422\u2013432 (1978)","journal-title":"IEEE Transactions on Information Theory"},{"key":"19_CR62","first-page":"9","volume":"3","author":"R.S. Sutton","year":"1988","unstructured":"Sutton, R.S.: Learning to predict by the methods of temporal differences. Machine Learning\u00a03, 9\u201344 (1988)","journal-title":"Machine Learning"},{"key":"19_CR63","doi-asserted-by":"crossref","unstructured":"Sutton, R.S.: Integrated architectures for learning, planning and reacting based on dynamic programming. In: Machine Learning: Proceedings of the Seventh International Workshop (1990)","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"19_CR64","unstructured":"Sutton, R.S., Precup, D., Singh, S.P.: Between MDPs and semi-MDPs: Learning, planning, learning and sequential decision making. Tech. Rep. COINS 89-95, University of Massachusetts, Amherst (1998)"},{"key":"19_CR65","first-page":"2133","volume":"10","author":"B. Tanner","year":"2009","unstructured":"Tanner, B., White, A.: RL-Glue: Language-independent software for reinforcement-learning experiments. Journal of Machine Learning Research\u00a010, 2133\u20132136 (2009)","journal-title":"Journal of Machine Learning Research"},{"key":"19_CR66","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1145\/203330.203343","volume":"38","author":"G. Tesauro","year":"1995","unstructured":"Tesauro, G.: Temporal difference learning and TD-Gammon. Communications of the ACM\u00a038, 58\u201368 (1995)","journal-title":"Communications of the ACM"},{"key":"19_CR67","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-2440-0","volume-title":"The Nature of Statistical Learning Theory","author":"V. Vapnik","year":"1995","unstructured":"Vapnik, V.: The Nature of Statistical Learning Theory. Springer, Heidelberg (1995)"},{"key":"19_CR68","unstructured":"Veness, J., Silver, D., Uther, W., Blair, A.: Bootstrapping from game tree search. In: Bengio, Y., Schuurmans, D., Lafferty, J., Williams, C.K.I., Culotta, A. (eds.) Advances in Neural Information Processing Systems, vol.\u00a022, pp. 1937\u20131945 (2009)"},{"key":"19_CR69","doi-asserted-by":"crossref","unstructured":"Veness, J., Ng, K., Hutter, M., Uther, W., Silver, D.: A Monte-carlo AIXI approximation. Journal of Artificial Intelligence Research (2011)","DOI":"10.1613\/jair.3125"},{"key":"19_CR70","unstructured":"Watkins, C.J.C.H.: Learning from delayed rewards. PhD thesis, King\u2019s College, Cambridge, England (1989)"},{"key":"19_CR71","first-page":"279","volume":"8","author":"C.J.C.H. Watkins","year":"1992","unstructured":"Watkins, C.J.C.H., Dayan, P.: Q-learning. Machine Learning\u00a08, 279\u2013292 (1992)","journal-title":"Machine Learning"},{"key":"19_CR72","doi-asserted-by":"crossref","unstructured":"Westra, J.: Organizing adaptation using agents in serious games. PhD thesis, Utrecht University (2011)","DOI":"10.1007\/978-3-642-32326-3_7"},{"issue":"2","key":"19_CR73","doi-asserted-by":"publisher","first-page":"57","DOI":"10.4236\/jilsa.2010.22009","volume":"2","author":"M. Wiering","year":"2010","unstructured":"Wiering, M.: Self-play and using an expert to learn to play backgammon with temporal difference learning. Journal of Intelligent Learning Systems and Applications\u00a02(2), 57\u201368 (2010)","journal-title":"Journal of Intelligent Learning Systems and Applications"},{"key":"19_CR74","doi-asserted-by":"crossref","unstructured":"Wiering, M., van Hasselt, H.: Ensemble algorithms in reinforcement learning. IEEE Transactions, SMC Part B, Special Issue on Adaptive Dynamic Programming and Reinforcement Learning in Feedback Control (2008)","DOI":"10.1109\/ADPRL.2007.368199"},{"key":"19_CR75","doi-asserted-by":"crossref","unstructured":"Wiering, M., van Hasselt, H.: The QV family compared to other reinforcement learning algorithms. In: Proceedings of the IEEE International Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL 2009), pp. 101\u2013108 (2009)","DOI":"10.1109\/ADPRL.2009.4927532"},{"key":"19_CR76","unstructured":"Wiering, M.A.: Multi-agent reinforcement learning for traffic light control. In: Langley, P. (ed.) Proceedings of the Seventeenth International Conference on Machine Learning, pp. 1151\u20131158 (2000)"},{"key":"19_CR77","first-page":"534","volume-title":"Machine Learning: Proceedings of the Thirteenth International Conference","author":"M.A. Wiering","year":"1996","unstructured":"Wiering, M.A., Schmidhuber, J.H.: Solving POMDPs with Levin search and EIRA. In: Saitta, L. (ed.) Machine Learning: Proceedings of the Thirteenth International Conference, pp. 534\u2013542. Morgan Kaufmann Publishers, San Francisco (1996)"}],"container-title":["Adaptation, Learning, and Optimization","Reinforcement Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-27645-3_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,22]],"date-time":"2025-03-22T13:01:54Z","timestamp":1742648514000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-27645-3_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642276446","9783642276453"],"references-count":77,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-27645-3_19","relation":{},"ISSN":["1867-4534","1867-4542"],"issn-type":[{"value":"1867-4534","type":"print"},{"value":"1867-4542","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012]]}}}