{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T07:14:32Z","timestamp":1771917272140,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":111,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642276446","type":"print"},{"value":"9783642276453","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-27645-3_17","type":"book-chapter","created":{"date-parts":[[2012,3,5]],"date-time":"2012-03-05T22:18:12Z","timestamp":1330985892000},"page":"539-577","source":"Crossref","is-referenced-by-count":29,"title":["Reinforcement Learning in Games"],"prefix":"10.1007","author":[{"given":"Istv\u00e1n","family":"Szita","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"17_CR1","doi-asserted-by":"crossref","unstructured":"Aha, D.W., Molineaux, M., Ponsen, M.: Learning to win: Case-based plan selection in a real-time strategy game. Case-Based Reasoning Research and Development, 5\u201320 (2005)","DOI":"10.1007\/11536406_4"},{"issue":"3","key":"17_CR2","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1007\/s10994-006-6225-2","volume":"63","author":"A. Amit","year":"2006","unstructured":"Amit, A., Markovitch, S.: Learning to bid in bridge. Machine Learning\u00a063(3), 287\u2013327 (2006)","journal-title":"Machine Learning"},{"key":"17_CR3","unstructured":"Andrade, G., Santana, H., Furtado, A., Leit\u00e3o, A., Ramalho, G.: Online adaptation of computer games agents: A reinforcement learning approach. Scientia\u00a015(2) (2004)"},{"key":"17_CR4","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P. Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multiarmed bandit problem. Machine Learning\u00a047, 235\u2013256 (2002)","journal-title":"Machine Learning"},{"key":"17_CR5","doi-asserted-by":"publisher","first-page":"364","DOI":"10.1016\/j.ic.2009.09.001","volume":"208","author":"G. Bart\u00f3k","year":"2010","unstructured":"Bart\u00f3k, G., Szepesv\u00e1ri, C., Zilles, S.: Models of active learning in group-structured state spaces. Information and Computation\u00a0208, 364\u2013384 (2010)","journal-title":"Information and Computation"},{"issue":"3","key":"17_CR6","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1023\/A:1007634325138","volume":"40","author":"J. Baxter","year":"2000","unstructured":"Baxter, J., Tridgell, A., Weaver, L.: Learning to play chess using temporal-differences. Machine learning\u00a040(3), 243\u2013263 (2000)","journal-title":"Machine learning"},{"key":"17_CR7","unstructured":"Baxter, J., Tridgell, A., Weaver, L.: Reinforcement learning and chess. In: Machines that learn to play games, pp. 91\u2013116. Nova Science Publishers, Inc. (2001)"},{"issue":"3","key":"17_CR8","first-page":"147","volume":"20","author":"D. Beal","year":"1997","unstructured":"Beal, D., Smith, M.C.: Learning piece values using temporal differences. ICCA Journal\u00a020(3), 147\u2013151 (1997)","journal-title":"ICCA Journal"},{"key":"17_CR9","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Athena Scientific (1996)"},{"key":"17_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/11674399_2","volume-title":"Computers and Games","author":"D. Billings","year":"2006","unstructured":"Billings, D., Davidson, A., Schauenberg, T., Burch, N., Bowling, M., Holte, R.C., Schaeffer, J., Szafron, D.: Game-Tree Search with Adaptation in Stochastic Imperfect-Information Games. In: van den Herik, H.J., Bj\u00f6rnsson, Y., Netanyahu, N.S. (eds.) CG 2004. LNCS, vol.\u00a03846, pp. 21\u201334. Springer, Heidelberg (2006)"},{"issue":"1","key":"17_CR11","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1109\/TCIAIG.2009.2018702","volume":"1","author":"Y. Bj\u00f6rnsson","year":"2009","unstructured":"Bj\u00f6rnsson, Y., Finnsson, H.: Cadiaplayer: A simulation-based general game player. IEEE Transactions on Computational Intelligence and AI in Games\u00a01(1), 4\u201315 (2009)","journal-title":"IEEE Transactions on Computational Intelligence and AI in Games"},{"key":"17_CR12","unstructured":"B\u00f6hm, N., K\u00f3kai, G., Mandl, S.: Evolving a heuristic function for the game of tetris. In: Proc. Lernen, Wissensentdeckung und Adaptivit\u00e4t LWA, pp. 118\u2013122 (2004)"},{"key":"17_CR13","doi-asserted-by":"crossref","unstructured":"Boumaza, A.: On the evolution of artificial Tetris players. In: IEEE Symposium on Computational Intelligence and Games (2009)","DOI":"10.1109\/CIG.2009.5286451"},{"key":"17_CR14","doi-asserted-by":"crossref","unstructured":"Bouzy, B., Helmstetter, B.: Monte Carlo Go developments. In: Advances in Computer Games, pp. 159\u2013174 (2003)","DOI":"10.1007\/978-0-387-35706-5_11"},{"key":"17_CR15","unstructured":"Bowling, M.: Convergence and no-regret in multiagent learning. In: Neural Information Processing Systems, pp. 209\u2013216 (2004)"},{"key":"17_CR16","doi-asserted-by":"crossref","unstructured":"Buro, M.: From simple features to sophisticated evaluation functions. In: International Conference on Computers and Games, pp. 126\u2013145 (1998)","DOI":"10.1007\/3-540-48957-6_8"},{"key":"17_CR17","unstructured":"Buro, M., Furtak, T.: RTS games as test-bed for real-time research. JCIS, 481\u2013484 (2003)"},{"key":"17_CR18","unstructured":"Buro, M., Lanctot, M., Orsten, S.: The second annual real-time strategy game AI competition. In: GAME-ON NA (2007)"},{"issue":"3","key":"17_CR19","doi-asserted-by":"publisher","first-page":"343","DOI":"10.1142\/S1793005708001094","volume":"4","author":"G. Chaslot","year":"2008","unstructured":"Chaslot, G., Winands, M., Herik, H., Uiterwijk, J., Bouzy, B.: Progressive strategies for monte-carlo tree search. New Mathematics and Natural Computation\u00a04(3), 343 (2008)","journal-title":"New Mathematics and Natural Computation"},{"key":"17_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-642-12993-3_1","volume-title":"Advances in Computer Games","author":"G. Chaslot","year":"2010","unstructured":"Chaslot, G., Fiter, C., Hoock, J.B., Rimmel, A., Teytaud, O.: Adding Expert Knowledge and Exploration in Monte-Carlo Tree Search. In: van den Herik, H.J., Spronck, P. (eds.) ACG 2009. LNCS, vol.\u00a06048, pp. 1\u201313. Springer, Heidelberg (2010)"},{"key":"17_CR21","unstructured":"Chatriot, L., Gelly, S., Jean-Baptiste, H., Perez, J., Rimmel, A., Teytaud, O.: Including expert knowledge in bandit-based Monte-Carlo planning, with application to computer-Go. In: European Workshop on Reinforcement Learning (2008)"},{"key":"17_CR22","unstructured":"Coquelin, P.A., Munos, R.: Bandit algorithms for tree search. In: Uncertainty in Artificial Intelligence (2007)"},{"key":"17_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1007\/978-3-540-75538-8_7","volume-title":"Computers and Games","author":"R. Coulom","year":"2007","unstructured":"Coulom, R.: Efficient Selectivity and Backup Operators in Monte-carlo Tree Search. In: van den Herik, H.J., Ciancarini, P., Donkers, H.H.L.M(J.) (eds.) CG 2006. LNCS, vol.\u00a04630, pp. 72\u201383. Springer, Heidelberg (2007)"},{"issue":"4","key":"17_CR24","doi-asserted-by":"crossref","first-page":"198","DOI":"10.3233\/ICG-2007-30403","volume":"30","author":"R. Coulom","year":"2007","unstructured":"Coulom, R.: Computing Elo ratings of move patterns in the game of go. ICGA Journal\u00a030(4), 198\u2013208 (2007)","journal-title":"ICGA Journal"},{"key":"17_CR25","unstructured":"Dahl, F.A.: Honte, a Go-playing program using neural nets. In: Machines that learn to play games, pp. 205\u2013223. Nova Science Publishers (2001)"},{"key":"17_CR26","unstructured":"Davidson, A.: Opponent modeling in poker: Learning and acting in a hostile and uncertain environment. Master\u2019s thesis, University of Alberta (2002)"},{"key":"17_CR27","doi-asserted-by":"crossref","unstructured":"Diuk, C., Cohen, A., Littman, M.L.: An object-oriented representation for efficient reinforcement learning. In: International Conference on Machine Learning, pp. 240\u2013247 (2008)","DOI":"10.1145\/1390156.1390187"},{"key":"17_CR28","unstructured":"Droste, S., F\u00fcrnkranz, J.: Learning of piece values for chess variants. Tech. Rep. TUD\u2013KE\u20132008-07, Knowledge Engineering Group, TU Darmstadt (2008)"},{"issue":"1-2","key":"17_CR29","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1023\/A:1007694015589","volume":"43","author":"S. D\u017eeroski","year":"2001","unstructured":"D\u017eeroski, S., Raedt, L.D., Driessens, K.: Relational reinforcement learning. Machine Learning\u00a043(1-2), 7\u201352 (2001)","journal-title":"Machine Learning"},{"key":"17_CR30","first-page":"251","volume":"15","author":"S.L. Epstein","year":"1994","unstructured":"Epstein, S.L.: Toward an ideal trainer. Machine Learning\u00a015, 251\u2013277 (1994)","journal-title":"Machine Learning"},{"key":"17_CR31","volume-title":"Probabilistic and Randomized Methods for Design Under Uncertainty","author":"V.F. Farias","year":"2006","unstructured":"Farias, V.F., van Roy, B.: Tetris: A Study of Randomized Constraint Sampling. In: Probabilistic and Randomized Methods for Design Under Uncertainty. Springer, UK (2006)"},{"key":"17_CR32","doi-asserted-by":"crossref","unstructured":"Fawcett, T., Utgoff, P.: Automatic feature generation for problem solving systems. In: International Conference on Machine Learning, pp. 144\u2013153 (1992)","DOI":"10.1016\/B978-1-55860-247-2.50024-3"},{"key":"17_CR33","first-page":"100","volume":"21","author":"L. Finkelstein","year":"1998","unstructured":"Finkelstein, L., Markovitch, S.: Learning to play chess selectively by acquiring move patterns. ICCA Journal\u00a021, 100\u2013119 (1998)","journal-title":"ICCA Journal"},{"key":"17_CR34","unstructured":"Fudenberg, D., Levine, D.K.: The theory of learning in games. MIT Press (1998)"},{"key":"17_CR35","unstructured":"F\u00fcrnkranz, J.: Machine learning in games: a survey. In: Machines that Learn to Play Games, pp. 11\u201359. Nova Science Publishers (2001)"},{"key":"17_CR36","unstructured":"F\u00fcrnkranz, J.: Recent advances in machine learning and game playing. Tech. rep., TU Darmstadt (2007)"},{"issue":"2","key":"17_CR37","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1007\/s10462-009-9112-y","volume":"29","author":"L. Galway","year":"2008","unstructured":"Galway, L., Charles, D., Black, M.: Machine learning in digital games: a survey. Artificial Intelligence Review\u00a029(2), 123\u2013161 (2008)","journal-title":"Artificial Intelligence Review"},{"key":"17_CR38","unstructured":"Gelly, S., Silver, D.: Achieving master-level play in 9x9 computer go. In: AAAI, pp. 1537\u20131540 (2008)"},{"key":"17_CR39","unstructured":"Gelly, S., Wang, Y., Munos, R., Teytaud, O.: Modification of UCT with patterns in Monte-Carlo go. Tech. rep., INRIA (2006)"},{"key":"17_CR40","unstructured":"Gherrity, M.: A game-learning machine. PhD thesis, University of California, San Diego, CA (1993)"},{"key":"17_CR41","unstructured":"Ghory, I.: Reinforcement learning in board games. Tech. rep., Department of Computer Science, University of Bristol (2004)"},{"key":"17_CR42","unstructured":"Gilgenbach, M.: Fun game AI design for beginners. In: AI Game Programming Wisdom, vol.\u00a03. Charles River Media, Inc. (2006)"},{"issue":"5","key":"17_CR43","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1145\/1284320.1284324","volume":"54","author":"A. Gilpin","year":"2007","unstructured":"Gilpin, A., Sandholm, T.: Lossless abstraction of imperfect information games. Journal of the ACM\u00a054(5), 25 (2007)","journal-title":"Journal of the ACM"},{"key":"17_CR44","unstructured":"Gilpin, A., Sandholm, T., S\u00f8rensen, T.B.: Potential-aware automated abstraction of sequential games, and holistic equilibrium analysis of Texas Hold\u2019em poker. In: AAAI, vol.\u00a022, pp. 50\u201357 (2007)"},{"key":"17_CR45","first-page":"313","volume":"14","author":"M.L. Ginsberg","year":"2002","unstructured":"Ginsberg, M.L.: Gib: Imperfect information in a computationally challenging game. Journal of Artificial Intelligence Research\u00a014, 313\u2013368 (2002)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"17_CR46","unstructured":"Gould, J., Levinson, R.: Experience-based adaptive search. Tech. Rep. UCSC-CRL-92-10, University of California at Santa Cruz (1992)"},{"key":"17_CR47","unstructured":"G\u00fcnther, M.: Automatic feature construction for general game playing. PhD thesis, Dresden University of Technology (2008)"},{"key":"17_CR48","doi-asserted-by":"crossref","unstructured":"Hagelb\u00e4ck, J., Johansson, S.J.: Measuring player experience on runtime dynamic difficulty scaling in an RTS game. In: International Conference on Computational Intelligence and Games (2009)","DOI":"10.1109\/CIG.2009.5286494"},{"key":"17_CR49","unstructured":"Hartley, T., Mehdi, Q., Gough, N.: Online learning from observation for interactive computer games. In: International Conference on Computer Games: Artificial Intelligence and Mobile Systems, pp. 27\u201330 (2005)"},{"key":"17_CR50","doi-asserted-by":"publisher","first-page":"277","DOI":"10.1016\/S0004-3702(01)00152-7","volume":"134","author":"H.J. Herik van den","year":"2002","unstructured":"van den Herik, H.J., Uiterwijk, J.W.H.M., van Rijswijck, J.: Games solved: Now and in the future. Artificial Intelligence\u00a0134, 277\u2013311 (2002)","journal-title":"Artificial Intelligence"},{"key":"17_CR51","volume-title":"Behind Deep Blue: Building the Computer that Defeated the World Chess Champion","author":"F.H. Hsu","year":"2002","unstructured":"Hsu, F.H.: Behind Deep Blue: Building the Computer that Defeated the World Chess Champion. Princeton University Press, Princeton (2002)"},{"key":"17_CR52","unstructured":"Hunicke, R., Chapman, V.: AI for dynamic difficult adjustment in games. In: Challenges in Game AI Workshop (2004)"},{"key":"17_CR53","unstructured":"Kakade, S.: A natural policy gradient. In: Advances in Neural Information Processing Systems, vol.\u00a014, pp. 1531\u20131538 (2001)"},{"key":"17_CR54","doi-asserted-by":"crossref","unstructured":"Kalles, D., Kanellopoulos, P.: On verifying game designs and playing strategies using reinforcement learning. In: ACM Symposium on Applied Computing, pp. 6\u201311 (2001)","DOI":"10.1145\/372202.372204"},{"key":"17_CR55","unstructured":"Kerbusch, P.: Learning unit values in Wargus using temporal differences. BSc thesis (2005)"},{"key":"17_CR56","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"282","DOI":"10.1007\/11871842_29","volume-title":"Machine Learning: ECML 2006","author":"L. Kocsis","year":"2006","unstructured":"Kocsis, L., Szepesv\u00e1ri, C.: Bandit Based Monte-Carlo Planning. In: F\u00fcrnkranz, J., Scheffer, T., Spiliopoulou, M. (eds.) ECML 2006. LNCS (LNAI), vol.\u00a04212, pp. 282\u2013293. Springer, Heidelberg (2006)"},{"key":"17_CR57","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1007\/11922155_4","volume-title":"Advances in Computer Games","author":"L. Kocsis","year":"2006","unstructured":"Kocsis, L., Szepesv\u00e1ri, C., Winands, M.H.M.: RSPSA: Enhanced Parameter Optimization in Games. In: van den Herik, H.J., Hsu, S.-C., Hsu, T.-s., Donkers, H.H.L.M(J.) (eds.) CG 2005. LNCS, vol.\u00a04250, pp. 39\u201356. Springer, Heidelberg (2006)"},{"key":"17_CR58","unstructured":"Kok, E.: Adaptive reinforcement learning agents in RTS games. Master\u2019s thesis, University of Utrecht, The Netherlands (2008)"},{"key":"17_CR59","unstructured":"Koza, J.: Genetic programming: on the programming of computers by means of natural selection. MIT Press (1992)"},{"key":"17_CR60","unstructured":"Kuhlmann, G.J.: Automated domain analysis and transfer learning in general game playing. PhD thesis, University of Texas at Austin (2010)"},{"key":"17_CR61","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1007\/3-540-46014-4_23","volume-title":"Methods and Applications of Artificial Intelligence","author":"M.G. Lagoudakis","year":"2002","unstructured":"Lagoudakis, M.G., Parr, R., Littman, M.L.: Least-Squares Methods in Reinforcement Learning for Control. In: Vlahavas, I.P., Spyropoulos, C.D. (eds.) SETN 2002. LNCS (LNAI), vol.\u00a02308, pp. 249\u2013260. Springer, Heidelberg (2002)"},{"key":"17_CR62","unstructured":"Laursen, R., Nielsen, D.: Investigating small scale combat situations in real time strategy computer games. Master\u2019s thesis, University of Aarhus (2005)"},{"key":"17_CR63","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1007\/3-540-45579-5_9","volume-title":"Computers and Games","author":"R. Levinson","year":"2002","unstructured":"Levinson, R., Weber, R.: Chess Neighborhoods, Function Combination, and Reinforcement Learning. In: Marsland, T., Frank, I. (eds.) CG 2001. LNCS, vol.\u00a02063, pp. 133\u2013150. Springer, Heidelberg (2002)"},{"key":"17_CR64","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"749","DOI":"10.1007\/978-3-540-30140-0_66","volume-title":"Algorithms \u2013 ESA 2004","author":"U. Lorenz","year":"2004","unstructured":"Lorenz, U.: Beyond Optimal Play in Two-Person-Zerosum Games. In: Albers, S., Radzik, T. (eds.) ESA 2004. LNCS, vol.\u00a03221, pp. 749\u2013759. Springer, Heidelberg (2004)"},{"key":"17_CR65","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-11678-0","volume-title":"Knowledge-Free and Learning-Based Methods in Intelligent Game Playing","author":"J. Ma\u0144dziuk","year":"2010","unstructured":"Ma\u0144dziuk, J.: Knowledge-Free and Learning-Based Methods in Intelligent Game Playing. Springer, Heidelberg (2010)"},{"key":"17_CR66","unstructured":"Marthi, B., Russell, S., Latham, D.: Writing Stratagus-playing agents in concurrent alisp. In: IJCAI Workshop on Reasoning, Representation, and Learning in Computer Games, pp. 67\u201371 (2005)"},{"key":"17_CR67","unstructured":"McGlinchey, S.J.: Learning of AI players from game observation data. In: GAME-ON, pp. 106\u2013110 (2003)"},{"key":"17_CR68","unstructured":"Molineaux, M., Aha, D.W., Ponsen, M.: Defeating novel opponents in a real-time strategy game. In: IJCAI Workshop on Reasoning, Representation, and Learning in Computer Games, pp. 72\u201377 (2005)"},{"key":"17_CR69","first-page":"195","volume":"7","author":"D.E. Moriarty","year":"1995","unstructured":"Moriarty, D.E., Miikkulainen, R.: Discovering complex Othello strategies through evolutionary neural networks. Connection Science\u00a07, 195\u2013209 (1995)","journal-title":"Connection Science"},{"issue":"4","key":"17_CR70","doi-asserted-by":"crossref","first-page":"219","DOI":"10.3233\/ICG-2002-25405","volume":"25","author":"M. M\u00fcller","year":"2002","unstructured":"M\u00fcller, M.: Position evaluation in computer go. ICGA Journal\u00a025(4), 219\u2013228 (2002)","journal-title":"ICGA Journal"},{"key":"17_CR71","unstructured":"Naddaf, Y.: Game-independent AI agents for playing Atari 2600 console games. Master\u2019s thesis, University of Alberta (2010)"},{"key":"17_CR72","unstructured":"Pollack, J.B., Blair, A.D.: Why did TD-Gammon work? In: Neural Information Processing Systems, vol.\u00a09, pp. 10\u201316 (1997)"},{"key":"17_CR73","unstructured":"Ponsen, M., Spronck, P.: Improving adaptive game AI with evolutionary learning. In: Computer Games: Artificial Intelligence, Design and Education (2004)"},{"key":"17_CR74","unstructured":"Ponsen, M., Mu\u00f1oz-Avila, H., Spronck, P., Aha, D.W.: Automatically acquiring adaptive real-time strategy game opponents using evolutionary learning. In: Proceedings of the 17th Innovative Applications of Artificial Intelligence Conference (2005)"},{"key":"17_CR75","unstructured":"Ponsen, M., Spronck, P., Tuyls, K.: Hierarchical reinforcement learning in computer games. In: Adaptive Learning Agents and Multi-Agent Systems, pp. 49\u201360 (2006)"},{"key":"17_CR76","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-642-11814-2_1","volume-title":"Adaptive and Learning Agents","author":"M. Ponsen","year":"2010","unstructured":"Ponsen, M., Taylor, M.E., Tuyls, K.: Abstraction and Generalization in Reinforcement Learning: A Summary and Framework. In: Taylor, M.E., Tuyls, K. (eds.) ALA 2009. LNCS, vol.\u00a05924, pp. 1\u201333. Springer, Heidelberg (2010)"},{"key":"17_CR77","doi-asserted-by":"crossref","unstructured":"Ramanujan, R., Sabharwal, A., Selman, B.: Adversarial search spaces and sampling-based planning. In: International Conference on Automated Planning and Scheduling (2010)","DOI":"10.1609\/icaps.v20i1.13437"},{"key":"17_CR78","unstructured":"Risk, N., Szafron, D.: Using counterfactual regret minimization to create competitive multiplayer poker agents. In: International Conference on Autonomous Agents and Multiagent Systems, pp. 159\u2013166 (2010)"},{"issue":"5-6","key":"17_CR79","doi-asserted-by":"publisher","first-page":"958","DOI":"10.1016\/j.artint.2010.12.005","volume":"175","author":"J. Rubin","year":"2011","unstructured":"Rubin, J., Watson, I.: Computer poker: A review. Artificial Intelligence\u00a0175(5-6), 958\u2013987 (2011)","journal-title":"Artificial Intelligence"},{"key":"17_CR80","doi-asserted-by":"crossref","unstructured":"Schaeffer, J.: The games computers (and people) play. In: Zelkowitz, M. (ed.) Advances in Computers, vol.\u00a050, pp. 89\u2013266. Academic Press (2000)","DOI":"10.1016\/S0065-2458(00)80019-4"},{"key":"17_CR81","unstructured":"Schaeffer, J., Hlynka, M., Jussila, V.: Temporal difference learning applied to a high-performance game-playing program. In: International Joint Conference on Artificial Intelligence, pp. 529\u2013534 (2001)"},{"key":"17_CR82","unstructured":"Schnizlein, D., Bowling, M., Szafron, D.: Probabilistic state translation in extensive games with large action sets. In: International Joint Conference on Artificial Intelligence, pp. 278\u2013284 (2009)"},{"key":"17_CR83","series-title":"Studies in Fuzziness and Soft Computing","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1007\/978-3-7908-1833-8_4","volume-title":"Computational Intelligence in Games","author":"N.N. Schraudolph","year":"2001","unstructured":"Schraudolph, N.N., Dayan, P., Sejnowski, T.J.: Learning to evaluate go positions via temporal difference methods. In: Computational Intelligence in Games. Studies in Fuzziness and Soft Computing, ch. 4, vol.\u00a062, pp. 77\u201398. Springer, Heidelberg (2001)"},{"key":"17_CR84","unstructured":"Scott, B.: The illusion of intelligence. In: AI Game Programming Wisdom, pp. 16\u201320. Charles River Media (2002)"},{"key":"17_CR85","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1007\/978-3-540-40031-8_4","volume-title":"Computers and Games","author":"A. Shapiro","year":"2003","unstructured":"Shapiro, A., Fuchs, G., Levinson, R.: Learning a Game Strategy Using Pattern-Weights and Self-Play. In: Schaeffer, J., M\u00fcller, M., Bj\u00f6rnsson, Y. (eds.) CG 2002. LNCS, vol.\u00a02883, pp. 42\u201360. Springer, Heidelberg (2003)"},{"key":"17_CR86","doi-asserted-by":"crossref","unstructured":"Sharifi, A.A., Zhao, R., Szafron, D.: Learning companion behaviors using reinforcement learning in games. In: AIIDE (2010)","DOI":"10.1609\/aiide.v6i1.12392"},{"key":"17_CR87","doi-asserted-by":"crossref","unstructured":"Sharma, S., Kobti, Z., Goodwin, S.: General game playing: An overview and open problems. In: International Conference on Computing, Engineering and Information, pp. 257\u2013260 (2009)","DOI":"10.1109\/ICC.2009.50"},{"key":"17_CR88","doi-asserted-by":"crossref","unstructured":"Silver, D., Tesauro, G.: Monte-carlo simulation balancing. In: International Conference on Machine Learning (2009)","DOI":"10.1145\/1553374.1553495"},{"key":"17_CR89","doi-asserted-by":"crossref","unstructured":"Silver, D., Sutton, R., Mueller, M.: Sample-based learning and search with permanent and transient memories. In: ICML (2008)","DOI":"10.1145\/1390156.1390278"},{"key":"17_CR90","unstructured":"Spronck, P., Sprinkhuizen-Kuyper, I., Postma, E.: Difficulty scaling of game AI. In: GAME-ON 2004: 5th International Conference on Intelligent Games and Simulation (2004)"},{"issue":"3","key":"17_CR91","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1007\/s10994-006-6205-6","volume":"63","author":"P. Spronck","year":"2006","unstructured":"Spronck, P., Ponsen, M., Sprinkhuizen-Kuyper, I., Postma, E.: Adaptive game AI with dynamic scripting. Machine Learning\u00a063(3), 217\u2013248 (2006)","journal-title":"Machine Learning"},{"issue":"6","key":"17_CR92","doi-asserted-by":"publisher","first-page":"653","DOI":"10.1109\/TEVC.2005.856210","volume":"9","author":"K.O. Stanley","year":"2005","unstructured":"Stanley, K.O., Bryant, B.D., Miikkulainen, R.: Real-time neuroevolution in the NERO video game. IEEE Transactions on Evolutionary Computation\u00a09(6), 653\u2013668 (2005)","journal-title":"IEEE Transactions on Evolutionary Computation"},{"key":"17_CR93","doi-asserted-by":"crossref","unstructured":"Sturtevant, N., White, A.: Feature construction for reinforcement learning in Hearts. In: Advances in Computers and Games, pp. 122\u2013134 (2007)","DOI":"10.1007\/978-3-540-75538-8_11"},{"key":"17_CR94","unstructured":"Szczepa\u0144ski, T., Aamodt, A.: Case-based reasoning for improved micromanagement in real-time strategy games. In: Workshop on Case-Based Reasoning for Computer Games, 8th International Conference on Case-Based Reasoning, pp. 139\u2013148 (2009)"},{"issue":"12","key":"17_CR95","doi-asserted-by":"publisher","first-page":"2936","DOI":"10.1162\/neco.2006.18.12.2936","volume":"18","author":"I. Szita","year":"2006","unstructured":"Szita, I., L\u0151rincz, A.: Learning Tetris using the noisy cross-entropy method. Neural Computation\u00a018(12), 2936\u20132941 (2006a)","journal-title":"Neural Computation"},{"key":"17_CR96","doi-asserted-by":"crossref","first-page":"659","DOI":"10.1613\/jair.2368","volume":"30","author":"I. Szita","year":"2006","unstructured":"Szita, I., L\u0151rincz, A.: Learning to play using low-complexity rule-based policies: Illustrations through Ms. Pac-Man. Journal of Articial Intelligence Research\u00a030, 659\u2013684 (2006b)","journal-title":"Journal of Articial Intelligence Research"},{"key":"17_CR97","unstructured":"Szita, I., Szepesv\u00e1ri, C.: Sz-tetris as a benchmark for studying key problems of rl. In: ICML 2010 Workshop on Machine Learning and Games (2010)"},{"key":"17_CR98","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-642-12993-3_3","volume-title":"Advances in Computer Games","author":"I. Szita","year":"2010","unstructured":"Szita, I., Chaslot, G., Spronck, P.: Monte-Carlo Tree Search in Settlers of Catan. In: van den Herik, H.J., Spronck, P. (eds.) ACG 2009. LNCS, vol.\u00a06048, pp. 21\u201332. Springer, Heidelberg (2010)"},{"key":"17_CR99","first-page":"257","volume":"8","author":"G. Tesauro","year":"1992","unstructured":"Tesauro, G.: Practical issues in temporal difference learning. Machine Learning\u00a08, 257\u2013277 (1992)","journal-title":"Machine Learning"},{"issue":"3","key":"17_CR100","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1145\/203330.203343","volume":"38","author":"G. Tesauro","year":"1995","unstructured":"Tesauro, G.: Temporal difference learning and TD-gammon. Communications of the ACM\u00a038(3), 58\u201368 (1995)","journal-title":"Communications of the ACM"},{"issue":"3","key":"17_CR101","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1023\/A:1007469231743","volume":"32","author":"G. Tesauro","year":"1998","unstructured":"Tesauro, G.: Comments on co-evolution in the successful learning of backgammon strategy\u2019. Machine Learning\u00a032(3), 241\u2013243 (1998)","journal-title":"Machine Learning"},{"issue":"1-2","key":"17_CR102","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(01)00110-2","volume":"134","author":"G. Tesauro","year":"2002","unstructured":"Tesauro, G.: Programming backgammon using self-teaching neural nets. Artificial Intelligence\u00a0134(1-2), 181\u2013199 (2002)","journal-title":"Artificial Intelligence"},{"issue":"1","key":"17_CR103","doi-asserted-by":"crossref","first-page":"3","DOI":"10.3233\/ICG-2009-32102","volume":"32","author":"C. Thiery","year":"2009","unstructured":"Thiery, C., Scherrer, B.: Building controllers for Tetris. ICGA Journal\u00a032(1), 3\u201311 (2009)","journal-title":"ICGA Journal"},{"key":"17_CR104","unstructured":"Thrun, S.: Learning to play the game of chess. In: Neural Information Processing Systems, vol.\u00a07, pp. 1069\u20131076 (1995)"},{"key":"17_CR105","unstructured":"Utgoff, P.: Feature construction for game playing. In: F\u00fcrnkranz, J., Kubat, M. (eds.) Machines that Learn to Play Games, pp. 131\u2013152. Nova Science Publishers (2001)"},{"key":"17_CR106","doi-asserted-by":"crossref","unstructured":"Utgoff, P., Precup, D.: Constructive function approximation. In: Liu, H., Motoda, H. (eds.) Feature Extraction, Construction and Selection: A Data Mining Perspective, vol.\u00a0453, pp. 219\u2013235. Kluwer Academic Publishers (1998)","DOI":"10.1007\/978-1-4615-5725-8_14"},{"key":"17_CR107","unstructured":"Veness, J., Silver, D., Uther, W., Blair, A.: Bootstrapping from game tree search. In: Neural Information Processing Systems, vol.\u00a022, pp. 1937\u20131945 (2009)"},{"key":"17_CR108","doi-asserted-by":"crossref","unstructured":"Weber, B.G., Mateas, M.: Case-based reasoning for build order in real-time strategy games. In: Artificial Intelligence and Interactive Digital Entertainment, pp. 1313\u20131318 (2009)","DOI":"10.1609\/aiide.v5i1.12360"},{"key":"17_CR109","doi-asserted-by":"crossref","unstructured":"Wender, S., Watson, I.: Using reinforcement learning for city site selection in the turn-based strategy game Civilization IV. In: Computational Intelligence and Games, pp. 372\u2013377 (2009)","DOI":"10.1109\/CIG.2008.5035664"},{"key":"17_CR110","doi-asserted-by":"publisher","first-page":"57","DOI":"10.4236\/jilsa.2010.22009","volume":"2","author":"M.A. Wiering","year":"2010","unstructured":"Wiering, M.A.: Self-play and using an expert to learn to play backgammon with temporal difference learning. Journal of Intelligent Learning Systems and Applications\u00a02, 57\u201368 (2010)","journal-title":"Journal of Intelligent Learning Systems and Applications"},{"key":"17_CR111","unstructured":"Zinkevich, M., Johanson, M., Bowling, M., Piccione, C.: Regret minimization in games with incomplete information. In: Neural Information Processing Systems, pp. 1729\u20131736 (2008)"}],"container-title":["Adaptation, Learning, and Optimization","Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-27645-3_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,22]],"date-time":"2025-03-22T13:02:51Z","timestamp":1742648571000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-27645-3_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642276446","9783642276453"],"references-count":111,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-27645-3_17","relation":{},"ISSN":["1867-4534","1867-4542"],"issn-type":[{"value":"1867-4534","type":"print"},{"value":"1867-4542","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012]]}}}