{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T10:55:38Z","timestamp":1752231338194},"publisher-location":"Berlin, Heidelberg","reference-count":14,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540628583"},{"type":"electronic","value":"9783540687085"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[1997]]},"DOI":"10.1007\/3-540-62858-4_89","type":"book-chapter","created":{"date-parts":[[2012,2,26]],"date-time":"2012-02-26T22:52:34Z","timestamp":1330296754000},"page":"242-249","source":"Crossref","is-referenced-by-count":3,"title":["Learning and exploitation do not conflict under minimax optimality"],"prefix":"10.1007","author":[{"given":"Csaba","family":"Szepesv\u00e1ri","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2005,6,2]]},"reference":[{"key":"23_CR1","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1016\/0004-3702(94)00011-O","volume":"72","author":"A.G. Barto","year":"1995","unstructured":"A.G. Barto, S.J. Bradtke, and S.P. Singh. Learning to act using real-time dynamic programming. Artificial Intelligence, 72:91\u2013138, 1995. Technical Report 91-57, Computer Science Department, University of Massachusetts, Vol. 59., 1991.","journal-title":"Artificial Intelligence"},{"key":"23_CR2","volume-title":"Master's thesis","author":"J. A. Boyan","year":"1992","unstructured":"Justin A. Boyan. Modular Neural Networks for Learning Context-Dependent Game Strategies. Master's thesis, Department of Engineering and Computer Laboratory, University of Cambridge, Cambridge, UK, August 1992."},{"key":"23_CR3","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4615-6746-2","volume-title":"Controlled Markov Processes","author":"E.B. Dynkin","year":"1979","unstructured":"E.B. Dynkin and A.A. Yushkevich. Controlled Markov Processes. Springer-Verlag, Berlin, 1979."},{"key":"23_CR4","volume-title":"PhD thesis","author":"M. Heger","year":"1996","unstructured":"M. Heger. Risk-sensitive decision making. PhD thesis, Zentrum f\u00fcr Kognitionwissenschaften, Universit\u00e4t Bremen, FB3 Informatik, Postfach 330 440, 28334 Bremen, Germany, 1996."},{"key":"23_CR5","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1016\/0004-3702(90)90054-4","volume":"42","author":"R.E. Korf","year":"1990","unstructured":"R.E. Korf. Real-time heuristic search. Artificial Intelligence, 42:189\u2013211, 1990.","journal-title":"Artificial Intelligence"},{"key":"23_CR6","unstructured":"M.L. Littman and Cs. Szepesv\u00e1ri. A Generalized Reinforcement Learning Model: convergence and applications. In Int. Conf. on Machine Learning, 1996. http:\/\/iserv.iki.kfki.hu\/asl-publs.html."},{"key":"23_CR7","volume-title":"Advances in Neural Information Processing Systems 6","author":"N. N. Schraudolph","year":"1994","unstructured":"Nicol N. Schraudolph, Peter Dayan, and Terrence J. Sejnowski. Using the TD(\u03bb) algorithm to learn an evaluation function for the game of Go. In Advances in Neural Information Processing Systems 6, Morgan Kaufmann, San Mateo, CA, 1994."},{"key":"23_CR8","doi-asserted-by":"crossref","first-page":"1095","DOI":"10.1073\/pnas.39.10.1953","volume":"39","author":"L.S. Shapley","year":"1953","unstructured":"L.S. Shapley. Stochastic games. Proceedings of the National Academy of Sciences of the United States of America, 39:1095\u20131100, 1953.","journal-title":"Proceedings of the National Academy of Sciences of the United States of America"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"C. Stein. A two-sample test for a linear hypothesis whose power is independent of variance. Ann. Math. Statist., 16, 1945.","DOI":"10.1214\/aoms\/1177731088"},{"key":"23_CR10","volume-title":"Technical Report 96\u2013101","author":"C. Szepesv\u00e1ri","year":"1996","unstructured":"Cs. Szepesv\u00e1ri. Certainty equivalence policies are self-optimizing under minimax optimality. Technical Report 96\u2013101, Research Group on Artificial Intelligence, JATE-MTA, Szeged 6720, Aradi vrt tere 1., HUNGARY, August 1996. URL: http:\/\/www.inf.u-szeged.hu\/\u223crgai."},{"key":"23_CR11","volume-title":"Technical Report 96\u2013100","author":"C. Szepesv\u00e1ri","year":"1996","unstructured":"Cs. Szepesv\u00e1ri. Some basic facts concerning minimax sequential decision problems. Technical Report 96\u2013100, Research Group on Artificial Intelligence, JATE-MTA, Szeged 6720, Aradi vrt tere 1., HUNGARY, August 1996. URL: http:\/\/www.inf.uszeged.hu\/\u223crgai."},{"key":"23_CR12","unstructured":"Cs. Szepesv\u00e1ri and M. Littman. Generalized Markov Decision Processes: Dynamic programming and reinforcement learning algorithms. Operations Research, 1996. in preparation."},{"key":"23_CR13","doi-asserted-by":"crossref","unstructured":"Gerald Tesauro. Temporal difference learning and TD-Gammon. Communications of the ACM, 58\u201367, March 1995.","DOI":"10.1145\/203330.203343"},{"key":"23_CR14","unstructured":"Sebastian Thrun. Learning to play the game of chess. In Neural Information Processing Systems 7, 1995."}],"container-title":["Lecture Notes in Computer Science","Machine Learning: ECML-97"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/3-540-62858-4_89.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,17]],"date-time":"2020-11-17T21:14:35Z","timestamp":1605647675000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/3-540-62858-4_89"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1997]]},"ISBN":["9783540628583","9783540687085"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/3-540-62858-4_89","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[1997]]}}}