{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T09:50:05Z","timestamp":1743069005473,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":70,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642276446"},{"type":"electronic","value":"9783642276453"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-27645-3_6","type":"book-chapter","created":{"date-parts":[[2012,3,5]],"date-time":"2012-03-05T22:18:12Z","timestamp":1330985892000},"page":"175-204","source":"Crossref","is-referenced-by-count":4,"title":["Sample Complexity Bounds of Exploration"],"prefix":"10.1007","author":[{"given":"Lihong","family":"Li","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"6_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Ng, A.Y.: Exploration and apprenticeship learning in reinforcement learning. In: Proceedings of the Twenty-Second International Conference on Machine Learning (ICML-2005), pp. 1\u20138 (2005)","DOI":"10.1145\/1102351.1102352"},{"key":"6_CR2","unstructured":"Asmuth, J., Li, L., Littman, M.L., Nouri, A., Wingate, D.: A Bayesian sampling approach to exploration in reinforcement learning. In: Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence (UAI-2009), pp. 19\u201326 (2009)"},{"key":"6_CR3","unstructured":"Bartlett, P.L., Tewari, A.: REGAL: A regularization based algorithm for reinforcement learning in weakly communicating MDPs. In: Proceedings of the Twenty-Fifth Annual Conference on Uncertainty in Artificial Intelligence (UAI-2009), pp. 35\u201342 (2009)"},{"issue":"1-2","key":"6_CR4","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1016\/0004-3702(94)00011-O","volume":"72","author":"A.G. Barto","year":"1995","unstructured":"Barto, A.G., Bradtke, S.J., Singh, S.P.: Learning to act using real-time dynamic programming. Artificial Intelligence\u00a072(1-2), 81\u2013138 (1995)","journal-title":"Artificial Intelligence"},{"key":"6_CR5","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Athena Scientific (1996)"},{"key":"6_CR6","first-page":"213","volume":"3","author":"R.I. Brafman","year":"2002","unstructured":"Brafman, R.I., Tennenholtz, M.: R-max\u2014a general polynomial time algorithm for near-optimal reinforcement learning. Journal of Machine Learning Research\u00a03, 213\u2013231 (2002)","journal-title":"Journal of Machine Learning Research"},{"key":"6_CR7","first-page":"1955","volume":"10","author":"E. Brunskill","year":"2009","unstructured":"Brunskill, E., Leffler, B.R., Li, L., Littman, M.L., Roy, N.: Provably efficient learning with typed parametric models. Journal of Machine Learning Research\u00a010, 1955\u20131988 (2009)","journal-title":"Journal of Machine Learning Research"},{"issue":"1","key":"6_CR8","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1287\/moor.22.1.222","volume":"22","author":"A.N. Burnetas","year":"1997","unstructured":"Burnetas, A.N., Katehakis, M.N.: Optimal adaptive policies for Markov decision processes. Mathematics of Operations Research\u00a022(1), 222\u2013255 (1997)","journal-title":"Mathematics of Operations Research"},{"key":"6_CR9","unstructured":"Dearden, R., Friedman, N., Andre, D.: Model based Bayesian exploration. In: Proceedings of the Fifteenth Conference on Uncertainty in Artificial Intelligence (UAI-1999), pp. 150\u2013159 (1999)"},{"key":"6_CR10","doi-asserted-by":"crossref","unstructured":"Diuk, C., Cohen, A., Littman, M.L.: An object-oriented representation for efficient reinforcement learning. In: Proceedings of the Twenty-Fifth International Conference on Machine Learning (ICML-2008), pp. 240\u2013247 (2008)","DOI":"10.1145\/1390156.1390187"},{"key":"6_CR11","doi-asserted-by":"crossref","unstructured":"Diuk, C., Li, L., Leffler, B.R.: The adaptive k-meteorologists problem and its application to structure discovery and feature selection in reinforcement learning. In: Proceedings of the Twenty-Sixth International Conference on Machine Learning (ICML-2009), pp. 249\u2013256 (2009)","DOI":"10.1145\/1553374.1553406"},{"key":"6_CR12","unstructured":"Duff, M.O.: Optimal learning: Computational procedures for Bayes-adaptive Markov decision processes. PhD thesis, University of Massachusetts, Amherst, MA (2002)"},{"key":"6_CR13","first-page":"1","volume":"5","author":"E. Even-Dar","year":"2003","unstructured":"Even-Dar, E., Mansour, Y.: Learning rates for Q-learning. Journal of Machine Learning Research\u00a05, 1\u201325 (2003)","journal-title":"Journal of Machine Learning Research"},{"key":"6_CR14","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1007\/3-540-45435-7_18","volume-title":"Computational Learning Theory","author":"E. Even-Dar","year":"2002","unstructured":"Even-Dar, E., Mannor, S., Mansour, Y.: Multi-Armed Bandit and Markov Decision Processes. In: Kivinen, J., Sloan, R.H. (eds.) COLT 2002. LNCS (LNAI), vol.\u00a02375, pp. 255\u2013270. Springer, Heidelberg (2002)"},{"key":"6_CR15","doi-asserted-by":"crossref","unstructured":"Fiechter, C.N.: Efficient reinforcement learning. In: Proceedings of the Seventh Annual ACM Conference on Computational Learning Theory (COLT-1994), pp. 88\u201397 (1994)","DOI":"10.1145\/180139.181019"},{"key":"6_CR16","unstructured":"Fiechter, C.N.: Expected mistake bound model for on-line reinforcement learning. In: Proceedings of the Fourteenth International Conference on Machine Learning (ICML-1997), pp. 116\u2013124 (1997)"},{"key":"6_CR17","unstructured":"Guestrin, C., Patrascu, R., Schuurmans, D.: Algorithm-directed exploration for model-based reinforcement learning in factored MDPs. In: Proceedings of the Nineteenth International Conference on Machine Learning (ICML-2002), pp. 235\u2013242 (2002)"},{"issue":"6","key":"6_CR18","doi-asserted-by":"publisher","first-page":"1185","DOI":"10.1162\/neco.1994.6.6.1185","volume":"6","author":"T. Jaakkola","year":"1994","unstructured":"Jaakkola, T., Jordan, M.I., Singh, S.P.: On the convergence of stochastic iterative dynamic programming algorithms. Neural Computation\u00a06(6), 1185\u20131201 (1994)","journal-title":"Neural Computation"},{"key":"6_CR19","first-page":"1563","volume":"11","author":"T. Jaksch","year":"2010","unstructured":"Jaksch, T., Ortner, R., Auer, P.: Near-optimal regret bounds for reinforcement learning. Journal of Machine Learning Research\u00a011, 1563\u20131600 (2010)","journal-title":"Journal of Machine Learning Research"},{"key":"6_CR20","doi-asserted-by":"crossref","unstructured":"Jong, N.K., Stone, P.: Model-based function approximation in reinforcement learning. In: Proceedings of the Sixth International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS-2007), pp. 670\u2013677 (2007)","DOI":"10.1145\/1329125.1329242"},{"key":"6_CR21","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/4168.001.0001","volume-title":"Learning in Embedded Systems","author":"L.P. Kaelbling","year":"1993","unstructured":"Kaelbling, L.P.: Learning in Embedded Systems. MIT Press, Cambridge (1993)"},{"key":"6_CR22","unstructured":"Kakade, S.: On the sample complexity of reinforcement learning. PhD thesis, Gatsby Computational Neuroscience Unit, University College London, UK (2003)"},{"key":"6_CR23","unstructured":"Kakade, S., Kearns, M.J., Langford, J.: Exploration in metric state spaces. In: Proceedings of the Twentieth International Conference on Machine Learning (ICML-2003), pp. 306\u2013312 (2003)"},{"key":"6_CR24","unstructured":"Kearns, M.J., Koller, D.: Efficient reinforcement learning in factored MDPs. In: Proceedings of the Sixteenth International Joint Conference on Artificial Intelligence (IJCAI-1999), pp. 740\u2013747 (1999)"},{"key":"6_CR25","unstructured":"Kearns, M.J., Singh, S.P.: Finite-sample convergence rates for Q-learning and indirect algorithms. In: Advances in Neural Information Processing Systems (NIPS-1998), vol.\u00a011, pp. 996\u20131002 (1999)"},{"issue":"2-3","key":"6_CR26","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1023\/A:1017984413808","volume":"49","author":"M.J. Kearns","year":"2002","unstructured":"Kearns, M.J., Singh, S.P.: Near-optimal reinforcement learning in polynomial time. Machine Learning\u00a049(2-3), 209\u2013232 (2002)","journal-title":"Machine Learning"},{"issue":"2-3","key":"6_CR27","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1023\/A:1017932429737","volume":"49","author":"M.J. Kearns","year":"2002","unstructured":"Kearns, M.J., Mansour, Y., Ng, A.Y.: A sparse sampling algorithm for near-optimal planning in large Markov decision processes. Machine Learning\u00a049(2-3), 193\u2013208 (2002)","journal-title":"Machine Learning"},{"key":"6_CR28","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"282","DOI":"10.1007\/11871842_29","volume-title":"Machine Learning: ECML 2006","author":"L. Kocsis","year":"2006","unstructured":"Kocsis, L., Szepesv\u00e1ri, C.: Bandit Based Monte-Carlo Planning. In: F\u00fcrnkranz, J., Scheffer, T., Spiliopoulou, M. (eds.) ECML 2006. LNCS (LNAI), vol.\u00a04212, pp. 282\u2013293. Springer, Heidelberg (2006)"},{"issue":"1-3","key":"6_CR29","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1007\/BF00114729","volume":"22","author":"S. Koenig","year":"1996","unstructured":"Koenig, S., Simmons, R.G.: The effect of representation and knowledge on goal-directed exploration with reinforcement-learning algorithms. Machine Learning\u00a022(1-3), 227\u2013250 (1996)","journal-title":"Machine Learning"},{"key":"6_CR30","doi-asserted-by":"crossref","unstructured":"Kolter, J.Z., Ng, A.Y.: Near Bayesian exploration in polynomial time. In: Proceedings of the Twenty-Sixth International Conference on Machine Learning (ICML-2009), pp. 513\u2013520 (2009)","DOI":"10.1145\/1553374.1553441"},{"key":"6_CR31","unstructured":"Li, L.: A unifying framework for computational reinforcement learning theory. PhD thesis, Rutgers University, New Brunswick, NJ (2009)"},{"issue":"3-4","key":"6_CR32","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1007\/s10472-010-9201-2","volume":"58","author":"L. Li","year":"2010","unstructured":"Li, L., Littman, M.L.: Reducing reinforcement learning to KWIK online regression. Annals of Mathematics and Artificial Intelligence\u00a058(3-4), 217\u2013237 (2010)","journal-title":"Annals of Mathematics and Artificial Intelligence"},{"key":"6_CR33","unstructured":"Li, L., Littman, M.L., Mansley, C.R.: Online exploration in least-squares policy iteration. In: Proceedings of the Eighteenth International Conference on Agents and Multiagent Systems (AAMAS-2009), pp. 733\u2013739 (2009)"},{"issue":"3","key":"6_CR34","doi-asserted-by":"publisher","first-page":"399","DOI":"10.1007\/s10994-010-5225-4","volume":"82","author":"L. Li","year":"2011","unstructured":"Li, L., Littman, M.L., Walsh, T.J., Strehl, A.L.: Knows what it knows: A framework for self-aware learning. Machine Learning\u00a082(3), 399\u2013443 (2011)","journal-title":"Machine Learning"},{"issue":"4","key":"6_CR35","first-page":"285","volume":"2","author":"N. Littlestone","year":"1987","unstructured":"Littlestone, N.: Learning quickly when irrelevant attributes abound: A new linear-threshold algorithms. Machine Learning\u00a02(4), 285\u2013318 (1987)","journal-title":"Machine Learning"},{"issue":"2","key":"6_CR36","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1023\/A:1007541107674","volume":"35","author":"N. Meuleau","year":"1999","unstructured":"Meuleau, N., Bourgine, P.: Exploration of multi-state environments: Local measures and back-propagation of uncertainty. Machine Learning\u00a035(2), 117\u2013154 (1999)","journal-title":"Machine Learning"},{"issue":"1","key":"6_CR37","first-page":"103","volume":"13","author":"A.W. Moore","year":"1993","unstructured":"Moore, A.W., Atkeson, C.G.: Prioritized sweeping: Reinforcement learning with less data and less time. Machine Learning\u00a013(1), 103\u2013130 (1993)","journal-title":"Machine Learning"},{"key":"6_CR38","unstructured":"Neu, G., Gy\u00f6rgy, A., Szepesv\u00e1ri, C., Antos, A.: Online Markov decision processes under bandit feedback. In: Advances in Neural Information Processing Systems 23 (NIPS-2010), pp. 1804\u20131812 (2011)"},{"key":"6_CR39","unstructured":"Ng, A.Y., Harada, D., Russell, S.J.: Policy invariance under reward transformations: Theory and application to reward shaping. In: Proceedings of the Sixteenth International Conference on Machine Learning (ICML-1999), pp. 278\u2013287 (1999)"},{"key":"6_CR40","unstructured":"Nouri, A., Littman, M.L.: Multi-resolution exploration in continuous spaces. In: Advances in Neural Information Processing Systems 21 (NIPS-2008), pp. 1209\u20131216 (2009)"},{"issue":"1","key":"6_CR41","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1007\/s10994-010-5202-y","volume":"81","author":"A. Nouri","year":"2010","unstructured":"Nouri, A., Littman, M.L.: Dimension reduction and its application to model-based exploration in continuous spaces. Machine Learning\u00a081(1), 85\u201398 (2010)","journal-title":"Machine Learning"},{"key":"6_CR42","doi-asserted-by":"crossref","unstructured":"Poupart, P., Vlassis, N., Hoey, J., Regan, K.: An analytic solution to discrete Bayesian reinforcement learning. In: Proceedings of the Twenty-Third International Conference on Machine Learning (ICML-2006), pp. 697\u2013704 (2006)","DOI":"10.1145\/1143844.1143932"},{"key":"6_CR43","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"M.L. Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley-Interscience, New York (1994)"},{"key":"6_CR44","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1007\/978-3-540-39857-8_29","volume-title":"Machine Learning: ECML 2003","author":"B. Ratitch","year":"2003","unstructured":"Ratitch, B., Precup, D.: Using MDP Characteristics to Guide Exploration in Reinforcement Learning. In: Lavra\u010d, N., Gamberger, D., Todorovski, L., Blockeel, H. (eds.) ECML 2003. LNCS (LNAI), vol.\u00a02837, pp. 313\u2013324. Springer, Heidelberg (2003)"},{"issue":"5","key":"6_CR45","doi-asserted-by":"publisher","first-page":"527","DOI":"10.1090\/S0002-9904-1952-09620-8","volume":"58","author":"H. Robbins","year":"1952","unstructured":"Robbins, H.: Some aspects of the sequential design of experiments. Bulletin of the American Mathematical Society\u00a058(5), 527\u2013535 (1952)","journal-title":"Bulletin of the American Mathematical Society"},{"key":"6_CR46","unstructured":"Sayedi, A., Zadimoghaddam, M., Blum, A.: Trading off mistakes and don\u2019t-know predictions. In: Advances in Neural Information Processing Systems 23 (NIPS-2010), pp. 2092\u20132100 (2011)"},{"issue":"3","key":"6_CR47","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1023\/A:1007678930559","volume":"38","author":"S.P. Singh","year":"2000","unstructured":"Singh, S.P., Jaakkola, T., Littman, M.L., Szepesv\u00e1ri, C.: Convergence results for single-step on-policy reinforcement-learning algorithms. Machine Learning\u00a038(3), 287\u2013308 (2000)","journal-title":"Machine Learning"},{"key":"6_CR48","doi-asserted-by":"crossref","unstructured":"Strehl, A.L.: Model-based reinforcement learning in factored-state MDPs. In: Proceedings of the IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning, pp. 103\u2013110 (2007a)","DOI":"10.1109\/ADPRL.2007.368176"},{"key":"6_CR49","unstructured":"Strehl, A.L.: Probably approximately correct (PAC) exploration in reinforcement learning. PhD thesis, Rutgers University, New Brunswick, NJ (2007b)"},{"issue":"8","key":"6_CR50","doi-asserted-by":"publisher","first-page":"1309","DOI":"10.1016\/j.jcss.2007.08.009","volume":"74","author":"A.L. Strehl","year":"2008","unstructured":"Strehl, A.L., Littman, M.L.: An analysis of model-based interval estimation for Markov decision processes. Journal of Computer and System Sciences\u00a074(8), 1309\u20131331 (2008a)","journal-title":"Journal of Computer and System Sciences"},{"key":"6_CR51","unstructured":"Strehl, A.L., Littman, M.L.: Online linear regression and its application to model-based reinforcement learning. In: Advances in Neural Information Processing Systems 20 (NIPS-2007), pp. 1417\u20131424 (2008b)"},{"key":"6_CR52","unstructured":"Strehl, A.L., Li, L., Littman, M.L.: Incremental model-based learners with formal learning-time guarantees. In: Proceedings of the Twenty-Second Conference on Uncertainty in Artificial Intelligence (UAI-2006), pp. 485\u2013493 (2006a)"},{"key":"6_CR53","doi-asserted-by":"crossref","unstructured":"Strehl, A.L., Li, L., Wiewiora, E., Langford, J., Littman, M.L.: PAC model-free reinforcement learning. In: Proceedings of the Twenty-Third International Conference on Machine Learning (ICML-2006), pp. 881\u2013888 (2006b)","DOI":"10.1145\/1143844.1143955"},{"key":"6_CR54","doi-asserted-by":"crossref","unstructured":"Strehl, A.L., Diuk, C., Littman, M.L.: Efficient structure learning in factored-state MDPs. In: Proceedings of the Twenty-Second AAAI Conference on Artificial Intelligence (AAAI-2007), pp. 645\u2013650 (2007)","DOI":"10.1109\/ADPRL.2007.368176"},{"key":"6_CR55","first-page":"2413","volume":"10","author":"A.L. Strehl","year":"2009","unstructured":"Strehl, A.L., Li, L., Littman, M.L.: Reinforcement learning in finite MDPs: PAC analysis. Journal of Machine Learning Research\u00a010, 2413\u20132444 (2009)","journal-title":"Journal of Machine Learning Research"},{"key":"6_CR56","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"6_CR57","doi-asserted-by":"crossref","unstructured":"Szita, I., L\u0151rincz, A.: The many faces of optimism: A unifying approach. In: Proceedings of the Twenty-Fifth International Conference on Machine Learning (ICML-2008), pp. 1048\u20131055 (2008)","DOI":"10.1145\/1390156.1390288"},{"key":"6_CR58","unstructured":"Szita, I., Szepesv\u00e1ri, C.: Model-based reinforcement learning with nearly tight exploration complexity bounds. In: Proceedings of the Twenty-Seventh International Conference on Machine Learning (ICML-2010), pp. 1031\u20131038 (2010)"},{"key":"6_CR59","unstructured":"Szita, I., Szepesv\u00e1ri, C.: Agnostic KWIK learning and efficient approximate reinforcement learning. In: Proceedings of the Twenty-Fourth Annual Conference on Learning Theory, COLT-2011 (2011)"},{"key":"6_CR60","unstructured":"Tewari, A., Bartlett, P.L.: Optimistic linear programming gives logarithmic regret for irreducible MDPs. In: Advances in Neural Information Processing Systems 20 (NIPS-2007), pp. 1505\u20131512 (2008)"},{"key":"6_CR61","unstructured":"Thrun, S.: The role of exploration in learning control. In: White, D.A., Sofge, D.A. (eds.) Handbook of Intelligent Control: Neural, Fuzzy and Adaptive Approaches, pp. 527\u2013559. Van Nostrand Reinhold (1992)"},{"issue":"11","key":"6_CR62","doi-asserted-by":"publisher","first-page":"1134","DOI":"10.1145\/1968.1972","volume":"27","author":"L.G. Valiant","year":"1984","unstructured":"Valiant, L.G.: A theory of the learnable. Communications of the ACM\u00a027(11), 1134\u20131142 (1984)","journal-title":"Communications of the ACM"},{"key":"6_CR63","unstructured":"Walsh, T.J.: Efficient learning of relational models for sequential decision making. PhD thesis, Rutgers University, New Brunswick, NJ (2010)"},{"key":"6_CR64","unstructured":"Walsh, T.J., Szita, I., Diuk, C., Littman, M.L.: Exploring compact reinforcement-learning representations with linear regression. In: Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence (UAI-2009), pp. 591\u2013598 (2009); corrected version as Technical Report DCS-tr-660, Department of Computer Science, Rutgers University"},{"key":"6_CR65","doi-asserted-by":"crossref","unstructured":"Walsh, T.J., Goschin, S., Littman, M.L.: Integrating sample-based planning and model-based reinforcement learning. In: Proceedings of the Twenty-Fourth AAAI Conference on Artificial Intelligence (AAAI-2010), pp. 612\u2013617 (2010a)","DOI":"10.1609\/aaai.v24i1.7689"},{"key":"6_CR66","unstructured":"Walsh, T.J., Subramanian, K., Littman, M.L., Diuk, C.: Generalizing apprenticeship learning across hypothesis classes. In: Proceedings of the Twenty-Seventh International Conference on Machine Learning (ICML-2010), pp. 1119\u20131126 (2010b)"},{"key":"6_CR67","unstructured":"Walsh, T.J., Hewlett, D., Morrison, C.T.: Blending autonomous and apprenticeship learning. In: Advances in Neural Information Processing Systems 24, NIPS-2011 (2012)"},{"key":"6_CR68","first-page":"279","volume":"8","author":"C.J. Watkins","year":"1992","unstructured":"Watkins, C.J., Dayan, P.: Q-learning. Machine Learning\u00a08, 279\u2013292 (1992)","journal-title":"Machine Learning"},{"key":"6_CR69","doi-asserted-by":"crossref","unstructured":"Whitehead, S.D.: Complexity and cooperation in Q-learning. In: Proceedings of the Eighth International Workshop on Machine Learning (ICML-1991), pp. 363\u2013367 (1991)","DOI":"10.1016\/B978-1-55860-200-7.50075-1"},{"key":"6_CR70","doi-asserted-by":"crossref","unstructured":"Wiering, M., Schmidhuber, J.: Efficient model-based exploration. In: Proceedings of the Fifth International Conference on Simulation of Adaptive Behavior: From Animals to Animats 5 (SAB-1998), pp. 223\u2013228 (1998)","DOI":"10.7551\/mitpress\/3119.003.0034"}],"container-title":["Adaptation, Learning, and Optimization","Reinforcement Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-27645-3_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,21]],"date-time":"2024-04-21T03:12:50Z","timestamp":1713669170000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-27645-3_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642276446","9783642276453"],"references-count":70,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-27645-3_6","relation":{},"ISSN":["1867-4534","1867-4542"],"issn-type":[{"type":"print","value":"1867-4534"},{"type":"electronic","value":"1867-4542"}],"subject":[],"published":{"date-parts":[[2012]]}}}