{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T07:10:12Z","timestamp":1774941012554,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":60,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642276446","type":"print"},{"value":"9783642276453","type":"electronic"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-27645-3_4","type":"book-chapter","created":{"date-parts":[[2012,3,5]],"date-time":"2012-03-05T22:18:12Z","timestamp":1330985892000},"page":"111-141","source":"Crossref","is-referenced-by-count":13,"title":["Learning and Using Models"],"prefix":"10.1007","author":[{"given":"Todd","family":"Hester","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peter","family":"Stone","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"4_CR1","unstructured":"Asmuth, J., Li, L., Littman, M., Nouri, A., Wingate, D.: A Bayesian sampling approach to exploration in reinforcement learning. In: Proceedings of the 25th Conference on Uncertainty in Artificial Intelligence, UAI (2009)"},{"key":"4_CR2","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1023\/A:1006511328852","volume":"11","author":"C. Atkeson","year":"1997","unstructured":"Atkeson, C., Moore, A., Schaal, S.: Locally weighted learning for control. Artificial Intelligence Review\u00a011, 75\u2013113 (1997)","journal-title":"Artificial Intelligence Review"},{"issue":"2","key":"4_CR3","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P. Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multiarmed bandit problem. Machine Learning\u00a047(2), 235\u2013256 (2002)","journal-title":"Machine Learning"},{"issue":"3","key":"4_CR4","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1109\/TAMD.2009.2037513","volume":"1","author":"A. Baranes","year":"2009","unstructured":"Baranes, A., Oudeyer, P.Y.: R-IAC: Robust Intrinsically Motivated Exploration and Active Learning. IEEE Transactions on Autonomous Mental Development\u00a01(3), 155\u2013169 (2009)","journal-title":"IEEE Transactions on Autonomous Mental Development"},{"key":"4_CR5","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1016\/S0004-3702(00)00033-3","volume":"121","author":"C. Boutilier","year":"2000","unstructured":"Boutilier, C., Dearden, R., Goldszmidt, M.: Stochastic dynamic programming with factored representations. Artificial Intelligence\u00a0121, 49\u2013107 (2000)","journal-title":"Artificial Intelligence"},{"key":"4_CR6","unstructured":"Brafman, R., Tennenholtz, M.: R-Max - a general polynomial time algorithm for near-optimal reinforcement learning. In: Proceedings of the Seventeenth International Joint Conference on Artificial Intelligence (IJCAI), pp. 953\u2013958 (2001)"},{"issue":"1","key":"4_CR7","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L. Breiman","year":"2001","unstructured":"Breiman, L.: Random forests. Machine Learning\u00a045(1), 5\u201332 (2001)","journal-title":"Machine Learning"},{"key":"4_CR8","unstructured":"Chakraborty, D., Stone, P.: Structure learning in ergodic factored MDPs without knowledge of the transition function\u2019s in-degree. In: Proceedings of the Twenty-Eighth International Conference on Machine Learning, ICML (2011)"},{"key":"4_CR9","unstructured":"Dearden, R., Friedman, N., Andre, D.: Model based Bayesian exploration. In: Proceedings of the Fifteenth Conference on Uncertainty in Artificial Intelligence (UAI), pp. 150\u2013159 (1999)"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Degris, T., Sigaud, O., Wuillemin, P.H.: Learning the structure of factored Markov Decision Processes in reinforcement learning problems. In: Proceedings of the Twenty-Third International Conference on Machine Learning (ICML), pp. 257\u2013264 (2006)","DOI":"10.1145\/1143844.1143877"},{"key":"4_CR11","unstructured":"Deisenroth, M., Rasmussen, C.: PILCO: A model-based and data-efficient approach to policy search. In: Proceedings of the Twenty-Eighth International Conference on Machine Learning, ICML (2011)"},{"key":"4_CR12","unstructured":"Dietterich, T.: The MAXQ method for hierarchical reinforcement learning. In: Proceedings of the Fifteenth International Conference on Machine Learning (ICML), pp. 118\u2013126 (1998)"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"Diuk, C., Cohen, A., Littman, M.: An object-oriented representation for efficient reinforcement learning. In: Proceedings of the Twenty-Fifth International Conference on Machine Learning (ICML), pp. 240\u2013247 (2008)","DOI":"10.1145\/1390156.1390187"},{"key":"4_CR14","doi-asserted-by":"crossref","unstructured":"Diuk, C., Li, L., Leffler, B.: The adaptive-meteorologists problem and its application to structure learning and feature selection in reinforcement learning. In: Proceedings of the Twenty-Sixth International Conference on Machine Learning (ICML), p. 32 (2009)","DOI":"10.1145\/1553374.1553406"},{"key":"4_CR15","unstructured":"Duff, M.: Design for an optimal probe. In: Proceedings of the Twentieth International Conference on Machine Learning (ICML), pp. 131\u2013138 (2003)"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Even-dar, E., Mansour, Y.: Learning rates for q-learning. Journal of Machine Learning Research, 1\u201325 (2001)","DOI":"10.1007\/3-540-44581-1_39"},{"key":"4_CR17","unstructured":"Fikes, R., Nilsson, N.: Strips: A new approach to the application of theorem proving to problem solving. Tech. Rep. 43r, AI Center, SRI International, 333 Ravenswood Ave, Menlo Park, CA 94025, SRI Project 8259 (1971)"},{"key":"4_CR18","doi-asserted-by":"crossref","unstructured":"Gordon, G.: Stable function approximation in dynamic programming. In: Proceedings of the Twelfth International Conference on Machine Learning, ICML (1995)","DOI":"10.1016\/B978-1-55860-377-6.50040-2"},{"key":"4_CR19","unstructured":"Guestrin, C., Patrascu, R., Schuurmans, D.: Algorithm-directed exploration for model-based reinforcement learning in factored MDPs. In: Proceedings of the Nineteenth International Conference on Machine Learning (ICML), pp. 235\u2013242 (2002)"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"van Hasselt, H., Wiering, M.: Reinforcement learning in continuous action spaces. In: IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning (ADPRL), pp. 272\u2013279 (2007)","DOI":"10.1109\/ADPRL.2007.368199"},{"key":"4_CR21","unstructured":"Hester, T., Stone, P.: Generalized model learning for reinforcement learning in factored domains. In: Proceedings of the Eight International Joint Conference on Autonomous Agents and Multiagent Systems, AAMAS (2009)"},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"Hester, T., Stone, P.: Real time targeted exploration in large domains. In: Proceedings of the Ninth International Conference on Development and Learning, ICDL (2010)","DOI":"10.1109\/DEVLRN.2010.5578845"},{"key":"4_CR23","doi-asserted-by":"crossref","unstructured":"Hester, T., Quinlan, M., Stone, P.: Generalized model learning for reinforcement learning on a humanoid robot. In: Proceedings of the 2010 IEEE International Conference on Robotics and Automation, ICRA (2010)","DOI":"10.1109\/ROBOT.2010.5509181"},{"key":"4_CR24","doi-asserted-by":"crossref","unstructured":"Hester, T., Quinlan, M., Stone, P.: A real-time model-based reinforcement learning architecture for robot control. ArXiv e-prints 11051749 (2011)","DOI":"10.1109\/ICRA.2012.6225072"},{"key":"4_CR25","doi-asserted-by":"crossref","unstructured":"Jong, N., Stone, P.: Model-based function approximation for reinforcement learning. In: Proceedings of the Sixth International Joint Conference on Autonomous Agents and Multiagent Systems, AAMAS (2007)","DOI":"10.1145\/1329125.1329242"},{"key":"4_CR26","unstructured":"Kakade, S.: On the sample complexity of reinforcement learning. PhD thesis, University College London (2003)"},{"key":"4_CR27","unstructured":"Kearns, M., Koller, D.: Efficient reinforcement learning in factored MDPs. In: Proceedings of the Sixteenth International Joint Conference on Artificial Intelligence (IJCAI), pp. 740\u2013747 (1999)"},{"key":"4_CR28","unstructured":"Kearns, M., Singh, S.: Near-optimal reinforcement learning in polynomial time. In: Proceedings of the Fifteenth International Conference on Machine Learning (ICML), pp. 260\u2013268 (1998)"},{"key":"4_CR29","unstructured":"Kearns, M., Mansour, Y., Ng, A.: A sparse sampling algorithm for near-optimal planning in large Markov Decision Processes. In: Proceedings of the Sixteenth International Joint Conference on Artificial Intelligence (IJCAI), pp. 1324\u20131331 (1999)"},{"key":"4_CR30","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"282","DOI":"10.1007\/11871842_29","volume-title":"Machine Learning: ECML 2006","author":"L. Kocsis","year":"2006","unstructured":"Kocsis, L., Szepesv\u00e1ri, C.: Bandit based monte-carlo planning. In: F\u00fcrnkranz, J., Scheffer, T., Spiliopoulou, M. (eds.) ECML 2006. LNCS (LNAI), vol.\u00a04212, pp. 282\u2013293. Springer, Heidelberg (2006)"},{"key":"4_CR31","doi-asserted-by":"crossref","unstructured":"Kolter, J.Z., Ng, A.: Near-Bayesian exploration in polynomial time. In: Proceedings of the Twenty-Sixth International Conference on Machine Learning (ICML), pp. 513\u2013520 (2009)","DOI":"10.1145\/1553374.1553441"},{"key":"4_CR32","first-page":"1107","volume":"4","author":"M. Lagoudakis","year":"2003","unstructured":"Lagoudakis, M., Parr, R.: Least-squares policy iteration. Journal of Machine Learning Research\u00a04, 1107\u20131149 (2003)","journal-title":"Journal of Machine Learning Research"},{"key":"4_CR33","doi-asserted-by":"crossref","unstructured":"Li, L., Littman, M., Walsh, T.: Knows what it knows: a framework for self-aware learning. In: Proceedings of the Twenty-Fifth International Conference on Machine Learning (ICML), pp. 568\u2013575 (2008)","DOI":"10.1145\/1390156.1390228"},{"key":"4_CR34","unstructured":"Li, L., Littman, M., Mansley, C.: Online exploration in least-squares policy iteration. In: Proceedings of the Eight International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS), pp. 733\u2013739 (2009)"},{"key":"4_CR35","doi-asserted-by":"crossref","unstructured":"McCallum, A.: Learning to use selective attention and short-term memory in sequential tasks. In: From Animals to Animats 4: Proceedings of the Fourth International Conference on Simulation of Adaptive Behavior (1996)","DOI":"10.7551\/mitpress\/3118.003.0039"},{"key":"4_CR36","first-page":"103","volume":"13","author":"A. Moore","year":"1993","unstructured":"Moore, A., Atkeson, C.: Prioritized sweeping: Reinforcement learning with less data and less real time. Machine Learning\u00a013, 103\u2013130 (1993)","journal-title":"Machine Learning"},{"key":"4_CR37","first-page":"199","volume":"21","author":"A. Moore","year":"1995","unstructured":"Moore, A., Atkeson, C.: The parti-game algorithm for variable resolution reinforcement learning in multidimensional state-spaces. Machine Learning\u00a021, 199\u2013233 (1995)","journal-title":"Machine Learning"},{"key":"4_CR38","unstructured":"Ng, A., Kim, H.J., Jordan, M., Sastry, S.: Autonomous helicopter flight via reinforcement learning. In: Advances in Neural Information Processing Systems (NIPS), vol.\u00a016 (2003)"},{"issue":"1","key":"4_CR39","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1007\/s10994-010-5202-y","volume":"81","author":"A. Nouri","year":"2010","unstructured":"Nouri, A., Littman, M.: Dimension reduction and its application to model-based exploration in continuous spaces. Mach. Learn.\u00a081(1), 85\u201398 (2010)","journal-title":"Mach. Learn."},{"issue":"2","key":"4_CR40","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1023\/A:1017928328829","volume":"49","author":"D. Ormnoneit","year":"2002","unstructured":"Ormnoneit, D., Sen, \u015a.: Kernel-based reinforcement learning. Machine Learning\u00a049(2), 161\u2013178 (2002)","journal-title":"Machine Learning"},{"key":"4_CR41","unstructured":"Pasula, H., Zettlemoyer, L., Kaelbling, L.P.: Learning probabilistic relational planning rules. In: Proceedings of the 14th International Conference on Automated Planning and Scheduling, ICAPS (2004)"},{"key":"4_CR42","doi-asserted-by":"crossref","unstructured":"Pazis, J., Lagoudakis, M.: Binary action search for learning continuous-action control policies. In: Proceedings of the Twenty-Sixth International Conference on Machine Learning (ICML), p. 100 (2009)","DOI":"10.1145\/1553374.1553476"},{"key":"4_CR43","doi-asserted-by":"crossref","unstructured":"Poupart, P., Vlassis, N., Hoey, J., Regan, K.: An analytic solution to discrete Bayesian reinforcement learning. In: Proceedings of the Twenty-Third International Conference on Machine Learning (s), pp. 697\u2013704 (2006)","DOI":"10.1145\/1143844.1143932"},{"key":"4_CR44","doi-asserted-by":"publisher","first-page":"997","DOI":"10.1109\/72.623201","volume":"8","author":"D. Prokhorov","year":"1997","unstructured":"Prokhorov, D., Wunsch, D.: Adaptive critic designs. IEEE Transactions on Neural Networks\u00a08, 997\u20131007 (1997)","journal-title":"IEEE Transactions on Neural Networks"},{"key":"4_CR45","unstructured":"Rasmussen, C., Kuss, M.: Gaussian processes in reinforcement learning. In: Advances in Neural Information Processing Systems (NIPS), vol.\u00a016 (2004)"},{"issue":"1","key":"4_CR46","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1109\/37.257895","volume":"14","author":"S. Schaal","year":"1994","unstructured":"Schaal, S., Atkeson, C.: Robot juggling: implementation of memory-based learning. IEEE Control Systems Magazine\u00a014(1), 57\u201371 (1994)","journal-title":"IEEE Control Systems Magazine"},{"key":"4_CR47","doi-asserted-by":"crossref","unstructured":"Schmidhuber, J.: Curious model-building control systems. In: Proceedings of the International Joint Conference on Neural Networks, pp. 1458\u20131463. IEEE (1991)","DOI":"10.1109\/IJCNN.1991.170605"},{"key":"4_CR48","doi-asserted-by":"crossref","unstructured":"Silver, D., Sutton, R., M\u00fcller, M.: Sample-based learning and search with permanent and transient memories. In: Proceedings of the Twenty-Fifth International Conference on Machine Learning (ICML), pp. 968\u2013975 (2008)","DOI":"10.1145\/1390156.1390278"},{"key":"4_CR49","doi-asserted-by":"crossref","unstructured":"Strehl, A., Littman, M.: A theoretical analysis of model-based interval estimation. In: Proceedings of the Twenty-Second International Conference on Machine Learning (ICML), pp. 856\u2013863 (2005)","DOI":"10.1145\/1102351.1102459"},{"key":"4_CR50","unstructured":"Strehl, A., Diuk, C., Littman, M.: Efficient structure learning in factored-state MDPs. In: Proceedings of the Twenty-Second AAAI Conference on Artificial Intelligence, pp. 645\u2013650 (2007)"},{"key":"4_CR51","unstructured":"Strens, M.: A Bayesian framework for reinforcement learning. In: Proceedings of the Seventeenth International Conference on Machine Learning (ICML), pp. 943\u2013950 (2000)"},{"key":"4_CR52","doi-asserted-by":"crossref","unstructured":"Sutton, R.: Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In: Proceedings of the Seventh International Conference on Machine Learning (ICML), pp. 216\u2013224 (1990)","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"issue":"4","key":"4_CR53","doi-asserted-by":"publisher","first-page":"160","DOI":"10.1145\/122344.122377","volume":"2","author":"R. Sutton","year":"1991","unstructured":"Sutton, R.: Dyna, an integrated architecture for learning, planning, and reacting. SIGART Bulletin\u00a02(4), 160\u2013163 (1991)","journal-title":"SIGART Bulletin"},{"key":"4_CR54","unstructured":"Sutton, R., McAllester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: Advances in Neural Information Processing Systems (NIPS), vol.\u00a012, pp. 1057\u20131063 (1999)"},{"issue":"3","key":"4_CR55","doi-asserted-by":"publisher","first-page":"764","DOI":"10.1109\/TNN.2002.1000146","volume":"13","author":"G. Venayagamoorthy","year":"2002","unstructured":"Venayagamoorthy, G., Harley, R., Wunsch, D.: Comparison of heuristic dynamic programming and dual heuristic programming adaptive critics for neurocontrol of a turbogenerator. IEEE Transactions on Neural Networks\u00a013(3), 764\u2013773 (2002)","journal-title":"IEEE Transactions on Neural Networks"},{"key":"4_CR56","doi-asserted-by":"crossref","unstructured":"Walsh, T., Goschin, S., Littman, M.: Integrating sample-based planning and model-based reinforcement learning. In: Proceedings of the Twenty-Fifth AAAI Conference on Artificial Intelligence (2010)","DOI":"10.1609\/aaai.v24i1.7689"},{"key":"4_CR57","doi-asserted-by":"crossref","unstructured":"Wang, T., Lizotte, D., Bowling, M., Schuurmans, D.: Bayesian sparse sampling for on-line reward optimization. In: Proceedings of the Twenty-Second International Conference on Machine Learning (ICML), pp. 956\u2013963 (2005)","DOI":"10.1145\/1102351.1102472"},{"key":"4_CR58","doi-asserted-by":"crossref","unstructured":"Wang, Y., Gelly, S.: Modifications of UCT and sequence-like simulations for Monte-Carlo Go. In: IEEE Symposium on Computational Intelligence and Games (2007)","DOI":"10.1109\/CIG.2007.368095"},{"key":"4_CR59","unstructured":"Weinstein, A., Mansley, C., Littman, M.: Sample-based planning for continuous action Markov Decision Processes. In: ICML 2010 Workshop on Reinforcement Learning and Search in Very Large Spaces (2010)"},{"key":"4_CR60","doi-asserted-by":"crossref","first-page":"223","DOI":"10.7551\/mitpress\/3119.003.0034","volume-title":"From Animals to Animats 5: Proceedings of the Fifth International Conference on Simulation of Adaptive Behavior","author":"M. Wiering","year":"1998","unstructured":"Wiering, M., Schmidhuber, J.: Efficient model-based exploration. In: From Animals to Animats 5: Proceedings of the Fifth International Conference on Simulation of Adaptive Behavior, pp. 223\u2013228. MIT Press, Cambridge (1998)"}],"container-title":["Adaptation, Learning, and Optimization","Reinforcement Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-27645-3_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,21]],"date-time":"2024-04-21T03:12:29Z","timestamp":1713669149000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-27645-3_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642276446","9783642276453"],"references-count":60,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-27645-3_4","relation":{},"ISSN":["1867-4534","1867-4542"],"issn-type":[{"value":"1867-4534","type":"print"},{"value":"1867-4542","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012]]}}}