{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,5]],"date-time":"2025-10-05T14:33:25Z","timestamp":1759674805917,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":16,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540688464"},{"type":"electronic","value":"9783540688471"}],"license":[{"start":{"date-parts":[[2008,1,1]],"date-time":"2008-01-01T00:00:00Z","timestamp":1199145600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008]]},"DOI":"10.1007\/978-3-540-68847-1_15","type":"book-chapter","created":{"date-parts":[[2008,7,17]],"date-time":"2008-07-17T08:26:35Z","timestamp":1216283195000},"page":"171-183","source":"Crossref","is-referenced-by-count":6,"title":["Model-Based Reinforcement Learning in a Complex Domain"],"prefix":"10.1007","author":[{"given":"Shivaram","family":"Kalyanakrishnan","sequence":"first","affiliation":[]},{"given":"Peter","family":"Stone","sequence":"additional","affiliation":[]},{"given":"Yaxin","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"15_CR1","unstructured":"Albus, J.S.: Brains, Behavior, and Robotics. BYTE Books, Peterborough (1981)"},{"key":"15_CR2","doi-asserted-by":"crossref","unstructured":"Atkeson, C., Santamar\u00eda, J.: A comparison of direct and model-based reinforcement learning. In: IEEE International Conference on Robotics and Automation, vol.\u00a04, pp. 3557\u20133564 (April 1997)","DOI":"10.1109\/ROBOT.1997.606886"},{"key":"15_CR3","doi-asserted-by":"crossref","unstructured":"Boone, G.: Efficient reinforcement learning: model-based acrobot control. In: IEEE International Conference on Robotics and Automation, vol.\u00a01, pp. 229\u2013234 (April 1997)","DOI":"10.1109\/ROBOT.1997.620043"},{"key":"15_CR4","unstructured":"Bradtke, S.J., Duff, M.O.: Reinforcement learning methods for continuous-time Markov decision problems. In: Tesauro, G., Touretzky, D., Leen, T. (eds.) Advances in Neural Information Processing Systems, vol.\u00a07, pp. 393\u2013400. The MIT Press (1995)"},{"key":"15_CR5","doi-asserted-by":"crossref","unstructured":"Kalyanakrishnan, S., Liu, Y., Stone, P.: Half field offense in RoboCup soccer: A multiagent reinforcement learning case study. In: Proceedings of the RoboCup International Symposium 2006 (June 2006)","DOI":"10.1007\/978-3-540-74024-7_7"},{"key":"15_CR6","doi-asserted-by":"crossref","unstructured":"Kalyanakrishnan, S., Stone, P.: Batch reinforcement learning in a complex domain. In: The Sixth International Joint Conference on Autonomous Agents and Multiagent Systems (May 2007)","DOI":"10.1145\/1329125.1329241"},{"key":"15_CR7","first-page":"293","volume":"8","author":"L.-J. Lin","year":"1992","unstructured":"Lin, L.-J.: Self-improving reactive agents based on reinforcement learning, planning and teaching. Machine Learning\u00a08, 293\u2013321 (1992)","journal-title":"Machine Learning"},{"key":"15_CR8","unstructured":"Chen, M., Foroughi, E., Heintz, F., Huang, Z., Kapetanakis, S., Kostiadis, K., Kummeneje, J., Noda, I., Obst, O., Riley, P., Steffens, T., Wang, Y., Yin, X.: Users manual: RoboCup soccer server \u2014 for soccer server version 7.07 and later. In: The RoboCup Federation (August 2002)"},{"key":"15_CR9","volume-title":"Advances in Neural Information Processing Systems 16","author":"A.Y. Ng","year":"2004","unstructured":"Ng, A.Y., Kim, H.J., Jordan, M.I., Sastry, S.: Autonomous helicopter flight via reinforcement learning. In: Thrun, S., Saul, L., Sch\u00f6lkopf, B. (eds.) Advances in Neural Information Processing Systems 16, MIT Press, Cambridge (2004)"},{"key":"15_CR10","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"M.L. Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. John Wiley and Sons, New York (1994)"},{"issue":"3","key":"15_CR11","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1177\/105971230501300301","volume":"13","author":"P. Stone","year":"2005","unstructured":"Stone, P., Sutton, R.S., Kuhlmann, G.: Reinforcement learning for RoboCup-soccer keepaway. Adaptive Behavior\u00a013(3), 165\u2013188 (2005)","journal-title":"Adaptive Behavior"},{"key":"15_CR12","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"issue":"1-2","key":"15_CR13","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"R.S. Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.P.: Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning. Artificial Intelligence\u00a0112(1-2), 181\u2013211 (1999)","journal-title":"Artificial Intelligence"},{"key":"15_CR14","unstructured":"Tesauro, G.: Practical issues in temporal difference learning. In: Moody, J.E., Hanson, S.J., Lippmann, R.P. (eds.) Advances in Neural Information Processing Systems, vol.\u00a04, pp. 259\u2013266. Morgan Kaufmann Publishers, Inc. (1992)"},{"issue":"1-3","key":"15_CR15","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1007\/BF00114724","volume":"22","author":"J.N. Tsitsiklis","year":"1996","unstructured":"Tsitsiklis, J.N., Roy, B.V.: Feature-based methods for large scale dynamic programming. Machine Learning\u00a022(1-3), 59\u201394 (1996)","journal-title":"Machine Learning"},{"issue":"3-4","key":"15_CR16","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"C.J.C.H. Watkins","year":"1992","unstructured":"Watkins, C.J.C.H., Dayan, P.: Q-learning. Machine Learning\u00a08(3-4), 279\u2013292 (1992)","journal-title":"Machine Learning"}],"container-title":["Lecture Notes in Computer Science","RoboCup 2007: Robot Soccer World Cup XI"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-68847-1_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,31]],"date-time":"2025-01-31T01:48:25Z","timestamp":1738288105000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-68847-1_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008]]},"ISBN":["9783540688464","9783540688471"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-68847-1_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2008]]}}}