{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:37:32Z","timestamp":1776886652665,"version":"3.51.2"},"publisher-location":"Berlin, Heidelberg","reference-count":43,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642276446","type":"print"},{"value":"9783642276453","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-27645-3_2","type":"book-chapter","created":{"date-parts":[[2012,3,5]],"date-time":"2012-03-05T17:18:12Z","timestamp":1330967892000},"page":"45-73","source":"Crossref","is-referenced-by-count":192,"title":["Batch Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Sascha","family":"Lange","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thomas","family":"Gabel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Martin","family":"Riedmiller","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"2_CR1","unstructured":"Antos, A., Munos, R., Szepesvari, C.: Fitted Q-iteration in continuous action-space MDPs. In: Advances in Neural Information Processing Systems, vol.\u00a020, pp. 9\u201316 (2008)"},{"key":"2_CR2","doi-asserted-by":"crossref","unstructured":"Baird, L.: Residual algorithms: Reinforcement learning with function approximation. In: Proc. of the Twelfth International Conference on Machine Learning, pp. 30\u201337 (1995)","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"issue":"4","key":"2_CR3","doi-asserted-by":"publisher","first-page":"819","DOI":"10.1287\/moor.27.4.819.297","volume":"27","author":"D. Bernstein","year":"2002","unstructured":"Bernstein, D., Givan, D., Immerman, N., Zilberstein, S.: The Complexity of Decentralized Control of Markov Decision Processes. Mathematics of Operations Research\u00a027(4), 819\u2013840 (2002)","journal-title":"Mathematics of Operations Research"},{"key":"2_CR4","volume-title":"Neuro-dynamic programming","author":"D. Bertsekas","year":"1996","unstructured":"Bertsekas, D., Tsitsiklis, J.: Neuro-dynamic programming. Athena Scientific, Belmont (1996)"},{"key":"2_CR5","doi-asserted-by":"crossref","unstructured":"Bonarini, A., Caccia, C., Lazaric, A., Restelli, M.: Batch reinforcement learning for controlling a mobile wheeled pendulum robot. In: IFIP AI, pp. 151\u2013160 (2008)","DOI":"10.1007\/978-0-387-09695-7_15"},{"key":"2_CR6","volume-title":"Complex Scheduling","author":"P. Brucker","year":"2005","unstructured":"Brucker, P., Knust, S.: Complex Scheduling. Springer, Berlin (2005)"},{"issue":"7-9","key":"2_CR7","doi-asserted-by":"publisher","first-page":"1508","DOI":"10.1016\/j.neucom.2008.12.019","volume":"72","author":"M.P. Deisenroth","year":"2009","unstructured":"Deisenroth, M.P., Rasmussen, C.E., Peters, J.: Gaussian Process Dynamic Programming. Neurocomputing\u00a072(7-9), 1508\u20131524 (2009)","journal-title":"Neurocomputing"},{"issue":"1","key":"2_CR8","first-page":"503","volume":"6","author":"D. Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., Wehenkel, L.: Tree-Based Batch Mode Reinforcement Learning. Journal of Machine Learning Research\u00a06(1), 503\u2013556 (2005a)","journal-title":"Journal of Machine Learning Research"},{"key":"2_CR9","doi-asserted-by":"crossref","unstructured":"Ernst, D., Glavic, M., Geurts, P., Wehenkel, L.: Approximate Value Iteration in the Reinforcement Learning Context. Application to Electrical Power System Control. International Journal of Emerging Electric Power Systems\u00a03(1) (2005b)","DOI":"10.2202\/1553-779X.1066"},{"issue":"2","key":"2_CR10","doi-asserted-by":"publisher","first-page":"517","DOI":"10.1109\/TSMCB.2008.2007630","volume":"39","author":"D. Ernst","year":"2009","unstructured":"Ernst, D., Glavic, M., Capitanescu, F., Wehenkel, L.: Reinforcement learning versus model predictive control: a comparison on a power system problem. IEEE Transactions on Systems, Man, and Cybernetics, Part B\u00a039(2), 517\u2013529 (2009)","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics, Part B"},{"key":"2_CR11","unstructured":"Gabel, T., Riedmiller, M.: Adaptive Reactive Job-Shop Scheduling with Reinforcement Learning Agents. International Journal of Information Technology and Intelligent Computing\u00a024(4) (2008a)"},{"key":"2_CR12","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1007\/978-3-540-89722-4_7","volume-title":"Recent Advances in Reinforcement Learning","author":"T. Gabel","year":"2008","unstructured":"Gabel, T., Riedmiller, M.: Evaluation of Batch-Mode Reinforcement Learning Methods for Solving DEC-MDPs with Changing Action Sets. In: Girgin, S., Loth, M., Munos, R., Preux, P., Ryabko, D. (eds.) EWRL 2008. LNCS (LNAI), vol.\u00a05323, pp. 82\u201395. Springer, Heidelberg (2008)"},{"key":"2_CR13","unstructured":"Gabel, T., Riedmiller, M.: Reinforcement Learning for DEC-MDPs with Changing Action Sets and Partially Ordered Dependencies. In: Proceedings of the 7th International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS 2008), IFAAMAS, Estoril, Portugal, pp. 1333\u20131336 (2008)"},{"key":"2_CR14","first-page":"261","volume-title":"Proc. of the Twelfth International Conference on Machine Learning","author":"G.J. Gordon","year":"1995","unstructured":"Gordon, G.J.: Stable Function Approximation in Dynamic Programming. In: Proc. of the Twelfth International Conference on Machine Learning, pp. 261\u2013268. Morgan Kaufmann, Tahoe City (1995a)"},{"key":"2_CR15","unstructured":"Gordon, G.J.: Stable function approximation in dynamic programming. Tech. rep., CMU-CS-95-103, CMU School of Computer Science, Pittsburgh, PA (1995b)"},{"key":"2_CR16","unstructured":"Gordon, G.J.: Chattering in SARSA (\u03bb). Tech. rep. (1996)"},{"key":"2_CR17","unstructured":"Guez, A., Vincent, R.D., Avoli, M., Pineau, J.: Adaptive treatment of epilepsy via batch-mode reinforcement learning. In: AAAI, pp. 1671\u20131678 (2008)"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Hafner, R., Riedmiller, M.: Reinforcement Learning in Feedback Control \u2014 challenges and benchmarks from technical process control. Machine Learning (accepted for publication, 2011), doi:10.1007\/s10994-011-5235-x","DOI":"10.1007\/s10994-011-5235-x"},{"issue":"5786","key":"2_CR19","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1126\/science.1127647","volume":"313","author":"G. Hinton","year":"2006","unstructured":"Hinton, G., Salakhutdinov, R.: Reducing the Dimensionality of Data with Neural Networks. Science\u00a0313(5786), 504\u2013507 (2006)","journal-title":"Science"},{"key":"2_CR20","first-page":"650","volume-title":"The Sixth International Joint Conference on Autonomous Agents and Multiagent Systems","author":"S. Kalyanakrishnan","year":"2007","unstructured":"Kalyanakrishnan, S., Stone, P.: Batch reinforcement learning in a complex domain. In: The Sixth International Joint Conference on Autonomous Agents and Multiagent Systems, pp. 650\u2013657. ACM, New York (2007)"},{"key":"2_CR21","volume-title":"Proceedings of the Int. Conference on Machine Learning Applications (ICMLA 2009)","author":"T. Kietzmann","year":"2009","unstructured":"Kietzmann, T., Riedmiller, M.: The Neuro Slot Car Racer: Reinforcement Learning in a Real World Setting. In: Proceedings of the Int. Conference on Machine Learning Applications (ICMLA 2009). Springer, Miami (2009)"},{"key":"2_CR22","unstructured":"Lagoudakis, M., Parr, R.: Model-Free Least-Squares Policy Iteration. In: Advances in Neural Information Processing Systems, vol.\u00a014, pp. 1547\u20131554 (2001)"},{"key":"2_CR23","first-page":"1107","volume":"4","author":"M. Lagoudakis","year":"2003","unstructured":"Lagoudakis, M., Parr, R.: Least-Squares Policy Iteration. Journal of Machine Learning Research\u00a04, 1107\u20131149 (2003)","journal-title":"Journal of Machine Learning Research"},{"key":"2_CR24","unstructured":"Lange, S.: Tiefes Reinforcement Lernen auf Basis visueller Wahrnehmungen. Dissertation, Universit\u00e4t Osnabr\u00fcck (2010)"},{"key":"2_CR25","doi-asserted-by":"crossref","unstructured":"Lange, S., Riedmiller, M.: Deep auto-encoder neural networks in reinforcement learning. In: International Joint Conference on Neural Networks (IJCNN 2010), Barcelona, Spain (2010a)","DOI":"10.1109\/IJCNN.2010.5596468"},{"key":"2_CR26","unstructured":"Lange, S., Riedmiller, M.: Deep learning of visual control policies. In: European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN 2010), Brugge, Belgium (2010b)"},{"key":"2_CR27","first-page":"535","volume-title":"Proceedings of the Seventeenth International Conference on Machine Learning (ICML 2000)","author":"M. Lauer","year":"2000","unstructured":"Lauer, M., Riedmiller, M.: An Algorithm for Distributed Reinforcement Learning in Cooperative Multi-Agent Systems. In: Proceedings of the Seventeenth International Conference on Machine Learning (ICML 2000), pp. 535\u2013542. Morgan Kaufmann, Stanford (2000)"},{"issue":"3","key":"2_CR28","first-page":"293","volume":"8","author":"L. Lin","year":"1992","unstructured":"Lin, L.: Self-Improving Reactive Agents Based on Reinforcement Learning, Planning and Teaching. Machine Learning\u00a08(3), 293\u2013321 (1992)","journal-title":"Machine Learning"},{"key":"2_CR29","unstructured":"Ormoneit, D., Glynn, P.: Kernel-based reinforcement learning in average-cost problems: An application to optimal portfolio choice. In: Advances in Neural Information Processing Systems, vol.\u00a013, pp. 1068\u20131074 (2001)"},{"issue":"10","key":"2_CR30","doi-asserted-by":"publisher","first-page":"1624","DOI":"10.1109\/TAC.2002.803530","volume":"47","author":"D. Ormoneit","year":"2002","unstructured":"Ormoneit, D., Glynn, P.: Kernel-based reinforcement learning in average-cost problems. IEEE Transactions on Automatic Control\u00a047(10), 1624\u20131636 (2002)","journal-title":"IEEE Transactions on Automatic Control"},{"issue":"2","key":"2_CR31","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1023\/A:1017928328829","volume":"49","author":"D. Ormoneit","year":"2002","unstructured":"Ormoneit, D., Sen, \u015a.: Kernel-based reinforcement learning. Machine Learning\u00a049(2), 161\u2013178 (2002)","journal-title":"Machine Learning"},{"key":"2_CR32","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1007\/11564096_32","volume-title":"Machine Learning: ECML 2005","author":"M. Riedmiller","year":"2005","unstructured":"Riedmiller, M.: Neural Fitted Q Iteration - First Experiences with a Data Efficient Neural Reinforcement Learning Method. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS (LNAI), vol.\u00a03720, pp. 317\u2013328. Springer, Heidelberg (2005)"},{"key":"2_CR33","unstructured":"Riedmiller, M., Braun, H.: A direct adaptive method for faster backpropagation learning: The RPROP algorithm. In: Ruspini, H. (ed.) Proceedings of the IEEE International Conference on Neural Networks (ICNN), San Francisco, pp. 586\u2013591 (1993)"},{"key":"2_CR34","volume-title":"Proceedings of the FBIT 2007 Conference","author":"M. Riedmiller","year":"2007","unstructured":"Riedmiller, M., Montemerlo, M., Dahlkamp, H.: Learning to Drive in 20 Minutes. In: Proceedings of the FBIT 2007 Conference. Springer, Jeju (2007)"},{"key":"2_CR35","doi-asserted-by":"crossref","unstructured":"Riedmiller, M., Hafner, R., Lange, S., Lauer, M.: Learning to dribble on a real robot by success and failure. In: Proc. of the IEEE International Conference on Robotics and Automation, pp. 2207\u20132208 (2008)","DOI":"10.1109\/ROBOT.2008.4543536"},{"issue":"1","key":"2_CR36","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1007\/s10514-009-9120-4","volume":"27","author":"M. Riedmiller","year":"2009","unstructured":"Riedmiller, M., Gabel, T., Hafner, R., Lange, S.: Reinforcement Learning for Robot Soccer. Autonomous Robots\u00a027(1), 55\u201374 (2009)","journal-title":"Autonomous Robots"},{"issue":"6088","key":"2_CR37","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1038\/323533a0","volume":"323","author":"D. Rumelhart","year":"1986","unstructured":"Rumelhart, D., Hinton, G., Williams, R.: Learning representations by back-propagating errors. Nature\u00a0323(6088), 533\u2013536 (1986)","journal-title":"Nature"},{"key":"2_CR38","unstructured":"Schoknecht, R., Merke, A.: Convergent combinations of reinforcement learning with linear function approximation. In: Advances in Neural Information Processing Systems, vol.\u00a015, pp. 1611\u20131618 (2003)"},{"key":"2_CR39","unstructured":"Singh, S., Jaakkola, T., Jordan, M.: Reinforcement learning with soft state aggregation. In: Advances in Neural Information Processing Systems, vol.\u00a07, pp. 361\u2013368 (1995)"},{"key":"2_CR40","volume-title":"Reinforcement Learning. An Introduction","author":"R. Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning. An Introduction. MIT Press\/A Bradford Book, Cambridge, USA (1998)"},{"key":"2_CR41","doi-asserted-by":"crossref","unstructured":"Timmer, S., Riedmiller, M.: Fitted Q Iteration with CMACs. In: Proceedings of the IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning (ADPRL 2007), Honolulu, USA (2007)","DOI":"10.1109\/ADPRL.2007.368162"},{"key":"2_CR42","doi-asserted-by":"crossref","unstructured":"Tognetti, S., Savaresi, S., Spelta, C., Restelli, M.: Batch reinforcement learning for semi-active suspension control, pp. 582\u2013587 (2009)","DOI":"10.1109\/CCA.2009.5281070"},{"key":"2_CR43","unstructured":"Werbos, P.: Beyond regression: New tools for prediction and analysis in the behavioral sciences. PhD thesis, Harvard University (1974)"}],"container-title":["Adaptation, Learning, and Optimization","Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-27645-3_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,4,28]],"date-time":"2019-04-28T07:21:03Z","timestamp":1556436063000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-27645-3_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642276446","9783642276453"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-27645-3_2","relation":{},"ISSN":["1867-4534","1867-4542"],"issn-type":[{"value":"1867-4534","type":"print"},{"value":"1867-4542","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012]]}}}