{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T10:23:50Z","timestamp":1780482230114,"version":"3.54.1"},"publisher-location":"Berlin, Heidelberg","reference-count":132,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642276446","type":"print"},{"value":"9783642276453","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-27645-3_18","type":"book-chapter","created":{"date-parts":[[2012,3,5]],"date-time":"2012-03-05T22:18:12Z","timestamp":1330985892000},"page":"579-610","source":"Crossref","is-referenced-by-count":107,"title":["Reinforcement Learning in Robotics: A Survey"],"prefix":"10.1007","author":[{"given":"Jens","family":"Kober","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jan","family":"Peters","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","reference":[{"key":"18_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Quigley, M., Ng, A.Y.: Using inaccurate models in reinforcement learning. In: International Conference on Machine Learning, ICML (2006)","DOI":"10.1145\/1143844.1143845"},{"key":"18_CR2","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Coates, A., Quigley, M., Ng, A.Y.: An application of reinforcement learning to aerobatic helicopter flight. In: Advances in Neural Information Processing Systems, NIPS (2007)","DOI":"10.7551\/mitpress\/7503.003.0006"},{"key":"18_CR3","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Dolgov, D., Ng, A.Y., Thrun, S.: Apprenticeship learning for motion planning with application to parking lot navigation. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS (2008)","DOI":"10.1109\/IROS.2008.4651222"},{"key":"18_CR4","doi-asserted-by":"crossref","unstructured":"Argall, B.D., Browning, B., Veloso, M.: Learning robot motion control with demonstration and advice-operators. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS (2008)","DOI":"10.1109\/IROS.2008.4651020"},{"key":"18_CR5","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1016\/j.robot.2008.10.024","volume":"57","author":"B.D. Argall","year":"2009","unstructured":"Argall, B.D., Chernova, S., Veloso, M., Browning, B.: A survey of robot learning from demonstration. Robotics and Autonomous Systems\u00a057, 469\u2013483 (2009)","journal-title":"Robotics and Autonomous Systems"},{"issue":"2-3","key":"18_CR6","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00117447","volume":"23","author":"M. Asada","year":"1996","unstructured":"Asada, M., Noda, S., Tawaratsumida, S., Hosoda, K.: Purposive behavior acquisition for a real robot by vision-based reinforcement learning. Machine Learning\u00a023(2-3), 279\u2013303 (1996)","journal-title":"Machine Learning"},{"key":"18_CR7","first-page":"75","volume":"11","author":"C. Atkeson","year":"1997","unstructured":"Atkeson, C., Moore, A., Stefan, S.: Locally weighted learning for control. AI Review\u00a011, 75\u2013113 (1997)","journal-title":"AI Review"},{"key":"18_CR8","unstructured":"Atkeson, C.G.: Using local trajectory optimizers to speed up global optimization in dynamic programming. In: Advances in Neural Information Processing Systems, NIPS (1994)"},{"key":"18_CR9","unstructured":"Atkeson, C.G.: Nonparametric model-based reinforcement learning. In: Advances in Neural Information Processing Systems, NIPS (1998)"},{"key":"18_CR10","unstructured":"Atkeson, C.G., Schaal, S.: Robot learning from demonstration. In: International Conference on Machine Learning, ICML (1997)"},{"key":"18_CR11","unstructured":"Bagnell, J.A., Schneider, J.C.: Autonomous helicopter control using reinforcement learning policy search methods. In: IEEE International Conference on Robotics and Automation, ICRA (2001)"},{"key":"18_CR12","unstructured":"Bakker, B., Zhumatiy, V., Gruener, G., Schmidhuber, J.: A robot that reinforcement-learns to identify and memorize important previous observations. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS (2003)"},{"key":"18_CR13","unstructured":"Bakker, B., Zhumatiy, V., Gruener, G., Schmidhuber, J.: Quasi-online reinforcement learning for robots. In: IEEE International Conference on Robotics and Automation, ICRA (2006)"},{"issue":"4","key":"18_CR14","doi-asserted-by":"publisher","first-page":"341","DOI":"10.1023\/A:1025696116075","volume":"13","author":"A.G. Barto","year":"2003","unstructured":"Barto, A.G., Mahadevan, S.: Recent advances in hierarchical reinforcement learning. Discrete Event Dynamic Systems\u00a013(4), 341\u2013379 (2003)","journal-title":"Discrete Event Dynamic Systems"},{"key":"18_CR15","volume-title":"Dynamic Programming","author":"R.E. Bellman","year":"1957","unstructured":"Bellman, R.E.: Dynamic Programming. Princeton University Press, Princeton (1957)"},{"key":"18_CR16","volume-title":"Introduction to the Mathematical Theory of Control Processes","author":"R.E. Bellman","year":"1967","unstructured":"Bellman, R.E.: Introduction to the Mathematical Theory of Control Processes, vol.\u00a040-I. Academic Press, New York (1967)"},{"key":"18_CR17","volume-title":"Introduction to the Mathematical Theory of Control Processes","author":"R.E. Bellman","year":"1971","unstructured":"Bellman, R.E.: Introduction to the Mathematical Theory of Control Processes, vol.\u00a040-II. Academic Press, New York (1971)"},{"issue":"3-4","key":"18_CR18","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1016\/S0921-8890(97)00043-2","volume":"22","author":"H. Benbrahim","year":"1997","unstructured":"Benbrahim, H., Franklin, J.A.: Biped dynamic walking using reinforcement learning. Robotics and Autonomous Systems\u00a022(3-4), 283\u2013302 (1997)","journal-title":"Robotics and Autonomous Systems"},{"key":"18_CR19","unstructured":"Benbrahim, H., Doleac, J., Franklin, J., Selfridge, O.: Real-time learning: a ball on a beam. In: International Joint Conference on Neural Networks, IJCNN (1992)"},{"key":"18_CR20","unstructured":"Bentivegna, D.C.: Learning from observation using primitives. PhD thesis, Georgia Institute of Technology (2004)"},{"key":"18_CR21","volume-title":"Advances in Design and Control","author":"J.T. Betts","year":"2001","unstructured":"Betts, J.T.: Practical methods for optimal control using nonlinear programming. In: Advances in Design and Control, vol.\u00a03. Society for Industrial and Applied Mathematics (SIAM), Philadelphia (2001)"},{"key":"18_CR22","unstructured":"Birdwell, N., Livingston, S.: Reinforcement learning in sensor-guided aibo robots. Tech. rep., University of Tennesse, Knoxville, advised by Dr. Itamar Elhanany (2007)"},{"key":"18_CR23","doi-asserted-by":"crossref","unstructured":"Bitzer, S., Howard, M., Vijayakumar, S.: Using dimensionality reduction to exploit constraints in reinforcement learning. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS (2010)","DOI":"10.1109\/IROS.2010.5650243"},{"key":"18_CR24","doi-asserted-by":"crossref","unstructured":"Buchli, J., Stulp, F., Theodorou, E., Schaal, S.: Learning variable impedance control. International Journal of Robotics Research Online First (2011)","DOI":"10.1177\/0278364911402527"},{"key":"18_CR25","volume-title":"Reinforcement Learning and Dynamic Programming Using Function Approximators","author":"L. Bu\u015foniu","year":"2010","unstructured":"Bu\u015foniu, L., Babu\u0161ka, R., De Schutter, B., Ernst, D.: Reinforcement Learning and Dynamic Programming Using Function Approximators. CRC Press, Boca Raton (2010)"},{"issue":"7","key":"18_CR26","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1145\/1538788.1538812","volume":"52","author":"A. Coates","year":"2009","unstructured":"Coates, A., Abbeel, P., Ng, A.Y.: Apprenticeship learning for helicopter control. Commun. ACM\u00a052(7), 97\u2013105 (2009)","journal-title":"Commun. ACM"},{"key":"18_CR27","doi-asserted-by":"crossref","unstructured":"Cocora, A., Kersting, K., Plagemann, C., Burgard, W., Raedt, L.D.: Learning relational navigation policies. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS (2006)","DOI":"10.1109\/IROS.2006.282061"},{"key":"18_CR28","doi-asserted-by":"crossref","unstructured":"Conn, K., Peters II, R.A.: Reinforcement learning with a supervisor for a mobile robot in a real-world environment. In: IEEE International Symposium on Computational Intelligence in Robotics and Automation, CIRA (2007)","DOI":"10.1109\/CIRA.2007.382878"},{"issue":"2","key":"18_CR29","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1162\/neco.1997.9.2.271","volume":"9","author":"P. Dayan","year":"1997","unstructured":"Dayan, P., Hinton, G.E.: Using expectation-maximization for reinforcement learning. Neural Computation\u00a09(2), 271\u2013278 (1997)","journal-title":"Neural Computation"},{"key":"18_CR30","unstructured":"Deisenroth, M.P., Rasmussen, C.E.: A practical and conceptual framework for learning in control. Tech. Rep. UW-CSE-10-06-01, Department of Computer Science & Engineering, University of Washington, USA (2010)"},{"issue":"3","key":"18_CR31","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1109\/3477.499790","volume":"26","author":"J.Y. Donnart","year":"1996","unstructured":"Donnart, J.Y., Meyer, J.A.: Learning reactive and planning rules in a motivationally autonomous animat. IEEE Transactions on Systems, Man, and Cybernetics, Part B: Cybernetics\u00a026(3), 381\u2013395 (1996)","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics, Part B: Cybernetics"},{"key":"18_CR32","unstructured":"Dorigo, M., Colombetti, M.: Robot shaping: Developing situated agents through learning. Tech. rep., International Computer Science Institute, Berkeley, CA (1993)"},{"issue":"7","key":"18_CR33","doi-asserted-by":"publisher","first-page":"936","DOI":"10.1016\/j.engappai.2007.01.003","volume":"20","author":"Y. Duan","year":"2007","unstructured":"Duan, Y., Liu, Q., Xu, X.: Application of reinforcement learning in robot soccer. Engineering Applications of Artificial Intelligence\u00a020(7), 936\u2013950 (2007)","journal-title":"Engineering Applications of Artificial Intelligence"},{"key":"18_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1007\/978-3-540-87732-5_44","volume-title":"Advances in Neural Networks - ISNN 2008","author":"Y. Duan","year":"2008","unstructured":"Duan, Y., Cui, B., Yang, H.: Robot Navigation Based on Fuzzy RL Algorithm. In: Sun, F., Zhang, J., Tan, Y., Cao, J., Yu, W. (eds.) ISNN 2008, Part I. LNCS, vol.\u00a05263, pp. 391\u2013399. Springer, Heidelberg (2008)"},{"issue":"2","key":"18_CR35","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1177\/0278364907084980","volume":"27","author":"G. Endo","year":"2008","unstructured":"Endo, G., Morimoto, J., Matsubara, T., Nakanishi, J., Cheng, G.: Learning CPG-based biped locomotion with a policy gradient method: Application to a humanoid robot. I. J. Robotic Res.\u00a027(2), 213\u2013228 (2008)","journal-title":"I. J. Robotic Res."},{"issue":"3","key":"18_CR36","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1016\/j.robot.2007.08.001","volume":"56","author":"M.S. Erden","year":"2008","unstructured":"Erden, M.S., Leblebicio\u011flu, K.: Free gait generation with reinforcement learning for a six-legged robot. Robot. Auton. Syst.\u00a056(3), 199\u2013212 (2008)","journal-title":"Robot. Auton. Syst."},{"key":"18_CR37","unstructured":"Fagg, A.H., Lotspeich, D.L., Hoff, J., Bekey, G.A.: Rapid reinforcement learning for reactive control policy design for autonomous robots. In: Artificial Life in Robotics (1998)"},{"key":"18_CR38","unstructured":"Gaskett, C., Fletcher, L., Zelinsky, A.: Reinforcement learning for a vision based mobile robot. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS (2000)"},{"key":"18_CR39","unstructured":"Geng, T., Porr, B., W\u00f6rg\u00f6tter, F.: Fast biped walking with a reflexive controller and real-time policy searching. In: Advances in Neural Information Processing Systems, NIPS (2006)"},{"key":"18_CR40","doi-asserted-by":"crossref","unstructured":"Glynn, P.: Likelihood ratio gradient estimation: an overview. In: Winter Simulation Conference, WSC (1987)","DOI":"10.21236\/ADA197085"},{"key":"18_CR41","unstructured":"Goldberg, D.E.: Genetic algorithms. Addision Wesley (1989)"},{"key":"18_CR42","unstructured":"Gr\u00e4ve, K., St\u00fcckler, J., Behnke, S.: Learning motion skills from expert demonstrations and own experience using gaussian process regression. In: Joint International Symposium on Robotics (ISR) and German Conference on Robotics, ROBOTIK (2010)"},{"issue":"13","key":"18_CR43","doi-asserted-by":"crossref","first-page":"1521","DOI":"10.1163\/156855307782148550","volume":"21","author":"F. Guenter","year":"2007","unstructured":"Guenter, F., Hersch, M., Calinon, S., Billard, A.: Reinforcement learning for imitating constrained reaching movements. Advanced Robotics\u00a021(13), 1521\u20131544 (2007)","journal-title":"Advanced Robotics"},{"issue":"1","key":"18_CR44","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1109\/37.257890","volume":"14","author":"V. Gullapalli","year":"1994","unstructured":"Gullapalli, V., Franklin, J., Benbrahim, H.: Acquiring robot skills via reinforcement learning. IEEE on Control Systems Magazine\u00a014(1), 13\u201324 (1994)","journal-title":"IEEE on Control Systems Magazine"},{"key":"18_CR45","unstructured":"Hafner, R., Riedmiller, M.: Reinforcement learning on a omnidirectional mobile robot. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS (2003)"},{"key":"18_CR46","doi-asserted-by":"crossref","unstructured":"Hafner, R., Riedmiller, M.: Neural reinforcement learning controllers for a real robot application. In: IEEE International Conference on Robotics and Automation, ICRA (2007)","DOI":"10.1109\/ROBOT.2007.363631"},{"key":"18_CR47","unstructured":"Hailu, G., Sommer, G.: Integrating symbolic knowledge in reinforcement learning. In: IEEE International Conference on Systems, Man and Cybernetics (SMC) (1998)"},{"key":"18_CR48","doi-asserted-by":"crossref","unstructured":"Hester, T., Quinlan, M., Stone, P.: Generalized model learning for reinforcement learning on a humanoid robot. In: IEEE International Conference on Robotics and Automation, ICRA (2010)","DOI":"10.1109\/ROBOT.2010.5509181"},{"key":"18_CR49","unstructured":"Huang, X., Weng, J.: Novelty and reinforcement learning in the value system of developmental robots. In: Lund University Cognitive Studies (2002)"},{"key":"18_CR50","unstructured":"Ijspeert, A.J., Nakanishi, J., Schaal, S.: Learning attractor landscapes for learning motor primitives. in: Advances in Neural Information Processing Systems, NIPS (2003)"},{"key":"18_CR51","unstructured":"Ilg, W., Albiez, J., Jedele, H., Berns, K., Dillmann, R.: Adaptive periodic movement control for the four legged walking machine BISAM. In: IEEE International Conference on Robotics and Automation, ICRA (1999)"},{"key":"18_CR52","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L.P. Kaelbling","year":"1996","unstructured":"Kaelbling, L.P., Littman, M.L., Moore, A.W.: Reinforcement learning: A survey. Journal of Artificial Intelligence Research\u00a04, 237\u2013285 (1996)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"18_CR53","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1007\/3-540-49240-2_3","volume-title":"Learning Robots","author":"Z. Kalm\u00e1r","year":"1998","unstructured":"Kalm\u00e1r, Z., Szepesv\u00e1ri, C., L\u0151rincz, A.: Modular Reinforcement Learning: An Application to a Real Robot Task. In: Birk, A., Demiris, J. (eds.) EWLR 1997. LNCS (LNAI), vol.\u00a01545, pp. 29\u201345. Springer, Heidelberg (1998)"},{"key":"18_CR54","doi-asserted-by":"crossref","unstructured":"Kappen, H.: Path integrals and symmetry breaking for optimal control theory. Journal of Statistical Mechanics: Theory and Experiment\u00a011 (2005)","DOI":"10.1088\/1742-5468\/2005\/11\/P11011"},{"key":"18_CR55","doi-asserted-by":"crossref","unstructured":"Katz, D., Pyuro, Y., Brock, O.: Learning to manipulate articulated objects in unstructured environments using a grounded relational representation. In: Robotics: Science and Systems, R:SS (2008)","DOI":"10.15607\/RSS.2008.IV.033"},{"key":"18_CR56","unstructured":"Kimura, H., Yamashita, T., Kobayashi, S.: Reinforcement learning of walking behavior for a four-legged robot. In: IEEE Conference on Decision and Control (CDC) (2001)"},{"key":"18_CR57","unstructured":"Kirchner, F.: Q-learning of complex behaviours on a six-legged walking machine. In: EUROMICRO Workshop on Advanced Mobile Robots (1997)"},{"key":"18_CR58","volume-title":"Optimal control theory","author":"D.E. Kirk","year":"1970","unstructured":"Kirk, D.E.: Optimal control theory. Prentice-Hall, Englewood Cliffs (1970)"},{"key":"18_CR59","doi-asserted-by":"crossref","unstructured":"Ko, J., Klein, D.J., Fox, D., H\u00e4hnel, D.: Gaussian processes and reinforcement learning for identification and control of an autonomous blimp. In: IEEE International Conference on Robotics and Automation (ICRA) (2007)","DOI":"10.1109\/ROBOT.2007.363075"},{"key":"18_CR60","doi-asserted-by":"crossref","unstructured":"Kober, J., Peters, J.: Policy search for motor primitives in robotics. In: Advances in Neural Information Processing Systems, NIPS (2009)","DOI":"10.1109\/ROBOT.2009.5152577"},{"key":"18_CR61","doi-asserted-by":"crossref","unstructured":"Kober, J., Peters, J.: Policy search for motor primitives in robotics. Machine Learning Online First (2010)","DOI":"10.1109\/ROBOT.2009.5152577"},{"key":"18_CR62","doi-asserted-by":"crossref","unstructured":"Kober, J., Mohler, B., Peters, J.: Learning perceptual coupling for motor primitives. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS (2008)","DOI":"10.1109\/IROS.2008.4650953"},{"key":"18_CR63","doi-asserted-by":"crossref","unstructured":"Kober, J., Oztop, E., Peters, J.: Reinforcement learning to adjust robot movements to new situations. In: Robotics: Science and Systems Conference (R:SS) (2010)","DOI":"10.15607\/RSS.2010.VI.005"},{"key":"18_CR64","doi-asserted-by":"crossref","unstructured":"Kohl, N., Stone, P.: Policy gradient reinforcement learning for fast quadrupedal locomotion. In: IEEE International Conference on Robotics and Automation (ICRA) (2004)","DOI":"10.1109\/ROBOT.2004.1307456"},{"key":"18_CR65","doi-asserted-by":"crossref","unstructured":"Kolter, J.Z., Ng, A.Y.: Policy search via the signed derivative. In: Robotics: Science and Systems (R:SS) (2009)","DOI":"10.7551\/mitpress\/8727.003.0028"},{"key":"18_CR66","unstructured":"Kolter, J.Z., Abbeel, P., Ng, A.Y.: Hierarchical apprenticeship learning with application to quadruped locomotion. In: Advances in Neural Information Processing Systems (NIPS) (2007)"},{"key":"18_CR67","doi-asserted-by":"crossref","unstructured":"Kolter, J.Z., Coates, A., Ng, A.Y., Gu, Y., DuHadway, C.: Space-indexed dynamic programming: learning to follow trajectories. In: International Conference on Machine Learning (ICML) (2008)","DOI":"10.1145\/1390156.1390218"},{"key":"18_CR68","doi-asserted-by":"crossref","unstructured":"Kolter, J.Z., Plagemann, C., Jackson, D.T., Ng, A.Y., Thrun, S.: A probabilistic approach to mixed open-loop and closed-loop control, with application to extreme autonomous driving. In: IEEE International Conference on Robotics and Automation (ICRA) (2010)","DOI":"10.1109\/ROBOT.2010.5509562"},{"key":"18_CR69","doi-asserted-by":"crossref","unstructured":"Kroemer, O., Detry, R., Piater, J., Peters, J.: Active learning using mean shift optimization for robot grasping. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS) (2009)","DOI":"10.1109\/IROS.2009.5354345"},{"issue":"9","key":"18_CR70","doi-asserted-by":"publisher","first-page":"1105","DOI":"10.1016\/j.robot.2010.06.001","volume":"58","author":"O. Kroemer","year":"2010","unstructured":"Kroemer, O., Detry, R., Piater, J., Peters, J.: Combining active learning and reactive control for robot grasping. Robotics and Autonomous Systems\u00a058(9), 1105\u20131116 (2010)","journal-title":"Robotics and Autonomous Systems"},{"key":"18_CR71","doi-asserted-by":"crossref","unstructured":"Kuhn, H.W., Tucker, A.W.: Nonlinear programming. In: Berkeley Symposium on Mathematical Statistics and Probability (1950)","DOI":"10.1525\/9780520411586-036"},{"key":"18_CR72","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1007\/978-3-540-74024-7_5","volume-title":"RoboCup 2006","author":"T. Latzke","year":"2007","unstructured":"Latzke, T., Behnke, S., Bennewitz, M.: Imitative Reinforcement Learning for Soccer Playing Robots. In: Lakemeyer, G., Sklar, E., Sorrenti, D.G., Takahashi, T. (eds.) RoboCup 2006. LNCS (LNAI), vol.\u00a04434, pp. 47\u201358. Springer, Heidelberg (2007)"},{"key":"18_CR73","unstructured":"Lizotte, D., Wang, T., Bowling, M., Schuurmans, D.: Automatic gait optimization with gaussian process regression. In: International Joint Conference on Artifical Intelligence (IJCAI) (2007)"},{"issue":"2-3","key":"18_CR74","doi-asserted-by":"publisher","first-page":"311","DOI":"10.1016\/0004-3702(92)90058-6","volume":"55","author":"S. Mahadevan","year":"1992","unstructured":"Mahadevan, S., Connell, J.: Automatic programming of behavior-based robots using reinforcement learning. Artificial Intelligence\u00a055(2-3), 311\u2013365 (1992)","journal-title":"Artificial Intelligence"},{"key":"18_CR75","unstructured":"Mart\u00ednez-Mar\u00edn, T., Duckett, T.: Fast reinforcement learning for vision-guided mobile robots. In: IEEE International Conference on Robotics and Automation (ICRA) (2005)"},{"key":"18_CR76","doi-asserted-by":"crossref","unstructured":"Mataric, M.J.: Reward functions for accelerated learning. In: International Conference on Machine Learning (ICML) (1994)","DOI":"10.1016\/B978-1-55860-335-6.50030-1"},{"key":"18_CR77","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1023\/A:1008819414322","volume":"4","author":"M.J. Mataric","year":"1997","unstructured":"Mataric, M.J.: Reinforcement learning in the multi-robot domain. Autonomous Robots\u00a04, 73\u201383 (1997)","journal-title":"Autonomous Robots"},{"key":"18_CR78","doi-asserted-by":"crossref","unstructured":"Michels, J., Saxena, A., Ng, A.Y.: High speed obstacle avoidance using monocular vision and reinforcement learning. In: International Conference on Machine Learning (ICML) (2005)","DOI":"10.1145\/1102351.1102426"},{"key":"18_CR79","doi-asserted-by":"crossref","unstructured":"Mitsunaga, N., Smith, C., Kanda, T., Ishiguro, H., Hagita, N.: Robot behavior adaptation for human-robot interaction based on policy gradient reinforcement learning. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS) (2005)","DOI":"10.1109\/IROS.2005.1545206"},{"issue":"8","key":"18_CR80","doi-asserted-by":"publisher","first-page":"1281","DOI":"10.1016\/S0893-6080(96)00043-3","volume":"9","author":"H. Miyamoto","year":"1996","unstructured":"Miyamoto, H., Schaal, S., Gandolfo, F., Gomi, H., Koike, Y., Osu, R., Nakano, E., Wada, Y., Kawato, M.: A kendama learning robot based on bi-directional theory. Neural Networks\u00a09(8), 1281\u20131302 (1996)","journal-title":"Neural Networks"},{"issue":"1","key":"18_CR81","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/S0921-8890(01)00113-0","volume":"36","author":"J. Morimoto","year":"2001","unstructured":"Morimoto, J., Doya, K.: Acquisition of stand-up behavior by a real robot using hierarchical reinforcement learning. Robotics and Autonomous Systems\u00a036(1), 37\u201351 (2001)","journal-title":"Robotics and Autonomous Systems"},{"key":"18_CR82","doi-asserted-by":"publisher","first-page":"737","DOI":"10.1177\/0278364908091463","volume":"27","author":"J. Nakanishi","year":"2008","unstructured":"Nakanishi, J., Cory, R., Mistry, M., Peters, J., Schaal, S.: Operational space control: a theoretical and emprical comparison. International Journal of Robotics Research\u00a027, 737\u2013757 (2008)","journal-title":"International Journal of Robotics Research"},{"key":"18_CR83","doi-asserted-by":"crossref","unstructured":"Nemec, B., Tamo\u0161i\u016bnait\u0117, M., W\u00f6rg\u00f6tter, F., Ude, A.: Task adaptation through exploration and action sequencing. In: IEEE-RAS International Conference on Humanoid Robots, Humanoids (2009)","DOI":"10.1109\/ICHR.2009.5379568"},{"key":"18_CR84","doi-asserted-by":"crossref","unstructured":"Nemec, B., Zorko, M., Zlajpah, L.: Learning of a ball-in-a-cup playing robot. In: International Workshop on Robotics in Alpe-Adria-Danube Region (RAAD) (2010)","DOI":"10.1109\/RAAD.2010.5524570"},{"key":"18_CR85","unstructured":"Ng, A.Y., Coates, A., Diel, M., Ganapathi, V., Schulte, J., Tse, B., Berger, E., Liang, E.: Autonomous inverted helicopter flight via reinforcement learning. In: International Symposium on Experimental Robotics (ISER) (2004a)"},{"key":"18_CR86","unstructured":"Ng, A.Y., Kim, H.J., Jordan, M.I., Sastry, S.: Autonomous helicopter flight via reinforcement learning. In: Advances in Neural Information Processing Systems (NIPS) (2004b)"},{"key":"18_CR87","doi-asserted-by":"crossref","unstructured":"O\u00dfwald, S., Hornung, A., Bennewitz, M.: Learning reliable and efficient navigation with a humanoid. In: IEEE International Conference on Robotics and Automation (ICRA) (2010)","DOI":"10.1109\/ROBOT.2010.5509420"},{"key":"18_CR88","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1007\/978-3-540-74565-5_19","volume-title":"KI 2007: Advances in Artificial Intelligence","author":"L. Paletta","year":"2007","unstructured":"Paletta, L., Fritz, G., Kintzler, F., Irran, J., Dorffner, G.: Perception and Developmental Learning of Affordances in Autonomous Robots. In: Hertzberg, J., Beetz, M., Englert, R. (eds.) KI 2007. LNCS (LNAI), vol.\u00a04667, pp. 235\u2013250. Springer, Heidelberg (2007)"},{"key":"18_CR89","doi-asserted-by":"crossref","unstructured":"Pastor, P., Kalakrishnan, M., Chitta, S., Theodorou, E., Schaal, S.: Skill learning and task outcome prediction for manipulation. In: IEEE International Conference on Robotics and Automation (ICRA) (2011)","DOI":"10.1109\/ICRA.2011.5980200"},{"key":"18_CR90","doi-asserted-by":"crossref","unstructured":"Pendrith, M.: Reinforcement learning in situated agents: Some theoretical problems and practical solutions. In: European Workshop on Learning Robots (EWRL) (1999)","DOI":"10.1007\/3-540-40044-3_6"},{"issue":"2","key":"18_CR91","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1177\/0278364907087548","volume":"27","author":"J. Peters","year":"2008","unstructured":"Peters, J., Schaal, S.: Learning to control in operational space. International Journal of Robotics Research\u00a027(2), 197\u2013212 (2008a)","journal-title":"International Journal of Robotics Research"},{"issue":"7-9","key":"18_CR92","doi-asserted-by":"publisher","first-page":"1180","DOI":"10.1016\/j.neucom.2007.11.026","volume":"71","author":"J. Peters","year":"2008","unstructured":"Peters, J., Schaal, S.: Natural actor-critic. Neurocomputing\u00a071(7-9), 1180\u20131190 (2008b)","journal-title":"Neurocomputing"},{"issue":"4","key":"18_CR93","doi-asserted-by":"publisher","first-page":"682","DOI":"10.1016\/j.neunet.2008.02.003","volume":"21","author":"J. Peters","year":"2008","unstructured":"Peters, J., Schaal, S.: Reinforcement learning of motor skills with policy gradients. Neural Networks\u00a021(4), 682\u2013697 (2008c)","journal-title":"Neural Networks"},{"key":"18_CR94","unstructured":"Peters, J., Vijayakumar, S., Schaal, S.: Linear quadratic regulation as benchmark for policy gradient methods. Tech. rep., University of Southern California (2004)"},{"key":"18_CR95","doi-asserted-by":"crossref","unstructured":"Peters, J., M\u00fclling, K., Altun, Y.: Relative entropy policy search. In: National Conference on Artificial Intelligence (AAAI) (2010a)","DOI":"10.1609\/aaai.v24i1.7727"},{"key":"18_CR96","doi-asserted-by":"crossref","unstructured":"Peters, J., M\u00fclling, K., Kober, J., Nguyen-Tuong, D., Kroemer, O.: Towards motor skill learning for robotics. In: International Symposium on Robotics Research, ISRR (2010b)","DOI":"10.1007\/978-3-642-19457-3_28"},{"key":"18_CR97","doi-asserted-by":"crossref","unstructured":"Piater, J., Jodogne, S., Detry, R., Kraft, D., Kr\u00fcger, N., Kroemer, O., Peters, J.: Learning visual representations for perception-action systems. International Journal of Robotics Research Online First (2010)","DOI":"10.1007\/978-3-642-19457-3_24"},{"key":"18_CR98","unstructured":"Platt, R., Grupen, R.A., Fagg, A.H.: Improving grasp skills using schema structured learning. In: International Conference on Development and Learning (2006)"},{"key":"18_CR99","volume-title":"Adaptive control","author":"K.J. \u00c5str\u00f6m","year":"1989","unstructured":"\u00c5str\u00f6m, K.J., Wittenmark, B.: Adaptive control. Addison-Wesley, Reading (1989)"},{"issue":"1","key":"18_CR100","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1007\/s10514-009-9120-4","volume":"27","author":"M. Riedmiller","year":"2009","unstructured":"Riedmiller, M., Gabel, T., Hafner, R., Lange, S.: Reinforcement learning for robot soccer. Autonomous Robots\u00a027(1), 55\u201373 (2009)","journal-title":"Autonomous Robots"},{"key":"18_CR101","doi-asserted-by":"crossref","unstructured":"Rottmann, A., Plagemann, C., Hilgers, P., Burgard, W.: Autonomous blimp control using model-free reinforcement learning in a continuous state and action space. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS (2007)","DOI":"10.1109\/IROS.2007.4399531"},{"key":"18_CR102","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-540-87481-2_16","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"T. R\u00fcckstie\u00df","year":"2008","unstructured":"R\u00fcckstie\u00df, T., Felder, M., Schmidhuber, J.: State-Dependent Exploration for Policy Gradient Methods. In: Daelemans, W., Goethals, B., Morik, K. (eds.) ECML PKDD 2008, Part II. LNCS (LNAI), vol.\u00a05212, pp. 234\u2013249. Springer, Heidelberg (2008)"},{"key":"18_CR103","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"777","DOI":"10.1007\/3-540-46084-5_126","volume-title":"Artificial Neural Networks - ICANN 2002","author":"M.-A. Sato","year":"2002","unstructured":"Sato, M.-A., Nakamura, Y., Ishii, S.: Reinforcement Learning for Biped Locomotion. In: Dorronsoro, J.R. (ed.) ICANN 2002. LNCS, vol.\u00a02415, pp. 777\u2013782. Springer, Heidelberg (2002)"},{"key":"18_CR104","unstructured":"Schaal, S.: Learning from demonstration. In: Advances in Neural Information Processing Systems, NIPS (1997)"},{"issue":"1","key":"18_CR105","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1109\/37.257895","volume":"14","author":"S. Schaal","year":"1994","unstructured":"Schaal, S., Atkeson, C.G.: Robot juggling: An implementation of memory-based learning. Control Systems Magazine\u00a014(1), 57\u201371 (1994)","journal-title":"Control Systems Magazine"},{"issue":"1","key":"18_CR106","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1023\/A:1015727715131","volume":"17","author":"S. Schaal","year":"2002","unstructured":"Schaal, S., Atkeson, C.G., Vijayakumar, S.: Scalable techniques from nonparameteric statistics for real-time robot learning. Applied Intelligence\u00a017(1), 49\u201360 (2002)","journal-title":"Applied Intelligence"},{"issue":"1","key":"18_CR107","doi-asserted-by":"publisher","first-page":"425","DOI":"10.1016\/S0079-6123(06)65027-9","volume":"165","author":"S. Schaal","year":"2007","unstructured":"Schaal, S., Mohajerian, P., Ijspeert, A.J.: Dynamics systems vs. optimal control - a unifying view. Progress in Brain Research\u00a0165(1), 425\u2013445 (2007)","journal-title":"optimal control - a unifying view. Progress in Brain Research"},{"key":"18_CR108","unstructured":"Smart, W.D., Kaelbling, L.P.: A framework for reinforcement learning on real robots. In: National Conference on Artificial Intelligence\/Innovative Applications of Artificial Intelligence, AAAI\/IAAI (1998)"},{"key":"18_CR109","unstructured":"Smart, W.D., Kaelbling, L.P.: Effective reinforcement learning for mobile robots. In: IEEE International Conference on Robotics and Automation (ICRA) (2002)"},{"key":"18_CR110","unstructured":"Soni, V., Singh, S.: Reinforcement learning of hierarchical skills on the sony aibo robot. In: International Conference on Development and Learning (ICDL) (2006)"},{"key":"18_CR111","unstructured":"Strens, M., Moore, A.: Direct policy search using paired statistical tests. In: International Conference on Machine Learning (ICML) (2001)"},{"key":"18_CR112","volume-title":"Reinforcement Learning","author":"R. Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning. MIT Press, Boston (1998)"},{"key":"18_CR113","doi-asserted-by":"crossref","unstructured":"Sutton, R.S.: Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In: International Machine Learning Conference (1990)","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"18_CR114","unstructured":"Sutton, R.S., McAllester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: Advances in Neural Information Processing Systems (NIPS) (2000)"},{"key":"18_CR115","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Koop, A., Silver, D.: On the role of tracking in stationary environments. In: International Conference on Machine Learning (ICML) (2007)","DOI":"10.1145\/1273496.1273606"},{"issue":"4","key":"18_CR116","doi-asserted-by":"publisher","first-page":"353","DOI":"10.1016\/S0954-1810(01)00027-9","volume":"15","author":"M.M. Svinin","year":"2001","unstructured":"Svinin, M.M., Yamada, K., Ueda, K.: Emergent synthesis of motion patterns for locomotion robots. Artificial Intelligence in Engineering\u00a015(4), 353\u2013363 (2001)","journal-title":"Artificial Intelligence in Engineering"},{"key":"18_CR117","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1029","DOI":"10.1007\/978-3-642-03040-6_125","volume-title":"Advances in Neuro-Information Processing","author":"T. Tamei","year":"2009","unstructured":"Tamei, T., Shibata, T.: Policy Gradient Learning of Cooperative Interaction with a Robot Using User\u2019s Biological Signals. In: K\u00f6ppen, M., Kasabov, N., Coghill, G. (eds.) ICONIP 2008. LNCS, vol.\u00a05507, pp. 1029\u20131037. Springer, Heidelberg (2009)"},{"key":"18_CR118","doi-asserted-by":"crossref","unstructured":"Tedrake, R.: Stochastic policy gradient reinforcement learning on a simple 3d biped. In: International Conference on Intelligent Robots and Systems (IROS) (2004)","DOI":"10.1109\/IROS.2004.1389841"},{"key":"18_CR119","unstructured":"Tedrake, R., Zhang, T.W., Seung, H.S.: Learning to walk in 20 minutes. In: Yale Workshop on Adaptive and Learning Systems (2005)"},{"key":"18_CR120","doi-asserted-by":"publisher","first-page":"1038","DOI":"10.1177\/0278364910369189","volume":"29","author":"R. Tedrake","year":"2010","unstructured":"Tedrake, R., Manchester, I.R., Tobenkin, M.M., Roberts, J.W.: LQR-trees: Feedback motion planning via sums of squares verification. International Journal of Robotics Research\u00a029, 1038\u20131052 (2010)","journal-title":"International Journal of Robotics Research"},{"key":"18_CR121","doi-asserted-by":"crossref","unstructured":"Theodorou, E.A., Buchli, J., Schaal, S.: Reinforcement learning of motor skills in high dimensions: A path integral approach. In: IEEE International Conference on Robotics and Automation (ICRA) (2010)","DOI":"10.1109\/ROBOT.2010.5509336"},{"key":"18_CR122","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1016\/0921-8890(95)00022-8","volume":"15","author":"S. Thrun","year":"1995","unstructured":"Thrun, S.: An approach to learning mobile robot navigation. Robotics and Autonomous Systems\u00a015, 301\u2013319 (1995)","journal-title":"Robotics and Autonomous Systems"},{"key":"18_CR123","unstructured":"Tokic, M., Ertel, W., Fessler, J.: The crawler, a class room demonstrator for reinforcement learning. In: International Florida Artificial Intelligence Research Society Conference (FLAIRS) (2009)"},{"key":"18_CR124","doi-asserted-by":"crossref","unstructured":"Toussaint, M., Storkey, A., Harmeling, S.: Expectation-Maximization methods for solving (PO)MDPs and optimal control problems. In: Inference and Learning in Dynamic Models. Cambridge University Press (2010)","DOI":"10.1017\/CBO9780511984679.019"},{"issue":"3-4","key":"18_CR125","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1016\/S0921-8890(97)00042-0","volume":"22","author":"C. Touzet","year":"1997","unstructured":"Touzet, C.: Neural reinforcement learning for behaviour synthesis. Robotics and Autonomous Systems, Special Issue on Learning Robot: the New Wave\u00a022(3-4), 251\u2013281 (1997)","journal-title":"Robotics and Autonomous Systems, Special Issue on Learning Robot: the New Wave"},{"key":"18_CR126","unstructured":"Uchibe, E., Asada, M., Hosoda, K.: Cooperative behavior acquisition in multi mobile robots environment by reinforcement learning based on state vector estimation. In: IEEE International Conference on Robotics and Automation (ICRA) (1998)"},{"issue":"2","key":"18_CR127","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1007\/s10514-009-9132-0","volume":"27","author":"N. Vlassis","year":"2009","unstructured":"Vlassis, N., Toussaint, M., Kontes, G., Piperidis, S.: Learning model-free robot control by a Monte Carlo EM algorithm. Autonomous Robots\u00a027(2), 123\u2013130 (2009)","journal-title":"Autonomous Robots"},{"key":"18_CR128","doi-asserted-by":"crossref","unstructured":"Wang, B., Li, J., Liu, H.: A heuristic reinforcement learning for robot approaching objects. In: IEEE Conference on Robotics, Automation and Mechatronics (2006)","DOI":"10.1109\/RAMECH.2006.252749"},{"key":"18_CR129","unstructured":"Willgoss, R.A., Iqbal, J.: Reinforcement learning of behaviors in mobile robots using noisy infrared sensing. In: Australian Conference on Robotics and Automation (1999)"},{"key":"18_CR130","first-page":"229","volume":"8","author":"R.J. Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine Learning\u00a08, 229\u2013256 (1992)","journal-title":"Machine Learning"},{"key":"18_CR131","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"250","DOI":"10.1007\/978-3-540-69134-1_25","volume-title":"From Animals to Animats 10","author":"T. Yasuda","year":"2008","unstructured":"Yasuda, T., Ohkura, K.: A Reinforcement Learning Technique with an Adaptive Action Generator for a Multi-Robot System. In: Asada, M., Hallam, J.C.T., Meyer, J.-A., Tani, J. (eds.) SAB 2008. LNCS (LNAI), vol.\u00a05040, pp. 250\u2013259. Springer, Heidelberg (2008)"},{"key":"18_CR132","unstructured":"Youssef, S.M.: Neuro-based learning of mobile robots with evolutionary path planning. In: ICGST International Conference on Automation, Robotics and Autonomous Systems (ARAS) (2005)"}],"container-title":["Adaptation, Learning, and Optimization","Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-27645-3_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,22]],"date-time":"2025-03-22T13:02:28Z","timestamp":1742648548000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-27645-3_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642276446","9783642276453"],"references-count":132,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-27645-3_18","relation":{},"ISSN":["1867-4534","1867-4542"],"issn-type":[{"value":"1867-4534","type":"print"},{"value":"1867-4542","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012]]}}}