{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T19:28:23Z","timestamp":1770751703890,"version":"3.50.0"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2009,5,15]],"date-time":"2009-05-15T00:00:00Z","timestamp":1242345600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Auton Robot"],"published-print":{"date-parts":[[2009,7]]},"DOI":"10.1007\/s10514-009-9120-4","type":"journal-article","created":{"date-parts":[[2009,5,14]],"date-time":"2009-05-14T14:12:01Z","timestamp":1242310321000},"page":"55-73","source":"Crossref","is-referenced-by-count":177,"title":["Reinforcement learning for robot soccer"],"prefix":"10.1007","volume":"27","author":[{"given":"Martin","family":"Riedmiller","sequence":"first","affiliation":[]},{"given":"Thomas","family":"Gabel","sequence":"additional","affiliation":[]},{"given":"Roland","family":"Hafner","sequence":"additional","affiliation":[]},{"given":"Sascha","family":"Lange","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2009,5,15]]},"reference":[{"issue":"2","key":"9120_CR1","doi-asserted-by":"crossref","first-page":"275","DOI":"10.1016\/S0004-3702(99)00026-0","volume":"110","author":"M. Asada","year":"1999","unstructured":"Asada, M., Uchibe, E., & Hosoda, K. (1999). Cooperative behavior acquisition for mobile robots in dynamically changing real worlds via vision-based reinforcement learning and development. Artificial Intelligence, 110(2), 275\u2013292.","journal-title":"Artificial Intelligence"},{"key":"9120_CR2","first-page":"1615","volume-title":"Proceedings of the 2001 IEEE international conference on robotics and automation (ICRA 2001)","author":"J. Bagnell","year":"2001","unstructured":"Bagnell, J., & Schneider, J. (2001). Autonomous helicopter control using reinforcement learning policy search methods. In Proceedings of the 2001 IEEE international conference on robotics and automation (ICRA 2001) (pp. 1615\u20131620), Seoul, South Korea. New York: IEEE Press."},{"key":"9120_CR3","series-title":"LNCS","first-page":"712","volume-title":"RoboCup 2003: robot soccer world cup VII","author":"S. Behnke","year":"2003","unstructured":"Behnke, S., Egorova, A., Gloye, A., Rojas, R., & Simon, M. (2003). Predicting away robot control latency. In D. Polani, B. Browning, A. Bonarini, & K. Yoshida (Eds.), LNCS. RoboCup 2003: robot soccer world cup VII (pp. 712\u2013719), Padua, Italy. Berlin: Springer."},{"key":"9120_CR4","volume-title":"Dynamic programming","author":"R. Bellman","year":"1957","unstructured":"Bellman, R. (1957). Dynamic programming. Princeton: Princeton University Press."},{"key":"9120_CR5","volume-title":"Neuro dynamic programming","author":"D. Bertsekas","year":"1996","unstructured":"Bertsekas, D., & Tsitsiklis, J. (1996). Neuro dynamic programming. Belmont: Athena Scientific."},{"key":"9120_CR6","volume-title":"Proceedings of the 2004 IEEE\/RSJ international conference on intelligent robots and systems (IROS 2004)","author":"S. Chernova","year":"2004","unstructured":"Chernova, S., & Veloso, M. (2004). An evolutionary approach to gait learning for four-legged robots. In Proceedings of the 2004 IEEE\/RSJ international conference on intelligent robots and systems (IROS 2004), Sendai, Japan. New York: IEEE Press."},{"key":"9120_CR7","first-page":"1017","volume-title":"Advances in neural information processing systems 8 (NIPS 1995)","author":"R. Crites","year":"1995","unstructured":"Crites, R., & Barto, A. (1995). Improving elevator performance using reinforcement learning. In Advances in neural information processing systems 8 (NIPS 1995) (pp. 1017\u20131023), Denver, USA. Cambridge: MIT Press."},{"issue":"1","key":"9120_CR8","first-page":"503","volume":"6","author":"D. Ernst","year":"2006","unstructured":"Ernst, D., Geurts, P., & Wehenkel, L. (2006). Tree-based batch mode reinforcement learning. Journal of Machine Learning Research, 6(1), 503\u2013556.","journal-title":"Journal of Machine Learning Research"},{"key":"9120_CR9","unstructured":"Gabel, T., & Riedmiller, M. (2007). Adaptive reactive job-shop scheduling with learning agents. International Journal of Information Technology and Intelligent Computing, 2(4)."},{"key":"9120_CR10","unstructured":"Gabel, T., Hafner, R., Lange, S., Lauer, M., & Riedmiller, M. (2006). Bridging the gap: learning in the RoboCup simulation and midsize league. In Proceedings of the 7th Portuguese conference on automatic control (Controlo 2006), Porto, Portugal."},{"key":"9120_CR11","volume-title":"LNCS. RoboCup 2008: robot soccer world cup XII","author":"T. Gabel","year":"2008","unstructured":"Gabel, T., Riedmiller, M., & Trost, F. (2008). A case study on improving defense behavior in soccer simulation 2D: the NeuroHassle approach. In Iocchi, L., Matsubara, H., Weitzenfeld, A., & Zhou, C. (Eds.), LNCS. RoboCup 2008: robot soccer world cup XII, Suzhou, China. Berlin: Springer."},{"key":"9120_CR12","first-page":"261","volume-title":"Proceedings of the twelfth international conference on machine learning (ICML\u00a01995)","author":"G. Gordon","year":"1995","unstructured":"Gordon, G., Prieditis, A., & Russell, S. (1995). Stable function approximation in dynamic programming. In Proceedings of the twelfth international conference on machine learning (ICML\u00a01995) (pp.\u00a0261\u2013268), Tahoe City, USA. San Mateo: Morgan Kaufmann."},{"key":"9120_CR13","volume-title":"Proceedings of the IEEE international conference on robotics and automation (ICRA 07)","author":"R. Hafner","year":"2007","unstructured":"Hafner, R., & Riedmiller, M. (2007). Neural reinforcement learning controllers for a real robot application. In Proceedings of the IEEE international conference on robotics and automation (ICRA 07), Rome, Italy. New York: IEEE Press."},{"key":"9120_CR14","series-title":"LNCS","first-page":"310","volume-title":"RoboCup 2004: robot soccer world cup VIII","author":"U. Kaufmann","year":"2004","unstructured":"Kaufmann, U., Mayer, G., Kraetzschmar, G., & Palm, G. (2004). Visual robot detection in RoboCup using neural networks. In D.\u00a0Nardi, M.\u00a0Riedmiller, C.\u00a0Sammut, & J. Santos-Victor (Eds.), LNCS. RoboCup 2004: robot soccer world cup VIII (pp. 310\u2013322), Porto, Portugal. Berlin: Springer."},{"key":"9120_CR15","volume-title":"RoboCup-97: robot soccer world cup I","year":"1997","unstructured":"Kitano, H. (Ed.). (1997). RoboCup-97: robot soccer world cup I. Berlin: Springer."},{"key":"9120_CR16","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/IROS.2008.4650953","volume-title":"Proceedings of the 2008 IEEE\/RSJ international conference on intelligent robots and systems (IROS 2008)","author":"J. Kober","year":"2008","unstructured":"Kober, J., Mohler, B., & Peters, J. (2008). Learning perceptual coupling for motor primitives. In Proceedings of the 2008 IEEE\/RSJ international conference on intelligent robots and systems (IROS 2008) (pp. 834\u2013839), Nice, France. New York: IEEE Press."},{"key":"9120_CR17","first-page":"1107","volume":"4","author":"M. Lagoudakis","year":"2003","unstructured":"Lagoudakis, M., & Parr, R. (2003). Least-squares policy iteration. Journal of Machine Learning Research, 4, 1107\u20131149.","journal-title":"Journal of Machine Learning Research"},{"key":"9120_CR18","series-title":"LNCS","first-page":"142","volume-title":"RoboCup 2005: robot soccer world cup IX","author":"M. Lauer","year":"2005","unstructured":"Lauer, M., Lange, S., & Riedmiller, M. (2005). Calculating the perfect match: an efficient and accurate approach for robot self-localization. In A. Bredenfeld, A. Jacoff, I. Noda, & Y. Takahashi (Eds.), LNCS. RoboCup 2005: robot soccer world cup IX (pp. 142\u2013153), Osaka, Japan. Berlin: Springer."},{"issue":"1","key":"9120_CR19","first-page":"11","volume":"20","author":"M. Lauer","year":"2006","unstructured":"Lauer, M., Lange, S., & Riedmiller, M. (2006). Motion estimation of moving objects for autonomous mobile robots. Kunstliche Intelligenz, 20(1), 11\u201317.","journal-title":"Kunstliche Intelligenz"},{"key":"9120_CR20","first-page":"126","volume-title":"The 21st IASTED international multi-conference on applied informatics (AI 2003)","author":"B. Li","year":"2003","unstructured":"Li, B., Hu, H., & Spacek, L. (2003). An adaptive color segmentation algorithm for Sony legged robots. In The 21st IASTED international multi-conference on applied informatics (AI 2003) (pp. 126\u2013131), Innsbruck, Austria. New York: IASTED\/ACTA Press."},{"issue":"3","key":"9120_CR21","first-page":"293","volume":"8","author":"L. Lin","year":"1992","unstructured":"Lin, L. (1992). Self-improving reactive agents based on reinforcement learning, planning and teaching. Machine Learning, 8(3), 293\u2013321.","journal-title":"Machine Learning"},{"key":"9120_CR22","series-title":"LNAI","volume-title":"RoboCup 2008: robot soccer world cup XII","author":"J. Ma","year":"2008","unstructured":"Ma, J., & Cameron, S. (2008). Combining policy search with planning in multi-agent cooperation. In L. Iocchi, H. Matsubara, A.\u00a0Weitzenfeld, & C. Zhou (Eds.), LNAI. RoboCup 2008: robot soccer world cup XII, Suzhou, China. Berlin: Springer."},{"key":"9120_CR23","series-title":"LNAI","volume-title":"RoboCup 2005: robot soccer world cup IX","author":"T. Nakashima","year":"2005","unstructured":"Nakashima, T., Takatani, M., Udo, M., Ishibuchi, H., & Nii, M. (2005). Performance evaluation of an evolutionary method for RoboCup soccer strategies. In A. Bredenfeld, A. Jacoff, I. Noda, & Y. Takahashi (Eds.), LNAI. RoboCup 2005: robot soccer world cup IX, Osaka, Japan. Berlin: Springer."},{"key":"9120_CR24","first-page":"363","volume-title":"Experimental robotics IX, the 9th international symposium on experimental robotics (ISER)","author":"A. Ng","year":"2004","unstructured":"Ng, A., Coates, A., Diel, M., Ganapathi, V., Schulte, J., Tse, B., Berger, E., & Liang, E. (2004). Autonomous inverted helicopter flight via reinforcement learning. In Experimental robotics IX, the 9th international symposium on experimental robotics (ISER) (pp. 363\u2013372), Singapore, China. Berlin: Springer."},{"issue":"2\u20133","key":"9120_CR25","doi-asserted-by":"crossref","first-page":"233","DOI":"10.1080\/088395198117848","volume":"12","author":"I. Noda","year":"1998","unstructured":"Noda, I., Matsubara, H., Hiraki, K., & Frank, I. (1998). Soccer server: a tool for research on multi-agent systems. Applied Artificial Intelligence, 12(2\u20133), 233\u2013250.","journal-title":"Applied Artificial Intelligence"},{"issue":"7","key":"9120_CR26","doi-asserted-by":"crossref","first-page":"677","DOI":"10.1163\/1568553041719519","volume":"18","author":"M. Ogino","year":"2004","unstructured":"Ogino, M., Katoh, Y., Aono, M., Asada, M., & Hosoda, K. (2004). Reinforcement learning of humanoid rhythmic walking parameters based on visual information. Advanced Robotics, 18(7), 677\u2013697.","journal-title":"Advanced Robotics"},{"key":"9120_CR27","first-page":"27","volume-title":"Proceedings of the 20005 IEEE international symposium on computational intelligence in robotics and automation (CIRA 2005)","author":"M. Oubbati","year":"2005","unstructured":"Oubbati, M., Schanz, M., & Levi, P. (2005). Kinematic and dynamic adaptive control of a nonholonomic mobile robot using a RNN. In Proceedings of the 20005 IEEE international symposium on computational intelligence in robotics and automation (CIRA 2005) (pp. 27\u201333). New York: IEEE Press."},{"key":"9120_CR28","volume-title":"Proceedings of the IEEE\/RSJ international conference on intelligent robots and systems (IROS)","author":"J. Peters","year":"2006","unstructured":"Peters, J., & Schaal, S. (2006). Policy gradient methods for robotics. In Proceedings of the IEEE\/RSJ international conference on intelligent robots and systems (IROS), Beijing, China. New York: IEEE Press."},{"issue":"2","key":"9120_CR29","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1177\/0278364907087548","volume":"27","author":"J. Peters","year":"2008","unstructured":"Peters, J., & Schaal, S. (2008a). Learning to control in operational space. The International Journal of Robotics Research, 27(2), 197\u2013212.","journal-title":"The International Journal of Robotics Research"},{"issue":"4","key":"9120_CR30","doi-asserted-by":"crossref","first-page":"682","DOI":"10.1016\/j.neunet.2008.02.003","volume":"21","author":"J. Peters","year":"2008","unstructured":"Peters, J., & Schaal, S. (2008b). Reinforcement learning of motor skills with policy gradients. Neural Networks, 21(4), 682\u2013697.","journal-title":"Neural Networks"},{"key":"9120_CR31","volume-title":"Markov decision processes: discrete stochastic dynamic programming","author":"M. Puterman","year":"2005","unstructured":"Puterman, M. (2005). Markov decision processes: discrete stochastic dynamic programming. New York: Wiley-Interscience."},{"key":"9120_CR32","unstructured":"Riedmiller, M. (1997). Generating continuous control signals for reinforcement controllers using dynamic output elements. In Proceedings of the European symposium on artificial neural networks (ESANN 1997), Bruges, Belgium."},{"key":"9120_CR33","volume-title":"Machine learning: ECML 2005, 16th European conference on machine learning","author":"M. Riedmiller","year":"2005","unstructured":"Riedmiller, M. (2005). Neural fitted Q iteration\u2014first experiences with a data efficient neural reinforcement learning method. In Machine learning: ECML 2005, 16th European conference on machine learning, Porto, Portugal. Berlin: Springer."},{"key":"9120_CR34","doi-asserted-by":"crossref","unstructured":"Riedmiller, M., & Braun, H., (1993). A direct adaptive method for faster backpropagation learning: the RPROP algorithm. In H. Ruspini (Ed.), Proceedings of the IEEE international conference on neural networks (ICNN) (pp. 586\u2013591), San Francisco.","DOI":"10.1109\/ICNN.1993.298623"},{"key":"9120_CR35","volume-title":"Adaptivity and learning","author":"M. Riedmiller","year":"2003","unstructured":"Riedmiller, M., & Merke, A. (2003). Using machine learning techniques in complex multi-agent domains. In I. Stamatescu, W. Menzel, M. Richter, & U. Ratsch (Eds.), Adaptivity and learning. Berlin: Springer."},{"key":"9120_CR36","volume-title":"Proceedings of the FBIT 2007 conference","author":"M. Riedmiller","year":"2007","unstructured":"Riedmiller, M., Montemerlo, M., & Dahlkamp, H. (2007). Learning to drive in 20 minutes. In Proceedings of the FBIT 2007 conference, Jeju, Korea. Berlin: Springer."},{"key":"9120_CR37","series-title":"LNCS","first-page":"310","volume-title":"RoboCup 2004: robot soccer world cup VIII","author":"T. R\u00f6fer","year":"2004","unstructured":"R\u00f6fer, T. (2004). Evolutionary gait-optimization using a fitness function based on proprioception. In Nardi, D., Riedmiller, M., Sammut, C., & Santos-Victor, J. (Eds.), LNCS. RoboCup 2004: robot soccer world cup VIII (pp. 310\u2013322), Porto, Portugal. Berlin: Springer."},{"issue":"3","key":"9120_CR38","doi-asserted-by":"crossref","first-page":"165","DOI":"10.1177\/105971230501300301","volume":"13","author":"P. Stone","year":"2005","unstructured":"Stone, P., Sutton, R., & Kuhlmann, G. (2005). Reinforcement learning for RoboCup-soccer keepaway. Adaptive Behavior, 13(3), 165\u2013188.","journal-title":"Adaptive Behavior"},{"key":"9120_CR39","volume-title":"Reinforcement learning. An introduction","author":"R. Sutton","year":"1998","unstructured":"Sutton, R., & Barto, A. (1998). Reinforcement learning. An introduction. Cambridge: MIT Press\/A Bradford Book."},{"key":"9120_CR40","first-page":"1057","volume-title":"Advances in neural information processing systems 12 (NIPS 1999)","author":"R. Sutton","year":"2000","unstructured":"Sutton, R., McAllester, D., Singh, S., & Mansour, Y. (2000). Policy gradient methods for reinforcement learning with function approximation. In Advances in neural information processing systems 12 (NIPS 1999) (pp. 1057\u20131063), Denver, USA. Cambridge: MIT Press."},{"key":"9120_CR41","first-page":"206","volume-title":"Neural information processing systems (NIPS 1996)","author":"G. Tesauro","year":"1995","unstructured":"Tesauro, G., & Galpering, G. (1995). On-line policy improvement using Monte Carlo search. In Neural information processing systems (NIPS 1996) (pp. 206\u2013221), Denver, USA. Berlin: Springer."},{"issue":"3","key":"9120_CR42","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1016\/0004-3702(89)90017-9","volume":"39","author":"G. Tesauro","year":"1989","unstructured":"Tesauro, G., & Sejnowski, T. (1989). A parallel network that learns to play backgammon. Artificial Intelligence, 39(3), 357\u2013390.","journal-title":"Artificial Intelligence"},{"issue":"1","key":"9120_CR43","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1016\/j.robot.2004.05.005","volume":"48","author":"A. Treptow","year":"2004","unstructured":"Treptow, A., & Zell, A. (2004). Real-time object tracking for soccer-robots without color information. Robotics and Autonomous Systems, 48(1), 41\u201348.","journal-title":"Robotics and Autonomous Systems"},{"key":"9120_CR44","first-page":"279","volume":"8","author":"C. Watkins","year":"1992","unstructured":"Watkins, C., & Dayan, P. (1992). Q-learning. Machine Learning, 8, 279\u2013292.","journal-title":"Machine Learning"},{"key":"9120_CR45","unstructured":"Wehenkel, L., Glavic, M., & Ernst, D. (2005). New developments in the application of automatic learning to power system control. In Proceedings of the 15th power systems computation conference (PSCC05), Liege, Belgium."}],"container-title":["Autonomous Robots"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10514-009-9120-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10514-009-9120-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10514-009-9120-4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,29]],"date-time":"2019-05-29T19:13:32Z","timestamp":1559157212000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10514-009-9120-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,5,15]]},"references-count":45,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2009,7]]}},"alternative-id":["9120"],"URL":"https:\/\/doi.org\/10.1007\/s10514-009-9120-4","relation":{},"ISSN":["0929-5593","1573-7527"],"issn-type":[{"value":"0929-5593","type":"print"},{"value":"1573-7527","type":"electronic"}],"subject":[],"published":{"date-parts":[[2009,5,15]]}}}