{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,12]],"date-time":"2025-02-12T12:40:18Z","timestamp":1739364018557,"version":"3.37.0"},"publisher-location":"Berlin, Heidelberg","reference-count":33,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642037368"},{"type":"electronic","value":"9783642037375"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009]]},"DOI":"10.1007\/978-3-642-03737-5_18","type":"book-chapter","created":{"date-parts":[[2009,9,28]],"date-time":"2009-09-28T16:27:02Z","timestamp":1254155222000},"page":"247-262","source":"Crossref","is-referenced-by-count":0,"title":["New Trends in Robotic Reinforcement Learning: Single and Multi-robot Case"],"prefix":"10.1007","author":[{"given":"Du\u0161ko","family":"Kati\u0107","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"18_CR1","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. The MIT Press, Cambridge (1998)"},{"key":"18_CR2","volume-title":"Neuro-Dynamic Programming","author":"D.P. Bertsekas","year":"1996","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Athena Scientific, Belmont (1996)"},{"key":"18_CR3","doi-asserted-by":"crossref","unstructured":"Watkins, C.J.C.H., Dayan, P.: Q Learning. Machine Learning, 279\u2013292 (1992)","DOI":"10.1007\/BF00992698"},{"key":"18_CR4","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1016\/S0921-8890(97)00043-2","volume":"22","author":"H. Benbrahim","year":"1997","unstructured":"Benbrahim, H., Franklin, J.A.: Biped Dynamic Walking using Reinforcement Learning. Robotics and Autonomous Systems\u00a022, 283\u2013302 (1997)","journal-title":"Robotics and Autonomous Systems"},{"key":"18_CR5","doi-asserted-by":"crossref","unstructured":"Nguyen-Tuong, D., Peters, J.: Local Gaussian Process Regression for Real-time Model-based Robot Control. In: Proc. of IEEE\/RSJ International Conference on Intelligent Robots and Systems, Nice, France (2008)","DOI":"10.1109\/IROS.2008.4650850"},{"key":"18_CR6","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1177\/0278364907087548","volume":"27","author":"J. Peters","year":"2008","unstructured":"Peters, J., Schaal, S.: Learning to Control in Operational Space. International Journal of Robotics Research\u00a027, 197\u2013212 (2008)","journal-title":"International Journal of Robotics Research"},{"key":"18_CR7","doi-asserted-by":"crossref","unstructured":"Bakker, B., Zhumatiy, V., Gruener, G., Schmidhuber, J.: A Robot that Reinforcement-Learns to Identify and Memorize Important Previous Observations. In: Proc. of the IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 430\u2013435 (2003)","DOI":"10.1109\/IROS.2003.1250667"},{"key":"18_CR8","unstructured":"Bakker, B., Schmidhuber, J.: Hierarchical Reinforcement Learning Based on Automatic Discovery of Subgoals and Specialization of Subpolicies. In: Proc. of the 2003 European Workshop on Reinforcement Learning, Nancy, France (2003)"},{"key":"18_CR9","unstructured":"Mori, T., Nakamura, Y., Sato, M., Ishii, S.: Reinforcement Learning for a CPG-driven Biped Robot. In: Proc. of the Nineteenth National Conference on Artificial Intelligence (AAAI), pp. 623\u2013630 (2004)"},{"key":"18_CR10","unstructured":"Nakamura, Y., Sato, M., Ishii, S.: Reinforcement Learning for Biped Robot. In: Proc. of International Symposium on Adaptive Motion of Animals and Machines (2003)"},{"key":"18_CR11","unstructured":"Peters, J., Vijayakumar, S.M., Schaal, S.: Reinforcement Learning for Humanoid Robotics. In: Proc. of Third IEEE-RAS International Conference on Humanoid Robots, Karlsruhe, Germany (2003)"},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Tedrake, R., Zhang, T.W., Seung, H.S.: Stochastic Policy Gradient Reinforcement Learning on a Simple 3d Biped. In: Proc. of the 2004 IEEE\/RSJ International Conference on Intelligent Robots and Systems (2004)","DOI":"10.1109\/IROS.2004.1389841"},{"key":"18_CR13","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1177\/0278364907084980","volume":"27","author":"G. Endo","year":"2008","unstructured":"Endo, G., Morimoto, J., Matsubara, T., Nakanishi, J., Cheng, G.: Learning CPG-based Biped Locomotion with a Policy Gradient Method: Application to a Humanoid Robot. International Journal of Robotics Research\u00a027, 213\u2013228 (2008)","journal-title":"International Journal of Robotics Research"},{"key":"18_CR14","doi-asserted-by":"crossref","unstructured":"Lee, J., Oh, J.H.: Walking Pattern Generation for Planar Biped Walking Using Q-learning. In: Proc. of the 17th World Congress The International Federation of Automatic Control, Seoul, Korea, pp. 3027\u20133032 (2008)","DOI":"10.3182\/20080706-5-KR-1001.00512"},{"key":"18_CR15","doi-asserted-by":"crossref","unstructured":"Shibata, T., Hitomoi, K., Nakamura, Y., Ishii, S.: Reinforcement Learning of Stable Trajectory for Quasi-Passive Dynamic Walking of an Unstable Biped Robot. In: Hackel, M. (ed.) Humanoid Robots: Human-like Machines, Itech, Vienna, Austria, pp. 211\u2013226 (2007)","DOI":"10.5772\/4804"},{"key":"18_CR16","doi-asserted-by":"crossref","unstructured":"Kati\u0107, D., Vukobratovi\u0107, M.: Reinforcement Learning Algorithms in Humanoid Robotics. In: de Pina Filho, A.C. (ed.) Humanoid Robots: New Developments, Advanced Robotic Systems International and I-Tech, Vienna, pp. 367\u2013400 (2007)","DOI":"10.5772\/4878"},{"key":"18_CR17","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s10846-007-9174-5","volume":"51","author":"D. Katic","year":"2008","unstructured":"Katic, D., Rodic, A., Vukobratovic, M.: Hybrid Dynamic Control Algorithm For Humanoid Robots Based on Reinforcement Learning. J. of Intelligent and Robotic Systems\u00a051, 3\u201330 (2008)","journal-title":"J. of Intelligent and Robotic Systems"},{"key":"18_CR18","doi-asserted-by":"crossref","unstructured":"Katic, D., Rodi\u0107, A.: Dynamic Control Algorithm for Biped Walking Based on Policy Gradient Fuzzy Reinforcement Learning. In: Proc. of the 17th IFAC World Congress, Seoul, Republic of Corea (2008)","DOI":"10.3182\/20080706-5-KR-1001.00294"},{"key":"18_CR19","unstructured":"Nakanishi, J., Morimoto, J., Endo, G., Cheng, G., Schaal, S., Kawato, M.: A Framework for Learning Biped Locomotion with Dynamic Movement Primitives. In: Proc. of IEEE-RAS\/RSJ International Conference on Humanoid Robots, Los Angeles, USA (2004)"},{"key":"18_CR20","doi-asserted-by":"crossref","unstructured":"Peters, J., Schaal, S.: Policy Gradient Methods for Robotics. In: Proc. of the IEEE International Conference on Intelligent Robotics Systems, Beijing, China (2006)","DOI":"10.1109\/IROS.2006.282564"},{"key":"18_CR21","first-page":"5","volume":"2","author":"L.E. Parker","year":"2008","unstructured":"Parker, L.E.: Distributed Intelligence: Overview of the Field and its Application in Multi-Robot Systems. J. of Physical Agents\u00a02, 5\u201314 (2008)","journal-title":"J. of Physical Agents"},{"key":"18_CR22","unstructured":"Yang, E., Gu, D.: Multiagent Reinforcement Learning for Multi-Robot Systems: A Survey. Technical Report CSM-404, Department of Computer Science, University of Essex (2004)"},{"key":"18_CR23","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1016\/S0921-8890(01)00114-2","volume":"35","author":"K.H. Park","year":"2001","unstructured":"Park, K.H., Kim, Y.J., Kim, J.H.: Modular Q-Learning-based Multi-Agent Cooperation for Robot Soccer. Robotics and Autonomous Systems\u00a035, 109\u2013122 (2001)","journal-title":"Robotics and Autonomous Systems"},{"key":"18_CR24","doi-asserted-by":"crossref","first-page":"5","DOI":"10.5772\/5614","volume":"1","author":"C.F. Touzet","year":"2004","unstructured":"Touzet, C.F.: Distributed Lazy Q-Learning for Cooperativemobile Robots. International Journal of Advanced Robotic Systems\u00a01, 5\u201313 (2004)","journal-title":"International Journal of Advanced Robotic Systems"},{"key":"18_CR25","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1023\/A:1008942012299","volume":"8","author":"P. Stone","year":"2000","unstructured":"Stone, P., Veloso, M.: Multiagent Systems: a Survey from a Machine Learning Perspective. Autonomous Robots\u00a08, 345\u2013383 (2000)","journal-title":"Autonomous Robots"},{"key":"18_CR26","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1023\/A:1008819414322","volume":"4","author":"M.J. Mataric","year":"1997","unstructured":"Mataric, M.J.: Reinforcement Learning in the Multi-Robot Domain. Autonomous Robots\u00a04, 73\u201383 (1997)","journal-title":"Autonomous Robots"},{"key":"18_CR27","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1016\/S1389-0417(01)00017-1","volume":"2","author":"M.J. Mataric","year":"2001","unstructured":"Mataric, M.J.: Learning in Behavior-based Multi-Robot Systems: Policies, Models, and Other Agents. J. of Cognitive Systems Research\u00a02, 81\u201393 (2001)","journal-title":"J. of Cognitive Systems Research"},{"key":"18_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1007\/3-540-36077-8_26","volume-title":"Advances in Information Systems","author":"I. Gultekin","year":"2002","unstructured":"Gultekin, I., Arslan, A.: Modular-Fuzzy Cooperative Algorithm for Multi-Agent Systems. In: Yakhno, T. (ed.) ADVIS 2002. LNCS, vol.\u00a02457, pp. 255\u2013263. Springer, Heidelberg (2002)"},{"key":"18_CR29","doi-asserted-by":"crossref","unstructured":"Guo, H., Meng, Y.: Dynamic Correlation Matrix-based Multi-Q Learning for a Multi-Robot System. In: Proc. of IEEE\/RSJ International Conference on Intelligent Robots and Systems, Nice, France, pp. 840\u2013845 (2008)","DOI":"10.1109\/IROS.2008.4651021"},{"key":"18_CR30","doi-asserted-by":"crossref","unstructured":"Melo, F.S., Ribeiro, M.I.: Reinforcement Learning with Function Approximation for Cooperative Navigation Tasks. In: Proc. of the 2008 IEEE International Conference on Robotics and Automation, Pasadena, USA, pp. 3321\u20133327 (2008)","DOI":"10.1109\/ROBOT.2008.4543717"},{"key":"18_CR31","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"625","DOI":"10.1007\/978-3-540-87656-4_77","volume-title":"Hybrid Artificial Intelligence Systems","author":"Y. Sanz","year":"2008","unstructured":"Sanz, Y., de Lope, J., Mart\u00edn, J.A.H.: Applying Reinforcement Learing to Multi-Robot Team Coordination. In: Corchado, E., Abraham, A., Pedrycz, W. (eds.) HAIS 2008. LNCS, vol.\u00a05271, pp. 625\u2013632. Springer, Heidelberg (2008)"},{"key":"18_CR32","unstructured":"Tu, J.: Continuous Reinforcement Learning for Feedback Control Systems. Master\u2019s thesis, Computer Science Department, Colorado State University, Fort Collins, USA (2001)"},{"key":"18_CR33","doi-asserted-by":"crossref","first-page":"157","DOI":"10.1017\/S0263574707003682","volume":"26","author":"A. Rodi\u0107","year":"2008","unstructured":"Rodi\u0107, A., Vukobratovi\u0107, M., Addi, K., Dalleau, G.: Contribution to the Modeling of Non-smooth, Multi-point Contact Dynamics of Biped Locomotion \u2013 Theory and Experiments. Robotica\u00a026, 157\u2013175 (2008)","journal-title":"Robotica"}],"container-title":["Studies in Computational Intelligence","Towards Intelligent Engineering and Information Technology"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-03737-5_18.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,12]],"date-time":"2025-02-12T11:35:36Z","timestamp":1739360136000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-03737-5_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009]]},"ISBN":["9783642037368","9783642037375"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-03737-5_18","relation":{},"ISSN":["1860-949X","1860-9503"],"issn-type":[{"type":"print","value":"1860-949X"},{"type":"electronic","value":"1860-9503"}],"subject":[],"published":{"date-parts":[[2009]]}}}