{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,29]],"date-time":"2024-08-29T19:11:40Z","timestamp":1724958700175},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"3-4","license":[{"start":{"date-parts":[[2015,1,9]],"date-time":"2015-01-09T00:00:00Z","timestamp":1420761600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Intell Robot Syst"],"published-print":{"date-parts":[[2015,12]]},"DOI":"10.1007\/s10846-014-0171-1","type":"journal-article","created":{"date-parts":[[2015,1,8]],"date-time":"2015-01-08T21:10:16Z","timestamp":1420751416000},"page":"385-399","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Batch Reinforcement Learning for Robotic Soccer Using the Q-Batch Update-Rule"],"prefix":"10.1007","volume":"80","author":[{"given":"Jo\u00e3o","family":"Cunha","sequence":"first","affiliation":[]},{"given":"Rui","family":"Serra","sequence":"additional","affiliation":[]},{"given":"Nuno","family":"Lau","sequence":"additional","affiliation":[]},{"given":"Lu\u00eds Seabra","family":"Lopes","sequence":"additional","affiliation":[]},{"given":"Ant\u00f3io J. R.","family":"Neves","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,1,9]]},"reference":[{"issue":"1","key":"171_CR1","first-page":"73","volume":"18","author":"H Kitano","year":"1997","unstructured":"Kitano, H., Asada, M., Kuniyoshi, Y., Noda, I., Osawa, E., Robocup, H.M.: A challenge problem for ai. AI mag. 18(1), 73 (1997)","journal-title":"AI mag."},{"issue":"1","key":"171_CR2","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1007\/s10514-009-9120-4","volume":"27","author":"M Riedmiller","year":"2009","unstructured":"Riedmiller, M., Gabel, T., Hafner, R., Lange, S.: Reinforcement learning for robot soccer. Auton. Robot. 27(1), 55\u201373 (2009)","journal-title":"Auton. Robot."},{"key":"171_CR3","doi-asserted-by":"crossref","unstructured":"Bonarini, A., Caccia, C., Lazaric, A., Restelli, M.: Batch reinforcement learning for controlling a mobile wheeled pendulum robot. In: Bramer, M. (ed.) Artificial Intelligence in Theory and Practice II, IFIP 20th World Computer Congress, vol. 276 of IFIP, pp. 151\u2013160 Milano, Italy, Springer. (2008)","DOI":"10.1007\/978-0-387-09695-7_15"},{"key":"171_CR4","doi-asserted-by":"crossref","unstructured":"Lauer, M.: A case study on learning a steering controller from scratch with reinforcement learning. In: Intelligent Vehicles Symposium (IV), 2011 IEEE, pp. 260\u2013265. IEEE (2011)","DOI":"10.1109\/IVS.2011.5940478"},{"key":"171_CR5","doi-asserted-by":"crossref","first-page":"137","DOI":"10.1007\/s10994-011-5235-x","volume":"84","author":"R Hafner","year":"2011","unstructured":"Hafner, R., Riedmiller, M.: Reinforcement learning in feedback control. Mach. Learn. 84, 137\u2013169 (2011)","journal-title":"Mach. Learn."},{"key":"171_CR6","first-page":"19","volume-title":"Robot Soccer, chapter CAMBADA soccer team: from robot architecture to multiagent coordination","author":"AJR Neves","year":"2010","unstructured":"Neves, A.J.R., Azevedo, J.L., Cunha, B., Lau, N., Silva, J., Santos, F., Corrente, G., Martins, D.A., Figueiredo, N., Pereira, A., Almeida, L., Lopes, L.S., Pinho, A.J., Rodrigues, J.M.O.S., Pedreiras, P.: Robot Soccer, chapter CAMBADA soccer team: from robot architecture to multiagent coordination, pp. 19\u201345. I-Tech Education and Publishing, Vienna (2010)"},{"key":"171_CR7","doi-asserted-by":"crossref","unstructured":"Cunha, J., Serra, R., Lau, N., Lopes, L.S., Neves, A.J.R.: Learning robotic soccer controllers with the q-batch update-rule. In: Proceedings of International Conference on Autonomous Robot Systems and Competitions (ICARSC 2014), pp. 134\u2013139. Espinho, Portugal (2014)","DOI":"10.1109\/ICARSC.2014.6849775"},{"key":"171_CR8","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: An introduction. MIT Press, Cambridge (1998)"},{"key":"171_CR9","doi-asserted-by":"crossref","unstructured":"Wiering, M.A., van Otterlo, M. (eds.).: Reinforcement Learning: State of the Art, volume 12 of Adaptation, Learning, and Optimization. Springer, Berlin (2012)","DOI":"10.1007\/978-3-642-27645-3"},{"key":"171_CR10","doi-asserted-by":"crossref","unstructured":"Szepesv\u00e1ri, C.: Algorithms for Reinforcement Learning. Synthesis Lectures on Artificial Intelligence and Machine Learning. Morgan & Claypool (2010)","DOI":"10.2200\/S00268ED1V01Y201005AIM009"},{"key":"171_CR11","volume-title":"Learning from Delayed Rewards PhD thesis","author":"CJCH Watkins","year":"1989","unstructured":"Watkins, C.J.C.H.: Learning from Delayed Rewards PhD thesis. University of Cambridge, Cambridge (1989)"},{"key":"171_CR12","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1007\/978-3-642-27645-3_2","volume-title":"Reinforcement Learning: State of the Art, chapter 2","author":"S Lange","year":"2012","unstructured":"Lange, S., Gabel, T., Riedmiller, M.: Batch reinforcement learning. In: Wiering, M., van Otterlo, M. (eds.) Reinforcement Learning: State of the Art, chapter 2, pp. 45\u201374. Springer, Berlin (2012)"},{"key":"171_CR13","doi-asserted-by":"crossref","unstructured":"Riedmiller, M., Braun, H.: A direct adaptive method for faster backropagation learning: the RPROP algorithm. In: Ruspini, H. (ed.) Proceedings of the IEEE International Conference on Neural Networks, pp. 586\u2013591. San Francisco, CA (1993)","DOI":"10.1109\/ICNN.1993.298623"},{"key":"171_CR14","doi-asserted-by":"crossref","unstructured":"Riedmiller, M.: Neural fitted Q iterationfirst experiences with a data efficient neural reinforcement learning method. In: Gama, J., Camacho, R., Brazdil, P., Jorge, A., Torgo, L. (eds.) Proceedings of the european conference on machine learning, vol. 3720 of lecture notes in computer science, pp. 317\u2013328, Springer (2005)","DOI":"10.1007\/11564096_32"},{"key":"171_CR15","doi-asserted-by":"crossref","unstructured":"Gordon, G., Prieditis, A., Russel, S.: Stable function approximation in dynamic programming. In: Proceedings of the 12th Internation Conference on Machine Learning (ICML 1995), pp. 261\u2013268, Tahoe City, USA (1995)","DOI":"10.1016\/B978-1-55860-377-6.50040-2"},{"key":"171_CR16","first-page":"503","volume":"6","author":"D Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., Wehenkel, L.: Tree-based batch mode reinforcement learning. J. Mach. Learn. Res. 6, 503\u2013556 (2005)","journal-title":"J. Mach. Learn. Res."},{"issue":"3-4","key":"171_CR17","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1007\/BF00992699","volume":"8","author":"L-J Lin","year":"1992","unstructured":"Lin, L.-J.: Self-improving reactive agents based on reinforcement learning, planning and teaching. Mach. Learn. 8(3-4), 293\u2013321 (1992)","journal-title":"Mach. Learn."},{"key":"171_CR18","unstructured":"Tesauro, G., Galperin, G.R.: On-line policy improvement using Monte Carlo search. In: Neural information processing systems (NIPS), pp. 206\u2013221, Denver (1996)"},{"key":"171_CR19","unstructured":"Cunha, J., Lau, N., Neves, A.J.R.: Q-Batch: initial results with a novel update rule for Batch Reinforcement Learning. In: Advances in Artificial Intelligence - Local Proceedings, XVI Portuguese Conference on Artificial Intelligence, Azores, Portugal, pp. 240\u2013251 (September 2013)"},{"key":"171_CR20","unstructured":"Lauer, M., Langue, S., Riedmiller, M.: Motion estimation of moving objects for autonomous mobile robots. In: Kunstliche Intelligenz, vol. 20, pp. 11\u201317 (2006)"},{"key":"171_CR21","doi-asserted-by":"crossref","unstructured":"Cunha, J., Lau, N., Rodrigues, J.M.O.S., Cunha, B., Azevedo, J.: Predictive control for behavior generation of omni-directional robots. In: Progress in Artificial Intelligence, 14th Portuguese Conference on Artificial Intelligence, vol. 5816 of Lecture Notes in Artificial Intelligence, pp. 275\u2013286, Aveiro, Portugal. Springer-Verlag Berlin \/Heidelberg. (2009)","DOI":"10.1007\/978-3-642-04686-5_23"},{"key":"171_CR22","doi-asserted-by":"crossref","unstructured":"Riedmiller, M.: 10 steps and some tricks to set up neural reinforcement controllers. In: Neural Networks: Tricks of the Trade (2nd ed.), pp. 735\u2013757 (2012)","DOI":"10.1007\/978-3-642-35289-8_39"},{"key":"171_CR23","doi-asserted-by":"crossref","unstructured":"Corrente, G., Cunha, J., Sequeira, R., Lau, N.: Cooperative Robotics: Passes in robotic soccer. In: Proceedings of 13th International Conference on Autonomous Robot Systems and Competitions, pp. 82\u201387. Lisbon, Portugal (2013)","DOI":"10.1109\/Robotica.2013.6623532"},{"key":"171_CR24","doi-asserted-by":"crossref","unstructured":"Ng, A.Y., Coates, A., Diel, M., Ganapathi, V., Schulte, J., Tse, B., Berger, E., Liang, E.: Autonomous inverted helicopter flight via reinforcement learning. In: International Symposium on Experimental Robotics, pp. 363\u2013372. Springer, Singapore (2004)","DOI":"10.1007\/11552246_35"},{"key":"171_CR25","volume-title":"Proceedings of the IEEE\/RSJ international conference on intelligent robots and systems","author":"J Peters","year":"2006","unstructured":"Peters, J., Schaal, S.: Policy gradient methods for robotics. In: Proceedings of the IEEE\/RSJ international conference on intelligent robots and systems. IEEE Press, Beijing, China (2006)"},{"key":"171_CR26","volume-title":"Learning to drive in 20 minutes. In: Proceedings of the FBIT 2007 conference","author":"M Riedmiller","year":"2007","unstructured":"Riedmiller, M., Montemerlo, M., Dahlkamp, H.: Learning to drive in 20 minutes. In: Proceedings of the FBIT 2007 conference. Springer, Jeju, Korea (2007)"},{"key":"171_CR27","doi-asserted-by":"crossref","unstructured":"Hester, T., Quinlan, M., Stone, P.: Generalized model learning for reinforcement learning on a humanoid robot. In: IEEE International Conference on Robotics and Automation (ICRA) (2010)","DOI":"10.1109\/ROBOT.2010.5509181"},{"issue":"11","key":"171_CR28","doi-asserted-by":"crossref","first-page":"1238","DOI":"10.1177\/0278364913495721","volume":"32","author":"J Jens Kober","year":"2013","unstructured":"Jens Kober, J., Bagnel, A., Peters, J.: Reinforcement learning in robotics: A survey . Int. J. Robot. Res. 32(11), 1238\u20131274 (2013)","journal-title":"Int. J. Robot. Res."}],"container-title":["Journal of Intelligent &amp; Robotic Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-014-0171-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10846-014-0171-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-014-0171-1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,19]],"date-time":"2019-08-19T12:35:59Z","timestamp":1566218159000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10846-014-0171-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,1,9]]},"references-count":28,"journal-issue":{"issue":"3-4","published-print":{"date-parts":[[2015,12]]}},"alternative-id":["171"],"URL":"https:\/\/doi.org\/10.1007\/s10846-014-0171-1","relation":{},"ISSN":["0921-0296","1573-0409"],"issn-type":[{"value":"0921-0296","type":"print"},{"value":"1573-0409","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,1,9]]}}}