{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T08:10:02Z","timestamp":1748765402258,"version":"3.41.0"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319293387"},{"type":"electronic","value":"9783319293394"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-29339-4_30","type":"book-chapter","created":{"date-parts":[[2016,1,28]],"date-time":"2016-01-28T15:59:18Z","timestamp":1453996758000},"page":"356-364","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["RLLib: C++ Library to Predict, Control, and Represent Learnable Knowledge Using On\/Off Policy Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Saminda","family":"Abeyruwan","sequence":"first","affiliation":[]},{"given":"Ubbo","family":"Visser","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,1,29]]},"reference":[{"key":"30_CR1","unstructured":"Abeyruwan, S., Seekircher, A., Visser, U.: Dynamic role assignment using general value functions. In: AAMAS 2013, Adaptive Learning Agents Workshop (2013)"},{"key":"30_CR2","unstructured":"Bishop, C.M.: Pattern Recognition and Machine Learning. Information Science and Statistics, 1 edn. Springer, Heidelberg (2007)"},{"key":"30_CR3","unstructured":"Dabney, W., Barto, A.G.: Adaptive step-size for online temporal difference learning. In: AAAI Conference on Artificial Intelligence (2012)"},{"key":"30_CR4","doi-asserted-by":"crossref","unstructured":"Degris, T., Pilarski, P.M., Sutton, R.S.: Model-free reinforcement learning with continuous action in practice. In: American Control Conference (ACC), pp. 2177\u20132182. IEEE (2012)","DOI":"10.1109\/ACC.2012.6315022"},{"key":"30_CR5","unstructured":"Degris, T., White, M., Sutton, R.S.: Off-policy actor-critic. In: Proceedings of the 29th International Conference on Machine Learning (ICML), pp. 457\u2013464 (2012)"},{"key":"30_CR6","unstructured":"Delepoulle, F.D.C.S.: PIQLE: a platform for implementation of q-learning experiments. In: Neural Information Processing Systems (NIPS), Workshop on Reinforcement Learning Benchmarks and Bake-off II (2005)"},{"key":"30_CR7","doi-asserted-by":"crossref","unstructured":"Diuk, C., Cohen, A., Littman, M.L.: An object-oriented representation for efficient reinforcement learning. In: Proceedings of the 25th International Conference on Machine Learning (ICML), pp. 240\u2013247 (2008)","DOI":"10.1145\/1390156.1390187"},{"issue":"1","key":"30_CR8","first-page":"625","volume":"14","author":"H Frezza-Buet","year":"2013","unstructured":"Frezza-Buet, H., Geist, M.: A C++ template-based reinforcement learning library: fitting the code to the mathematics. J. Mach. Learn. Res. (JMLR) 14(1), 625\u2013628 (2013)","journal-title":"J. Mach. Learn. Res. (JMLR)"},{"key":"30_CR9","unstructured":"Hafner, R., Riedmiller, M.: Case study: control of a real world system in CLSquare. In: Proceedings of the NIPS Workshop on Reinforcement Learning Comparisons, Whistler, British Columbia, Canada (2005)"},{"key":"30_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/3-540-64473-3_46","volume-title":"Robot Soccer World Cup I","author":"H Kitano","year":"1998","unstructured":"Kitano, H., Asada, M., Kuniyoshi, Y., Noda, I., Osawai, E., Matsubara, H.: RoboCup: a challenge problem for AI and robotics. In: Kitano, H. (ed.) RoboCup 1997. LNCS, vol. 1395, pp. 1\u201319. Springer, Heidelberg (1998)"},{"key":"30_CR11","series-title":"Adaptation, Learning, and Optimization","doi-asserted-by":"publisher","first-page":"579","DOI":"10.1007\/978-3-642-27645-3_18","volume-title":"Reinforcement Learning","author":"J Kober","year":"2012","unstructured":"Kober, J., Peters, J.: Reinforcement learning in robotics: a survey. In: Wiering, M., van Otterlo, M. (eds.) Reinforcement Learning. ALO, vol. 12, pp. 579\u2013610. Springer, Heidelberg (2012)"},{"key":"30_CR12","doi-asserted-by":"crossref","unstructured":"Konidaris, G., Osentoski, S., Thomas, P.: Value function approximation in reinforcement learning using the Fourier basis. In: Proceedings of the 25th Conference on Artificial Intelligence, pp. 380\u2013385 (2011)","DOI":"10.1609\/aaai.v25i1.7903"},{"issue":"1\u20132","key":"30_CR13","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1007\/s10994-011-5237-8","volume":"84","author":"T Kovacs","year":"2011","unstructured":"Kovacs, T., Egginton, R.: On the analysis and design of software for reinforcement learning, with a survey of existing systems. Mach. Learn. 84(1\u20132), 7\u201349 (2011)","journal-title":"Mach. Learn."},{"key":"30_CR14","unstructured":"Maei, H.R.: Gradient temporal-difference learning algorithms. Ph.D. thesis, University of Alberta (2011)"},{"key":"30_CR15","doi-asserted-by":"crossref","unstructured":"Maei, H.R., Sutton, R.S.: GQ($$\\lambda $$): a general gradient algorithm for temporal-difference prediction learning with eligibility traces. In: Proceedings of the 3rd Conference on Artificial General Intelligence (AGI), pp. 1\u20136. Atlantis Press (2010)","DOI":"10.2991\/agi.2010.22"},{"key":"30_CR16","unstructured":"Maei, H.R., Szepesv\u00e1ri, C., Bhatnagar, S., Sutton, R.S.: Toward off-policy learning control with function approximation. In: Proceedings of the 27th International Conference on Machine Learning (ICML), pp. 719\u2013726 (2010)"},{"key":"30_CR17","doi-asserted-by":"crossref","unstructured":"Mahmood, A.R., Sutton, R.S., Degris, T., Pilarski, P.M.: Tuning-free step-size adaptation. In: Acoustics, Speech and Signal Processing (ICASSP), pp. 2121\u20132124. IEEE (2012)","DOI":"10.1109\/ICASSP.2012.6288330"},{"key":"30_CR18","unstructured":"Papis, B., Wawrzynski, P.: dotRL: a platform for rapid reinforcement learning methods development and validation. In: 2013 Federated Conference on Computer Science and Information Systems (FedCSIS), pp. 129\u2013136 (2013)"},{"key":"30_CR19","first-page":"743","volume":"11","author":"T Schaul","year":"2010","unstructured":"Schaul, T., Bayer, J., Wierstra, D., Sun, Y., Felder, M., Sehnke, F., R\u00fcckstie\u00df, T., Schmidhuber, J.: PyBrain. J. Mach. Learn. Res. 11, 743\u2013746 (2010)","journal-title":"J. Mach. Learn. Res."},{"key":"30_CR20","unstructured":"Seekircher, A., Abeyruwan, S., Visser, U.: Accurate ball tracking with extended Kalman filters as a prerequisite for a high-level behavior with reinforcement learning. In: The 6th Workshop on Humanoid Soccer Robots at Humanoid Conference, Bled (Slovenia) (2011)"},{"key":"30_CR21","unstructured":"Seijen, H.V., Sutton, R.: True online TD($$\\lambda $$). In: Jebara, T., Xing, E.P. (eds.) Proceedings of the 31st International Conference on Machine Learning (ICML). JMLR Workshop and Conference Proceedings, pp. 692\u2013700 (2014)"},{"key":"30_CR22","doi-asserted-by":"publisher","DOI":"10.1002\/9781118557426","volume-title":"Markov Decision Processes in Artificial Intelligence","author":"O Sigaud","year":"2013","unstructured":"Sigaud, O., Buffet, O.: Markov Decision Processes in Artificial Intelligence. Wiley, New York (2013)"},{"key":"30_CR23","unstructured":"Sutton, R.S.: Generalization in reinforcement learning: successful examples using sparse coarse coding. In: Advances in Neural Information Processing Systems 8, pp. 1038\u20131044. MIT Press (1996)"},{"key":"30_CR24","unstructured":"Sutton, R.S.: a standard interface for reinforcement learning software in C++. http:\/\/webdocs.cs.ualberta.ca\/ sutton\/RLinterface\/RLI-Cplusplus.html. Accessed 12 July 2015"},{"key":"30_CR25","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"30_CR26","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Koop, A., Silver, D.: On the role of tracking in stationary environments. In: Proceedings of the 24th International Conference on Machine Learning, pp. 871\u2013878. ACM (2007)","DOI":"10.1145\/1273496.1273606"},{"key":"30_CR27","unstructured":"Sutton, R.S., Modayil, J., Delp, M., Degris, T., Pilarski, P.M., White, A., Precup, D.: Horde: a scalable real-time architecture for learning knowledge from unsupervised sensorimotor interaction. In: Proceedings of the 10th International Conference on Autonomous Agents and Multiagent Systems (AAMAS), pp. 761\u2013768 (2011)"},{"key":"30_CR28","series-title":"Synthesis Lectures on Artificial Intelligence and Machine Learning","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-01551-9","volume-title":"Algorithms for Reinforcement Learning","author":"C Szepesv\u00e1ri","year":"2010","unstructured":"Szepesv\u00e1ri, C.: Algorithms for Reinforcement Learning. Synthesis Lectures on Artificial Intelligence and Machine Learning. Morgan & Claypool Publishers, San Rafael (2010)"},{"key":"30_CR29","first-page":"2133","volume":"10","author":"B Tanner","year":"2009","unstructured":"Tanner, B., White, A.: RL-Glue: language-independent software for reinforcement-learning experiments. J. Mach. Learn. Res. 10, 2133\u20132136 (2009)","journal-title":"J. Mach. Learn. Res."},{"key":"30_CR30","doi-asserted-by":"crossref","unstructured":"Taylor, M.E., Kuhlmann, G., Stone, P.: Autonomous transfer for reinforcement learning. In: Proceedings of the 7th International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS), vol. 1, pp. 283\u2013290 (2008)","DOI":"10.1145\/1329125.1329248"}],"container-title":["Lecture Notes in Computer Science","RoboCup 2015: Robot World Cup XIX"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-29339-4_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T07:42:14Z","timestamp":1748763734000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-29339-4_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319293387","9783319293394"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-29339-4_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]},"assertion":[{"value":"29 January 2016","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}