{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,26]],"date-time":"2025-08-26T06:53:22Z","timestamp":1756191202915,"version":"3.40.3"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319458557"},{"type":"electronic","value":"9783319458564"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-45856-4_2","type":"book-chapter","created":{"date-parts":[[2016,8,29]],"date-time":"2016-08-29T15:21:28Z","timestamp":1472484088000},"page":"18-32","source":"Crossref","is-referenced-by-count":16,"title":["A Gentle Introduction to Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Ann","family":"Now\u00e9","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tim","family":"Brys","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,8,30]]},"reference":[{"key":"2_CR1","volume-title":"Brains, Behavior, and Robotics","author":"JS Albus","year":"1981","unstructured":"Albus, J.S.: Brains, Behavior, and Robotics. Byte Books, Peterborough (1981)"},{"key":"2_CR2","unstructured":"Amazon: Amazon prime air (2016). http:\/\/www.amazon.com\/b?node=8037720011 . Accessed 20 Apr 2016"},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Barrett, L., Narayanan, S.: Learning all optimal policies with multiple criteria. In: Proceedings of the 25th International Conference on Machine Learning, pp. 41\u201347. ACM (2008)","DOI":"10.1145\/1390156.1390162"},{"key":"2_CR4","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TSMC.1983.6313077","volume":"5","author":"AG Barto","year":"1983","unstructured":"Barto, A.G., Sutton, R.S., Anderson, C.W.: Neuronlike adaptive elements that can solve difficult learning control problems. IEEE Trans. Syst. Man Cybern. 5, 834\u2013846 (1983)","journal-title":"IEEE Trans. Syst. Man Cybern."},{"key":"2_CR5","volume-title":"Dynamic Programming and Optimal Control","author":"DP Bertsekas","year":"1995","unstructured":"Bertsekas, D.P.: Dynamic Programming and Optimal Control, vol. 1. Athena Scientific, Belmont (1995)"},{"key":"2_CR6","volume-title":"A Basic Course in Probability Theory","author":"R Bhattacharya","year":"2007","unstructured":"Bhattacharya, R., Waymire, E.C.: A Basic Course in Probability Theory. Springer, New York (2007)"},{"key":"2_CR7","doi-asserted-by":"crossref","first-page":"659","DOI":"10.1613\/jair.4818","volume":"53","author":"D Bloembergen","year":"2015","unstructured":"Bloembergen, D., Tuyls, K., Hennes, D., Kaisers, M.: Evolutionary dynamics of multi-agent learning: a survey. J. Artif. Intell. Res. 53, 659\u2013697 (2015)","journal-title":"J. Artif. Intell. Res."},{"issue":"1","key":"2_CR8","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TCIAIG.2012.2186810","volume":"4","author":"CB Browne","year":"2012","unstructured":"Browne, C.B., Powley, E., Whitehouse, D., Lucas, S.M., Cowling, P.I., Rohlfshagen, P., Tavener, S., Perez, D., Samothrakis, S., Colton, S.: A survey of monte carlo tree search methods. IEEE Trans. Comput. Intell. AI Games 4(1), 1\u201343 (2012)","journal-title":"IEEE Trans. Comput. Intell. AI Games"},{"key":"2_CR9","unstructured":"Brys, T., Harutyunyan, A., Suay, H.B., Chernova, S., Taylor, M.E., Now\u00e9, A.: Reinforcement learning from demonstration through shaping. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), pp. 3352\u20133358 (2015)"},{"issue":"1","key":"2_CR10","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1007\/BF01197559","volume":"14","author":"I Das","year":"1997","unstructured":"Das, I., Dennis, J.E.: A closer look at drawbacks of minimizing weighted sums of objectives for Pareto set generation in multicriteria optimization problems. Struct. Optim. 14(1), 63\u201369 (1997)","journal-title":"Struct. Optim."},{"key":"2_CR11","unstructured":"Devlin, S., Kudenko, D.: Dynamic potential-based reward shaping. In: Proceedings of the 11th International Conference on Autonomous Agents and Multiagent Systems, vol. 1, pp. 433\u2013440. International Foundation for Autonomous Agents and Multiagent Systems (2012)"},{"issue":"02","key":"2_CR12","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1142\/S0219525911002998","volume":"14","author":"S Devlin","year":"2011","unstructured":"Devlin, S., Kudenko, D., Grze\u015b, M.: An empirical study of potential-based reward shaping and advice in complex, multi-agent systems. Adv. Complex Syst. 14(02), 251\u2013278 (2011)","journal-title":"Adv. Complex Syst."},{"key":"2_CR13","unstructured":"G\u00e1bor, Z., Kalm\u00e1r, Z., Szepesv\u00e1ri, C.: Multi-criteria reinforcement learning. In: ICML, vol. 98, pp. 197\u2013205 (1998)"},{"issue":"1521","key":"2_CR14","first-page":"35","volume":"94","author":"PY Glorennec","year":"1994","unstructured":"Glorennec, P.Y.: Fuzzy q-learning and evolutionary strategy for adaptive fuzzy control. EUFIT 94(1521), 35\u201340 (1994)","journal-title":"EUFIT"},{"key":"2_CR15","unstructured":"Google: Google self-driving car project. Accessed 20 Apr 2016"},{"key":"2_CR16","doi-asserted-by":"crossref","unstructured":"Harutyunyan, A., Devlin, S., Vrancx, P., Now\u00e9, A.: Expressing arbitrary reward functions as potential-based advice. In: Proceedings of the Twenty-Ninth AAAI Conference on Artificial Intelligence (2015)","DOI":"10.1609\/aaai.v29i1.9628"},{"key":"2_CR17","unstructured":"Klopf, A.H.: Brain function, adaptive systems: a heterostatic theory. Technical report AFCRL-72-0164, Air Force Cambridge Research Laboratories, Bedford, MA (1972)"},{"key":"2_CR18","unstructured":"Knox, W.B., Stone, P.: Combining manual feedback with subsequent MDP reward signals for reinforcement learning. In: Proceedings of the 9th International Conference on Autonomous Agents and Multiagent Systems, pp. 5\u201312 (2010)"},{"key":"2_CR19","unstructured":"Lizotte, D.J., Bowling, M.H., Murphy, S.A.: Efficient reinforcement learning with multiple reward functions for randomized controlled trial analysis. In: Proceedings of the 27th International Conference on Machine Learning (ICML-2010), pp. 695\u2013702 (2010)"},{"issue":"7540","key":"2_CR20","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"2_CR21","unstructured":"Ng, A.Y., Harada, D., Russell, S.: Policy invariance under reward transformations: theory and application to reward shaping. In: Proceedings of the Sixteenth International Conference on Machine Learning, vol. 99, pp. 278\u2013287 (1999)"},{"key":"2_CR22","unstructured":"Now\u00e9, A.: Fuzzy reinforcement learning: an overview. In: Advances in Fuzzy Theory and Technology (1995)"},{"key":"2_CR23","series-title":"Adaptation, Learning, and Optimization","doi-asserted-by":"crossref","first-page":"441","DOI":"10.1007\/978-3-642-27645-3_14","volume-title":"Reinforcement Learning","author":"A Now\u00e9","year":"2012","unstructured":"Now\u00e9, A., Vrancx, P., De Hauwere, Y.-M.: Game theory and multi-agent reinforcement learning. In: Wiering, M., van Otterlo, M. (eds.) Reinforcement Learning. ALO, vol. 12, pp. 441\u2013470. Springer, Heidelberg (2012)"},{"key":"2_CR24","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1613\/jair.3987","volume":"48","author":"DM Roijers","year":"2013","unstructured":"Roijers, D.M., Vamplew, P., Whiteson, S., Dazeley, R.: A survey of multi-objective sequential decision-making. J. Artif. Intell. Res. 48, 67\u2013113 (2013)","journal-title":"J. Artif. Intell. Res."},{"key":"2_CR25","unstructured":"Russell, S., Norvig, P.: Artificial Intelligence: A Modern Approach. Artificial Intelligence, vol. 25, p. 27. Prentice-Hall, Egnlewood Cliffs (1995)"},{"key":"2_CR26","doi-asserted-by":"crossref","unstructured":"Sehnke, F., Graves, A., Osendorfer, C., Schmidhuber, J.: Multimodal parameter-exploring policy gradients. In: Ninth International Conference on Machine Learning and Applications (ICMLA), pp. 113\u2013118. IEEE (2010)","DOI":"10.1109\/ICMLA.2010.24"},{"issue":"4","key":"2_CR27","doi-asserted-by":"crossref","first-page":"551","DOI":"10.1016\/j.neunet.2009.12.004","volume":"23","author":"F Sehnke","year":"2010","unstructured":"Sehnke, F., Osendorfer, C., R\u00fcckstie\u00df, T., Graves, A., Peters, J., Schmidhuber, J.: Parameter-exploring policy gradients. Neural Netw. 23(4), 551\u2013559 (2010)","journal-title":"Neural Netw."},{"issue":"7587","key":"2_CR28","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., Huang, A., Maddison, C.J., Guez, A., Sifre, L., van den Driessche, G., Schrittwieser, J., Antonoglou, I., Panneershelvam, V., Lanctot, M., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"issue":"3","key":"2_CR29","doi-asserted-by":"crossref","first-page":"287","DOI":"10.1023\/A:1007678930559","volume":"38","author":"S Singh","year":"2000","unstructured":"Singh, S., Jaakkola, T., Littman, M.L., Szepesv\u00e1ri, C.: Convergence results for single-step on-policy reinforcement-learning algorithms. Mach. Learn. 38(3), 287\u2013308 (2000)","journal-title":"Mach. Learn."},{"issue":"1\u20133","key":"2_CR30","first-page":"123","volume":"22","author":"SP Singh","year":"1996","unstructured":"Singh, S.P., Sutton, R.S.: Reinforcement learning with replacing eligibility traces. Mach. Learn. 22(1\u20133), 123\u2013158 (1996)","journal-title":"Mach. Learn."},{"key":"2_CR31","volume-title":"The Behavior of Organisms: An Experimental Analysis","author":"BF Skinner","year":"1938","unstructured":"Skinner, B.F.: The Behavior of Organisms: An Experimental Analysis. Appleton-Century, New York (1938)"},{"key":"2_CR32","unstructured":"Sutton, R.: The future of AI (2006). https:\/\/www.youtube.com\/watch?v=pD-FWetbvN8 . Accessed 28 June 2016"},{"key":"2_CR33","volume-title":"Reinforcement Learning: An Introduction","author":"R Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning: An Introduction, vol. 1. Cambridge University Press, Cambridge (1998)"},{"key":"2_CR34","unstructured":"Sutton, R.S., McAllester, D.A., Singh, S.P., Mansour, Y., et al.: Policy gradient methods for reinforcement learning with function approximation. In: NIPS, vol. 99, pp. 1057\u20131063 (1999)"},{"key":"2_CR35","volume-title":"Autonomous Inter-Task Transfer in Reinforcement Learning Domains","author":"ME Taylor","year":"2008","unstructured":"Taylor, M.E.: Autonomous Inter-Task Transfer in Reinforcement Learning Domains. ProQuest, Ann Arbor (2008)"},{"key":"2_CR36","first-page":"1633","volume":"10","author":"ME Taylor","year":"2009","unstructured":"Taylor, M.E., Stone, P.: Transfer learning for reinforcement learning domains: a survey. J. Mach. Learn. Res. 10, 1633\u20131685 (2009)","journal-title":"J. Mach. Learn. Res."},{"issue":"3","key":"2_CR37","first-page":"185","volume":"16","author":"JN Tsitsiklis","year":"1994","unstructured":"Tsitsiklis, J.N.: Asynchronous stochastic approximation and Q-learning. Mach. Learn. 16(3), 185\u2013202 (1994)","journal-title":"Mach. Learn."},{"issue":"1\u20132","key":"2_CR38","first-page":"51","volume":"84","author":"P Vamplew","year":"2010","unstructured":"Vamplew, P., Dazeley, R., Berry, A., Issabekov, R., Dekker, E.: Empirical evaluation methods for multiobjective reinforcement learning algorithms. Mach. Learn. 84(1\u20132), 51\u201380 (2010)","journal-title":"Mach. Learn."},{"key":"2_CR39","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"crossref","first-page":"372","DOI":"10.1007\/978-3-540-89378-3_37","volume-title":"AI 2008: Advances in Artificial Intelligence","author":"P Vamplew","year":"2008","unstructured":"Vamplew, P., Yearwood, J., Dazeley, R., Berry, A.: On the limitations of scalarisation for multi-objective reinforcement learning of Pareto fronts. In: Wobcke, W., Zhang, M. (eds.) AI 2008. LNCS (LNAI), vol. 5360, pp. 372\u2013378. Springer, Heidelberg (2008)"},{"key":"2_CR40","unstructured":"Van Moffaert, K.: Multi-criteria reinforcement learning for sequential decision making problems. Ph.D. thesis, Vrije Universiteit Brussel (2016)"},{"key":"2_CR41","doi-asserted-by":"crossref","unstructured":"Van Moffaert, K., Drugan, M.M., Now\u00e9, A.: Scalarized multi-objective reinforcement learning: novel design techniques. In: IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning. IEEE (2013)","DOI":"10.1109\/ADPRL.2013.6615007"},{"issue":"1","key":"2_CR42","first-page":"3483","volume":"15","author":"K Moffaert Van","year":"2014","unstructured":"Van Moffaert, K., Now\u00e9, A.: Multi-objective reinforcement learning using sets of pareto dominating policies. J. Mach. Learn. Res. 15(1), 3483\u20133512 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"2_CR43","unstructured":"Wang, W., Sebag, M., et al.: Multi-objective monte-carlo tree search. In: ACML, pp. 507\u2013522 (2012)"},{"key":"2_CR44","unstructured":"Watkins, C.J.C.H.: Learning from delayed rewards. Ph.D. thesis, University of Cambridge (1989)"},{"key":"2_CR45","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-27645-3","volume-title":"Reinforcement Learning: State-of-the-Art (Adaptation, Learning, and Optimization)","author":"M Wiering","year":"2012","unstructured":"Wiering, M., Otterlo, M.: Reinforcement Learning: State-of-the-Art (Adaptation, Learning, and Optimization). Springer, Berlin (2012)"},{"key":"2_CR46","unstructured":"Wiewiora, E., Cottrell, G., Elkan, C.: Principled methods for advising reinforcement learning agents. In: International Conference on Machine Learning, pp. 792\u2013799 (2003)"}],"container-title":["Lecture Notes in Computer Science","Scalable Uncertainty Management"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-45856-4_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,7]],"date-time":"2022-07-07T07:13:01Z","timestamp":1657177981000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-45856-4_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319458557","9783319458564"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-45856-4_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2016]]}}}