{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T23:39:08Z","timestamp":1725838748004},"publisher-location":"Cham","reference-count":21,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319263496"},{"type":"electronic","value":"9783319263502"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-26350-2_53","type":"book-chapter","created":{"date-parts":[[2015,11,21]],"date-time":"2015-11-21T10:59:35Z","timestamp":1448103575000},"page":"596-608","source":"Crossref","is-referenced-by-count":3,"title":["Reinforcement Learning of Pareto-Optimal Multiobjective Policies Using Steering"],"prefix":"10.1007","author":[{"given":"Peter","family":"Vamplew","sequence":"first","affiliation":[]},{"given":"Rustam","family":"Issabekov","sequence":"additional","affiliation":[]},{"given":"Richard","family":"Dazeley","sequence":"additional","affiliation":[]},{"given":"Cameron","family":"Foale","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,11,22]]},"reference":[{"key":"53_CR1","doi-asserted-by":"crossref","unstructured":"Brys, T., Van Moffaert, K., Van Vaerenbergh, K., Now\u00e9, A.: On the behaviour of scalarization methods for the engagement of a wet clutch. In: The 12th International Conference on Machine Learning and Applications. IEEE (2013)","DOI":"10.1109\/ICMLA.2013.52"},{"key":"53_CR2","unstructured":"Castelletti, A., Corani, G., Rizzolli, A., Soncini-Sessa, R., Weber, E.: Reinforcement learning in the operational management of a water system. In: IFAC Workshop on Modeling and Control in Environmental Issues, pp. 325\u2013330 (2002)"},{"key":"53_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1007\/11672142_26","volume-title":"STACS 2006","author":"K Chatterjee","year":"2006","unstructured":"Chatterjee, K., Majumdar, R., Henzinger, T.A.: Markov decision processes with multiple objectives. In: Durand, B., Thomas, W. (eds.) STACS 2006. LNCS, vol. 3884, pp. 325\u2013336. Springer, Heidelberg (2006)"},{"key":"53_CR4","doi-asserted-by":"crossref","unstructured":"Handa, H.: Solving multi-objective reinforcement learning problems by EDA-RL - acquisition of various strategies. In: Proceedings of the Ninth Internatonal Conference on Intelligent Sysems Design and Applications, pp. 426\u2013431 (2009)","DOI":"10.1109\/ISDA.2009.92"},{"key":"53_CR5","unstructured":"Kalyanakrishnan, S., Stone, P.: An empirical analysis of value function-based and policy search reinforcement learning. In: Proceedings of The 8th International Conference on Autonomous Agents and Multiagent Systems, vol. 2, pp. 749\u2013756. International Foundation for Autonomous Agents and Multiagent Systems (2009)"},{"key":"53_CR6","unstructured":"Lizotte, D.J., Bowling, M., Murphy, S.A.: Efficient reinforcement learning with multiple reward functions for randomized clinical trial analysis. In: 27th International Conference on Machine Learning, pp. 695\u2013702 (2010)"},{"key":"53_CR7","unstructured":"Mannor, S., Shimkin, N.: The steering approach for multi-criteria reinforcement learning. In: Neural Information Processing Systems, pp. 1563\u20131570 (2001)"},{"key":"53_CR8","first-page":"325","volume":"5","author":"S Mannor","year":"2004","unstructured":"Mannor, S., Shimkin, N.: A geometric approach to multi-criterion reinforcement learning. J. Mach. Learn. Res. 5, 325\u2013360 (2004)","journal-title":"J. Mach. Learn. Res."},{"key":"53_CR9","doi-asserted-by":"crossref","unstructured":"Parisi, S., Pirotta, M., Smacchia, N., Bascetta, L., Restelli, M.: Policy gradient approaches for multi-objective sequential decision making. In: 2014 International Joint Conference on Neural Networks (IJCNN), pp. 2323\u20132330. IEEE (2014)","DOI":"10.1109\/IJCNN.2014.6889738"},{"key":"53_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1007\/978-3-642-41575-3_24","volume-title":"Algorithmic Decision Theory","author":"DM Roijers","year":"2013","unstructured":"Roijers, D.M., Whiteson, S., Oliehoek, F.A.: Computing convex coverage sets for multi-objective coordination graphs. In: Perny, P., Pirlot, M., Tsouki\u00e0s, A. (eds.) ADT 2013. LNCS, vol. 8176, pp. 309\u2013323. Springer, Heidelberg (2013)"},{"key":"53_CR11","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1613\/jair.3987","volume":"48","author":"D Roijers","year":"2013","unstructured":"Roijers, D., Vamplew, P., Whiteson, S., Dazeley, R.: A survey of multi-objective sequential decision-making. J. Artif. Intell. Res. 48, 67\u2013113 (2013)","journal-title":"J. Artif. Intell. Res."},{"key":"53_CR12","unstructured":"Shelton, C.: Importance sampling for reinforcement learning with multiple objectives. AI Technical report 2001\u2013003, MIT, August 2001"},{"key":"53_CR13","doi-asserted-by":"crossref","unstructured":"Soh, H., Demiris, Y.: Evolving policies for multi-reward partially observable Markov decision processes (MR-POMDPs). In: Proceedings of the 13th Annual Conference on Genetic and Evolutionary Computation, GECCO 2011, pp. 713\u2013720 (2011)","DOI":"10.1145\/2001576.2001674"},{"key":"53_CR14","unstructured":"Taylor, M.E., Whiteson, S., Stone, P.: Temporal difference and policy search methods for reinforcement learning: an empirical comparison. In: Proceedings of the National Conference on Artificial Intelligence, vol. 22, p. 1675 (2007)"},{"key":"53_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"340","DOI":"10.1007\/978-3-642-10439-8_35","volume-title":"AI 2009: Advances in Artificial Intelligence","author":"P Vamplew","year":"2009","unstructured":"Vamplew, P., Dazeley, R., Barker, E., Kelarev, A.: Constructing stochastic mixture policies for episodic multiobjective reinforcement learning tasks. In: Nicholson, A., Li, X. (eds.) AI 2009. LNCS, vol. 5866, pp. 340\u2013349. Springer, Heidelberg (2009)"},{"issue":"1\u20132","key":"53_CR16","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1007\/s10994-010-5232-5","volume":"84","author":"P Vamplew","year":"2011","unstructured":"Vamplew, P., Dazeley, R., Berry, A., Dekker, E., Issabekov, R.: Empirical evaluation methods for multiobjective reinforcement learning algorithms. Mach. Learn. 84(1\u20132), 51\u201380 (2011)","journal-title":"Mach. Learn."},{"key":"53_CR17","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"372","DOI":"10.1007\/978-3-540-89378-3_37","volume-title":"AI 2008: Advances in Artificial Intelligence","author":"P Vamplew","year":"2008","unstructured":"Vamplew, P., Yearwood, J., Dazeley, R., Berry, A.: On the limitations of scalarisation for multi-objective reinforcement learning of Pareto fronts. In: Wobcke, W., Zhang, M. (eds.) AI 2008. LNCS (LNAI), vol. 5360, pp. 372\u2013378. Springer, Heidelberg (2008)"},{"key":"53_CR18","first-page":"3483","volume":"15","author":"K Moffaert Van","year":"2014","unstructured":"Van Moffaert, K., Now\u00e9, A.: Multi-objective reinforcement learning using sets of pareto dominating policies. J. Mach. Learn. Res. 15, 3483\u20133512 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"53_CR19","unstructured":"Watkins, C.J.C.H.: Learning from delayed rewards (1989)"},{"issue":"1","key":"53_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10458-009-9100-2","volume":"21","author":"S Whiteson","year":"2010","unstructured":"Whiteson, S., Taylor, M.E., Stone, P.: Critical factors in the empirical performance of temporal difference and evolutionary methods for reinforcement learning. Auton. Agent. Multi-Agent Syst. 21(1), 1\u201335 (2010)","journal-title":"Auton. Agent. Multi-Agent Syst."},{"issue":"2","key":"53_CR21","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1109\/TEVC.2003.810758","volume":"7","author":"E Zitzler","year":"2003","unstructured":"Zitzler, E., Thiele, L., Laumanns, M., Fonseca, C.M., da Fonseca, V.G.: Performance assessment of multiobjective optimisers: an analysis and review. IEEE Trans. Evol. Comput. 7(2), 117\u2013132 (2003)","journal-title":"IEEE Trans. Evol. Comput."}],"container-title":["Lecture Notes in Computer Science","AI 2015: Advances in Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-26350-2_53","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,31]],"date-time":"2019-05-31T17:01:39Z","timestamp":1559322099000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-26350-2_53"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319263496","9783319263502"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-26350-2_53","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]}}}