{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T00:36:13Z","timestamp":1760056573502,"version":"build-2065373602"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"30","license":[{"start":{"date-parts":[[2024,12,9]],"date-time":"2024-12-09T00:00:00Z","timestamp":1733702400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,9]],"date-time":"2024-12-09T00:00:00Z","timestamp":1733702400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s00521-024-10504-8","type":"journal-article","created":{"date-parts":[[2024,12,9]],"date-time":"2024-12-09T03:32:18Z","timestamp":1733715138000},"page":"24641-24653","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Nonlinear scalarization in stochastic multi-objective MDPs"],"prefix":"10.1007","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0985-4550","authenticated-orcid":false,"given":"Marc","family":"Vincent","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,9]]},"reference":[{"issue":"1","key":"10504_CR1","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1613\/jair.3987","volume":"48","author":"DM Roijers","year":"2013","unstructured":"Roijers DM, Vamplew P, Whiteson S, Dazeley R (2013) A survey of multi-objective sequential decision-making. J Artif Intell Res 48(1):67\u2013113","journal-title":"J Artif Intell Res"},{"issue":"1","key":"10504_CR2","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1007\/s10458-022-09552-y","volume":"36","author":"CF Hayes","year":"2022","unstructured":"Hayes CF, R\u0103dulescu R, Bargiacchi E, K\u00e4llstr\u00f6m J, Macfarlane M, Reymond M, Verstraeten T, Zintgraf LM, Dazeley R, Heintz F, Howley E, Irissappane AA, Mannion P, Now\u00e9 A, Ramos G, Restelli M, Vamplew P, Roijers DM (2022) A practical guide to multi-objective reinforcement learning and planning. Auton Agents Multi-Agent Syst 36(1):26. https:\/\/doi.org\/10.1007\/s10458-022-09552-y","journal-title":"Auton Agents Multi-Agent Syst"},{"key":"10504_CR3","unstructured":"Juozapaitis Z, Koul A, Fern A, Erwig M, and Doshi-Velez F (2019) Explainable reinforcement learning via reward decomposition"},{"key":"10504_CR4","doi-asserted-by":"crossref","unstructured":"Anderson A, Dodge J, Sadarangani A, Juozapaitis Z, Newman E, Irvine J, Chattopadhyay S, Fern A, and Burnett M (2019) Explaining reinforcement learning to mere mortals: an empirical study. 1328\u20131334 https:\/\/www.ijcai.org\/proceedings\/2019\/184","DOI":"10.24963\/ijcai.2019\/184"},{"key":"10504_CR5","unstructured":"Li C and Czarnecki K (2019) Urban driving with multi-objective deep reinforcement learning, arXiv:1811.08586 [cs]"},{"key":"10504_CR6","unstructured":"Abel A, Roijers D, Lenaerts T, Now\u00e9 A, and Steckelmacher D (2019) Dynamic weights in multi-objective deep reinforcement learning. In international conference on machine learning, pp. 11\u201320, Long Beach, California, USA, May 2019. PMLR. http:\/\/proceedings.mlr.press\/v97\/abels19a.html. ZSCC: 0000015 ISSN: 2640-3498 section: machine learning"},{"key":"10504_CR7","doi-asserted-by":"publisher","unstructured":"Nguyen T (2018) A multi-objective deep reinforcement learning framework. ArXiv. https:\/\/doi.org\/10.1016\/j.engappai.2020.103915","DOI":"10.1016\/j.engappai.2020.103915"},{"key":"10504_CR8","unstructured":"Mossalam H, Assael YM, Roijers DM , and Whiteson S (2016) Multi-objective deep reinforcement learning, arXiv: 1610.02707"},{"key":"10504_CR9","doi-asserted-by":"publisher","unstructured":"Chatterjee K, Majumdar R, and Henzinger TA (2006) Markov decision processes with multiple objectives. In Durand B and Thomas W, editors, STACS 2006, Lecture notes in computer science, pp. 325\u2013336, Berlin, Heidelberg Springer. ISBN 978-3-540-32288-7. https:\/\/doi.org\/10.1007\/11672142_26","DOI":"10.1007\/11672142_26"},{"key":"10504_CR10","unstructured":"Reymond M and Now\u00e9 A (2019) Pareto-DQN: approximating the pareto front in complex multi-objective decision problems. Proceedings of the adaptive and learning agents workshop 2019 (ALA-19) at AAMAS"},{"key":"10504_CR11","doi-asserted-by":"publisher","unstructured":"Uchibe E and Doya K (2007) Constrained reinforcement learning from intrinsic and extrinsic rewards. In 2007 IEEE 6th international conference on development and learning, pp. 163\u2013168, July 2007. https:\/\/doi.org\/10.1109\/DEVLRN.2007.4354030","DOI":"10.1109\/DEVLRN.2007.4354030"},{"key":"10504_CR12","unstructured":"Shelton C (2001) Importance sampling for reinforcement learning with multiple objectives. PhD thesis"},{"issue":"3","key":"10504_CR13","doi-asserted-by":"publisher","first-page":"1783","DOI":"10.1007\/s00521-021-05859-1","volume":"34","author":"P Vamplew","year":"2022","unstructured":"Vamplew P, Foale C, Dazeley R (2022a) A demonstration of issues with value-based multiobjective reinforcement learning under stochastic state transitions. Neural Comput Appl 34(3):1783\u20131799. https:\/\/doi.org\/10.1007\/s00521-021-05859-1","journal-title":"Neural Comput Appl"},{"key":"10504_CR14","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction, 2nd edn. Bradford Books, Cambridge, Massachusetts","edition":"2"},{"issue":"7540","key":"10504_CR15","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G, Petersen S, Beattie C, Sadik A, Antonoglou I, King H, Kumaran D, Wierstra D, Legg S, Hassabis D (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533. https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"key":"10504_CR16","unstructured":"Sutton RS The reward hypothesis. http:\/\/incompleteideas.net\/rlai.cs.ualberta.ca\/RLAI\/rewardhypothesis.html"},{"key":"10504_CR17","doi-asserted-by":"crossref","unstructured":"Knox WB, Allievi A, Banzhaf H, Schmitt F, and Stone P (2022) Reward (Mis)design for autonomous driving, arXiv:2104.13906 [cs]","DOI":"10.1016\/j.artint.2022.103829"},{"key":"10504_CR18","unstructured":"Skalse J, Howe NHR, Krasheninnikov D, and Krueger D (2022) Defining and characterizing reward hacking, arXiv:2209.13085 [cs, stat]"},{"key":"10504_CR19","unstructured":"Roy J, Girgis R, Romoff J, Bacon PL, and Pal C (2022) Direct behavior specification via constrained reinforcement learning, arXiv:2112.12228 [cs]"},{"issue":"2","key":"10504_CR20","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1007\/s10458-022-09575-5","volume":"36","author":"P Vamplew","year":"2022","unstructured":"Vamplew P, Smith BJ, K\u00e4llstr\u00f6m J, Ramos G, R\u0103dulescu R, Roijers DM, Hayes CF, Heintz F, Mannion P, Libin PJK, Dazeley R, Foale C (2022b) Scalar reward is not enough: a response to Silver, Singh, Precup and Sutton (2021). Auton Agents Multi-Agent Syst 36(2):41. https:\/\/doi.org\/10.1007\/s10458-022-09575-5","journal-title":"Auton Agents Multi-Agent Syst"},{"key":"10504_CR21","unstructured":"Randl\u00f8v J and Alstr\u00f8m P (1998) Learning to drive a bicycle using reinforcement learning and shaping. In international conference on machine learning, July 1998. https:\/\/www.semanticscholar.org\/paper\/Learning-to-Drive-a-Bicycle-Using-Reinforcement-and-Randl%C3%B8v-Alstr%C3%B8m\/9d8f6219fbd2da14d8d55562dcedf43fe671d0e3"},{"key":"10504_CR22","unstructured":"Ng AY , Harada D, and Russell SJ (1999) Policy invariance under reward transformations: theory and application to reward shaping. In proceedings of the 16th international conference on machine learning, ICML \u201999, pages 278\u2013287, San Francisco, CA, USA, June 1999. Morgan Kaufmann Publishers Inc. ISBN 978-1-55860-612-8"},{"key":"10504_CR23","unstructured":"Gupta A, Pacchiano A, Zhai Y, Kakade SM, and Levine S (2022) Unpacking reward shaping: understanding the benefits of reward engineering on sample complexity, arXiv:2210.09579 [cs]"},{"key":"10504_CR24","unstructured":"Eysenbach B, Gupta A, Ibarz J, and Levine S (2018) Diversity is all you need: learning skills without a reward function. arXiv:1802.06070[cs]"},{"key":"10504_CR25","unstructured":"Hausman K, Springenberg JT, Wang Z, Heess N, and Riedmiller M (2018) Learning an embedding space for transferable robot skills. In international conference on learning representations, https:\/\/openreview.net\/forum?id=rk07ZXZRb&noteId=rk07ZXZRb"},{"issue":"4","key":"10504_CR26","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530110","volume":"41","author":"XB Peng","year":"2022","unstructured":"Peng XB, Guo Y, Halper L, Levine S, Fidler S (2022) ASE: large-scale reusable adversarial skill embeddings for physically simulated characters. ACM Trans Gr 41(4):1\u201317. https:\/\/doi.org\/10.1145\/3528223.3530110","journal-title":"ACM Trans Gr"},{"key":"10504_CR27","unstructured":"Bowling M, Martin JD , Abel D, and Dabney W (2023) Settling the reward hypothesis http:\/\/arxiv.org\/abs\/2212.10420 [cs, math, stat]"},{"key":"10504_CR28","unstructured":"Miura S (2023) On the expressivity of multidimensional Markov reward, arXiv:2307.12184 [cs]"},{"key":"10504_CR29","unstructured":"Pitis S, Bailey D, and Ba J (2022) Rational multi-objective agents must admit non-markov reward representations. https:\/\/openreview.net\/forum?id=MNwA4sgzR4W"},{"issue":"1","key":"10504_CR30","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1007\/BF01197559","volume":"14","author":"I Das","year":"1997","unstructured":"Das I, Dennis JE (1997) A closer look at drawbacks of minimizing weighted sums of objectives for Pareto set generation in multicriteria optimization problems. Struct Optim 14(1):63\u201369. https:\/\/doi.org\/10.1007\/BF01197559","journal-title":"Struct Optim"},{"key":"10504_CR31","doi-asserted-by":"publisher","first-page":"372","DOI":"10.1007\/978-3-540-89378-3-37","volume-title":"AI 2008: advances in artificial intelligence","author":"P Vamplew","year":"2008","unstructured":"Vamplew P, Yearwood J, Dazeley R, Berry A (2008) On the limitations of scalarisation for multi-objective reinforcement learning of pareto fronts. In: Wobcke W, Zhang M (eds) AI 2008: advances in artificial intelligence. Berlin, Heidelberg Springer, pp 372\u2013378. https:\/\/doi.org\/10.1007\/978-3-540-89378-3-37"},{"key":"10504_CR32","unstructured":"Dornheim J (2022) gTLO: a generalized and non-linear multi-objective deep reinforcement learning approach arXiv:2204.04988 [cs, eess]"},{"key":"10504_CR33","doi-asserted-by":"crossref","unstructured":"Tajmajer T (2018) Modular multi-objective deep reinforcement learning with decision values, arXiv: 1704.06676","DOI":"10.15439\/2018F231"},{"key":"10504_CR34","unstructured":"Yang R, Sun X, and Narasimhan K (2019) A generalized algorithm for multi-objective reinforcement learning and policy adaptation, arXiv:1908.08342 [cs]"},{"key":"10504_CR35","unstructured":"Wang S, Reymond M, Irissappane AA, and Roijers DM (2022) Near on-policy experience sampling in multi-objective reinforcement learning. In proceedings of the 21st international conference on autonomous agents and multiagent systems, AAMAS \u201922, pp. 1756\u20131758, Richland, SC. International foundation for autonomous agents and multiagent systems. ISBN 978-1-4503-9213-6"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-10504-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-024-10504-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-10504-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,9]],"date-time":"2025-10-09T12:36:25Z","timestamp":1760013385000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-024-10504-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,9]]},"references-count":35,"journal-issue":{"issue":"30","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["10504"],"URL":"https:\/\/doi.org\/10.1007\/s00521-024-10504-8","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2024,12,9]]},"assertion":[{"value":"12 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 September 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 December 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or nonfinancial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}