{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T17:26:11Z","timestamp":1775323571699,"version":"3.50.1"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2023,4,28]],"date-time":"2023-04-28T00:00:00Z","timestamp":1682640000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,4,28]],"date-time":"2023-04-28T00:00:00Z","timestamp":1682640000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001634","name":"National University of Ireland, Galway","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001634","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003130","name":"Fonds Wetenschappelijk Onderzoek","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003130","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004418","name":"Vrije Universiteit Brussel","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004418","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004418","name":"Vrije Universiteit Brussel","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004418","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s10458-023-09604-x","type":"journal-article","created":{"date-parts":[[2023,4,28]],"date-time":"2023-04-28T15:01:57Z","timestamp":1682694117000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":19,"title":["Actor-critic multi-objective reinforcement learning for non-linear utility functions"],"prefix":"10.1007","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6735-6752","authenticated-orcid":false,"given":"Mathieu","family":"Reymond","sequence":"first","affiliation":[]},{"given":"Conor F.","family":"Hayes","sequence":"additional","affiliation":[]},{"given":"Denis","family":"Steckelmacher","sequence":"additional","affiliation":[]},{"given":"Diederik M.","family":"Roijers","sequence":"additional","affiliation":[]},{"given":"Ann","family":"Now\u00e9","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,4,28]]},"reference":[{"key":"9604_CR1","unstructured":"Roijers, D. M., Steckelmacher, D., & Now\u00e9, A. (2018). Multi-objective reinforcement learning for the expected utility of the return. In Proceedings of the adaptive and learning agents workshop at FAIM."},{"key":"9604_CR2","unstructured":"Reymond, M., Hayes, C., Roijers, D. M., Steckelmacher, D., & Now\u00e9, A. (2021). Actor-critic multi-objective reinforcement learning for non-linear utility functions. In Multi-objective decision making workshop (MODeM 2021)."},{"issue":"6","key":"9604_CR3","doi-asserted-by":"publisher","first-page":"3476","DOI":"10.1002\/wrcr.20295","volume":"49","author":"A Castelletti","year":"2013","unstructured":"Castelletti, A., Pianosi, F., & Restelli, M. (2013). A multiobjective reinforcement learning approach to water resources systems operation: Pareto frontier approximation in a single run. Water Resources Research, 49(6), 3476\u20133486.","journal-title":"Water Resources Research"},{"issue":"5","key":"9604_CR4","doi-asserted-by":"publisher","first-page":"1071","DOI":"10.1080\/0952813X.2017.1292319","volume":"29","author":"A Jalalimanesh","year":"2017","unstructured":"Jalalimanesh, A., Haghighi, H. S., Ahmadi, A., Hejazian, H., & Soltani, M. (2017). Multi-objective optimization of radiotherapy: distributed q-learning and agent-based simulation. Journal of Experimental & Theoretical Artificial Intelligence, 29(5), 1071\u20131086.","journal-title":"Journal of Experimental & Theoretical Artificial Intelligence"},{"key":"9604_CR5","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1613\/jair.3987","volume":"48","author":"DM Roijers","year":"2013","unstructured":"Roijers, D. M., Vamplew, P., Whiteson, S., & Dazeley, R. (2013). A survey of multi-objective sequential decision-making. Journal of Artificial Intelligence Research, 48, 67\u2013113.","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"1","key":"9604_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10458-022-09552-y","volume":"36","author":"CF Hayes","year":"2022","unstructured":"Hayes, C. F., R\u0103dulescu, R., Bargiacchi, E., K\u00e4llstr\u00f6m, J., Macfarlane, M., Reymond, M., Verstraeten, T., Zintgraf, L. M., Dazeley, R., & Heintz, F. (2022). A practical guide to multi-objective reinforcement learning and planning. Autonomous Agents and Multi-Agent Systems, 36(1), 1\u201359.","journal-title":"Autonomous Agents and Multi-Agent Systems"},{"key":"9604_CR7","doi-asserted-by":"crossref","unstructured":"Van\u00a0Moffaert, K., Drugan, M. M., & Now\u00e9, A. (2013). Scalarized multi-objective reinforcement learning: Novel design techniques. In 2013 IEEE symposium on adaptive dynamic programming and reinforcement learning (ADPRL) (pp. 191\u2013199). IEEE.","DOI":"10.1109\/ADPRL.2013.6615007"},{"issue":"1","key":"9604_CR8","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1007\/s10458-019-09433-x","volume":"34","author":"R R\u0103dulescu","year":"2020","unstructured":"R\u0103dulescu, R., Mannion, P., Roijers, D. M., & Now\u00e9, A. (2020). Multi-objective multi-agent decision making: A utility-based analysis and survey. Autonomous Agents and Multi-Agent Systems, 34(1), 10.","journal-title":"Autonomous Agents and Multi-Agent Systems"},{"issue":"1","key":"9604_CR9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-031-01576-2","volume":"11","author":"DM Roijers","year":"2017","unstructured":"Roijers, D. M., & Whiteson, S. (2017). Multi-objective decision making. Synthesis Lectures on Artificial Intelligence and Machine Learning, 11(1), 1\u2013129.","journal-title":"Synthesis Lectures on Artificial Intelligence and Machine Learning"},{"issue":"3\u20134","key":"9604_CR10","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/BF00992696","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams, R. J. (1992). Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine Learning, 8(3\u20134), 229\u2013256.","journal-title":"Machine Learning"},{"key":"9604_CR11","unstructured":"Mnih, V., Badia, A.P., Mirza, M., Graves, A., Lillicrap, T., Harley, T., Silver, D., & Kavukcuoglu, K. (2016). Asynchronous methods for deep reinforcement learning. In International conference on machine learning (pp. 1928\u20131937)."},{"key":"9604_CR12","unstructured":"Bellemare, M. G., Dabney, W., & Munos, R. (2017). A distributional perspective on reinforcement learning. arXiv preprint arXiv:1707.06887"},{"issue":"1\u20132","key":"9604_CR13","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1007\/s10994-010-5232-5","volume":"84","author":"P Vamplew","year":"2011","unstructured":"Vamplew, P., Dazeley, R., Berry, A., Issabekov, R., & Dekker, E. (2011). Empirical evaluation methods for multiobjective reinforcement learning algorithms. Machine Learning, 84(1\u20132), 51\u201380.","journal-title":"Machine Learning"},{"key":"9604_CR14","unstructured":"Abels, A., Roijers, D.M., Lenaerts, T., Now\u00e9, A., & Steckelmacher, D. (2019). Dynamic weights in multi-objective deep reinforcement learning. In Proceedings of the 36th international conference on machine learning. Proceedings of machine learning research (Vol. 97, pp. 11\u201320). PMLR."},{"key":"9604_CR15","unstructured":"Burda, Y., Edwards, H., Storkey, A., & Klimov, O. (2019) Exploration by random network distillation. In International conference on learning representations."},{"issue":"7540","key":"9604_CR16","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A. A., Veness, J., Bellemare, M. G., Graves, A., Riedmiller, M., Fidjeland, A. K., & Ostrovski, G. (2015). Human-level control through deep reinforcement learning. Nature, 518(7540), 529.","journal-title":"Nature"},{"key":"9604_CR17","doi-asserted-by":"crossref","unstructured":"Perez, E., Strub, F., De\u00a0Vries, H., Dumoulin, V., & Courville, A. (2018). Film: Visual reasoning with a general conditioning layer. In Proceedings of the AAAI conference on artificial intelligence (Vol. 32).","DOI":"10.1609\/aaai.v32i1.11671"},{"key":"9604_CR18","doi-asserted-by":"publisher","first-page":"399","DOI":"10.1613\/jair.4550","volume":"52","author":"DM Roijers","year":"2015","unstructured":"Roijers, D. M., Whiteson, S., & Oliehoek, F. A. (2015). Computing convex coverage sets for faster multi-objective coordination. Journal of Artificial Intelligence Research, 52, 399\u2013443.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"9604_CR19","unstructured":"Mossalam, H., Assael, Y. M., Roijers, D. M., & Whiteson, S. (2016). Multi-objective deep reinforcement learning. CoRR. arXiv:1610.02707"},{"key":"9604_CR20","doi-asserted-by":"crossref","unstructured":"Barrett, L., & Narayanan, S. (2008). Learning all optimal policies with multiple criteria. In Proceedings of the 25th international conference on machine learning (pp. 41\u201347). ACM.","DOI":"10.1145\/1390156.1390162"},{"issue":"1","key":"9604_CR21","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1007\/s11571-008-9066-9","volume":"3","author":"K Hiraoka","year":"2009","unstructured":"Hiraoka, K., Yoshida, M., & Mishima, T. (2009). Parallel reinforcement learning for weighted multi-criteria model with adaptive margin. Cognitive Neurodynamics, 3(1), 17\u201324.","journal-title":"Cognitive Neurodynamics"},{"key":"9604_CR22","doi-asserted-by":"crossref","unstructured":"Castelletti, A., Pianosi, F., & Restelli, M. (2012). Tree-based fitted q-iteration for multi-objective Markov decision problems. In The 2012 international joint conference on neural Networks (IJCNN) (pp. 1\u20138). IEEE.","DOI":"10.1109\/IJCNN.2012.6252759"},{"key":"9604_CR23","unstructured":"Yang, R., Sun, X., & Narasimhan, K. (2019). A generalized algorithm for multi-objective reinforcement learning and policy adaptation. In Proceedings of the 33rd international conference on neural information processing systems. Red Hook, NY, USA: Curran Associates Inc."},{"key":"9604_CR24","unstructured":"Abdolmaleki, A., Huang, S., Hasenclever, L., Neunert, M., Song, F., Zambelli, M., Martins, M., Heess, N., Hadsell, R., & Riedmiller, M. (2020). A distributional view on multi-objective policy optimization. In International conference on machine learning (pp. 11\u201322). PMLR."},{"key":"9604_CR25","unstructured":"Abdolmaleki, A., Springenberg, J. T., Tassa, Y., Munos, R., Heess, N., & Riedmiller, M. (2018). Maximum a posteriori policy optimisation. In International conference on learning representations. https:\/\/openreview.net\/forum?id=S1ANxQW0b."},{"key":"9604_CR26","unstructured":"Xu, J., Tian, Y., Ma, P., Rus, D., Sueda, S., & Matusik, W. (2020). Prediction-guided multi-objective reinforcement learning for continuous robot control. In International conference on machine learning (pp. 10607\u201310616). PMLR."},{"key":"9604_CR27","doi-asserted-by":"crossref","unstructured":"Vamplew, P., Dazeley, R., Barker, E., & Kelarev, A. (2009). Constructing stochastic mixture policies for episodic multiobjective reinforcement learning tasks. In Australasian joint conference on artificial intelligence (pp. 340\u2013349). Springer.","DOI":"10.1007\/978-3-642-10439-8_35"},{"key":"9604_CR28","unstructured":"Tesauro, G., Das, R., Chan, H., Kephart, J., Levine, D., Rawson, F., & Lefurgy, C. (2008). Managing power consumption and performance of computing systems using reinforcement learning. In Advances in neural information processing systems (pp. 1497\u20131504)."},{"key":"9604_CR29","unstructured":"Neil, D., Segler, M., Guasch, L., Ahmed, M., Plumbley, D., Sellwood, M., & Brown, N. (2018). Exploring deep recurrent models with reinforcement learning for molecule design. In 6th International conference on learning representations (ICLR), workshop track."},{"key":"9604_CR30","unstructured":"Roijers, D. M., Zintgraf, L. M., Libin, P., & Now\u00e9, A. (2018). Interactive multi-objective reinforcement learning in multi-armed bandits for any utility function. In ALA workshop at FAIM (Vol. 8)."},{"key":"9604_CR31","unstructured":"Hayes, C. F., Reymond, M., Roijers, D. M., Howley, E., & Mannion, P. (2021). Distributional Monte Carlo tree search for risk-aware and multi-objective reinforcement learning. In Proceedings of the 20th international conference on autonomous agents and multiagent systems (pp. 1530\u20131532)."},{"issue":"1","key":"9604_CR32","first-page":"3483","volume":"15","author":"K Van Moffaert","year":"2014","unstructured":"Van Moffaert, K., & Now\u00e9, A. (2014). Multi-objective reinforcement learning using sets of pareto dominating policies. The Journal of Machine Learning Research, 15(1), 3483\u20133512.","journal-title":"The Journal of Machine Learning Research"},{"key":"9604_CR33","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1613\/jair.4961","volume":"57","author":"S Parisi","year":"2016","unstructured":"Parisi, S., Pirotta, M., & Restelli, M. (2016). Multi-objective reinforcement learning through continuous pareto manifold approximation. Journal of Artificial Intelligence Research, 57, 187\u2013227.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"9604_CR34","unstructured":"Reymond, M., & Now\u00e9, A. (2019). Pareto-dqn: Approximating the pareto front in complex multi-objective decision problems. In Proceedings of the adaptive and learning agents workshop (ALA-19) at AAMAS."},{"key":"9604_CR35","unstructured":"Reymond, M., Bargiacchi, E., & Now\u00e9, A. (2022) Pareto conditioned networks. In Proceedings of the 21st international conference on autonomous agents and multiagent systems (pp. 1110\u20131118)."},{"key":"9604_CR36","doi-asserted-by":"publisher","first-page":"114228","DOI":"10.1016\/j.eswa.2020.114228","volume":"168","author":"THF de Oliveira","year":"2021","unstructured":"de Oliveira, T. H. F., de Souza Medeiros, L. P., Neto, A. D. D., & Melo, J. D. (2021). Q-managed: A new algorithm for a multiobjective reinforcement learning. Expert Systems with Applications, 168, 114228.","journal-title":"Expert Systems with Applications"},{"issue":"11","key":"9604_CR37","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., & Bengio, Y. (2020). Generative adversarial networks. Communications of the ACM, 63(11), 139\u2013144.","journal-title":"Communications of the ACM"},{"key":"9604_CR38","unstructured":"Kingma, D. P., & Welling, M. (2013). Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114"},{"key":"9604_CR39","unstructured":"Dinh, L., Sohl-Dickstein, J., & Bengio, S. (2016). Density estimation using real NVP. arXiv preprint arXiv:1605.08803"},{"key":"9604_CR40","unstructured":"Zintgraf, L. M., Roijers, D. M., Linders, S., Jonker, C. M., & Now\u00e9, A. (2018). Ordered preference elicitation strategies for supporting multi-objective decision making. In Proceedings of the 17th international conference on autonomous agents and multiagent systems (pp. 1477\u20131485). International Foundation for Autonomous Agents and Multiagent Systems."},{"key":"9604_CR41","doi-asserted-by":"crossref","unstructured":"Roijers, D. M., Zintgraf, L. M., Libin, P., Reymond, M., Bargiacchi, E., & Now\u00e9, A. (2020). Interactive multi-objective reinforcement learning in multi-armed bandits with gaussian process utility models. In Joint European conference on machine learning and knowledge discovery in databases (pp. 463\u2013478). Springer.","DOI":"10.1007\/978-3-030-67664-3_28"},{"key":"9604_CR42","doi-asserted-by":"crossref","unstructured":"Hayes, C. F., Verstraeten, T., Roijers, D. M., Howley, E., & Mannion, P. (2022). Expected scalarised returns dominance: A new solution concept for multi-objective decision making. Neural Computing and Applications, 1\u201321.","DOI":"10.1007\/s00521-022-07334-x"}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-023-09604-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10458-023-09604-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-023-09604-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,18]],"date-time":"2023-12-18T04:47:57Z","timestamp":1702874877000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10458-023-09604-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,28]]},"references-count":42,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["9604"],"URL":"https:\/\/doi.org\/10.1007\/s10458-023-09604-x","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,4,28]]},"assertion":[{"value":"14 March 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 April 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"23"}}