{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T15:11:23Z","timestamp":1772637083964,"version":"3.50.1"},"reference-count":86,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T00:00:00Z","timestamp":1772582400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T00:00:00Z","timestamp":1772582400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100004909","name":"Universidade Federal Do Rio Grande Do Sul","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100004909","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1007\/s10458-026-09736-w","type":"journal-article","created":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T08:53:00Z","timestamp":1772614380000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Generalized policy improvement for efficient and robust multi-objective reinforcement learning"],"prefix":"10.1007","volume":"40","author":[{"given":"Lucas N.","family":"Alegre","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ana L. C.","family":"Bazzan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Diederik M.","family":"Roijers","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ann","family":"Now\u00e9","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bruno C.","family":"da Silva","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,3,4]]},"reference":[{"key":"9736_CR1","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R. S., & Barto, A. G. (2018). Reinforcement Learning: An Introduction (2nd ed.). Cambridge, MA, USA: The MIT Press.","edition":"2"},{"issue":"7676","key":"9736_CR2","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., Schrittwieser, J., Simonyan, K., Antonoglou, I., Huang, A., Guez, A., Hubert, T., Baker, L., Lai, M., Bolton, A., Chen, Y., Lillicrap, T., Hui, F., Sifre, L., Driessche, G., Graepel, T., & Hassabis, D. (2017). Mastering the game of go without human knowledge. Nature., 550(7676), 354\u2013359. https:\/\/doi.org\/10.1038\/nature24270","journal-title":"Nature."},{"issue":"7836","key":"9736_CR3","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1038\/s41586-020-2939-8","volume":"588","author":"MG Bellemare","year":"2020","unstructured":"Bellemare, M. G., Candido, S., Castro, P. S., Gong, J., Machado, M. C., Moitra, S., Ponda, S. S., & Wang, Z. (2020). Autonomous navigation of stratospheric balloons using reinforcement learning. Nature., 588(7836), 77\u201382. https:\/\/doi.org\/10.1038\/s41586-020-2939-8","journal-title":"Nature."},{"issue":"7896","key":"9736_CR4","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1038\/s41586-021-04357-7","volume":"602","author":"PR Wurman","year":"2022","unstructured":"Wurman, P. R., Barrett, S., Kawamoto, K., MacGlashan, J., Subramanian, K., Walsh, T. J., Capobianco, R., Devlic, A., Eckert, F., Fuchs, F., Gilpin, L., Khandelwal, P., Kompella, V., Lin, H., MacAlpine, P., Oller, D., Seno, T., Sherstan, C., Thomure, M. D., \u2026 Kitano, H. (2022). Outracing champion gran turismo drivers with deep reinforcement learning. Nature., 602(7896), 223\u2013228. https:\/\/doi.org\/10.1038\/s41586-021-04357-7","journal-title":"Nature."},{"issue":"1","key":"9736_CR5","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1007\/s10458-022-09552-y","volume":"36","author":"CF Hayes","year":"2022","unstructured":"Hayes, C. F., R\u0103dulescu, R., Bargiacchi, E., K\u00e4llstr\u00f6m, J., Macfarlane, M., Reymond, M., Verstraeten, T., Zintgraf, L. M., Dazeley, R., Heintz, F., Howley, E., Irissappane, A. A., Mannion, P., Now\u00e9, A., Ramos, G., Restelli, M., Vamplew, P., & Roijers, D. M. (2022). A practical guide to multi-objective reinforcement learning and planning. Autonomous Agents and Multi-Agent Systems., 36(1), 26. https:\/\/doi.org\/10.1007\/s10458-022-09552-y","journal-title":"Autonomous Agents and Multi-Agent Systems."},{"key":"9736_CR6","unstructured":"Yang, R., Sun, X., & Narasimhan, K.: A generalized algorithm for multi-objective reinforcement learning and policy adaptation. In: Wallach, H., Larochelle, H., Beygelzimer, A., Alch\u00e9-Buc, F., Fox, E., & Garnett, R. (Eds.) Advances in Neural Information Processing Systems 32, pp. 14610\u201314621 (2019)."},{"issue":"48","key":"9736_CR7","doi-asserted-by":"publisher","first-page":"30079","DOI":"10.1073\/pnas.1907370117","volume":"117","author":"A Barreto","year":"2020","unstructured":"Barreto, A., Hou, S., Borsa, D., Silver, D., & Precup, D. (2020). Fast reinforcement learning with generalized policy updates. Proceedings of the National Academy of Sciences., 117(48), 30079\u201330087. https:\/\/doi.org\/10.1073\/pnas.1907370117","journal-title":"Proceedings of the National Academy of Sciences."},{"issue":"1","key":"9736_CR8","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1613\/jair.3987","volume":"48","author":"DM Roijers","year":"2013","unstructured":"Roijers, D. M., Vamplew, P., Whiteson, S., & Dazeley, R. (2013). A survey of multi-objective sequential decision-making. J. Artificial Intelligence Research., 48(1), 67\u2013113.","journal-title":"J. Artificial Intelligence Research."},{"key":"9736_CR9","unstructured":"Alegre, L.N., Bazzan, A.L.C., & Silva, B.C.: Optimistic linear support and successor features as a basis for optimal policy transfer. In: Chaudhuri, K., Jegelka, S., Song, L., Szepesvari, C., Niu, G., & Sabato, S. (Eds.) Proceedings of the 39th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 162, pp. 394\u2013413. PMLR, Baltimore, Maryland, USA (2022). proceedings.mlr.press\/v162\/alegre22a.html."},{"key":"9736_CR10","unstructured":"Mossalam, H., Assael, Y.M., Roijers, D.M.,& Whiteson, S.: Multi-objective deep reinforcement learning (2016). CoRR. arXiv:1610.02707"},{"key":"9736_CR11","unstructured":"Filos, A., V\u00e9rtes, E., Marinho, Z., Farquhar, G., Borsa, D., Friesen, A., Behbahani, F., Schaul, T., Barreto, A., & Osindero, S.: Model-value inconsistency as a signal for epistemic uncertainty. In: Chaudhuri, K., Jegelka, S., Song, L., Szepesvari, C., Niu, G., & Sabato, S. (eds.) Proceedings of the 39th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 162, pp. 6474\u20136498. PMLR, Baltimore, Maryland, USA (2022). https:\/\/proceedings.mlr.press\/v162\/filos22a.html."},{"key":"9736_CR12","doi-asserted-by":"crossref","unstructured":"Alegre, L.N., Bazzan, A.L.C., Roijers, D.M., Now\u00e9, A., & Silva, B.C.: Sample-efficient multi-objective learning via generalized policy improvement prioritization. In: Proceedings of the 2023 International Conference on Autonomous Agents and Multiagent Systems. AAMAS \u201923, pp. 2003\u20132012. International Foundation for Autonomous Agents and Multiagent Systems, Richland, SC (2023).","DOI":"10.65109\/TBSX2832"},{"key":"9736_CR13","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"ML Puterman","year":"2005","unstructured":"Puterman, M. L. (2005). Markov Decision Processes: Discrete Stochastic Dynamic Programming. New York, NY, USA: Wiley Series in Probability and Statistics. Wiley-Interscience."},{"key":"9736_CR14","volume-title":"Advances in Neural Information Processing Systems","author":"A Barreto","year":"2017","unstructured":"Barreto, A., Dabney, W., Munos, R., Hunt, J. J., Schaul, T., Hasselt, H. P., & Silver, D. (2017). Successor features for transfer in reinforcement learning. In I. Guyon, U. V. Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, & R. Garnett (Eds.), Advances in Neural Information Processing Systems.  (Vol. 30). Long Beach, CA, USA: Curran Associates Inc."},{"key":"9736_CR15","unstructured":"Barreto, A., Borsa, D., Quan, J., Schaul, T., Silver, D., Hessel, M., Mankowitz, D., Zidek, A., Munos, R.: Transfer in deep reinforcement learning using successor features and generalised policy improvement. In: Dy, J., & Krause, A. (Eds.) Proceedings of the 35th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 80, pp. 501\u2013510. Stockholm, Sweden (2018)."},{"key":"9736_CR16","unstructured":"Moerland, T.M., Broekens, J., & Jonker, C.M.: Model-based reinforcement learning: A survey (2020). CoRR. arXiv:2006.16712."},{"key":"9736_CR17","unstructured":"Van\u00a0Seijen, H., & Sutton, R.S.: Efficient planning in MDPs by small backups. In: Proceedings of the 30th International Conference on International Conference on Machine Learning. ICML\u201913, pp. 361\u2013369. JMLR.org, Atlanta, GA, USA (2013)."},{"key":"9736_CR18","unstructured":"Janner, M., Fu, J., Zhang, M., & Levine, S.: When to trust your model: Model-based policy optimization. In: Wallach, H., Larochelle, H., Beygelzimer, A., Alch\u00e9-Buc, F., Fox, E., & Garnett, R. (Eds.) Advances in Neural Information Processing Systems (NIPS)32, pp. 12519\u201312530. Curran Associates, Inc., Vancouver, BC, Canada (2019). http:\/\/papers.nips.cc\/paper\/9416-when-to-trust-your-model-model-based-policy-optimization.pdf."},{"key":"9736_CR19","doi-asserted-by":"publisher","unstructured":"Pan, Y., Yao, H., Farahmand, A., & White, M.: Hill climbing on value estimates for search-control in dyna. In: Proceedings of the 28th International Joint Conference on Artificial Intelligence, pp. 3209\u20133215. ijcai.org, Macao, China (2019). https:\/\/doi.org\/10.24963\/ijcai.2019\/445.","DOI":"10.24963\/ijcai.2019\/445"},{"key":"9736_CR20","unstructured":"Feinberg, V., Wan, A., Stoica, I., Jordan, M.I., Gonzalez, J.E., & Levine, S.: Model-based value estimation for efficient model-free reinforcement learning (2018). CoRR. arXiv:1803.00101."},{"key":"9736_CR21","volume-title":"Advances in Neural Information Processing Systems","author":"J Buckman","year":"2018","unstructured":"Buckman, J., Hafner, D., Tucker, G., Brevdo, E., & Lee, H. (2018). Sample-efficient reinforcement learning with stochastic ensemble value expansion. In S. Bengio, H. Wallach, H. Larochelle, K. Grauman, N. Cesa-Bianchi, & R. Garnett (Eds.), Advances in Neural Information Processing Systems.  (Vol. 31). Montr\u00e9al, Canada: Curran Associates Inc."},{"key":"9736_CR22","unstructured":"Abbas, Z., Sokota, S., Talvitie, E., White, M.: Selective dyna-style planning under limited model capacity. In: III, H.D., Singh, A. (Eds.) Proceedings of the 37th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 119, pp. 1\u201310. PMLR, Virtual (2020)."},{"key":"9736_CR23","unstructured":"Chua, K., Calandra, R., McAllister, R., & Levine, S.: Deep reinforcement learning in a handful of trials using probabilistic dynamics models. In: Proceedings of the 32nd International Conference on Neural Information Processing Systems. NIPS\u201918, pp. 4759\u20134770. Curran Associates Inc., Red Hook, NY, USA (2018)"},{"key":"9736_CR24","unstructured":"Deisenroth, M.P., & Rasmussen, C.E.: Pilco: A model-based and data-efficient approach to policy search. In:Proceedings of the 28th International Conference on International Conference on Machine Learning (ICML). ICML\u201911, pp. 465\u2013472. Omnipress, Madison, WI, USA (2011)."},{"key":"9736_CR25","unstructured":"D\u2019Oro, P., & Jaskowski, W.: How to learn a useful critic? model-based action-gradient-estimator policy optimization. In: Advances in Neural Information Processing Systems 33, Virtual (2020). https:\/\/proceedings.neurips.cc\/paper\/2020\/hash\/03255088ed63354a54e0e5ed957e9008-Abstract.html"},{"key":"9736_CR26","doi-asserted-by":"crossref","unstructured":"Sutton, R.S.: Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In: Proceedings of the 7th International Conference on Machine Learning, pp. 216\u2013224 (1990).","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"9736_CR27","doi-asserted-by":"crossref","unstructured":"Pan, Y., Zaheer, M., White, A., Patterson, A., & White, M.: Organizing experience: A deeper look at replay mechanisms for sample-based planning in continuous state domains. In: Proceedings of the 27th International Joint Conference on Artificial Intelligence. IJCAI\u201918, pp. 4794\u20134800. AAAI Press, Stockholm, Sweden (2018).","DOI":"10.24963\/ijcai.2018\/666"},{"key":"9736_CR28","unstructured":"Kaiser, L., Babaeizadeh, M., Mi\u0142os, P., Osi\u0144ski, B., Campbell, R.H., Czechowski, K., Erhan, D., Finn, C., Kozakowski, P., Levine, S., Mohiuddin, A., Sepassi, R., Tucker, G., & Michalewski, H.: Model based reinforcement learning for atari. In: Proceeding of the 8th International Conference on Learning Representations (2020)."},{"key":"9736_CR29","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1023\/A:1022635613229","volume":"13","author":"AW Moore","year":"1993","unstructured":"Moore, A. W., & Atkeson, C. G. (1993). Prioritized sweeping: Reinforcement learning with less data and less time. Machine Learning., 13, 103\u2013130.","journal-title":"Machine Learning."},{"issue":"3","key":"9736_CR30","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1007\/BF00992699","volume":"8","author":"L-J Lin","year":"1992","unstructured":"Lin, L.-J. (1992). Self-improving reactive agents based on reinforcement learning, planning and teaching. Machine Learning., 8(3), 293\u2013321. https:\/\/doi.org\/10.1007\/BF00992699","journal-title":"Machine Learning."},{"key":"9736_CR31","unstructured":"Schaul, T., Quan, J., Antonoglou, I., & Silver, D.: Prioritized experience replay. In: Proceedings of the 33rd International Conference on Learning Representations, Puerto Rico (2016)."},{"key":"9736_CR32","first-page":"14219","volume-title":"Advances in Neural Information Processing Systems","author":"S Fujimoto","year":"2020","unstructured":"Fujimoto, S., Meger, D., & Precup, D. (2020). An equivalence between loss functions and non-uniform sampling in experience replay. In H. Larochelle, M. Ranzato, R. Hadsell, M. F. Balcan, & H. Lin (Eds.), Advances in Neural Information Processing Systems (Vol. 33, pp. 14219\u201314230). Virtual: Curran Associates Inc."},{"key":"9736_CR33","unstructured":"Van\u00a0Seijen, H., & Sutton, R.: A deeper look at planning as learning from replay. In: Bach, F., & Blei, D. (Eds.) In: Proceedings of the 32nd International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 37, pp. 2314\u20132322. PMLR, Lille, France (2015)."},{"key":"9736_CR34","doi-asserted-by":"crossref","unstructured":"Roijers, D.: Multi-objective decision-theoretic planning. PhD thesis, University of Amsterdam (2016).","DOI":"10.1145\/3008665.3008670"},{"key":"9736_CR35","volume-title":"Advances in Neural Information Processing Systems","author":"I Osband","year":"2016","unstructured":"Osband, I., Blundell, C., Pritzel, A., & Van Roy, B. (2016). Deep exploration via bootstrapped DQN. In D. Lee, M. Sugiyama, U. Luxburg, I. Guyon, & R. Garnett (Eds.), Advances in Neural Information Processing Systems.  (Vol. 29). Barcelona, Spain: Curran Associates Inc."},{"key":"9736_CR36","unstructured":"Liang, L., Xu, Y., McAleer, S., Hu, D., Ihler, A., Abbeel, P., & Fox, R.: Reducing variance in temporal-difference value estimation via ensemble of deep networks. In: Proceedings of the 39th International Conference on Machine Learning (ICML) (2022)."},{"key":"9736_CR37","doi-asserted-by":"crossref","unstructured":"Ghasemipour, S.K.S., Gu, S.S., & Nachum, O.: Why so pessimistic? estimating uncertainties for offline RL through ensembles, and why their independence matters. In: Proceedings of the 36th Annual Conference on Advances in Neural Information Processing Systems (2022). https:\/\/openreview.net\/forum?id=z64kN1h1-rR.","DOI":"10.52202\/068431-1328"},{"key":"9736_CR38","unstructured":"Hasselt, H.v., Guez, A., & Silver, D.: Deep reinforcement learning with double q-learning. In: Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence. AAAI\u201916, pp. 2094\u20132100. AAAI Press, (2016)"},{"key":"9736_CR39","unstructured":"Abels, A., Roijers, D.M., Lenaerts, T., Now\u00e9, A., & Steckelmacher, D.: Dynamic weights in multi-objective deep reinforcement learning. In: Proceedings of the 36th International Conference on Machine Learning, vol. 97, pp. 11\u201320. International Machine Learning Society (IMLS), Long Beach, California, USA (2019)."},{"key":"9736_CR40","unstructured":"Borsa, D., Barreto, A., Quan, J., Mankowitz, D.J., Munos, R., Hasselt, H.V., & Silver, D., Schaul, T.: Universal successor features approximators. In: Proceedings of the 7th International Conference on Learning Representations (ICLR) (2019)."},{"key":"9736_CR41","unstructured":"Chen, X., Wang, C., Zhou, Z., & Ross, K.W.: Randomized ensembled double q-learning: Learning fast without a model. In: Proceedings of the Ninth International Conference on Learning Representations (ICLR), Virtual (2021)"},{"key":"9736_CR42","unstructured":"Fujimoto, S., Hoof, H., & Meger, D.: Addressing function approximation error in actor-critic methods. In: Proceedings of the 35th International Conference on Machine Learning, pp. 1582\u20131591 (2018)."},{"key":"9736_CR43","unstructured":"Lai, H., Shen, J., Zhang, W., Huang, Y., Zhang, X., Tang, R., Yu, Y., & Li, Z.: On effective scheduling of model-based reinforcement learning. In: Beygelzimer, A., Dauphin, Y., Liang, P., & Vaughan, J.W. (Eds.) In: Proceedings of the 35th Conference on Neural Information Processing Systems (2021). https:\/\/openreview.net\/forum?id=z36cUrI0jKJ."},{"key":"9736_CR44","unstructured":"Pan, Y., Mei, J., Farahmand, A.-m., White, M., Yao, H., Rohani, M., & Luo, J.: Understanding and mitigating the limitations of prioritized experience replay. In: Proceedings of the 38th Conference on Uncertainty in Artificial Intelligence (2022). https:\/\/openreview.net\/forum?id=HBlNGvIicg9"},{"key":"9736_CR45","unstructured":"Alegre, L.N., Felten, F., Talbi, E.-G., Danoy, G., Now\u00e9, A., Bazzan, A.L.C., & Silva, B.C.: MO-Gym: A library of multi-objective reinforcement learning environments. In: Proceedings of the 34th Benelux Conference on Artificial Intelligence BNAIC\/Benelearn 2022 (2022)"},{"key":"9736_CR46","unstructured":"Zintgraf, L.M., Kanters, T.V., Roijers, D.M., Oliehoek, F.A., & Beau, P.: Quality assessment of MORL algorithms: A utility-based approach. In: Benelearn 2015: Proceedings of the 24th Annual Machine Learning Conference of Belgium and the Netherlands (2015)."},{"key":"9736_CR47","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., & Levine, S.: Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: Dy, J., & Krause, A. (Eds.) In: Proceedings of the 35th International Conference on Machine Learning (ICML). Proceedings of Machine Learning Research, vol. 80, pp. 1861\u20131870. PMLR, Stockholmsm\u00e4ssan, Stockholm Sweden (2018). http:\/\/proceedings.mlr.press\/v80\/haarnoja18b.html."},{"key":"9736_CR48","unstructured":"Xu, J., Tian, Y., Ma, P., Rus, D., Sueda, S., & Matusik, W.: Prediction-guided multi-objective reinforcement learning for continuous robot control. In: Proceedings of the 37th International Conference on Machine Learning (ICML) (2020)."},{"key":"9736_CR49","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., & Klimov, O.: Proximal policy optimization algorithms (2017) CoRR. arXiv:1707.06347."},{"key":"9736_CR50","unstructured":"Bradbury, J., Frostig, R., Hawkins, P., Johnson, M.J., Leary, C., Maclaurin, D., Necula, G., Paszke, A., VanderPlas, J., Wanderman-Milne, S., & Zhang, Q.: JAX: composable transformations of Python+NumPy programs (2018). http:\/\/github.com\/google\/jax."},{"key":"9736_CR51","unstructured":"Agarwal, R., Schwarzer, M., Castro, P.S., Courville, A., & Bellemare, M.G.: Deep reinforcement learning at the edge of the statistical precipice. In: Beygelzimer, A., Dauphin, Y., Liang, P., & Vaughan, J.W. (Eds.) Advances in Neural Information Processing Systems, 34 (NeurIPS 2021) (2021). https:\/\/openreview.net\/forum?id=uqv8-U4lKBe."},{"key":"9736_CR52","doi-asserted-by":"crossref","unstructured":"Bellemare, M.G., Dabney, W., & Rowland, M.: Distributional Reinforcement Learning. MIT Press, Cambridge, MA (2023). http:\/\/www.distributional-rl.org.","DOI":"10.7551\/mitpress\/14207.001.0001"},{"key":"9736_CR53","doi-asserted-by":"crossref","unstructured":"Wiltzer, H., Farebrother, J., Gretton, A., & Rowland, M.: Foundations of multivariate distributional reinforcement learning. In: &Proceedings of the 38th International Conference on Neural Information Processing Systems, vol. 37, pp. 101297\u2013101336. Curran Associates Inc., Red Hook, NY, USA (2025).","DOI":"10.52202\/079017-3212"},{"issue":"1","key":"9736_CR54","first-page":"3483","volume":"15","author":"K Van Moffaert","year":"2014","unstructured":"Van Moffaert, K., & Now\u00e9, A. (2014). Multi-objective reinforcement learning using sets of Pareto dominating policies. J. Mach. Learn. Res., 15(1), 3483\u20133512.","journal-title":"J. Mach. Learn. Res."},{"key":"9736_CR55","doi-asserted-by":"publisher","unstructured":"Parisi, S., Pirotta, M., & Peters, J. (2017). Manifold-based multi-objective policy search with sample reuse. Neurocomputing.,263, 3\u201314. https:\/\/doi.org\/10.1016\/j.neucom.2016.11.094. Special Issue on Multi-Objective Reinforcement Learning","DOI":"10.1016\/j.neucom.2016.11.094"},{"key":"9736_CR56","unstructured":"Abdolmaleki, A., Huang, S., Hasenclever, L., Neunert, M., Song, F., Zambelli, M., Martins, M., Heess, N., Hadsell, R., & Riedmiller, M.: A distributional view on multi-objective policy optimization. In: Proceedings of the 37th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 119, pp. 11\u201322. PMLR, (2020)."},{"key":"9736_CR57","doi-asserted-by":"crossref","unstructured":"R\u00f6pke, W., Reymond, M., Mannion, P., Roijers, D.M., Now\u00e9, A., & Radulescu, R.: Divide and Conquer: Provably Unveiling the Pareto Front with Multi-Objective Reinforcement Learning. In: Proceedings of the 24th International Conference on Autonomous Agents and Multiagent Systems. AAMAS \u201925, pp. 1774\u20131783. International Foundation for Autonomous Agents and Multiagent Systems.","DOI":"10.65109\/XQBL5396"},{"key":"9736_CR58","doi-asserted-by":"publisher","unstructured":"Wiering, M.A., Withagen, M.,& Drugan, M.M.: Model-based multi-objective reinforcement learning. In: 2014 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL), pp. 1\u20136 (2014). https:\/\/doi.org\/10.1109\/ADPRL.2014.7010622.","DOI":"10.1109\/ADPRL.2014.7010622"},{"key":"9736_CR59","doi-asserted-by":"crossref","unstructured":"Yamaguchi, T., Nagahama, S., Ichikawa, Y., & Takadama, K.: Model-based multi-objective reinforcement learning with unknown weights. In: Yamamoto, S., & Mori, H. (Eds.) Human Interface and the Management of Information. Information in Intelligent Systems, pp. 311\u2013321. Springer, Cham (2019).","DOI":"10.1007\/978-3-030-22649-7_25"},{"key":"9736_CR60","doi-asserted-by":"crossref","unstructured":"Agarwal, M., Aggarwal, V., & Lan, T.: Multi-objective reinforcement learning with non-linear scalarization. In: Proceedings of the 21st International Conference on Autonomous Agents and Multiagent Systems. AAMAS \u201922, pp. 9\u201317. International Foundation for Autonomous Agents and Multiagent Systems, Richland, SC (2022).","DOI":"10.65109\/LNGI4831"},{"key":"9736_CR61","unstructured":"Wang, W., & Sebag, M.: Multi-objective Monte-Carlo tree search. In: Hoi, S.C.H., & Buntine, W. (Eds.) In: Proceedings of the Asian Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 25, pp. 507\u2013522. PMLR, Singapore Management University, Singapore (2012). https:\/\/proceedings.mlr.press\/v25\/wang12b.html."},{"issue":"4","key":"9736_CR62","doi-asserted-by":"publisher","first-page":"347","DOI":"10.1109\/TCIAIG.2014.2345842","volume":"7","author":"D Perez","year":"2015","unstructured":"Perez, D., Mostaghim, S., Samothrakis, S., & Lucas, S. M. (2015). Multiobjective Monte Carlo tree search for real-time games. IEEE Transactions on Computational Intelligence and AI in Games., 7(4), 347\u2013360. https:\/\/doi.org\/10.1109\/TCIAIG.2014.2345842","journal-title":"IEEE Transactions on Computational Intelligence and AI in Games."},{"key":"9736_CR63","doi-asserted-by":"crossref","unstructured":"Painter, M., Lacerda, B., & Hawes, N.: Convex hull monte-carlo tree-search. In: Proceedings of the 30th International Conference on Automated Planning and Scheduling (ICAPS), pp. 217\u2013225. AAAI Press, Nancy, France (2020).","DOI":"10.1609\/icaps.v30i1.6664"},{"key":"9736_CR64","unstructured":"Hayes, C.F., Reymond, M., Roijers, D.M., Howley, E., & Mannion, P.: Risk aware and multi-objective decision making with distributional monte carlo tree search. In: Proceedings of the 20th International Conference on Autonomous Agents and Multiagent Systems (2021)."},{"key":"9736_CR65","doi-asserted-by":"publisher","unstructured":"Hayes, C.F., Reymond, M., Roijers, D.M., Howley, E., & Mannion, P.: Monte carlo tree search algorithms for risk-aware and multi-objective reinforcement learning. Autonomous Agents and Multi-Agent Systems,37, (2) (2023). https:\/\/doi.org\/10.1007\/s10458-022-09596-0.","DOI":"10.1007\/s10458-022-09596-0"},{"key":"9736_CR66","unstructured":"Sinha, S., Song, J., Garg, A., & Ermon, S.: Experience replay with likelihood-free importance weights. In: Firoozi, R., Mehr, N., Yel, E., Antonova, R., Bohg, J., Schwager, M., & Kochenderfer, M. (Eds.) In: Proceedings of The 4th Annual Learning for Dynamics and Control Conference. Proceedings of Machine Learning Research, vol. 168, pp. 110\u2013123. PMLR, (2022). https:\/\/proceedings.mlr.press\/v168\/sinha22a.html."},{"key":"9736_CR67","unstructured":"Hafner, D., Lillicrap, T.P., Norouzi, M., & Ba, J.: Mastering atari with discrete world models. In: Proceedings of The 9th International Conference on Learning Representations. OpenReview.net, Virtual Event, Austria (2021)."},{"key":"9736_CR68","unstructured":"Pickett, M., & Barto, A.G.: Policyblocks: An algorithm for creating useful macro-actions in reinforcement learning. In: Proceedings of the Nineteenth International Conference on Machine Learning, pp. 506\u2013513. Morgan Kaufmann, Sydney, NSW, Australia (2002)."},{"issue":"56","key":"9736_CR69","first-page":"1633","volume":"10","author":"ME Taylor","year":"2009","unstructured":"Taylor, M. E., & Stone, P. (2009). Transfer learning for reinforcement learning domains: A survey. Journal of Machine Learning Research., 10(56), 1633\u20131685.","journal-title":"Journal of Machine Learning Research."},{"key":"9736_CR70","doi-asserted-by":"publisher","unstructured":"Li, S., Li, H., Zhang, J., Wang, Z., Liu, P., & Zhang, C.: Iob: integrating optimization transfer and behavior transfer for multi-policy reuse. Autonomous Agents and Multi-Agent Systems.,38, (1) (2023). https:\/\/doi.org\/10.1007\/s10458-023-09630-9.","DOI":"10.1007\/s10458-023-09630-9"},{"key":"9736_CR71","unstructured":"Gimelfarb, M., Barreto, A., Sanner, S., & Lee, C.-G.: Risk-aware transfer in reinforcement learning using successor features. In: Proceedings of the 35th Annual Conference on Advances in Neural Information Processing Systems, Online (2021)."},{"key":"9736_CR72","unstructured":"Janz, D., Hron, J., Mazur, P., Hofmann, K., Hern\u00e1ndez-Lobato, J.M., & Tschiatschek, S.: Successor uncertainties: exploration and uncertainty in temporal difference learning. In: Proceedings of the 33rd International Conference on Neural Information Processing Systems. Curran Associates Inc., Red Hook, NY, USA (2019)."},{"key":"9736_CR73","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1016\/j.neucom.2023.01.076","volume":"530","author":"P Malekzadeh","year":"2023","unstructured":"Malekzadeh, P., Hou, M., & Plataniotis, K. N. (2023). Uncertainty-aware transfer across tasks using hybrid model-based successor feature reinforcement learning. Neurocomputing., 530, 165\u2013187. https:\/\/doi.org\/10.1016\/j.neucom.2023.01.076","journal-title":"Neurocomputing."},{"key":"9736_CR74","doi-asserted-by":"crossref","unstructured":"Machado, M.C., Bellemare, M.G., & Bowling, M.: Count-based exploration with the successor representation. In: Proceedings of the Thirty-Fourth AAAI Conference on Artificial Intelligence (AAAI) (2020).","DOI":"10.1609\/aaai.v34i04.5955"},{"key":"9736_CR75","unstructured":"Chelu, V., Precup, D., & Hasselt, H.P.: Forethought and hindsight in credit assignment. In: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M.F., & Lin, H. (Eds.) Advances in Neural Information Processing Systems, vol. 33, pp. 2270\u20132281. Curran Associates, Inc., (2020)."},{"key":"9736_CR76","doi-asserted-by":"crossref","unstructured":"Grimm, C., Barreto, A., Farquhar, G., Silver, D., & Singh, S.: Proper value equivalence. In: Proceedings of the 35th Conference on Neural Information Processing Systems, Sydney, Australia (2021).","DOI":"10.52202\/068431-2393"},{"key":"9736_CR77","doi-asserted-by":"publisher","unstructured":"Reymond, M., Hayes, C.F., Steckelmacher, D., Roijers, D.M., & Now\u00e9, A.: Actor-critic multi-objective reinforcement learning for non-linear utility functions. Autonomous Agents and Multi-Agent Systems.,37, (2) (2023). https:\/\/doi.org\/10.1007\/s10458-023-09604-x.","DOI":"10.1007\/s10458-023-09604-x"},{"key":"9736_CR78","volume-title":"Abstract Dynamic Programming","author":"D Bertsekas","year":"2022","unstructured":"Bertsekas, D. (2022). Abstract Dynamic Programming. Belmont, MA: Athena Scientific."},{"key":"9736_CR79","doi-asserted-by":"crossref","unstructured":"Felten, F., Alegre, L.N., Now\u00e9, A., Bazzan, A.L.C., Talbi, E.-G., Danoy, G., & Silva, B.C.: A toolkit for reliable benchmarking and research in multi-objective reinforcement learning. In: Advances in Neural Information Processing Systems, vol. 36 (2023).","DOI":"10.52202\/075280-1028"},{"issue":"1\u20132","key":"9736_CR80","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1007\/s10994-010-5232-5","volume":"84","author":"P Vamplew","year":"2011","unstructured":"Vamplew, P., Dazeley, R., Berry, A., Issabekov, R., & Dekker, E. (2011). Empirical evaluation methods for multiobjective reinforcement learning algorithms. Mach. Learn., 84(1\u20132), 51\u201380. https:\/\/doi.org\/10.1007\/s10994-010-5232-5","journal-title":"Mach. Learn."},{"key":"9736_CR81","doi-asserted-by":"publisher","unstructured":"Towers, M., Terry, J.K., Kwiatkowski, A., Balis, J.U., Cola, G., Deleu, T., Goul\u00e3o, M., Kallinteris, A., KG, A., Krimmel, M., Perez-Vicente, R., Pierr\u00e9, A., Schulhoff, S., Tai, J.J., Tan, A.J.S., & Younis, O.G.: Gymnasium. Zenodo (2023). https:\/\/doi.org\/10.5281\/zenodo.8127025.","DOI":"10.5281\/zenodo.8127025"},{"issue":"1","key":"9736_CR82","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1109\/TEVC.2020.2992387","volume":"25","author":"J Blank","year":"2021","unstructured":"Blank, J., Deb, K., Dhebar, Y., Bandaru, S., & Seada, H. (2021). Generating well-spaced points on a unit simplex for evolutionary many-objective optimization. IEEE Transactions on Evolutionary Computation., 25(1), 48\u201360. https:\/\/doi.org\/10.1109\/TEVC.2020.2992387","journal-title":"IEEE Transactions on Evolutionary Computation."},{"key":"9736_CR83","doi-asserted-by":"publisher","first-page":"89497","DOI":"10.1109\/ACCESS.2020.2990567","volume":"8","author":"J Blank","year":"2020","unstructured":"Blank, J., & Deb, K. (2020). Pymoo: Multi-objective optimization in python. IEEE Access., 8, 89497\u201389509. https:\/\/doi.org\/10.1109\/ACCESS.2020.2990567","journal-title":"IEEE Access."},{"key":"9736_CR84","unstructured":"Hiraoka, T., Imagawa, T., Hashimoto, T., Onishi, T., & Tsuruoka, Y.: Dropout q-functions for doubly efficient reinforcement learning. In: Proceedings of the Tenth International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=xCVJMsPv3RT."},{"issue":"7540","key":"9736_CR85","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A. A., Veness, J., Bellemare, M. G., Graves, A., Riedmiller, M., Fidjeland, A. K., Ostrovski, G., et al. (2015). Human-level control through deep reinforcement learning. Nature., 518(7540), 529\u2013533.","journal-title":"Nature."},{"key":"9736_CR86","unstructured":"Kingma, D.P., & Ba, J.: Adam: A method for stochastic optimization. In: Bengio, Y., & LeCun, Y. (Eds.) In: Proceeding of the 3rd International Conference on Learning Representations (ICLR), San Diego, CA. (2015)."}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-026-09736-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10458-026-09736-w","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-026-09736-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T08:53:27Z","timestamp":1772614407000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10458-026-09736-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,4]]},"references-count":86,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,6]]}},"alternative-id":["9736"],"URL":"https:\/\/doi.org\/10.1007\/s10458-026-09736-w","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3,4]]},"assertion":[{"value":"24 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 January 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 March 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflict of interest\/competing interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"All authors approved the paper to be published.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare no competing interests.","order":6,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"12"}}