{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,18]],"date-time":"2026-06-18T06:56:51Z","timestamp":1781765811362,"version":"3.54.5"},"reference-count":84,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T00:00:00Z","timestamp":1770076800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T00:00:00Z","timestamp":1770076800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100007511","name":"Universidad Rey Juan Carlos","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100007511","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1007\/s10458-026-09732-0","type":"journal-article","created":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T05:39:44Z","timestamp":1770097184000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning the value systems of agents with preference-based and inverse reinforcement learning"],"prefix":"10.1007","volume":"40","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8853-1022","authenticated-orcid":false,"given":"Andr\u00e9s","family":"Holgado-S\u00e1nchez","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8298-4178","authenticated-orcid":false,"given":"Holger","family":"Billhardt","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8962-6856","authenticated-orcid":false,"given":"Alberto","family":"Fern\u00e1ndez","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2483-9508","authenticated-orcid":false,"given":"Sascha","family":"Ossowski","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2026,2,3]]},"reference":[{"key":"9732_CR1","doi-asserted-by":"publisher","unstructured":"Abbeel, P., Ng, A.Y. (2004). Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the 21st international conference on machine learning. Association for Computing Machinery, New York, NY, USA, ICML \u201904, p\u00a01. https:\/\/doi.org\/10.1145\/1015330.1015430","DOI":"10.1145\/1015330.1015430"},{"key":"9732_CR2","doi-asserted-by":"publisher","unstructured":"Aguilera, A., Albert\u00ed, M., Osman, N., Curto, G. (2025) Population synthesis with motivational attributes: A path towards cultural variation in agent-based models. In: Proceedings of the 28th European Conference on Artificial Intelligence (ECAI 2025), pp 3751\u20133758, https:\/\/doi.org\/10.3233\/FAIA251255","DOI":"10.3233\/FAIA251255"},{"key":"9732_CR3","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1515\/PJBR-2018-0024","volume":"9","author":"M Anderson","year":"2018","unstructured":"Anderson, M., & Anderson, S. L. (2018). Geneth: A general ethical dilemma analyzer. Paladyn, 9, 337\u2013357. https:\/\/doi.org\/10.1515\/PJBR-2018-0024","journal-title":"Paladyn"},{"key":"9732_CR4","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1109\/MIS.2006.64","volume":"21","author":"M Anderson","year":"2006","unstructured":"Anderson, M., Anderson, S. L., & Armen, C. (2006). An approach to computing ethics. IEEE Intelligent Systems, 21, 56\u201363. https:\/\/doi.org\/10.1109\/MIS.2006.64","journal-title":"IEEE Intelligent Systems"},{"key":"9732_CR5","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2021.103500","volume":"297","author":"S Arora","year":"2021","unstructured":"Arora, S., & Doshi, P. (2021). A survey of inverse reinforcement learning: Challenges, methods and progress. Artificial Intelligence, 297, Article 103500. https:\/\/doi.org\/10.1016\/j.artint.2021.103500","journal-title":"Artificial Intelligence"},{"issue":"01","key":"9732_CR6","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1609\/aaai.v33i01.33013","volume":"33","author":"A Balakrishnan","year":"2019","unstructured":"Balakrishnan, A., Bouneffouf, D., Mattei, N., & Rossi, F. (2019). Incorporating behavioral constraints in online ai systems. Proceedings of the AAAI Conference on Artificial Intelligence, 33(01), 3\u201311. https:\/\/doi.org\/10.1609\/aaai.v33i01.33013","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"9732_CR7","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1007\/s10458-011-9173-6","volume":"25","author":"T Bench-Capon","year":"2012","unstructured":"Bench-Capon, T., Atkinson, K., & McBurney, P. (2012). Using argumentation to model agent decision making in economic experiments. Autonomous Agents and Multi-Agent Systems, 25, 183\u2013208. https:\/\/doi.org\/10.1007\/s10458-011-9173-6","journal-title":"Autonomous Agents and Multi-Agent Systems"},{"key":"9732_CR8","doi-asserted-by":"publisher","unstructured":"Bradley, R.A., Terry, M.E. (1952). Rank analysis of incomplete block designs: I. the method of paired comparisons. Biometrika 39(3\/4):324\u2013345. https:\/\/doi.org\/10.2307\/2334029","DOI":"10.2307\/2334029"},{"key":"9732_CR9","unstructured":"Brown, D., Goo, W., Nagarajan, P., Niekum, S. (2019). Extrapolating beyond suboptimal demonstrations via inverse reinforcement learning from observations. In: Chaudhuri, K., Salakhutdinov, R. (eds) Proceedings of the 36th international conference on machine learning, proceedings of machine learning research (vol\u00a097, pp 783\u2013792). PMLR. https:\/\/proceedings.mlr.press\/v97\/brown19a.html"},{"issue":"9","key":"9732_CR10","doi-asserted-by":"publisher","first-page":"9573","DOI":"10.1609\/aaai.v38i9.28813","volume":"38","author":"N Chandak","year":"2024","unstructured":"Chandak, N., Goel, S., & Peters, D. (2024). Proportional aggregation of preferences for sequential decision making. Proceedings of the AAAI Conference on Artificial Intelligence, 38(9), 9573\u20139581. https:\/\/doi.org\/10.1609\/aaai.v38i9.28813","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"9732_CR11","unstructured":"Christiano, P.F., Leike, J., Brown, T.B., Martic, M., Legg, S., Amodei, D. (2017) Deep reinforcement learning from human preferences. In: Proceedings of the 31st international conference on neural information processing systems. Curran Associates Inc., Red Hook, NY, USA, NIPS\u201917, pp 4302\u20134310"},{"key":"9732_CR12","doi-asserted-by":"publisher","unstructured":"Curry, O. S., Alfano, M., Brandt, M. J., & Pelican, C. (2022). Moral molecules: Morality as a combinatorial system. Review of Philosophy and Psychology, 13(4), 1039\u20131058. https:\/\/doi.org\/10.1007\/s13164-021-00540-x","DOI":"10.1007\/s13164-021-00540-x"},{"key":"9732_CR13","doi-asserted-by":"publisher","unstructured":"Dignum, V. (2017). Responsible autonomy. In: Proceedings of the 26th international joint conference on artificial intelligence. AAAI Press, IJCAI\u201917, p 4698\u20134704, https:\/\/doi.org\/10.1007\/978-3-030-30371-6","DOI":"10.1007\/978-3-030-30371-6"},{"key":"9732_CR14","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1016\/J.IJAR.2016.06.005","volume":"78","author":"MS Fagundes","year":"2016","unstructured":"Fagundes, M. S., Ossowski, S., Cerquides, J., & Noriega, P. (2016). Design and evaluation of norm-aware agents based on normative markov decision processes. Int J Approx Reason, 78, 33\u201361. https:\/\/doi.org\/10.1016\/J.IJAR.2016.06.005","journal-title":"Int J Approx Reason"},{"key":"9732_CR15","unstructured":"Finn, C., Levine, S., Abbeel, P. (2016). Guided cost learning: Deep inverse optimal control via policy optimization. https:\/\/proceedings.mlr.press\/v48\/finn16.html"},{"key":"9732_CR16","doi-asserted-by":"publisher","unstructured":"Friedman, B., Kahn, P.H., Borning, A., Huldtgren, A. (2013). Value sensitive design and information systems, Springer Netherlands, Dordrecht, pp 55\u201395. https:\/\/doi.org\/10.1007\/978-94-007-7844-3_4","DOI":"10.1007\/978-94-007-7844-3_4"},{"key":"9732_CR17","unstructured":"Fu, J., Luo, K., Levine, S. (2018). Learning robust rewards with adverserial inverse reinforcement learning. In: International conference on learning representations. https:\/\/openreview.net\/forum?id=rkHywl-A-"},{"key":"9732_CR18","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1007\/s10994-012-5313-8","volume":"89","author":"J F\u00fcrnkranz","year":"2012","unstructured":"F\u00fcrnkranz, J., H\u00fcllermeier, E., Cheng, W., & Park, S. H. (2012). Preference-based reinforcement learning: a formal framework and a policy iteration algorithm. Machine learning, 89, 123\u2013156. https:\/\/doi.org\/10.1007\/s10994-012-5313-8","journal-title":"Machine learning"},{"key":"9732_CR19","doi-asserted-by":"publisher","first-page":"411","DOI":"10.1007\/S11023-020-09539-2","volume":"30","author":"I Gabriel","year":"2020","unstructured":"Gabriel, I. (2020). Artificial intelligence, values, and alignment. Minds and Machines, 30, 411\u2013437. https:\/\/doi.org\/10.1007\/S11023-020-09539-2","journal-title":"Minds and Machines"},{"key":"9732_CR20","doi-asserted-by":"publisher","unstructured":"Graham, J., Haidt, J., Koleva, S., Motyl, M., Iyer, R., Wojcik, S.P., Ditto, P.H. (2013). Chapter two - moral foundations theory: The pragmatic validity of moral pluralism. In: Devine, P., Plant, A. (eds) Advances in experimental social psychology (vol\u00a047, pp 55\u2013130). Academic Press. https:\/\/doi.org\/10.1016\/B978-0-12-407236-7.00002-4","DOI":"10.1016\/B978-0-12-407236-7.00002-4"},{"issue":"12","key":"9732_CR21","doi-asserted-by":"publisher","first-page":"517","DOI":"10.1016\/S1364-6613(02)02011-9","volume":"6","author":"J Greene","year":"2002","unstructured":"Greene, J., & Haidt, J. (2002). How (and where) does moral judgment work? Trends in Cognitive Sciences, 6(12), 517\u2013523. https:\/\/doi.org\/10.1016\/S1364-6613(02)02011-9","journal-title":"Trends in Cognitive Sciences"},{"key":"9732_CR22","volume-title":"Advances in Neural Information Processing Systems","author":"D Hadfield-Menell","year":"2016","unstructured":"Hadfield-Menell, D., Russell, S. J., Abbeel, P., & Dragan, A. (2016). Cooperative inverse reinforcement learning. In D. Lee, M. Sugiyama, U. Luxburg, I. Guyon, & R. Garnett (Eds.), Advances in Neural Information Processing Systems.  (Vol. 29). Curran Associates Inc."},{"key":"9732_CR23","doi-asserted-by":"publisher","unstructured":"Haidt, J. (2001). The emotional dog and its rational tail: a social intuitionist approach to moral judgment. Psychological review, 108(4), 814. https:\/\/doi.org\/10.1037\/0033-295X.108.4.814","DOI":"10.1037\/0033-295X.108.4.814"},{"key":"9732_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/S10458-022-09552-Y","volume":"36","author":"CF Hayes","year":"2022","unstructured":"Hayes, C. F., R\u0103dulescu, R., Bargiacchi, E., K\u00e4llstr\u00f6m, J., Macfarlane, M., Reymond, M., Verstraeten, T., Zintgraf, L. M., Dazeley, R., Heintz, F., Howley, E., Irissappane, A. A., Mannion, P., Now\u00e9, A., Ramos, G., Restelli, M., et al. (2022). A practical guide to multi-objective reinforcement learning and planning. Autonomous Agents and Multi-Agent Systems, 36, 1\u201359. https:\/\/doi.org\/10.1007\/S10458-022-09552-Y","journal-title":"Autonomous Agents and Multi-Agent Systems"},{"key":"9732_CR25","doi-asserted-by":"publisher","unstructured":"Holgado-S\u00e1nchez, A., Arias, J., Moreno-Rebato, M., Ossowski, S. (2023). On admissible behaviours for goal-oriented decision-making of value-aware agents. In: Multi-agent systems. Springer Nature Switzerland, Cham, pp. 415\u2013424, https:\/\/doi.org\/10.1007\/978-3-031-43264-4_27","DOI":"10.1007\/978-3-031-43264-4_27"},{"key":"9732_CR26","doi-asserted-by":"publisher","unstructured":"Holgado-S\u00e1nchez, A., Arias, J., Billhardt, H., Ossowski, S. (2024) Algorithms for learning value-aligned policies considering admissibility relaxation. In: Osman, N., Steels, L. (eds) Value engineering in artificial intelligence. Springer Nature Switzerland, Cham, pp. 145\u2013164, https:\/\/doi.org\/10.1007\/978-3-031-58202-8_9","DOI":"10.1007\/978-3-031-58202-8_9"},{"key":"9732_CR27","doi-asserted-by":"publisher","unstructured":"Holgado-S\u00e1nchez, A., Bajo, J., Billhardt, H., Ossowski, S., Arias, J. (2025a) Value learning for value-aligned route choice modeling via inverse reinforcement learning. In: Osman, N., Steels, L. (eds) Value engineering in artificial intelligence. Springer Nature Switzerland, Cham, pp. 40\u201360, https:\/\/doi.org\/10.1007\/978-3-031-85463-7_3","DOI":"10.1007\/978-3-031-85463-7_3"},{"key":"9732_CR28","doi-asserted-by":"publisher","unstructured":"Holgado-S\u00e1nchez, A., Billhardt, H., Ossowski, S., Degli-Esposti, S. (2025b) Learning the value systems of societies from preferences. In: Proceedings of the 28th european conference on artificial intelligence (ECAI 2025), frontiers in artificial intelligence and applications (vol 413, pp. 1123\u20131130). IOS Press. https:\/\/doi.org\/10.3233\/FAIA250923, https:\/\/ebooks.iospress.nl\/volumearticle\/75859","DOI":"10.3233\/FAIA250923"},{"key":"9732_CR29","doi-asserted-by":"publisher","unstructured":"Holgado-S\u00e1nchez, A., Billhardt, H., Ossowski, S., Fern\u00e1ndez, A. (2024). An ontology for value awareness engineering. In: Proceedings of the 16th international conference on agents and artificial intelligence - volume 3: AWAI, INSTICC. SciTePress, pp. 1421\u20131428, https:\/\/doi.org\/10.5220\/0012595500003636","DOI":"10.5220\/0012595500003636"},{"key":"9732_CR30","doi-asserted-by":"publisher","unstructured":"Holm, S. (2002). Principles of biomedical ethics, 5th edn. journal of medical ethics, 28(5), 332\u2013332. https:\/\/doi.org\/10.1136\/jme.28.5.332-a, https:\/\/jme.bmj.com\/content\/28\/5\/332.2.full.pdf","DOI":"10.1136\/jme.28.5.332-a"},{"key":"9732_CR31","doi-asserted-by":"publisher","unstructured":"Kalweit, G., Huegle, M., Werling, M., Boedecker, J. (2020). Deep inverse q-learning with constraints. Advances in Neural Information Processing Systems, 2020-December. https:\/\/doi.org\/10.48550\/arXiv.2008.01712","DOI":"10.48550\/arXiv.2008.01712"},{"key":"9732_CR32","doi-asserted-by":"publisher","unstructured":"Karanik, M., Billhardt, H., Fern\u00e1ndez, A., Ossowski, S. (2024). On the relevance of value system structure for automated value-aligned decision-making. In: Proceedings of the 39th ACM\/SIGAPP symposium on applied computing. Association for computing machinery, pp .679\u2013686, https:\/\/doi.org\/10.1145\/3605098.3636057","DOI":"10.1145\/3605098.3636057"},{"key":"9732_CR33","doi-asserted-by":"publisher","unstructured":"Kaufmann, T., Weng, P., Bengs, V., H\u00fcllermeier, E. (2024). A survey of reinforcement learning from human feedback. https:\/\/doi.org\/10.48550\/arXiv.2312.14925","DOI":"10.48550\/arXiv.2312.14925"},{"key":"9732_CR34","doi-asserted-by":"publisher","first-page":"452","DOI":"10.1109\/IIAI-AAI53430.2021.00078","volume":"2021","author":"D Kishikawa","year":"2021","unstructured":"Kishikawa, D., & Arai, S. (2021). Multi-objective inverse reinforcement learning via non-negative matrix factorization. Proceedings - 2021 10th International Congress on Advanced Applied Informatics. IIAI-AAI, 2021, 452\u2013457. https:\/\/doi.org\/10.1109\/IIAI-AAI53430.2021.00078","journal-title":"IIAI-AAI"},{"key":"9732_CR35","doi-asserted-by":"publisher","unstructured":"Lazzari, N., Giorgis, S.D., Gangemi, A., Presutti, V. (2024). Explainable moral values: a neuro-symbolic approach to value classification. https:\/\/doi.org\/10.48550\/arXiv.2410.12631","DOI":"10.48550\/arXiv.2410.12631"},{"key":"9732_CR36","unstructured":"Lee, K., Smith, L.M., Abbeel, P. (2021). Pebble: Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training. In: Meila, M., Zhang, T. (eds) Proceedings of the 38th international conference on machine learning, proceedings of machine learning research (vol 139, pp. 6152\u20136163). PMLR. https:\/\/proceedings.mlr.press\/v139\/lee21i.html"},{"key":"9732_CR37","doi-asserted-by":"publisher","unstructured":"Leike, J., Krueger, D., Everitt, T., Martic, M., Maini, V., Legg, S. (2018). Scalable agent alignment via reward modeling: a research direction. ArXiv abs\/1811.07871. https:\/\/doi.org\/10.48550\/arXiv.1811.07871","DOI":"10.48550\/arXiv.1811.07871"},{"key":"9732_CR39","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.111453","volume":"287","author":"RX Lera-Leri","year":"2024","unstructured":"Lera-Leri, R. X., Liscio, E., Bistaffa, F., Jonker, C. M., L\u00f3pez-S\u00e1nchez, M., Murukannaiah, P. K., Rodr\u00edguez-Aguilar, J. A., & Salas-Molina, F. (2024). Aggregating value systems for decision support. Knowledge-Based Systems, 287, Article 111453. https:\/\/doi.org\/10.1016\/j.knosys.2024.111453","journal-title":"Knowledge-Based Systems"},{"key":"9732_CR40","unstructured":"Levine, S., Popovic, Z., Koltun, V. (2011). Nonlinear inverse reinforcement learning with gaussian processes. In: Shawe-Taylor, J., Zemel, R., Bartlett, P., Pereira, F., Weinberger, K. (eds) Advances in neural information processing systems (vol\u00a024, pp .19\u201327). Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2011\/file\/c51ce410c124a10e0db5e4b97fc2af39-Paper.pdf"},{"key":"9732_CR41","doi-asserted-by":"crossref","unstructured":"Liscio, E., van der Meer, M., Cavalcante Siebert, L., Mouter, N., Jonker, C., Murukannaiah, P. (2021). Axies: Identifying and evaluating context-specific values. In: Proceedings of the 20th international conference on autonomous agents and multiagent systems. International foundation for autonomous agents and multiagent systems, AAMAS \u201921, pp. 799\u2013808, http:\/\/www.ifaamas.org\/Proceedings\/aamas2021\/pdfs\/p799.pdf","DOI":"10.65109\/OGNT4421"},{"key":"9732_CR42","doi-asserted-by":"publisher","unstructured":"Liscio, E., Dondera, A., Geadau, A., Jonker, C., Murukannaiah, P. (2022). Cross-domain classification of moral values. In: Carpuat, M., de\u00a0Marneffe, M.C., Meza\u00a0Ruiz, I.V. (Eds.) Findings of the association for computational linguistics: NAACL 2022. Association for computational linguistics, seattle, United States, pp. 2727\u20132745. https:\/\/doi.org\/10.18653\/v1\/2022.findings-naacl.209","DOI":"10.18653\/v1\/2022.findings-naacl.209"},{"key":"9732_CR43","doi-asserted-by":"publisher","unstructured":"Liscio, E., Lera-Leri, R., Bistaffa, F., Dobbe, R.I., Jonker, C.M., L\u00f3pez-S\u00e1nchez, M., Rodr\u00edguez-Aguilar, J.A., Murukannaiah, P.K. (2023a). Value inference in sociotechnical systems. In: Proceedings of the 2023 international conference on autonomous agents and multiagent systems. International foundation for autonomous agents and multiagent systems, Richland, SC, AAMAS \u201923, pp. 1774\u20131780. https:\/\/doi.org\/10.5555\/3545946.3598838","DOI":"10.5555\/3545946.3598838"},{"key":"9732_CR44","doi-asserted-by":"publisher","unstructured":"Liscio, E., Lera-Leri, R., Bistaffa, F., Dobbe, R.I.J., Jonker, C.M., L\u00f3pez-S\u00e1nchez, M., Rodr\u00edguez-Aguilar, J.A., Murukannaiah, P.K. (2023b). Inferring values via hybrid intelligence. In: HHAI 2023: Augmenting human intellect: Proceedings of the second international conference on hybrid human-artificial intelligence, Frontiers in artificial intelligence and applications (vol. 368, pp. 373\u2013378). IOS Press BV. https:\/\/doi.org\/10.3233\/FAIA230102","DOI":"10.3233\/FAIA230102"},{"key":"9732_CR45","doi-asserted-by":"publisher","unstructured":"Mercuur, R., Dignum, V., Jonker, C. (2019). The value of values and norms in social simulation. Journal of Artificial Societies and Social Simulation, 22(1), 9. https:\/\/doi.org\/10.18564\/jasss.3929, http:\/\/jasss.soc.surrey.ac.uk\/22\/1\/9.html","DOI":"10.18564\/jasss.3929"},{"key":"9732_CR46","doi-asserted-by":"publisher","first-page":"1739","DOI":"10.1613\/jair.1.13487","volume":"74","author":"N Montes","year":"2022","unstructured":"Montes, N., & Sierra, C. (2022). Synthesis and properties of optimally value-aligned normative systems. Journal of Artificial Intelligence Research, 74, 1739\u20131774. https:\/\/doi.org\/10.1613\/jair.1.13487","journal-title":"Journal of Artificial Intelligence Research"},{"key":"9732_CR47","doi-asserted-by":"publisher","unstructured":"Montes, N., Osman, N., Sierra, C., Slavkovik, M. (2023). Value engineering for autonomous agents. CoRR abs\/2302.08759. https:\/\/doi.org\/10.48550\/arXiv.2302.08759","DOI":"10.48550\/arXiv.2302.08759"},{"key":"9732_CR48","unstructured":"Ng, A.Y., Russell, S.J. (2000). Algorithms for inverse reinforcement learning. In: Proceedings of the 17th international conference on machine learning (pp. 663\u2013670). Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, ICML \u201900"},{"key":"9732_CR49","doi-asserted-by":"crossref","unstructured":"Osman, N., d\u2019Inverno, M. (2024). A computational framework of human values. In: Proceedings of the 23rd international conference on autonomous agents and multiagent systems (pp. 1531\u20131539). International foundation for autonomous agents and multiagent systems, Richland, SC, AAMAS \u201924","DOI":"10.65109\/MEOZ8791"},{"key":"9732_CR50","doi-asserted-by":"crossref","unstructured":"Osman, N., Rodr\u00edguez-Soto, M., Sabater-Mir, J. (2025). Instilling organisational values in firefighters through simulation-based training. https:\/\/arxiv.org\/abs\/2512.13737","DOI":"10.1007\/978-3-032-13063-1_14"},{"key":"9732_CR51","doi-asserted-by":"publisher","unstructured":"Ossowski, S. (ed) (2013). Agreement Technologies. No.\u00a08 in Law, Governance and Technology series (LGTS), Springer, https:\/\/doi.org\/10.1007\/978-94-007-5583-3","DOI":"10.1007\/978-94-007-5583-3"},{"key":"9732_CR52","doi-asserted-by":"publisher","unstructured":"van de Poel, I. (2021). Design for value change. Ethics and Information Technology, 23(1), 27\u201331. https:\/\/doi.org\/10.1007\/s10676-018-9461-9","DOI":"10.1007\/s10676-018-9461-9"},{"issue":"1","key":"9732_CR53","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1016\/S1755-5345(13)70005-8","volume":"2","author":"CG Prato","year":"2009","unstructured":"Prato, C. G. (2009). Route choice modeling: past, present and future research directions. Journal of Choice Modelling, 2(1), 65\u2013100. https:\/\/doi.org\/10.1016\/S1755-5345(13)70005-8","journal-title":"Journal of Choice Modelling"},{"issue":"10","key":"9732_CR54","doi-asserted-by":"publisher","first-page":"11183","DOI":"10.1609\/aaai.v36i10.21368","volume":"36","author":"L Qiu","year":"2022","unstructured":"Qiu, L., Zhao, Y., Li, J., Lu, P., Peng, B., Gao, J., & Zhu, S. C. (2022). Valuenet: A new dataset for human value driven dialogue system. Proceedings of the AAAI Conference on Artificial Intelligence, 36(10), 11183\u201311191. https:\/\/doi.org\/10.1609\/aaai.v36i10.21368","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"9732_CR55","unstructured":"Rabinowitz, N., Perbet, F., Song, F., Zhang, C., Eslami, S.M.A., Botvinick, M. (2018). Machine theory of mind. In: Dy, J., Krause, A. (eds) Proceedings of the 35th international conference on machine learning, proceedings of machine learning research (vol\u00a080, pp. 4218\u20134227). PMLR. https:\/\/proceedings.mlr.press\/v80\/rabinowitz18a.html"},{"key":"9732_CR56","doi-asserted-by":"crossref","unstructured":"Rafailov, R., Sharma, A., Mitchell, E., Manning, C.D., Ermon, S., Finn, C. (2023). Direct preference optimization: Your language model is secretly a reward model. In: Oh, A., Naumann, T., Globerson, A., Saenko, K., Hardt, M., Levine, S. (eds) Advances in neural information processing systems (vol\u00a036. pp. 53728\u201353741). Curran Associates, Inc.. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/a85b405ed65c6477a4fe8302b5e06ce7-Paper-Conference.pdf","DOI":"10.52202\/075280-2338"},{"key":"9732_CR57","unstructured":"Reddy, S., Dragan, A., Levine, S., Legg, S., Leike, J. (2020). Learning human objectives by evaluating hypothetical behavior. In: III, H.D., Singh, A. (eds) Proceedings of the 37th international conference on machine learning, proceedings of machine learning research (vol 119, pp. 8020\u20138029). PMLR. https:\/\/proceedings.mlr.press\/v119\/reddy20a.html"},{"key":"9732_CR58","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1007\/s10676-022-09635-0","volume":"24","author":"M Rodr\u00edguez-Soto","year":"2022","unstructured":"Rodr\u00edguez-Soto, M., Serramia, M., L\u00f3pez-S\u00e1nchez, M., & Rodr\u00edguez-Aguilar, J. A. (2022). Instilling moral value alignment by means of multi-objective reinforcement learning. Ethics and Information Technology, 24, 9. https:\/\/doi.org\/10.1007\/s10676-022-09635-0","journal-title":"Ethics and Information Technology"},{"key":"9732_CR59","doi-asserted-by":"publisher","unstructured":"Rodr\u00edguez-Soto, M., Osman, N., Sierra, C., Veja, P.S., Garc\u00eda, R.C., Danes, C.F., Retortillo, M.G., Mas\u00f3, S.M. (2024). Towards value awareness in the medical field. In: Proceedings of the 16th international conference on agents and artificial intelligence - volume 3: AWAI (pp. 1391\u20131398). INSTICC. SciTePress. https:\/\/doi.org\/10.5220\/0012588600003636","DOI":"10.5220\/0012588600003636"},{"key":"9732_CR60","doi-asserted-by":"publisher","unstructured":"Rodr\u00edguez-Soto, M., Osman, N., Sierra, C., Montes, N., Martinez\u00a0Roldan, J., Cintas\u00a0Garc\u00eda, R., Farriols\u00a0Danes, C., Garc\u00eda\u00a0Retortillo, M., M\u00ednguez\u00a0Mas\u00f3, S. (2025). User study design for identifying the semantics of bioethical principles. In: Osman, N., Steels, L. (eds) Value engineering in artificial intelligence (pp. 22\u201339). Springer Nature. https:\/\/doi.org\/10.1007\/978-3-031-85463-7_2","DOI":"10.1007\/978-3-031-85463-7_2"},{"key":"9732_CR61","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2025.104460","volume":"351","author":"M Rodr\u00edguez-Soto","year":"2026","unstructured":"Rodr\u00edguez-Soto, M., R\u0103dulescu, R., Bistaffa, F., Ricart, O., Mayoral-Macau, A., L\u00f3pez-S\u00e1nchez, M., Rodr\u00edguez-Aguilar, J. A., & Now\u00e9, A. (2026). Multi-objective reinforcement learning for provably incentivising alignment with value systems. Artificial Intelligence, 351, Article 104460. https:\/\/doi.org\/10.1016\/j.artint.2025.104460","journal-title":"Artificial Intelligence"},{"key":"9732_CR62","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1207\/S15327957PSPR0403_4","volume":"4","author":"MJ Rohan","year":"2000","unstructured":"Rohan, M. J. (2000). A rose by any name? the values construct. Personality and Social Psychology Review, 4, 255\u2013277. https:\/\/doi.org\/10.1207\/S15327957PSPR0403_4","journal-title":"Personality and Social Psychology Review"},{"key":"9732_CR63","doi-asserted-by":"publisher","unstructured":"Rothkopf, C.A., Dimitrakakis, C. (2011). Preference elicitation and inverse reinforcement learning. Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics), 6913, LNAI, 34\u201348. https:\/\/doi.org\/10.1007\/978-3-642-23808-6_3","DOI":"10.1007\/978-3-642-23808-6_3"},{"key":"9732_CR64","doi-asserted-by":"publisher","unstructured":"Russell, S. (2022). Artificial intelligence and the problem of control (pp. 19\u201324). Springer International Publishing, Cham. https:\/\/doi.org\/10.1007\/978-3-030-86144-5_3","DOI":"10.1007\/978-3-030-86144-5_3"},{"key":"9732_CR65","doi-asserted-by":"crossref","unstructured":"Schwartz, S.H. (1992). Universals in the content and structure of values: Theoretical advances and empirical tests in 20 countries. In: Advances in experimental social psychology (vol\u00a025, pp. 1\u201365). Elsevier","DOI":"10.1016\/S0065-2601(08)60281-6"},{"key":"9732_CR66","doi-asserted-by":"crossref","unstructured":"Serramia, M., L\u00f3pez-S\u00e1nchez, M., Rodr\u00edguez-Aguilar, J.A., Rodr\u00edguez, M., Wooldridge, M., Morales, J., Ans\u00f3tegui, C. (2018). Moral values in norm decision making. In: Proceedings of the 17th international conference on autonomous agents and multiagent systems (pp. 1294\u20131302). IFAAMAS, Richland, SC, AAMAS \u201918","DOI":"10.65109\/ZJZK1415"},{"key":"9732_CR67","doi-asserted-by":"publisher","unstructured":"Serramia, M., Rodr\u00edguez-Soto, M., L\u00f3pez-S\u00e1nchez, M., Rodr\u00edguez-Aguilar, J., Bistaffa, F., Boddington, P., Wooldridge, M., Ans\u00f3tegui, C. (2023). Encoding ethics to compute value-aligned norms. Minds and Machines, 1\u201330. https:\/\/doi.org\/10.1007\/s11023-023-09649-7","DOI":"10.1007\/s11023-023-09649-7"},{"key":"9732_CR68","doi-asserted-by":"publisher","first-page":"152","DOI":"10.1007\/978-3-319-21365-1_16","volume":"9205","author":"CE Sezener","year":"2015","unstructured":"Sezener, C. E. (2015). Inferring human values for safe agi design. Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 9205, 152\u2013155. https:\/\/doi.org\/10.1007\/978-3-319-21365-1_16","journal-title":"Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)"},{"key":"9732_CR69","doi-asserted-by":"publisher","first-page":"75269","DOI":"10.1109\/ACCESS.2022.3190975","volume":"10","author":"M Shahin","year":"2022","unstructured":"Shahin, M., Hussain, W., Nurwidyantoro, A., Perera, H., Shams, R., Grundy, J., & Whittle, J. (2022). Operationalizing human values in software engineering: A survey. IEEE Access, 10, 75269\u201375295. https:\/\/doi.org\/10.1109\/ACCESS.2022.3190975","journal-title":"IEEE Access"},{"key":"9732_CR70","unstructured":"Sierra, C., Osman, N., Noriega, P., Sabater-Mir, J., Perell\u00f3, A. (2021). Value alignment: A formal approach. arxiv:2110.09240"},{"key":"9732_CR71","unstructured":"Sinnott-Armstrong, W. (2019). Consequentialism. Stanford Encyclopedia of Philosophy"},{"key":"9732_CR72","doi-asserted-by":"crossref","unstructured":"Soares, N. (2018) The value learning problem. Artificial Intelligence Safety and Security, https:\/\/api.semanticscholar.org\/CorpusID:13096553","DOI":"10.1201\/9781351251389-7"},{"key":"9732_CR73","doi-asserted-by":"crossref","unstructured":"Sumers, T., Hawkins, R., Ho, M.K., Griffiths, T., Hadfield-Menell, D. (2022). How to talk so ai will learn: Instructions, descriptions, and autonomy. In: Koyejo, S., Mohamed, S., Agarwal, A., Belgrave, D., Cho, K., Oh, A. (eds) Advances in neural information processing systems (vol\u00a035, pp. 34762\u201334775). Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/e0cfde0ff720fa9674bb976e7f1b99d4-Paper-Conference.pdf","DOI":"10.52202\/068431-2519"},{"key":"9732_CR74","doi-asserted-by":"publisher","unstructured":"Vamplew, P., Dazeley, R., Foale, C., Firmin, S., & Mummery, J. (2018). Human-aligned artificial intelligence is a multiobjective problem. Ethics and Information Technology, 20(1), 27\u201340. https:\/\/doi.org\/10.1007\/S10676-017-9440-6\/FIGURES\/1","DOI":"10.1007\/S10676-017-9440-6\/FIGURES\/1"},{"key":"9732_CR75","doi-asserted-by":"crossref","unstructured":"Wallach, W., Allen, C. (2008). Moral machines: Teaching robots right from wrong. Oxford University Press","DOI":"10.1093\/acprof:oso\/9780195374049.001.0001"},{"key":"9732_CR76","doi-asserted-by":"publisher","unstructured":"Wang, H., Xiong, W., Xie, T., Zhao, H., Zhang, T. (2024). Interpretable preferences via multi-objective reward modeling and mixture-of-experts. In: Al-Onaizan, Y., Bansal, M., Chen, Y.N. (eds) Findings of the association for computational linguistics: EMNLP 2024 (pp. 10582\u201310592). Association for Computational Linguistics, Miami, Florida, USA. https:\/\/doi.org\/10.18653\/v1\/2024.findings-emnlp.620, https:\/\/aclanthology.org\/2024.findings-emnlp.620\/","DOI":"10.18653\/v1\/2024.findings-emnlp.620"},{"key":"9732_CR77","doi-asserted-by":"publisher","unstructured":"Weidinger, L., McKee, K. R., Everett, R., Huang, S., Zhu, T. O., Chadwick, M. J., Summerfield, C., & Gabriel, I. (2023). Using the veil of ignorance to align ai systems with principles of justice. Proceedings of the National Academy of Sciences, 120(18), Article e2213709120. https:\/\/doi.org\/10.1073\/pnas.2213709120","DOI":"10.1073\/pnas.2213709120"},{"issue":"9","key":"9732_CR78","doi-asserted-by":"publisher","first-page":"10207","DOI":"10.1609\/aaai.v38i9.28886","volume":"38","author":"D White","year":"2024","unstructured":"White, D., Wu, M., Novoseller, E., Lawhern, V. J., Waytowich, N., & Cao, Y. (2024). Rating-based reinforcement learning. Proceedings of the AAAI Conference on Artificial Intelligence, 38(9), 10207\u201310215. https:\/\/doi.org\/10.1609\/aaai.v38i9.28886","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"9732_CR79","doi-asserted-by":"publisher","first-page":"455","DOI":"10.1007\/978-3-030-01129-1_28","volume-title":"Social Informatics","author":"SR Wilson","year":"2018","unstructured":"Wilson, S. R., Shen, Y., & Mihalcea, R. (2018). Building and validating hierarchical lexicons with a case study on personal values. In S. Staab, O. Koltsova, & D. I. Ignatov (Eds.), Social Informatics (pp. 455\u2013470). Cham: Springer International Publishing."},{"key":"9732_CR80","unstructured":"Wirth, C., Akrour, R., Neumann, G., F\u00fcrnkranz, J. (2017). A survey of preference-based reinforcement learning methods. Journal of Machine Learning Research, 18(136), 1\u201346. http:\/\/jmlr.org\/papers\/v18\/16-634.html"},{"key":"9732_CR81","unstructured":"Wulfmeier, M., Ondr\u00fa\u0161ka, P., Ondr\u00fa\u0161ka, O., Posner, I. (2015). Maximum entropy deep inverse reinforcement learning. arXiv preprint arXiv:1507.04888"},{"key":"9732_CR82","unstructured":"Yang, R., Pan, X., Luo, F., Qiu, S., Zhong, H., Yu, D., Chen, J. (2024). Rewards-in-context: multi-objective alignment of foundation models with dynamic preference adjustment. In: Proceedings of the 41st international conference on machine learning (pp. 56276 \u2013 56297). JMLR.org, ICML\u201924"},{"key":"9732_CR83","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2023.104079","volume":"149","author":"Z Zhao","year":"2023","unstructured":"Zhao, Z., & Liang, Y. (2023). A deep inverse reinforcement learning approach to route choice modeling with context-dependent rewards. Transportation Research Part C: Emerging Technologies, 149, Article 104079. https:\/\/doi.org\/10.1016\/j.trc.2023.104079","journal-title":"Transportation Research Part C: Emerging Technologies"},{"key":"9732_CR84","doi-asserted-by":"publisher","unstructured":"Ziebart, B.D. (2010). Modeling purposeful adaptive behavior with the principle of maximum causal entropy. PhD thesis, CMU School of Computer Science, USA, https:\/\/doi.org\/10.1184\/R1\/6720692.v1","DOI":"10.1184\/R1\/6720692.v1"},{"key":"9732_CR85","unstructured":"Ziebart, B.D., Maas, A., Bagnell, J.A., Dey, A.K. (2008). Maximum entropy inverse reinforcement learning. In: Proceedings of the 23rd national conference on artificial intelligence - volume 3 (pp. 1433\u20131438). AAAI Press"}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-026-09732-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10458-026-09732-0","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-026-09732-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,18]],"date-time":"2026-06-18T06:32:05Z","timestamp":1781764325000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10458-026-09732-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,3]]},"references-count":84,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,6]]}},"alternative-id":["9732"],"URL":"https:\/\/doi.org\/10.1007\/s10458-026-09732-0","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"value":"1387-2532","type":"print"},{"value":"1573-7454","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,3]]},"assertion":[{"value":"18 November 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 January 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"4"}}