{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T15:29:22Z","timestamp":1767972562085,"version":"3.49.0"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2021,6,19]],"date-time":"2021-06-19T00:00:00Z","timestamp":1624060800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,6,19]],"date-time":"2021-06-19T00:00:00Z","timestamp":1624060800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100002913","name":"Vlaamse Overheid","doi-asserted-by":"publisher","award":["Onderzoeksprogramma Artifici\u00eble Intelligentie (AI) Vlaanderen"],"award-info":[{"award-number":["Onderzoeksprogramma Artifici\u00eble Intelligentie (AI) Vlaanderen"]}],"id":[{"id":"10.13039\/501100002913","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003130","name":"Fonds Wetenschappelijk Onderzoek","doi-asserted-by":"publisher","award":["1S47617N"],"award-info":[{"award-number":["1S47617N"]}],"id":[{"id":"10.13039\/501100003130","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s00521-021-06184-3","type":"journal-article","created":{"date-parts":[[2021,6,19]],"date-time":"2021-06-19T04:03:11Z","timestamp":1624075391000},"page":"1759-1781","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Opponent learning awareness and modelling in multi-objective normal form games"],"prefix":"10.1007","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1446-5514","authenticated-orcid":false,"given":"Roxana","family":"R\u0103dulescu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Timothy","family":"Verstraeten","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yijie","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Patrick","family":"Mannion","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Diederik M.","family":"Roijers","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ann","family":"Now\u00e9","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,6,19]]},"reference":[{"key":"6184_CR1","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1016\/j.artint.2018.01.002","volume":"258","author":"SV Albrecht","year":"2018","unstructured":"Albrecht SV, Stone P (2018) Autonomous agents modelling other agents: A comprehensive survey and open problems. Artif Intell 258:66\u201395","journal-title":"Artif Intell"},{"key":"6184_CR2","volume-title":"Probability and measure","author":"P Billingsley","year":"2008","unstructured":"Billingsley P (2008) Probability and measure. Wiley, New York"},{"issue":"1","key":"6184_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.2140\/pjm.1956.6.1","volume":"6","author":"D Blackwell","year":"1956","unstructured":"Blackwell D et al (1956) An analog of the minimax theorem for vector payoffs. Pacific J Math 6(1):1\u20138","journal-title":"Pacific J Math"},{"key":"6184_CR4","unstructured":"Bonilla E.V, Chai K.M, Williams C (2008) Multi-task gaussian process prediction. Advances in neural information processing systems, 153\u2013160"},{"key":"6184_CR5","first-page":"302","volume":"60","author":"P Borm","year":"1988","unstructured":"Borm P, Tijs S, Van Den Aarssen J (1988) Pareto equilibria in multiobjective games. Methods Op Res 60:302\u2013312","journal-title":"Methods Op Res"},{"issue":"3","key":"6184_CR6","doi-asserted-by":"publisher","first-page":"480","DOI":"10.1016\/S0377-2217(02)00406-X","volume":"148","author":"P Borm","year":"2003","unstructured":"Borm P, Vermeulen D, Voorneveld M (2003) The structure of the set of equilibria for two person multicriteria games. Eur J Op Res 148(3):480\u2013493","journal-title":"Eur J Op Res"},{"key":"6184_CR7","unstructured":"Chajewska U, Koller D (2000) Utilities as random variables: Density estimation and structure discovery. In Proceedings of the Sixteenth conference on Uncertainty in artificial intelligence, pp 63\u201371. Morgan Kaufmann Publishers Inc"},{"key":"6184_CR8","unstructured":"Chajewska U, Koller D, Ormoneit D (2001) Learning an agent\u2019s utility function by observing behavior. In Proceedings of the Eighteenth International Conference on Machine Learning, pp 35\u201342"},{"key":"6184_CR9","unstructured":"Chajewska U, Koller D, Parr R (2000) Making rational decisions using adaptive utility elicitation. In: AAAI\/IAAI, pp. 363\u2013369"},{"key":"6184_CR10","doi-asserted-by":"crossref","unstructured":"Chu W, Ghahramani Z (2005) Preference learning with gaussian processes. In Proceedings of the 22nd international conference on Machine learning, pp 137\u2013144. ACM","DOI":"10.1145\/1102351.1102369"},{"key":"6184_CR11","first-page":"2","volume":"746\u2013752","author":"C Claus","year":"1998","unstructured":"Claus C (1998) Boutilier C (1998) The dynamics of reinforcement learning in cooperative multiagent systems. AAAI\/IAAI 746\u2013752:2","journal-title":"AAAI\/IAAI"},{"key":"6184_CR12","unstructured":"Foerster J, Chen R.Y, Al-Shedivat M, Whiteson S, Abbeel P, Mordatch I (2018) Learning with opponent-learning awareness. In Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems, pp 122\u2013130"},{"key":"6184_CR13","unstructured":"Foerster J, Farquhar G, Al-Shedivat M, Rockt\u00e4schel T, Xing E, Whiteson S (2018) Dice: The infinitely differentiable monte carlo estimator. In International Conference on Machine Learning, pp 1529\u20131538"},{"key":"6184_CR14","volume-title":"The theory of learning in games","author":"D Fudenberg","year":"1998","unstructured":"Fudenberg D, Drew F, Levine DK, Levine DK (1998) The theory of learning in games, vol 2. MIT press, Cambridge"},{"key":"6184_CR15","first-page":"262","volume":"23","author":"S Guo","year":"2010","unstructured":"Guo S, Sanner S, Bonilla EV (2010) Gaussian process preference elicitation. Adv Neural Inf Process Syst 23:262\u2013270","journal-title":"Adv Neural Inf Process Syst"},{"key":"6184_CR16","unstructured":"Hayes C.F, R\u0103dulescu R, Bargiacchi E, K\u00e4llstr\u00f6m J, Macfarlane M, Reymond M, Verstraeten T, Zintgraf L.M, Dazeley R, Heintz F, et\u00a0al (2021) A practical guide to multi-objective reinforcement learning and planning. arXiv preprint arXiv:2103.09568"},{"key":"6184_CR17","unstructured":"He H, Boyd-Graber J, Kwok K, Daum\u00e9\u00a0III H (2016) Opponent modeling in deep reinforcement learning. In International Conference on Machine Learning, pp 1804\u20131813"},{"key":"6184_CR18","first-page":"29","volume":"2","author":"SJ Knegt","year":"2018","unstructured":"Knegt SJ, Drugan MM, Wiering MA (2018) Opponent modelling in the game of tron using reinforcement learning. ICAART 2:29\u201340","journal-title":"ICAART"},{"key":"6184_CR19","first-page":"6379","volume":"30","author":"R Lowe","year":"2017","unstructured":"Lowe R, Wu YI, Tamar A, Harb J, Pieter Abbeel O, Mordatch I (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. Adv Neural Inf Process Syst 30:6379\u20136390","journal-title":"Adv Neural Inf Process Syst"},{"key":"6184_CR20","first-page":"115","volume":"3","author":"D Lozovanu","year":"2005","unstructured":"Lozovanu D, Solomon D, Zelikovsky A (2005) Multiobjective games and determining pareto-nash equilibria. Buletinul Academiei de \u015etiin\u0163e a Republicii Moldova. Matematica 3:115\u2013122","journal-title":"Matematica"},{"key":"6184_CR21","doi-asserted-by":"publisher","first-page":"e23","DOI":"10.1017\/S0269888918000292","volume":"33","author":"P Mannion","year":"2018","unstructured":"Mannion P, Devlin S, Duggan J, Howley E (2018) Reward shaping for knowledge-based multi-objective multi-agent reinforcement learning. Knowl Eng Rev 33:e23","journal-title":"Knowl Eng Rev"},{"issue":"2","key":"6184_CR22","doi-asserted-by":"publisher","first-page":"286","DOI":"10.2307\/1969529","volume":"54","author":"J Nash","year":"1951","unstructured":"Nash J (1951) Non-cooperative games. Annals Math 54(2):286\u2013295","journal-title":"Annals Math"},{"key":"6184_CR23","unstructured":"Ng A.Y, Russell S.J, et\u00a0al (2000) Algorithms for inverse reinforcement learning. In Proceedings of the Seventeenth International Conference on Machine Learning, vol.\u00a01, p\u00a02"},{"key":"6184_CR24","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511800481","volume-title":"Algorithmic game theory","author":"N Nisan","year":"2007","unstructured":"Nisan N, Roughgarden T, Tardos E, Vazirani VV (2007) Algorithmic game theory. Cambridge University Press, Cambridge"},{"key":"6184_CR25","unstructured":"Paszke A, Gross S, Massa F, Lerer A, Bradbury J, Chanan G, Killeen T, Lin Z, Gimelshein N, Antiga L, Desmaison A, Kopf A, Yang E, DeVito Z, Raison M, Tejani A, Chilamkurthy S, Steiner B, Fang L, Bai J, Chintala S (2019) PyTorch: An Imperative Style, High-Performance Deep Learning Library. In: H.\u00a0Wallach, H.\u00a0Larochelle, A.\u00a0Beygelzimer, F.\u00a0d\u2019Alch\u00e9 Buc, E.\u00a0Fox, R.\u00a0Garnett (eds.) Advances in Neural Information Processing Systems, vol.\u00a032, pp. 8026\u20138037. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper\/2019\/file\/bdbca288fee7f92f2bfa9f7012727740-Paper.pdf"},{"key":"6184_CR26","unstructured":"Perolat J, Leibo J.Z, Zambaldi V, Beattie C, Tuyls K, Graepel T (2017) A multi-agent reinforcement learning model of common-pool resource appropriation. In Proceedings of the 31st International Conference on Neural Information Processing Systems, pp 3646\u20133655"},{"key":"6184_CR27","unstructured":"Raileanu R, Denton E, Szlam A, Fergus R Modeling others using oneself in multi-agent reinforcement learning. In International Conference on Machine Learning (ICML)"},{"key":"6184_CR28","first-page":"751","volume":"16","author":"CE Rasmussen","year":"2003","unstructured":"Rasmussen CE, Kuss M (2003) Gaussian processes in reinforcement learning. Adv Neural Inf Process Syst 16:751\u2013758","journal-title":"Adv Neural Inf Process Syst"},{"key":"6184_CR29","unstructured":"Roijers D.M, Steckelmacher D, Now\u00e9 A (2018) Multi-objective reinforcement learning for the expected utility of the return. In Proceedings of the Adaptive and Learning Agents workshop at FAIM"},{"key":"6184_CR30","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1613\/jair.3987","volume":"48","author":"DM Roijers","year":"2013","unstructured":"Roijers DM, Vamplew P, Whiteson S, Dazeley R (2013) A survey of multi-objective sequential decision-making. J Artif Intell Res 48:67\u2013113","journal-title":"J Artif Intell Res"},{"issue":"1","key":"6184_CR31","first-page":"1","volume":"11","author":"DM Roijers","year":"2017","unstructured":"Roijers DM, Whiteson S (2017) Multi-objective decision making. Synth Lect Artif Intell Mach Learn 11(1):1\u2013129","journal-title":"Synth Lect Artif Intell Mach Learn"},{"issue":"1","key":"6184_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10458-019-09433-x","volume":"34","author":"R R\u0103dulescu","year":"2020","unstructured":"R\u0103dulescu R, Mannion P, Roijers DM, Now\u00e9 A (2020) Multi-objective multi-agent decision making: a utility-based analysis and survey. Auton Agents Multi-Agent Syst 34(1):1\u201352. https:\/\/doi.org\/10.1007\/s10458-019-09433-x","journal-title":"Auton Agents Multi-Agent Syst"},{"key":"6184_CR33","doi-asserted-by":"publisher","first-page":"e32","DOI":"10.1017\/S0269888920000351","volume":"35","author":"R R\u0103dulescu","year":"2020","unstructured":"R\u0103dulescu R, Mannion P, Zhang Y, Roijers DM, Now\u00e9 A (2020) A utility-based analysis of equilibria in multi-objective normal-form games. Knowl Eng Rev 35:e32. https:\/\/doi.org\/10.1017\/S0269888920000351","journal-title":"Knowl Eng Rev"},{"issue":"1","key":"6184_CR34","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1002\/nav.3800060107","volume":"6","author":"LS Shapley","year":"1959","unstructured":"Shapley LS, Rigby FD (1959) Equilibrium points in games with vector payoffs. Naval Res Logist Quart 6(1):57\u201361","journal-title":"Naval Res Logist Quart"},{"key":"6184_CR35","doi-asserted-by":"publisher","first-page":"631","DOI":"10.1016\/j.envpol.2017.02.047","volume":"224","author":"Y Shen","year":"2017","unstructured":"Shen Y, Wu Y, Chen G, Van Grinsven HJ, Wang X, Gu B, Lou X (2017) Non-linear increase of respiratory diseases and their costs under severe air pollution. Environ Pollut 224:631\u2013637","journal-title":"Environ Pollut"},{"key":"6184_CR36","unstructured":"Sutton R.S, Barto A.G (2018) Reinforcement Learning: An Introduction, second edn. The MIT Press. http:\/\/incompleteideas.net\/book\/the-book-2nd.html"},{"key":"6184_CR37","volume-title":"Adversarial reinforcement learning","author":"W Uther","year":"1997","unstructured":"Uther W, Veloso M (1997) Adversarial reinforcement learning. Technical report, Carnegie Mellon University, USA (Unpublished)"},{"issue":"1","key":"6184_CR38","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1006\/game.1998.0680","volume":"28","author":"M Voorneveld","year":"1999","unstructured":"Voorneveld M, Vermeulen D, Borm P (1999) Axiomatizations of pareto equilibria in multicriteria games. Games Econ Behav 28(1):146\u2013154","journal-title":"Games Econ Behav"},{"key":"6184_CR39","unstructured":"Wang KA, Pleiss G, Gardner JR, Tyree S, Weinberger KQ, Wilson AG (2019) Exact gaussian processes on a million data points. Advances in Neural Information Processing Systems 32"},{"issue":"3\u20134","key":"6184_CR40","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/BF00992696","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams RJ (1992) Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach Learn 8(3\u20134):229\u2013256. https:\/\/doi.org\/10.1007\/BF00992696","journal-title":"Mach Learn"},{"key":"6184_CR41","unstructured":"Wilson A, Nickisch H (2015) Kernel interpolation for scalable structured gaussian processes (kiss-gp). In International Conference on Machine Learning, pp 1775\u20131784. PMLR"},{"key":"6184_CR42","doi-asserted-by":"crossref","unstructured":"Zhang Y, R\u0103dulescu R, Mannion P, Roijers D.M, Now\u00e9 A (2020) Opponent modelling for reinforcement learning in multi-objective normal form games. In Proceedings of the 19th International Conference on Autonomous Agents and MultiAgent Systems, pp 2080\u20132082","DOI":"10.1007\/s00521-021-06184-3"},{"key":"6184_CR43","unstructured":"Zhang Y, R\u0103dulescu R, Mannion P, Roijers D.M, Now\u00e9 A (2020) Opponent modelling using policy reconstruction for multi-objective normal form games. In: Proceedings of the Adaptive and Learning Agents Workshop (ALA-20) at AAMAS"},{"key":"6184_CR44","unstructured":"Zintgraf L.M, Roijers D.M, Linders S, Jonker C.M, Now\u00e9 A (2018) Ordered preference elicitation strategies for supporting multi-objective decision making. In Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems, pp 1477\u20131485. International Foundation for Autonomous Agents and Multiagent Systems"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-021-06184-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-021-06184-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-021-06184-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,4]],"date-time":"2022-02-04T18:11:51Z","timestamp":1643998311000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-021-06184-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,19]]},"references-count":44,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["6184"],"URL":"https:\/\/doi.org\/10.1007\/s00521-021-06184-3","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,6,19]]},"assertion":[{"value":"16 November 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 May 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 June 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}