{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T22:36:27Z","timestamp":1777070187923,"version":"3.51.4"},"reference-count":63,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T00:00:00Z","timestamp":1774828800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Artificial Intelligence"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.artint.2026.104525","type":"journal-article","created":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T07:48:18Z","timestamp":1774424898000},"page":"104525","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Partial identifiability and misspecification in inverse reinforcement learning"],"prefix":"10.1016","volume":"356","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-0848-7188","authenticated-orcid":false,"given":"Joar","family":"Skalse","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5627-9093","authenticated-orcid":false,"given":"Alessandro","family":"Abate","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.artint.2026.104525_bib0001","series-title":"Proceedings of the Seventeenth International Conference on Machine Learning","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","volume":"1","author":"Ng","year":"2000"},{"key":"10.1016\/j.artint.2026.104525_bib0002","series-title":"Machine Learning and Knowledge Discovery in Databases: ECML PKDD 2011, Proceedings, Part III","first-page":"34","article-title":"Preference elicitation and inverse reinforcement learning","volume":"6913","author":"Rothkopf","year":"2011"},{"issue":"5","key":"10.1016\/j.artint.2026.104525_bib0003","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1371\/journal.pcbi.1006122","article-title":"Identification of animal behavioral strategies by inverse reinforcement learning","volume":"14","author":"Yamaguchi","year":"2018","journal-title":"PLoS Comput. Biol."},{"key":"10.1016\/j.artint.2026.104525_bib0004","series-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"10.1016\/j.artint.2026.104525_bib0005","unstructured":"J. Clark, D. Amodei, Faulty Reward Functions in the Wild, 2016, (OpenAI Codex https:\/\/openai.com\/blog\/faulty-reward-functions\/)."},{"key":"10.1016\/j.artint.2026.104525_bib0006","series-title":"International Conference on Learning Representations","article-title":"A deep reinforced model for abstractive summarization","author":"Paulus","year":"2018"},{"key":"10.1016\/j.artint.2026.104525_bib0007","series-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems","first-page":"8022","article-title":"Reward learning from human preferences and demonstrations in atari","author":"Ibarz","year":"2018"},{"key":"10.1016\/j.artint.2026.104525_bib0008","unstructured":"D. Manheim, S. Garrabrant, Categorizing Variants of Goodhart\u2019s Law, 2019, 1803.0458510.48550\/arXiv.1803.04585."},{"key":"10.1016\/j.artint.2026.104525_bib0009","unstructured":"V. Krakovna, J. Uesato, V. Mikulik, M. Rahtz, T. Everitt, R. Kumar, Z. Kenton, J. Leike, S. Legg, Specification gaming: the flip side of AI ingenuity, 2020, (https:\/\/deepmind.google\/discover\/blog\/specification-gaming-the-flip-side-of-ai-ingenuity\/)."},{"key":"10.1016\/j.artint.2026.104525_bib0010","doi-asserted-by":"crossref","DOI":"10.1016\/j.artint.2022.103829","article-title":"Reward (Mis) design for autonomous driving","volume":"316","author":"Knox","year":"2023","journal-title":"Artif. Intell."},{"key":"10.1016\/j.artint.2026.104525_bib0011","series-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems","article-title":"Defining and characterizing reward hacking","author":"Skalse","year":"2022"},{"key":"10.1016\/j.artint.2026.104525_bib0012","doi-asserted-by":"crossref","unstructured":"R.Y. Pang, V. Padmakumar, T. Sellam, A.P. Parikh, H. He, et al., Reward Gaming in Conditional Text Generation, arXiv, 2022. 10.48550\/ARXIV.2211.08714.","DOI":"10.18653\/v1\/2023.acl-long.262"},{"key":"10.1016\/j.artint.2026.104525_sbref0013","series-title":"The Twelfth International Conference on Learning Representations","article-title":"Goodhart\u2019s law in reinforcement learning","author":"Karwowski","year":"2024"},{"issue":"13","key":"10.1016\/j.artint.2026.104525_bib0014","doi-asserted-by":"crossref","first-page":"1608","DOI":"10.1177\/0278364910371999","article-title":"Autonomous helicopter aerobatics through apprenticeship learning","volume":"29","author":"Abbeel","year":"2010","journal-title":"Int. J. Rob. Res."},{"key":"10.1016\/j.artint.2026.104525_bib0015","series-title":"Proceedings of Robotics: Science and Systems","article-title":"End-to-end robotic reinforcement learning without reward engineering","author":"Singh","year":"2019"},{"issue":"2","key":"10.1016\/j.artint.2026.104525_bib0016","doi-asserted-by":"crossref","DOI":"10.1145\/3054912","article-title":"Imitation learning: a survey of learning methods","volume":"50","author":"Hussein","year":"2017","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.artint.2026.104525_sbref0017","series-title":"Advances in Neural Information Processing Systems","article-title":"Cooperative inverse reinforcement learning","volume":"29","author":"Hadfield-Menell","year":"2016"},{"key":"10.1016\/j.artint.2026.104525_bib0018","unstructured":"M. Orsini, A. Raichuk, L. Hussenot, D. Vincent, R. Dadashi, S. Girgin, M. Geist, O. Bachem, O. Pietquin, M. Andrychowicz, What matters for adversarial imitation learning?, arXiv preprint arXiv: 2106.00672[cs.LG](2021). To appear in Proceedings of the 35th International Conference on Neural Information Processing Systems, 2021."},{"key":"10.1016\/j.artint.2026.104525_bib0019","series-title":"Proceedings of the 36th International Conference on Machine Learning","first-page":"5670","article-title":"On the feasibility of learning, rather than assuming, human biases for reward inference","volume":"97","author":"Shah","year":"2019"},{"key":"10.1016\/j.artint.2026.104525_bib0020","series-title":"Proceedings of the 40th International Conference on Machine Learning","article-title":"Invariance in policy optimisation and partial identifiability in reward learning","author":"Skalse","year":"2023"},{"issue":"12","key":"10.1016\/j.artint.2026.104525_bib0021","doi-asserted-by":"crossref","first-page":"15136","DOI":"10.1609\/aaai.v37i12.26766","article-title":"Misspecification in inverse reinforcement learning","volume":"37","author":"Skalse","year":"2023","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"10.1016\/j.artint.2026.104525_sbref0022","series-title":"The Twelfth International Conference on Learning Representations","article-title":"STARC: a general framework for quantifying differences between reward functions","author":"Skalse","year":"2024"},{"key":"10.1016\/j.artint.2026.104525_sbref0023","series-title":"The Twelfth International Conference on Learning Representations","article-title":"Quantifying the sensitivity of inverse reinforcement learning to misspecification","author":"Skalse","year":"2024"},{"key":"10.1016\/j.artint.2026.104525_bib0024","series-title":"Proceedings of the 27th International Conference on Machine Learning","first-page":"335","article-title":"Inverse optimal control with linearly-Solvable MDPs","author":"Dvijotham","year":"2010"},{"key":"10.1016\/j.artint.2026.104525_sbref0025","series-title":"Advances in Neural Information Processing Systems","first-page":"12362","article-title":"Identifiability in inverse reinforcement learning","volume":"34","author":"Cao","year":"2021"},{"key":"10.1016\/j.artint.2026.104525_bib0026","series-title":"Proceedings of the 40th International Conference on Machine Learning","first-page":"37","article-title":"Towards Theoretical Understanding of Inverse Reinforcement Learning","author":"Metelli","year":"2023"},{"issue":"3","key":"10.1016\/j.artint.2026.104525_bib0027","doi-asserted-by":"crossref","first-page":"201","DOI":"10.1016\/0165-1765(81)90067-7","article-title":"Some empirical evidence on dynamic inconsistency","volume":"8","author":"Thaler","year":"1981","journal-title":"Econ Lett"},{"key":"10.1016\/j.artint.2026.104525_bib0028","first-page":"55","article-title":"An adjusting procedure for studying delayed reinforcement","volume":"5","author":"Mazur","year":"1987","journal-title":"Quant. Anal. Behav."},{"issue":"4","key":"10.1016\/j.artint.2026.104525_bib0029","doi-asserted-by":"crossref","first-page":"496","DOI":"10.1093\/icb\/36.4.496","article-title":"Exponential versus hyperbolic discounting of delayed outcomes: risk and waiting time","volume":"36","author":"Green","year":"1996","journal-title":"Am. Zool."},{"key":"10.1016\/j.artint.2026.104525_bib0030","doi-asserted-by":"crossref","first-page":"54","DOI":"10.1037\/0096-3445.126.1.54","article-title":"Bidding on the future: evidence against normative discounting of delayed rewards","volume":"126","author":"Kirby","year":"1997","journal-title":"J. Experim. Psychol. General"},{"issue":"2","key":"10.1016\/j.artint.2026.104525_sbref0031","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1257\/jel.40.2.351","article-title":"Time discounting and time preference: a critical review","volume":"40","author":"Frederick","year":"2002","journal-title":"J. Econ. Lit."},{"key":"10.1016\/j.artint.2026.104525_bib0032","doi-asserted-by":"crossref","unstructured":"O. Evans, A. Stuhlmueller, N. Goodman, Learning the Preferences of Ignorant, Inconsistent Agents, 2016, 10.1609\/aaai.v30i1.10010.","DOI":"10.1609\/aaai.v30i1.10010"},{"issue":"4","key":"10.1016\/j.artint.2026.104525_sbref0033","doi-asserted-by":"crossref","first-page":"503","DOI":"10.2307\/1907921","article-title":"Le Comportement de l\u2019Homme Rationnel devant le Risque: Critique des Postulats et Axiomes de l\u2019Ecole Americaine","volume":"21","author":"Allais","year":"1953","journal-title":"Econometrica"},{"issue":"4","key":"10.1016\/j.artint.2026.104525_bib0034","doi-asserted-by":"crossref","first-page":"643","DOI":"10.2307\/1884324","article-title":"Risk, ambiguity, and the savage axioms*","volume":"75","author":"Ellsberg","year":"1961","journal-title":"Q. J. Econ."},{"issue":"2","key":"10.1016\/j.artint.2026.104525_sbref0035","doi-asserted-by":"crossref","first-page":"263","DOI":"10.2307\/1914185","article-title":"Prospect theory: an analysis of decision under risk","volume":"47","author":"Kahneman","year":"1979","journal-title":"Econometrica"},{"key":"10.1016\/j.artint.2026.104525_bib0036","doi-asserted-by":"crossref","unstructured":"S. Singh, J. Lacotte, A. Majumdar, M. Pavone, Risk-sensitive inverse reinforcement learning via semi- and non-parametric methods, 2018, 10.1177\/0278364918772017.","DOI":"10.1177\/0278364918772017"},{"key":"10.1016\/j.artint.2026.104525_bib0037","unstructured":"L. Chan, A. Critch, A. Dragan, Human irrationality: both bad and good for reward inference, arXiv preprint arXiv: 2111.06956(2021)."},{"key":"10.1016\/j.artint.2026.104525_bib0038","series-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems","first-page":"5603","article-title":"Occam\u2019s razor is insufficient to infer the preferences of irrational agents","volume":"31","author":"Armstrong","year":"2018"},{"key":"10.1016\/j.artint.2026.104525_bib0039","series-title":"IJCAI-PRICAI-20 Workshop on Artificial Intelligence Safety","article-title":"Choice set misspecification in reward inference","author":"Freedman","year":"2020"},{"key":"10.1016\/j.artint.2026.104525_sbref0040","series-title":"Advances in Neural Information Processing Systems","first-page":"4415","article-title":"Reward-rational (implicit) choice: a unifying formalism for reward learning","volume":"33","author":"Jeon","year":"2020"},{"key":"10.1016\/j.artint.2026.104525_sbref0041","series-title":"Advances in Neural Information Processing Systems","article-title":"Robust inverse reinforcement learning under transition dynamics mismatch","author":"Viano","year":"2021"},{"key":"10.1016\/j.artint.2026.104525_bib0042","unstructured":"J. Hong, K. Bhatia, A. Dragan, On the Sensitivity of Reward Inference to Misspecified Human Models, 2023, https:\/\/openreview.net\/forum?id=hJqGbUpDGV."},{"key":"10.1016\/j.artint.2026.104525_bib0043","series-title":"Proceedings of the 38th International Conference on Machine Learning","first-page":"5496","article-title":"Reward identification in inverse reinforcement learning","volume":"139","author":"Kim","year":"2021"},{"key":"10.1016\/j.artint.2026.104525_sbref0044","series-title":"Proceedings of the 40th International Conference on Machine Learning","first-page":"30224","article-title":"Identifiability and generalizability in constrained inverse reinforcement learning","volume":"202","author":"Schlaginhaufen","year":"2023"},{"key":"10.1016\/j.artint.2026.104525_sbref0045","series-title":"International Conference on Learning Representations","article-title":"Quantifying differences in reward functions","author":"Gleave","year":"2021"},{"key":"10.1016\/j.artint.2026.104525_bib0046","unstructured":"B. Wulfe, A. Balakrishna, L. Ellis, J. Mercat, R. McAllister, A. Gaidon, Dynamics-Aware Comparison of Learned Reward Functions, 2022, arXiv: 2201.10081."},{"key":"10.1016\/j.artint.2026.104525_sbref0047","series-title":"Advances in Neural Information Processing Systems","first-page":"15763","article-title":"Consequences of misaligned AI","volume":"33","author":"Zhuang","year":"2020"},{"key":"10.1016\/j.artint.2026.104525_sbref0048","series-title":"International Conference on Learning Representations","article-title":"The effects of reward misspecification: mapping and mitigating misaligned models","author":"Pan","year":"2022"},{"key":"10.1016\/j.artint.2026.104525_sbref0049","series-title":"Proceedings of the Thirty-Ninth Conference on Uncertainty in Artificial Intelligence","first-page":"1974","article-title":"On the limitations of Markovian rewards to express multi-objective, risk-sensitive, and modal tasks","volume":"216","author":"Skalse","year":"2023"},{"key":"10.1016\/j.artint.2026.104525_bib0050","doi-asserted-by":"crossref","unstructured":"S. Arora, P. Doshi, A survey of inverse reinforcement learning: Challenges, methods and progress, 2021, 10.1016\/j.artint.2021.103500.","DOI":"10.1016\/j.artint.2021.103500"},{"issue":"6","key":"10.1016\/j.artint.2026.104525_bib0051","doi-asserted-by":"crossref","first-page":"4307","DOI":"10.1007\/s10462-021-10108-x","article-title":"A survey of inverse reinforcement learning","volume":"55","author":"Adams","year":"2022","journal-title":"Artif. Intell. Rev."},{"key":"10.1016\/j.artint.2026.104525_bib0052","series-title":"Proceedings of the 20th International Joint Conference on Artifical Intelligence","first-page":"2586","article-title":"Bayesian inverse reinforcement learning","author":"Ramachandran","year":"2007"},{"key":"10.1016\/j.artint.2026.104525_bib0053","series-title":"Modeling Purposeful Adaptive Behavior with the Principle of Maximum Causal Entropy","author":"Ziebart","year":"2010"},{"key":"10.1016\/j.artint.2026.104525_bib0054","series-title":"Proceedings of the 34th International Conference on Machine Learning","first-page":"1352","article-title":"Reinforcement learning with deep energy-based policies","volume":"70","author":"Haarnoja","year":"2017"},{"key":"10.1016\/j.artint.2026.104525_bib0055","series-title":"Proceedings of the Sixteenth International Conference on Machine Learning","first-page":"278","article-title":"Policy invariance under reward transformations: theory and application to reward shaping","author":"Ng","year":"1999"},{"key":"10.1016\/j.artint.2026.104525_bib0056","unstructured":"E.J. Michaud, A. Gleave, S. Russell, Understanding Learned Reward Functions, 2020, arxiv: 2012.05862."},{"key":"10.1016\/j.artint.2026.104525_bib0057","unstructured":"E. Jenner, A. Gleave, Preprocessing Reward Functions for Interpretability, 2022, 2203.13553."},{"key":"10.1016\/j.artint.2026.104525_bib0058","series-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems","first-page":"8022","article-title":"Reward learning from human preferences and demonstrations in Atari","volume":"31","author":"Ibarz","year":"2018"},{"key":"10.1016\/j.artint.2026.104525_bib0059","unstructured":"V. Krakovna, L. Orseau, R. Kumar, M. Martic, S. Legg, Penalizing side effects using stepwise relative reachability, 2018, 10.48550\/ARXIV.1806.01186."},{"key":"10.1016\/j.artint.2026.104525_bib0060","unstructured":"V. Krakovna, L. Orseau, R. Ngo, M. Martic, S. Legg, Avoiding Side Effects By Considering Future Tasks, 2020, 10.48550\/ARXIV.2010.07877."},{"key":"10.1016\/j.artint.2026.104525_sbref0061","series-title":"Advances in Neural Information Processing Systems","first-page":"21406","article-title":"Avoiding side effects in complex environments","volume":"33","author":"Turner","year":"2020"},{"key":"10.1016\/j.artint.2026.104525_sbref0062","series-title":"NeurIPS ML Safety Workshop","article-title":"All\u2019s well that ends well: avoiding side effects with distance-impact penalties","author":"Griffin","year":"2022"},{"key":"10.1016\/j.artint.2026.104525_sbref0063","series-title":"Advances in Neural Information Processing Systems","first-page":"22146","article-title":"Reinforcement learning in newcomblike environments","volume":"34","author":"Bell","year":"2021"}],"container-title":["Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0004370226000512?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0004370226000512?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T21:53:27Z","timestamp":1777067607000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0004370226000512"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":63,"alternative-id":["S0004370226000512"],"URL":"https:\/\/doi.org\/10.1016\/j.artint.2026.104525","relation":{},"ISSN":["0004-3702"],"issn-type":[{"value":"0004-3702","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Partial identifiability and misspecification in inverse reinforcement learning","name":"articletitle","label":"Article Title"},{"value":"Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.artint.2026.104525","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Authors. Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"104525"}}