{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T20:18:22Z","timestamp":1774642702532,"version":"3.50.1"},"update-to":[{"DOI":"10.1371\/journal.pcbi.1007944","type":"new_version","label":"New version","source":"publisher","updated":{"date-parts":[[2020,7,2]],"date-time":"2020-07-02T00:00:00Z","timestamp":1593648000000}}],"reference-count":57,"publisher":"Public Library of Science (PLoS)","issue":"6","license":[{"start":{"date-parts":[[2020,6,22]],"date-time":"2020-06-22T00:00:00Z","timestamp":1592784000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100005856","name":"Faculdade de Ci\u00eancias e Tecnologia, Universidade Nova de Lisboa","doi-asserted-by":"publisher","award":["SFRH\/BD\/51711\/2011"],"award-info":[{"award-number":["SFRH\/BD\/51711\/2011"]}],"id":[{"id":"10.13039\/501100005856","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100013954","name":"Astor Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100013954","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100004440","name":"Wellcome Trust","doi-asserted-by":"publisher","award":["Senior Research Fellowship (WT104765MA)"],"award-info":[{"award-number":["Senior Research Fellowship (WT104765MA)"]}],"id":[{"id":"10.13039\/100004440","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000913","name":"James S. McDonnell Foundation","doi-asserted-by":"publisher","award":["JSMF220020372"],"award-info":[{"award-number":["JSMF220020372"]}],"id":[{"id":"10.13039\/100000913","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000324","name":"Gatsby Charitable Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000324","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100004440","name":"Wellcome Trust","doi-asserted-by":"publisher","award":["New Investigator Award (096689\/Z\/11\/Z)"],"award-info":[{"award-number":["New Investigator Award (096689\/Z\/11\/Z)"]}],"id":[{"id":"10.13039\/100004440","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Santa Casa da Miseric\u00f3rida de Lisboa","award":["Pr\u00e9mio Jo\u00e3o Lobo Antunes 2017"],"award-info":[{"award-number":["Pr\u00e9mio Jo\u00e3o Lobo Antunes 2017"]}]},{"DOI":"10.13039\/501100004189","name":"Max-Planck-Gesellschaft","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004189","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100005156","name":"Alexander von Humboldt-Stiftung","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100005156","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["www.ploscompbiol.org"],"crossmark-restriction":false},"short-container-title":["PLoS Comput Biol"],"DOI":"10.1371\/journal.pcbi.1007944","type":"journal-article","created":{"date-parts":[[2020,6,22]],"date-time":"2020-06-22T13:41:33Z","timestamp":1592833293000},"page":"e1007944","update-policy":"https:\/\/doi.org\/10.1371\/journal.pcbi.corrections_policy","source":"Crossref","is-referenced-by-count":24,"title":["Combined model-free and model-sensitive reinforcement learning in non-human primates"],"prefix":"10.1371","volume":"16","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4660-6051","authenticated-orcid":true,"given":"Bruno","family":"Miranda","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2083-2261","authenticated-orcid":true,"given":"W. M. Nishantha","family":"Malalasekera","sequence":"additional","affiliation":[]},{"given":"Timothy E.","family":"Behrens","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3476-1839","authenticated-orcid":true,"given":"Peter","family":"Dayan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5696-7507","authenticated-orcid":true,"given":"Steven W.","family":"Kennerley","sequence":"additional","affiliation":[]}],"member":"340","published-online":{"date-parts":[[2020,6,22]]},"reference":[{"key":"pcbi.1007944.ref001","doi-asserted-by":"crossref","DOI":"10.1109\/TNN.1998.712192","volume-title":"Introduction to Reinforcement Learning","author":"RS Sutton","year":"1998"},{"issue":"4","key":"pcbi.1007944.ref002","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1037\/h0061626","article-title":"Cognitive maps in rats and men","volume":"55","author":"EC Tolman","year":"1948","journal-title":"Psychological review"},{"issue":"1135","key":"pcbi.1007944.ref003","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1098\/rstb.1985.0010","article-title":"Actions and Habits: The Development of Behavioural Autonomy","volume":"308","author":"A Dickinson","year":"1985","journal-title":"Philosophical Transactions of the Royal Society of London B, Biological Sciences"},{"issue":"1","key":"pcbi.1007944.ref004","doi-asserted-by":"crossref","first-page":"1","DOI":"10.3758\/BF03199951","article-title":"Motivational control of goal-directed action","volume":"22","author":"A Dickinson","year":"1994","journal-title":"Animal Learning & Behavior"},{"key":"pcbi.1007944.ref005","volume-title":"Animal intelligence","author":"EL Thorndike","year":"1911"},{"issue":"12","key":"pcbi.1007944.ref006","doi-asserted-by":"crossref","first-page":"1704","DOI":"10.1038\/nn1560","article-title":"Uncertainty-based competition between prefrontal and dorsolateral striatal systems for behavioral control","volume":"8","author":"ND Daw","year":"2005","journal-title":"Nature Neuroscience"},{"key":"pcbi.1007944.ref007","doi-asserted-by":"crossref","first-page":"92","DOI":"10.3389\/fpsyg.2013.00092","article-title":"The Mixed Instrumental Controller: Using Value of Information to Combine Habitual Choice and Mental Simulation","volume":"4","author":"G Pezzulo","year":"2013","journal-title":"Frontiers in Psychology"},{"issue":"6","key":"pcbi.1007944.ref008","doi-asserted-by":"crossref","first-page":"1204","DOI":"10.1016\/j.neuron.2011.02.027","article-title":"Model-Based Influences on Humans\u2019 Choices and Striatal Prediction Errors","volume":"69","author":"ND Daw","year":"2011","journal-title":"Neuron"},{"issue":"4","key":"pcbi.1007944.ref009","doi-asserted-by":"crossref","first-page":"160","DOI":"10.1145\/122344.122377","article-title":"Dyna, an Integrated Architecture for Learning, Planning, and Reacting","volume":"2","author":"RS Sutton","year":"1991","journal-title":"SIGART Bull"},{"issue":"1","key":"pcbi.1007944.ref010","doi-asserted-by":"crossref","first-page":"182","DOI":"10.1037\/a0030844","article-title":"Retrospective revaluation in sequential decision making: a tale of two systems","volume":"143","author":"SJ Gershman","year":"2014","journal-title":"Journal of Experimental Psychology: General"},{"issue":"6","key":"pcbi.1007944.ref011","doi-asserted-by":"crossref","first-page":"1068","DOI":"10.1016\/j.conb.2012.05.011","article-title":"How to set the switches on this thing","volume":"22","author":"P Dayan","year":"2012","journal-title":"Current Opinion in Neurobiology"},{"issue":"4","key":"pcbi.1007944.ref012","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1371\/journal.pcbi.1006043","article-title":"Rational metareasoning and the plasticity of cognitive control","volume":"14","author":"F Lieder","year":"2018","journal-title":"PLOS Computational Biology"},{"issue":"45","key":"pcbi.1007944.ref013","doi-asserted-by":"crossref","first-page":"13817","DOI":"10.1073\/pnas.1506367112","article-title":"Habitual control of goal selection in humans","volume":"112","author":"F Cushman","year":"2015","journal-title":"Proceedings of the National Academy of Sciences"},{"issue":"45","key":"pcbi.1007944.ref014","doi-asserted-by":"crossref","first-page":"12868","DOI":"10.1073\/pnas.1609094113","article-title":"Adaptive integration of habits into depth-limited planning defines a habitual-goal\u2013directed spectrum","volume":"113","author":"M Keramati","year":"2016","journal-title":"Proceedings of the National Academy of Sciences"},{"issue":"12","key":"pcbi.1007944.ref015","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1371\/journal.pcbi.1004648","article-title":"Simple Plans or Sophisticated Habits? State, Transition and Learning Interactions in the Two-Step Task","volume":"11","author":"T Akam","year":"2015","journal-title":"PLOS Computational Biology"},{"issue":"9","key":"pcbi.1007944.ref016","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1371\/journal.pcbi.1004463","article-title":"Model-Based Reasoning in Humans Becomes Automatic with Training","volume":"11","author":"M Economides","year":"2015","journal-title":"PLOS Computational Biology"},{"key":"pcbi.1007944.ref017","first-page":"497","volume-title":"Stevens\u2019 Handbook of Experimental Psychology. vol. 3: Learning, Motivation and Emotion","author":"A Dickinson","year":"2002"},{"issue":"8","key":"pcbi.1007944.ref018","doi-asserted-by":"crossref","first-page":"1057","DOI":"10.1038\/nn1743","article-title":"Midbrain dopamine neurons encode decisions for future action","volume":"9","author":"G Morris","year":"2006","journal-title":"Nature Neuroscience"},{"issue":"12","key":"pcbi.1007944.ref019","doi-asserted-by":"crossref","first-page":"1615","DOI":"10.1038\/nn2013","article-title":"Dopamine neurons encode the better option in rats deciding between differently delayed or sized rewards","volume":"10","author":"MR Roesch","year":"2007","journal-title":"Nature Neuroscience"},{"issue":"1","key":"pcbi.1007944.ref020","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1038\/npp.2009.131","article-title":"Human and Rodent Homologies in Action Control: Corticostriatal Determinants of Goal-Directed and Habitual Action","volume":"35","author":"BW Balleine","year":"2009","journal-title":"Neuropsychopharmacology"},{"issue":"1655","key":"pcbi.1007944.ref021","article-title":"The algorithmic anatomy of model-based evaluation","volume":"369","author":"ND Daw","year":"2014","journal-title":"Philosophical Transactions of the Royal Society of London B: Biological Sciences"},{"issue":"2","key":"pcbi.1007944.ref022","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1016\/j.neuron.2013.09.007","article-title":"Goals and Habits in the Brain","volume":"80","author":"RJ Dolan","year":"2013","journal-title":"Neuron"},{"issue":"3","key":"pcbi.1007944.ref023","doi-asserted-by":"crossref","first-page":"555","DOI":"10.1901\/jeab.2005.110-04","article-title":"DYNAMIC RESPONSE-BY-RESPONSE MODELS OF MATCHING BEHAVIOR IN RHESUS MONKEYS","volume":"84","author":"B Lau","year":"2005","journal-title":"Journal of the Experimental Analysis of Behavior"},{"issue":"4","key":"pcbi.1007944.ref024","doi-asserted-by":"crossref","first-page":"585","DOI":"10.1016\/j.neuron.2010.04.016","article-title":"States versus Rewards: Dissociable Neural Prediction Error Signals Underlying Model-Based and Model-Free Reinforcement Learning","volume":"66","author":"J Gl\u00e4scher","year":"2010","journal-title":"Neuron"},{"issue":"3","key":"pcbi.1007944.ref025","doi-asserted-by":"crossref","first-page":"687","DOI":"10.1016\/j.neuron.2013.11.028","article-title":"Neural Computations Underlying Arbitration between Model-Based and Model-free Learning","volume":"81","author":"SW Lee","year":"2014","journal-title":"Neuron"},{"issue":"12","key":"pcbi.1007944.ref026","doi-asserted-by":"crossref","first-page":"e1003364","DOI":"10.1371\/journal.pcbi.1003364","article-title":"Actions, Action Sequences and Habits: Evidence That Goal-Directed and Habitual Action Control Are Hierarchically Organized","volume":"9","author":"A Dezfouli","year":"2013","journal-title":"PLoS Comput Biol"},{"issue":"9","key":"pcbi.1007944.ref027","doi-asserted-by":"crossref","first-page":"1321","DOI":"10.1177\/0956797617708288","article-title":"Cost-Benefit Arbitration Between Multiple Reinforcement-Learning Systems","volume":"28","author":"W Kool","year":"2017","journal-title":"Psychological Science"},{"issue":"8","key":"pcbi.1007944.ref028","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1371\/journal.pcbi.1005090","article-title":"When Does Model-Based Control Pay Off?","volume":"12","author":"W Kool","year":"2016","journal-title":"PLOS Computational Biology"},{"key":"pcbi.1007944.ref029","doi-asserted-by":"crossref","first-page":"1269","DOI":"10.1038\/nn.4613","article-title":"Dorsal hippocampus contributes to model-based planning","volume":"20","author":"KJ Miller","year":"2017","journal-title":"Nature Neuroscience"},{"key":"pcbi.1007944.ref030","doi-asserted-by":"crossref","first-page":"e11305","DOI":"10.7554\/eLife.11305","article-title":">Characterizing a psychiatric symptom dimension related to deficits in goal-directed control","volume":"5","author":"CM Gillan","year":"2016","journal-title":"eLife"},{"issue":"6","key":"pcbi.1007944.ref031","doi-asserted-by":"crossref","first-page":"777","DOI":"10.1037\/abn0000164","article-title":"Reduced model-based decision-making in schizophrenia","volume":"125","author":"AJ Culbreth","year":"2016","journal-title":"J Abnorm Psychol"},{"issue":"2","key":"pcbi.1007944.ref032","doi-asserted-by":"crossref","first-page":"122","DOI":"10.1159\/000362840","article-title":"Model-based and model-free decisions in alcohol dependence","volume":"70","author":"M Sebold","year":"2014","journal-title":"Neuropsychobiology"},{"issue":"10","key":"pcbi.1007944.ref033","doi-asserted-by":"crossref","first-page":"e659","DOI":"10.1038\/tp.2015.139","article-title":"Lateral prefrontal model-based signatures are reduced in healthy individuals with high trait impulsivity","volume":"5","author":"L Deserno","year":"2015","journal-title":"Translational psychiatry"},{"issue":"2","key":"pcbi.1007944.ref034","doi-asserted-by":"crossref","first-page":"355","DOI":"10.1093\/brain\/awv347","article-title":"Dopamine selectively remediates \u2018model-based\u2019 reward learning: a computational approach","volume":"139","author":"ME Sharp","year":"2015","journal-title":"Brain"},{"key":"pcbi.1007944.ref035","doi-asserted-by":"crossref","first-page":"624","DOI":"10.1038\/mp.2015.46","article-title":"Valence-dependent influence of serotonin depletion on model-based choice strategy","volume":"21","author":"Y Worbe","year":"2015","journal-title":"Molecular Psychiatry"},{"issue":"3","key":"pcbi.1007944.ref036","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1016\/j.neuron.2012.03.042","article-title":"Dopamine Enhances Model-Based over Model-Free Choice Behavior","volume":"75","author":"K Wunderlich","year":"2012","journal-title":"Neuron"},{"issue":"1","key":"pcbi.1007944.ref037","doi-asserted-by":"crossref","first-page":"e86850","DOI":"10.1371\/journal.pone.0086850","article-title":"Transcranial direct current stimulation of right dorsolateral prefrontal cortex does not affect model-based or model-free reinforcement learning in humans","volume":"9","author":"P Smittenaar","year":"2014","journal-title":"PLoS One"},{"issue":"4","key":"pcbi.1007944.ref038","doi-asserted-by":"crossref","first-page":"1211","DOI":"10.1523\/JNEUROSCI.1901-15.2016","article-title":"Variability in Dopamine Genes Dissociates Model-Based and Model-Free Reinforcement Learning","volume":"36","author":"BB Doll","year":"2016","journal-title":"The Journal of neuroscience: the official journal of the Society for Neuroscience"},{"issue":"0","key":"pcbi.1007944.ref039","first-page":"9","article-title":"The relation between reinforcement learning parameters and the influence of reinforcement history on choice behavior","volume":"66","author":"K Katahira","year":"2015","journal-title":"Journal of Mathematical Psychology"},{"issue":"22","key":"pcbi.1007944.ref040","doi-asserted-by":"crossref","first-page":"9493","DOI":"10.1073\/pnas.0608842104","article-title":"Neural signature of fictive learning signals in a sequential investment task","volume":"104","author":"T Lohrenz","year":"2007","journal-title":"Proceedings of the National Academy of Sciences"},{"issue":"6","key":"pcbi.1007944.ref041","first-page":"081","article-title":"The ubiquity of model-based reinforcement learning","volume":"22","author":"BB Doll","year":"2012","journal-title":"Current Opinion in Neurobiology"},{"key":"pcbi.1007944.ref042","article-title":"Model-free or muddled models in the two-stage task?","author":"CF da Silva","year":"2019","journal-title":"bioRxiv"},{"issue":"5","key":"pcbi.1007944.ref043","doi-asserted-by":"crossref","first-page":"e1002055","DOI":"10.1371\/journal.pcbi.1002055","article-title":"Speed\/Accuracy Trade-Off between the Habitual and the Goal-Directed Processes","volume":"7","author":"M Keramati","year":"2011","journal-title":"PLoS Comput Biol"},{"issue":"92","key":"pcbi.1007944.ref044","article-title":"The Mixed Instrumental Controller: Using Value of Information to Combine Habitual Choice and Mental Simulation","volume":"4","author":"G Pezzulo","year":"2013","journal-title":"Frontiers in Psychology"},{"issue":"1","key":"pcbi.1007944.ref045","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1007\/BF00993104","article-title":"Prioritized sweeping: Reinforcement learning with less data and less time","volume":"13","author":"A Moore","year":"1993","journal-title":"Machine Learning"},{"issue":"45","key":"pcbi.1007944.ref046","doi-asserted-by":"crossref","first-page":"12176","DOI":"10.1523\/JNEUROSCI.3761-07.2007","article-title":"Neural Ensembles in CA3 Transiently Encode Paths Forward of the Animal at a Decision Point","volume":"27","author":"A Johnson","year":"2007","journal-title":"The Journal of Neuroscience"},{"issue":"5","key":"pcbi.1007944.ref047","doi-asserted-by":"crossref","first-page":"767","DOI":"10.1038\/nn.3981","article-title":"Model-based choices involve prospective neural activity","volume":"18","author":"BB Doll","year":"2015","journal-title":"Nat Neurosci"},{"issue":"3","key":"pcbi.1007944.ref048","doi-asserted-by":"crossref","first-page":"507","DOI":"10.1007\/s00213-006-0502-4","article-title":"Tonic dopamine: opportunity costs and the control of response vigor","volume":"191","author":"Y Niv","year":"2007","journal-title":"Psychopharmacology"},{"issue":"12","key":"pcbi.1007944.ref049","doi-asserted-by":"crossref","first-page":"3933","DOI":"10.1162\/jocn_a_00090","article-title":"Vigor in the Face of Fluctuating Rates of Reward: An Experimental Examination","volume":"23","author":"M Guitart-Masip","year":"2011","journal-title":"Journal of Cognitive Neuroscience"},{"issue":"8","key":"pcbi.1007944.ref050","doi-asserted-by":"crossref","first-page":"1495","DOI":"10.1038\/npp.2013.48","article-title":"Dopamine Modulates Reward-Related Vigor","volume":"38","author":"U Beierholm","year":"2013","journal-title":"Neuropsychopharmacology"},{"issue":"15","key":"pcbi.1007944.ref051","doi-asserted-by":"crossref","first-page":"2865","DOI":"10.1002\/sim.3107","article-title":"Scaling regression inputs by dividing by two standard deviations","volume":"27","author":"A Gelman","year":"2008","journal-title":"Statistics in Medicine"},{"issue":"4","key":"pcbi.1007944.ref052","doi-asserted-by":"crossref","first-page":"e1002028","DOI":"10.1371\/journal.pcbi.1002028","article-title":"Disentangling the Roles of Approach, Activation and Valence in Instrumental and Pavlovian Responding","volume":"7","author":"QJM Huys","year":"2011","journal-title":"PLoS Comput Biol"},{"key":"pcbi.1007944.ref053","volume-title":"On-Line Q-Learning Using Connectionist Systems","author":"GA Rummery","year":"1994"},{"issue":"1","key":"pcbi.1007944.ref054","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","article-title":"Maximum likelihood from incomplete data via the EM algorithm","volume":"39","author":"AP Dempster","year":"1977","journal-title":"JOURNAL OF THE ROYAL STATISTICAL SOCIETY, SERIES B"},{"issue":"2","key":"pcbi.1007944.ref055","doi-asserted-by":"crossref","first-page":"461","DOI":"10.1214\/aos\/1176344136","article-title":"Estimating the Dimension of a Model","volume":"6","author":"G Schwarz","year":"1978","journal-title":"The Annals of Statistics"},{"issue":"430","key":"pcbi.1007944.ref056","doi-asserted-by":"crossref","first-page":"773","DOI":"10.1080\/01621459.1995.10476572","article-title":"Bayes Factors","volume":"90","author":"RE Kass","year":"1995","journal-title":"Journal of the American Statistical Association"},{"key":"pcbi.1007944.ref057","doi-asserted-by":"crossref","first-page":"85","DOI":"10.1016\/j.neuroimage.2013.08.065","article-title":"Bayesian model selection for group studies\u2014Revisited","volume":"84","author":"L Rigoux","year":"2014","journal-title":"NeuroImage"}],"updated-by":[{"DOI":"10.1371\/journal.pcbi.1007944","type":"new_version","label":"New version","source":"publisher","updated":{"date-parts":[[2020,7,2]],"date-time":"2020-07-02T00:00:00Z","timestamp":1593648000000}}],"container-title":["PLOS Computational Biology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.1007944","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T00:09:26Z","timestamp":1723075766000},"score":1,"resource":{"primary":{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.1007944"}},"subtitle":[],"editor":[{"given":"Samuel J.","family":"Gershman","sequence":"first","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2020,6,22]]},"references-count":57,"journal-issue":{"issue":"6","published-online":{"date-parts":[[2020,6,22]]}},"URL":"https:\/\/doi.org\/10.1371\/journal.pcbi.1007944","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/836007","asserted-by":"object"}]},"ISSN":["1553-7358"],"issn-type":[{"value":"1553-7358","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,6,22]]}}}