{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T14:52:07Z","timestamp":1772808727773,"version":"3.50.1"},"update-to":[{"DOI":"10.1371\/journal.pcbi.1007720","type":"new_version","label":"New version","source":"publisher","updated":{"date-parts":[[2020,4,23]],"date-time":"2020-04-23T00:00:00Z","timestamp":1587600000000}}],"reference-count":64,"publisher":"Public Library of Science (PLoS)","issue":"4","license":[{"start":{"date-parts":[[2020,4,13]],"date-time":"2020-04-13T00:00:00Z","timestamp":1586736000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["www.ploscompbiol.org"],"crossmark-restriction":false},"short-container-title":["PLoS Comput Biol"],"DOI":"10.1371\/journal.pcbi.1007720","type":"journal-article","created":{"date-parts":[[2020,4,13]],"date-time":"2020-04-13T17:31:27Z","timestamp":1586799087000},"page":"e1007720","update-policy":"https:\/\/doi.org\/10.1371\/journal.pcbi.corrections_policy","source":"Crossref","is-referenced-by-count":26,"title":["Generalizing to generalize: Humans flexibly switch between compositional and conjunctive structures during reinforcement learning"],"prefix":"10.1371","volume":"16","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4657-9690","authenticated-orcid":true,"given":"Nicholas T.","family":"Franklin","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8451-0523","authenticated-orcid":true,"given":"Michael J.","family":"Frank","sequence":"additional","affiliation":[]}],"member":"340","published-online":{"date-parts":[[2020,4,13]]},"reference":[{"key":"pcbi.1007720.ref001","volume-title":"The Principles of Psychology","author":"W James","year":"1890"},{"issue":"1","key":"pcbi.1007720.ref002","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1037\/a0017808","article-title":"Context, learning, and extinction","volume":"117","author":"SJ Gershman","year":"2010","journal-title":"Psychological Review"},{"issue":"1","key":"pcbi.1007720.ref003","doi-asserted-by":"crossref","first-page":"190","DOI":"10.1037\/a0030852","article-title":"Cognitive control over learning: Creating, clustering, and generalizing task-set structure","volume":"120","author":"AGE Collins","year":"2013","journal-title":"Psychological Review"},{"key":"pcbi.1007720.ref004","doi-asserted-by":"crossref","first-page":"160","DOI":"10.1016\/j.cognition.2016.04.002","article-title":"Neural signature of hierarchically structured expectations predicts clustering and transfer of rule sets in reinforcement learning","volume":"152","author":"AGE Collins","year":"2016","journal-title":"Cognition"},{"issue":"13","key":"pcbi.1007720.ref005","doi-asserted-by":"crossref","first-page":"4677","DOI":"10.1523\/JNEUROSCI.3900-13.2014","article-title":"Human EEG Uncovers Latent Generalizable Rule Structure during Learning","volume":"34","author":"AGE Collins","year":"2014","journal-title":"Journal of Neuroscience"},{"issue":"10","key":"pcbi.1007720.ref006","doi-asserted-by":"crossref","first-page":"1646","DOI":"10.1162\/jocn_a_01128","article-title":"The cost of structure leanring","volume":"29","author":"AGE Collins","year":"2017","journal-title":"Journal of Cognitive Neuroscience"},{"issue":"3","key":"pcbi.1007720.ref007","doi-asserted-by":"crossref","first-page":"527","DOI":"10.1093\/cercor\/bhr117","article-title":"Mechanisms of hierarchical reinforcement learning in cortico-striatal circuits 2: Evidence from fMRI","volume":"22","author":"D Badre","year":"2012","journal-title":"Cerebral Cortex"},{"key":"pcbi.1007720.ref008","doi-asserted-by":"crossref","first-page":"101261","DOI":"10.1016\/j.cogpsych.2019.101261","article-title":"Finding structure in multi-armed bandits","volume":"119","author":"E Schulz","year":"2020","journal-title":"Cognitive Psychology"},{"issue":"32","key":"pcbi.1007720.ref009","doi-asserted-by":"crossref","first-page":"7143","DOI":"10.1523\/JNEUROSCI.3336-17.2018","article-title":"Neural computations underlying causal structure learning","volume":"38","author":"MS Tomov","year":"2018","journal-title":"Journal of Neuroscience"},{"key":"pcbi.1007720.ref010","volume-title":"Advantages and Limitations of using Successor Features for Transfer in Reinforcement Learning","author":"L Lehnert","year":"2017"},{"key":"pcbi.1007720.ref011","volume-title":"Schema Networks: Zero-shot Transfer with a Generative Causal Model of Intuitive Physics","author":"K Kansky","year":"2017"},{"key":"pcbi.1007720.ref012","first-page":"385","volume-title":"Advances in neural information processing systems","author":"S Thrun","year":"1995"},{"key":"pcbi.1007720.ref013","unstructured":"Da Silva B, Konidaris G, Barto A. Learning parameterized skills. arXiv preprint arXiv:12066398. 2012;."},{"key":"pcbi.1007720.ref014","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1613\/jair.639","article-title":"Hierarchical reinforcement learning with the MAXQ value function decomposition","volume":"13","author":"TG Dietterich","year":"2000","journal-title":"Journal of artificial intelligence research"},{"issue":"1-2","key":"pcbi.1007720.ref015","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","article-title":"Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning","volume":"112","author":"RS Sutton","year":"1999","journal-title":"Artificial intelligence"},{"key":"pcbi.1007720.ref016","unstructured":"Saxe AM, Earle AC, Rosman B. Hierarchy through composition with multitask LMDPs. In: Proceedings of the 34th International Conference on Machine Learning-Volume 70. JMLR. org; 2017. p. 3017\u20133026."},{"key":"pcbi.1007720.ref017","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"1998"},{"key":"pcbi.1007720.ref018","article-title":"Compositional clustering in task structure learning","author":"NT Franklin","year":"2018","journal-title":"PLOS Computational Biology"},{"issue":"1","key":"pcbi.1007720.ref019","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1023\/A:1009778005914","article-title":"On bias, variance, 0\/1\u2014loss, and the curse-of-dimensionality","volume":"1","author":"JH Friedman","year":"1997","journal-title":"Data mining and knowledge discovery"},{"issue":"6","key":"pcbi.1007720.ref020","doi-asserted-by":"crossref","first-page":"371","DOI":"10.1080\/00222895.2010.526467","article-title":"Evidence for nodel-vased action planning in a sequential finger movement task","volume":"42","author":"A Fermin","year":"2010","journal-title":"Journal of Motor Behavior"},{"issue":"July","key":"pcbi.1007720.ref021","first-page":"1","article-title":"Model-based action planning involves cortico-cerebellar and basal ganglia networks","volume":"6","author":"ASR Fermin","year":"2016","journal-title":"Scientific Reports"},{"issue":"14","key":"pcbi.1007720.ref022","doi-asserted-by":"crossref","first-page":"5526","DOI":"10.1523\/JNEUROSCI.4647-10.2011","article-title":"Neural correlates of forward planning in a spatial decision task in humans","volume":"31","author":"DA Simon","year":"2011","journal-title":"Journal of Neuroscience"},{"key":"pcbi.1007720.ref023","doi-asserted-by":"crossref","unstructured":"Aldous DJ. Exchangeability and related topics. In \u00c9cole d\u2019\u00c9t\u00e9 de Probabilit\u00e9s de Saint-Flour XIII\u20141983 1985 (pp. 1-198). Springer, Berlin, Heidelberg.","DOI":"10.1007\/BFb0099421"},{"key":"pcbi.1007720.ref024","doi-asserted-by":"crossref","unstructured":"Anderson JR, Matessa M. A rational analysis of categorization. In: Machine Learning Proceedings 1990. Elsevier; 1990. p. 76\u201384.","DOI":"10.1016\/B978-1-55860-141-3.50013-4"},{"issue":"2","key":"pcbi.1007720.ref025","doi-asserted-by":"crossref","first-page":"309","DOI":"10.1037\/0033-295X.111.2.309","article-title":"SUSTAIN: a network model of category learning","volume":"111","author":"BC Love","year":"2004","journal-title":"Psychological review"},{"issue":"4","key":"pcbi.1007720.ref026","doi-asserted-by":"crossref","first-page":"1144","DOI":"10.1037\/a0020511","article-title":"Rational approximations to rational models: alternative algorithms for category learning","volume":"117","author":"AN Sanborn","year":"2010","journal-title":"Psychological review"},{"issue":"11","key":"pcbi.1007720.ref027","doi-asserted-by":"crossref","first-page":"e1003939","DOI":"10.1371\/journal.pcbi.1003939","article-title":"Statistical computations underlying the dynamics of memory updating","volume":"10","author":"SJ Gershman","year":"2014","journal-title":"PLoS computational biology"},{"key":"pcbi.1007720.ref028","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1016\/j.jmp.2015.06.004","article-title":"Harold Jeffreys\u2019s default Bayes factor hypothesis tests: Explanation, extension, and application in psychology","volume":"72","author":"A Ly","year":"2016","journal-title":"Journal of Mathematical Psychology"},{"key":"pcbi.1007720.ref029","first-page":"294","volume-title":"Advances in neural information processing systems","author":"CE Rasmussen","year":"2001"},{"key":"pcbi.1007720.ref030","doi-asserted-by":"crossref","first-page":"164","DOI":"10.3389\/fnbeh.2013.00164","article-title":"Gradual extinction prevents the return of fear: implications for the discovery of state","volume":"7","author":"SJ Gershman","year":"2013","journal-title":"Frontiers in behavioral neuroscience"},{"issue":"3","key":"pcbi.1007720.ref031","doi-asserted-by":"crossref","first-page":"243","DOI":"10.3758\/s13420-015-0176-z","article-title":"Individual differences in learning predict the return of fear","volume":"43","author":"SJ Gershman","year":"2015","journal-title":"Learning & behavior"},{"key":"pcbi.1007720.ref032","unstructured":"Wilson A, Fern A, Tadepalli P. Transfer learning in sequential decision problems: A hierarchical Bayesian approach. In: Proceedings of ICML Workshop on Unsupervised and Transfer Learning; 2012. p. 217\u2013227."},{"key":"pcbi.1007720.ref033","unstructured":"Mahmud M, Hawasly M, Rosman B, Ramamoorthy S. Clustering markov decision processes for continual transfer. arXiv preprint arXiv:13113959. 2013;."},{"issue":"1","key":"pcbi.1007720.ref034","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1007\/s10994-016-5547-y","article-title":"Bayesian policy reuse","volume":"104","author":"B Rosman","year":"2016","journal-title":"Machine Learning"},{"key":"pcbi.1007720.ref035","first-page":"1","article-title":"Clustering subspace generalization to obtain faster reinforcement learning","author":"M Hashemzadeh","year":"2019","journal-title":"Evolving Systems"},{"key":"pcbi.1007720.ref036","doi-asserted-by":"crossref","unstructured":"Mannor S, Menache I, Hoze A, Klein U. Dynamic abstraction in reinforcement learning via clustering. In: Proceedings of the twenty-first international conference on Machine learning. ACM; 2004. p. 71.","DOI":"10.1145\/1015330.1015355"},{"key":"pcbi.1007720.ref037","doi-asserted-by":"crossref","unstructured":"Konidaris G, Osentoski S, Thomas P. Value function approximation in reinforcement learning using the Fourier basis. In: Twenty-fifth AAAI conference on artificial intelligence; 2011.","DOI":"10.1609\/aaai.v25i1.7903"},{"key":"pcbi.1007720.ref038","first-page":"499418","article-title":"Discovery of hierarchical representations for efficient planning","author":"M Tomov","year":"2018","journal-title":"BioRxiv"},{"issue":"12","key":"pcbi.1007720.ref039","doi-asserted-by":"crossref","first-page":"915","DOI":"10.1038\/s41562-018-0467-4","article-title":"Generalization guides human exploration in vast decision spaces","volume":"2","author":"CM Wu","year":"2018","journal-title":"Nature human behaviour"},{"issue":"7-8","key":"pcbi.1007720.ref040","doi-asserted-by":"crossref","first-page":"961","DOI":"10.1016\/S0893-6080(99)00046-5","article-title":"What are the computations of the cerebellum, the basal ganglia and the cerebral cortex?","volume":"12","author":"K Doya","year":"1999","journal-title":"Neural networks"},{"key":"pcbi.1007720.ref041","doi-asserted-by":"crossref","first-page":"412","DOI":"10.1016\/j.neuroimage.2013.02.063","article-title":"The valuation system: a coordinate-based meta-analysis of BOLD fMRI experiments examining neural correlates of subjective value","volume":"76","author":"O Bartra","year":"2013","journal-title":"Neuroimage"},{"issue":"2","key":"pcbi.1007720.ref042","doi-asserted-by":"crossref","first-page":"300","DOI":"10.1037\/0033-295X.113.2.300","article-title":"Anatomy of a decision: striato-orbitofrontal interactions in reinforcement learning, decision making, and reversal","volume":"113","author":"MJ Frank","year":"2006","journal-title":"Psychological review"},{"issue":"2","key":"pcbi.1007720.ref043","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1016\/j.neuron.2013.11.005","article-title":"Orbitofrontal cortex as a cognitive map of task space","volume":"81","author":"RC Wilson","year":"2014","journal-title":"Neuron"},{"issue":"6","key":"pcbi.1007720.ref044","doi-asserted-by":"crossref","first-page":"1402","DOI":"10.1016\/j.neuron.2016.08.019","article-title":"Human orbitofrontal cortex represents a cognitive map of state space","volume":"91","author":"NW Schuck","year":"2016","journal-title":"Neuron"},{"issue":"7090","key":"pcbi.1007720.ref045","doi-asserted-by":"crossref","first-page":"223","DOI":"10.1038\/nature04676","article-title":"Neurons in the orbitofrontal cortex encode economic value","volume":"441","author":"C Padoa-Schioppa","year":"2006","journal-title":"Nature"},{"issue":"3","key":"pcbi.1007720.ref046","doi-asserted-by":"crossref","first-page":"509","DOI":"10.1093\/cercor\/bhr114","article-title":"Mechanisms of hierarchical reinforcement learning in corticostriatal circuits 1: computational analysis","volume":"22","author":"MJ Frank","year":"2012","journal-title":"Cerebral cortex"},{"key":"pcbi.1007720.ref047","unstructured":"Oh J, Singh S, Lee H, Kohli P. Zero-shot task generalization with multi-task deep reinforcement learning. In: Proceedings of the 34th International Conference on Machine Learning-Volume 70. JMLR. org; 2017. p. 2661\u20132670."},{"key":"pcbi.1007720.ref048","first-page":"1369","volume-title":"Advances in neural information processing systems","author":"E Todorov","year":"2007"},{"key":"pcbi.1007720.ref049","doi-asserted-by":"crossref","unstructured":"Piray P, Daw ND. A common model explaining flexible decision making, grid fields and cognitive control. bioRxiv. 2019; p. 856849.","DOI":"10.1101\/856849"},{"issue":"9","key":"pcbi.1007720.ref050","doi-asserted-by":"crossref","first-page":"1321","DOI":"10.1177\/0956797617708288","article-title":"Cost-benefit arbitration between multiple reinforcement-learning systems","volume":"28","author":"W Kool","year":"2017","journal-title":"Psychological science"},{"issue":"4","key":"pcbi.1007720.ref051","doi-asserted-by":"crossref","first-page":"665","DOI":"10.1037\/a0020198","article-title":"Decision making and the avoidance of cognitive demand","volume":"139","author":"W Kool","year":"2010","journal-title":"Journal of Experimental Psychology: General"},{"issue":"7","key":"pcbi.1007720.ref052","doi-asserted-by":"crossref","first-page":"e68210","DOI":"10.1371\/journal.pone.0068210","article-title":"What is the subjective cost of cognitive effort? Load, trait, and aging effects revealed by economic preference","volume":"8","author":"A Westbrook","year":"2013","journal-title":"PloS one"},{"issue":"7","key":"pcbi.1007720.ref053","doi-asserted-by":"crossref","first-page":"1024","DOI":"10.1111\/j.1460-9568.2011.07980.x","article-title":"How much of reinforcement learning is working memory, not reinforcement learning? A behavioral, computational, and neurogenetic analysis","volume":"35","author":"AGE Collins","year":"2012","journal-title":"European Journal of Neuroscience"},{"issue":"5","key":"pcbi.1007720.ref054","doi-asserted-by":"crossref","first-page":"751","DOI":"10.1177\/0956797612463080","article-title":"The curse of planning: dissecting multiple reinforcement-learning systems by taxing the central executive","volume":"24","author":"AR Otto","year":"2013","journal-title":"Psychological science"},{"issue":"44","key":"pcbi.1007720.ref055","doi-asserted-by":"crossref","first-page":"E10313","DOI":"10.1073\/pnas.1800755115","article-title":"Comparing continual task learning in minds and machines","volume":"115","author":"T Flesch","year":"2018","journal-title":"Proceedings of the National Academy of Sciences"},{"issue":"20","key":"pcbi.1007720.ref056","doi-asserted-by":"crossref","first-page":"7338","DOI":"10.1073\/pnas.0502455102","article-title":"Prefrontal cortex and flexible cognitive control: Rules without symbols","volume":"102","author":"NP Rougier","year":"2005","journal-title":"Proceedings of the National Academy of Sciences"},{"issue":"3","key":"pcbi.1007720.ref057","doi-asserted-by":"crossref","first-page":"829","DOI":"10.3758\/s13428-015-0642-8","article-title":"psiTurk: An open-source framework for conducting replicable behavioral experiments online","volume":"48","author":"TM Gureckis","year":"2016","journal-title":"Behavior research methods"},{"key":"pcbi.1007720.ref058","doi-asserted-by":"crossref","first-page":"34","DOI":"10.1016\/j.cognition.2017.12.014","article-title":"Deconstructing the human algorithms for exploration","volume":"173","author":"SJ Gershman","year":"2018","journal-title":"Cognition"},{"key":"pcbi.1007720.ref059","volume-title":"An introduction to the Kalman filter","author":"G Welch","year":"1995"},{"key":"pcbi.1007720.ref060","volume-title":"Doing Bayesian data analysis: A tutorial with R, JAGS, and Stan","author":"J Kruschke","year":"2014"},{"key":"pcbi.1007720.ref061","volume-title":"Bayesian data analysis","author":"A Gelman","year":"2014"},{"key":"pcbi.1007720.ref062","first-page":"1593","volume":"15","author":"MD Hoffman","year":"2011","journal-title":"The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo"},{"key":"pcbi.1007720.ref063","doi-asserted-by":"crossref","first-page":"e55","DOI":"10.7717\/peerj-cs.55","article-title":"Probabilistic programming in Python using PyMC3","volume":"2","author":"J Salvatier","year":"2016","journal-title":"PeerJ Computer Science"},{"key":"pcbi.1007720.ref064","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1016\/j.neubiorev.2016.02.018","article-title":"The LATER model of reaction time and decision","volume":"64","author":"I Noorani","year":"2016","journal-title":"Neuroscience & Biobehavioral Reviews"}],"updated-by":[{"DOI":"10.1371\/journal.pcbi.1007720","type":"new_version","label":"New version","source":"publisher","updated":{"date-parts":[[2020,4,23]],"date-time":"2020-04-23T00:00:00Z","timestamp":1587600000000}}],"container-title":["PLOS Computational Biology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.1007720","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,21]],"date-time":"2022-10-21T05:12:51Z","timestamp":1666329171000},"score":1,"resource":{"primary":{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.1007720"}},"subtitle":[],"editor":[{"given":"Jill","family":"O\u2019Reilly","sequence":"first","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2020,4,13]]},"references-count":64,"journal-issue":{"issue":"4","published-online":{"date-parts":[[2020,4,13]]}},"URL":"https:\/\/doi.org\/10.1371\/journal.pcbi.1007720","relation":{"new_version":[{"id-type":"doi","id":"10.1371\/journal.pcbi.1007720","asserted-by":"object"}]},"ISSN":["1553-7358"],"issn-type":[{"value":"1553-7358","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,4,13]]}}}