{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T12:20:26Z","timestamp":1776687626204,"version":"3.51.2"},"update-to":[{"DOI":"10.1371\/journal.pcbi.1007594","type":"new_version","label":"New version","source":"publisher","updated":{"date-parts":[[2020,4,16]],"date-time":"2020-04-16T00:00:00Z","timestamp":1586995200000}}],"reference-count":91,"publisher":"Public Library of Science (PLoS)","issue":"4","license":[{"start":{"date-parts":[[2020,4,6]],"date-time":"2020-04-06T00:00:00Z","timestamp":1586131200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100004405","name":"Toyota Motor Corporation","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100004405","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["N000141712984"],"award-info":[{"award-number":["N000141712984"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["MURI N00014-17-1-2961"],"award-info":[{"award-number":["MURI N00014-17-1-2961"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["CRCNS 1R01MH109177"],"award-info":[{"award-number":["CRCNS 1R01MH109177"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["www.ploscompbiol.org"],"crossmark-restriction":false},"short-container-title":["PLoS Comput Biol"],"DOI":"10.1371\/journal.pcbi.1007594","type":"journal-article","created":{"date-parts":[[2020,4,6]],"date-time":"2020-04-06T13:32:03Z","timestamp":1586179923000},"page":"e1007594","update-policy":"https:\/\/doi.org\/10.1371\/journal.pcbi.corrections_policy","source":"Crossref","is-referenced-by-count":84,"title":["Discovery of hierarchical representations for efficient planning"],"prefix":"10.1371","volume":"16","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0949-302X","authenticated-orcid":true,"given":"Momchil S.","family":"Tomov","sequence":"first","affiliation":[]},{"given":"Samyukta","family":"Yagati","sequence":"additional","affiliation":[]},{"given":"Agni","family":"Kumar","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6372-3607","authenticated-orcid":true,"given":"Wanqian","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6546-3298","authenticated-orcid":true,"given":"Samuel J.","family":"Gershman","sequence":"additional","affiliation":[]}],"member":"340","published-online":{"date-parts":[[2020,4,6]]},"reference":[{"issue":"4","key":"pcbi.1007594.ref001","doi-asserted-by":"crossref","first-page":"331","DOI":"10.1207\/s15427633scc0304_5","article-title":"\u2018Fine-to-coarse\u2019route planning and navigation in regionalized environments","volume":"3","author":"JM Wiener","year":"2003","journal-title":"Spatial cognition and computation"},{"issue":"4","key":"pcbi.1007594.ref002","doi-asserted-by":"crossref","first-page":"893","DOI":"10.1016\/j.neuron.2016.03.037","article-title":"Neural mechanisms of hierarchical planning in a virtual subway network","volume":"90","author":"J Balaguer","year":"2016","journal-title":"Neuron"},{"issue":"1","key":"pcbi.1007594.ref003","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1016\/j.cell.2018.06.012","article-title":"Optogenetic Editing Reveals the Hierarchical Organization of Learned Action Sequences","volume":"174","author":"CE Geddes","year":"2018","journal-title":"Cell"},{"key":"pcbi.1007594.ref004","volume-title":"The problem of serial order in behavior","author":"KS Lashley","year":"1951"},{"issue":"1-2","key":"pcbi.1007594.ref005","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1006\/nlme.1998.3843","article-title":"The basal ganglia and chunking of action repertoires","volume":"70","author":"AM Graybiel","year":"1998","journal-title":"Neurobiology of learning and memory"},{"issue":"2","key":"pcbi.1007594.ref006","doi-asserted-by":"crossref","first-page":"361","DOI":"10.1016\/j.neuron.2013.05.038","article-title":"A Dual Operator View of Habitual Behavior Reflecting Cortical and Striatal Dynamics","volume":"79","author":"K Smith","year":"2013","journal-title":"Neuron"},{"issue":"4","key":"pcbi.1007594.ref007","doi-asserted-by":"crossref","first-page":"486","DOI":"10.1038\/nn.3331","article-title":"Neural representations of events arise from temporal community structure","volume":"16","author":"AC Schapiro","year":"2013","journal-title":"Nature neuroscience"},{"issue":"8","key":"pcbi.1007594.ref008","doi-asserted-by":"crossref","first-page":"e1003779","DOI":"10.1371\/journal.pcbi.1003779","article-title":"Optimal behavioral hierarchy","volume":"10","author":"A Solway","year":"2014","journal-title":"PLoS computational biology"},{"issue":"2","key":"pcbi.1007594.ref009","doi-asserted-by":"crossref","first-page":"370","DOI":"10.1016\/j.neuron.2011.05.042","article-title":"A neural signature of hierarchical reinforcement learning","volume":"71","author":"JJ Ribas-Fernandes","year":"2011","journal-title":"Neuron"},{"key":"pcbi.1007594.ref010","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1016\/j.cobeha.2015.07.007","article-title":"Discovering latent causes in reinforcement learning","volume":"5","author":"SJ Gershman","year":"2015","journal-title":"Current Opinion in Behavioral Sciences"},{"key":"pcbi.1007594.ref011","volume-title":"Multi-hierarchical representation of large-scale space: Applications to mobile robots","author":"JA Fern\u00e1ndez","year":"2013"},{"key":"pcbi.1007594.ref012","doi-asserted-by":"crossref","unstructured":"Lynn CW, Kahn AE, Bassett DS. Structure from noise: Mental errors yield abstract representations of events. arXiv preprint arXiv:180512491. 2018;.","DOI":"10.32470\/CCN.2018.1169-0"},{"key":"pcbi.1007594.ref013","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"2018"},{"key":"pcbi.1007594.ref014","first-page":"64","article-title":"A theory of Pavlovian conditioning: Variations in the effectiveness of reinforcement and nonreinforcement","volume":"2","author":"RA Rescorla","year":"1972","journal-title":"Classical conditioning II: Current research and theory"},{"issue":"5306","key":"pcbi.1007594.ref015","doi-asserted-by":"crossref","first-page":"1593","DOI":"10.1126\/science.275.5306.1593","article-title":"A neural substrate of prediction and reward","volume":"275","author":"W Schultz","year":"1997","journal-title":"Science"},{"issue":"7540","key":"pcbi.1007594.ref016","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"V Mnih","year":"2015","journal-title":"Nature"},{"key":"pcbi.1007594.ref017","volume-title":"Introduction to algorithms","author":"TH Cormen","year":"2009"},{"key":"pcbi.1007594.ref018","volume-title":"Artificial intelligence: a modern approach","author":"SJ Russell","year":"2016"},{"issue":"2","key":"pcbi.1007594.ref019","doi-asserted-by":"crossref","first-page":"81","DOI":"10.1037\/h0043158","article-title":"The magical number seven, plus or minus two: Some limits on our capacity for processing information","volume":"63","author":"GA Miller","year":"1956","journal-title":"Psychological review"},{"issue":"3","key":"pcbi.1007594.ref020","doi-asserted-by":"crossref","first-page":"262","DOI":"10.1016\/j.cognition.2008.08.011","article-title":"Hierarchically organized behavior and its neural foundations: a reinforcement learning perspective","volume":"113","author":"MM Botvinick","year":"2009","journal-title":"Cognition"},{"key":"pcbi.1007594.ref021","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1017\/S0140525X01003922","article-title":"The Magical Number 4 in Short-Term Memory: A Reconsideration of Mental Storage Capacity","volume":"24","author":"N Cowan","year":"2001","journal-title":"The Behavioral and brain sciences"},{"issue":"1","key":"pcbi.1007594.ref022","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.jmp.2011.08.004","article-title":"A tutorial on Bayesian nonparametric models","volume":"56","author":"SJ Gershman","year":"2012","journal-title":"Journal of Mathematical Psychology"},{"issue":"8","key":"pcbi.1007594.ref023","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1371\/journal.pcbi.1003779","article-title":"Optimal Behavioral Hierarchy","volume":"10","author":"A Solway","year":"2014","journal-title":"PLOS Computational Biology"},{"key":"pcbi.1007594.ref024","unstructured":"Srivastava V, Reverdy P, Leonard NE. Correlated multiarmed bandit problem: Bayesian algorithms and regret analysis. arXiv preprint arXiv:150701160. 2015;."},{"key":"pcbi.1007594.ref025","doi-asserted-by":"crossref","unstructured":"Schulz E, Franklin NT, Gershman SJ. Finding structure in multi-armed bandits. bioRxiv. 2018; p. 432534.","DOI":"10.1101\/432534"},{"issue":"12","key":"pcbi.1007594.ref026","doi-asserted-by":"crossref","first-page":"915","DOI":"10.1038\/s41562-018-0467-4","article-title":"Generalization guides human exploration in vast decision spaces","volume":"2","author":"CM Wu","year":"2018","journal-title":"Nature Human Behaviour"},{"key":"pcbi.1007594.ref027","volume-title":"From Understanding Computation to Understanding Neural Circuitry","author":"D Marr","year":"1976"},{"key":"pcbi.1007594.ref028","doi-asserted-by":"crossref","first-page":"190","DOI":"10.1037\/a0030852","article-title":"Cognitive control over learning: Creating, clustering, and generalizing task-set structure","volume":"120","author":"AGE Collins","year":"2013","journal-title":"Psychol Rev"},{"key":"pcbi.1007594.ref029","first-page":"201414219","volume-title":"Proceedings of the National Academy of Sciences","author":"QJ Huys","year":"2015"},{"key":"pcbi.1007594.ref030","unstructured":"Konidaris G. Constructing abstraction hierarchies using a skill-symbol loop. In: IJCAI: proceedings of the conference. vol. 2016. NIH Public Access; 2016. p. 1648."},{"key":"pcbi.1007594.ref031","unstructured":"Kondor RI, Lafferty J. Diffusion kernels on graphs and other discrete structures. In: Proceedings of the 19th international conference on machine learning. vol. 2002; 2002. p. 315\u2013322."},{"issue":"11","key":"pcbi.1007594.ref032","doi-asserted-by":"crossref","first-page":"1643","DOI":"10.1038\/nn.4650","article-title":"The hippocampus as a predictive map","volume":"20","author":"KL Stachenfeld","year":"2017","journal-title":"Nature neuroscience"},{"issue":"4","key":"pcbi.1007594.ref033","doi-asserted-by":"crossref","first-page":"613","DOI":"10.1162\/neco.1993.5.4.613","article-title":"Improving generalization for temporal difference learning: The successor representation","volume":"5","author":"P Dayan","year":"1993","journal-title":"Neural Computation"},{"key":"pcbi.1007594.ref034","first-page":"4511","volume-title":"Advances in Neural Information Processing Systems","author":"D McNamee","year":"2016"},{"issue":"5","key":"pcbi.1007594.ref035","doi-asserted-by":"crossref","first-page":"889","DOI":"10.1162\/neco.1995.7.5.889","article-title":"The helmholtz machine","volume":"7","author":"P Dayan","year":"1995","journal-title":"Neural computation"},{"key":"pcbi.1007594.ref036","volume-title":"Probabilistic models of cognition","author":"N Chater","year":"2006"},{"issue":"3","key":"pcbi.1007594.ref037","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1017\/S0140525X12000477","article-title":"Whatever next? Predictive brains, situated agents, and the future of cognitive science","volume":"36","author":"A Clark","year":"2013","journal-title":"Behavioral and brain sciences"},{"issue":"2","key":"pcbi.1007594.ref038","doi-asserted-by":"crossref","first-page":"80","DOI":"10.1111\/j.1467-8721.2007.00480.x","article-title":"Event segmentation","volume":"16","author":"JM Zacks","year":"2007","journal-title":"Current directions in psychological science"},{"issue":"2","key":"pcbi.1007594.ref039","doi-asserted-by":"crossref","first-page":"312","DOI":"10.1016\/j.neuron.2013.09.007","article-title":"Goals and Habits in the Brain","volume":"80","author":"RJ Dolan","year":"2013","journal-title":"Neuron"},{"key":"pcbi.1007594.ref040","doi-asserted-by":"crossref","unstructured":"Thorndike EL. Animal intelligence; experimental studies. New York, The Macmillan Company; 1911. Available from: https:\/\/www.biodiversitylibrary.org\/item\/16001.","DOI":"10.5962\/bhl.title.55072"},{"issue":"4","key":"pcbi.1007594.ref041","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1037\/h0061626","article-title":"Cognitive Maps in Rats and Men","volume":"55","author":"EC Tolman","year":"1948","journal-title":"Psychological Review"},{"issue":"1","key":"pcbi.1007594.ref042","doi-asserted-by":"crossref","first-page":"35","DOI":"10.1080\/14640748308400912","article-title":"The effect of the instrumental training contingency on susceptibility to reinforcer devaluation","volume":"35","author":"A Dickinson","year":"1983","journal-title":"The Quarterly Journal of Experimental Psychology"},{"key":"pcbi.1007594.ref043","volume-title":"Farrar, Straus and Giroux","author":"D Kahneman","year":"2011"},{"issue":"5","key":"pcbi.1007594.ref044","doi-asserted-by":"crossref","first-page":"645","DOI":"10.1017\/S0140525X00003435","article-title":"Individual differences in reasoning: Implications for the rationality debate?","volume":"23","author":"KE Stanovich","year":"2000","journal-title":"Behavioral and brain sciences"},{"issue":"1","key":"pcbi.1007594.ref045","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1111\/j.1460-9568.2004.03095.x","article-title":"Lesions of dorsolateral striatum preserve outcome expectancy but disrupt habit formation in instrumental learning","volume":"19","author":"HH Yin","year":"2004","journal-title":"European journal of neuroscience"},{"issue":"2","key":"pcbi.1007594.ref046","doi-asserted-by":"crossref","first-page":"513","DOI":"10.1111\/j.1460-9568.2005.04218.x","article-title":"The role of the dorsomedial striatum in instrumental conditioning","volume":"22","author":"HH Yin","year":"2005","journal-title":"European Journal of Neuroscience"},{"issue":"4-5","key":"pcbi.1007594.ref047","doi-asserted-by":"crossref","first-page":"407","DOI":"10.1016\/S0028-3908(98)00033-1","article-title":"Goal-directed instrumental action: contingency and incentive learning and their cortical substrates","volume":"37","author":"BW Balleine","year":"1998","journal-title":"Neuropharmacology"},{"issue":"5","key":"pcbi.1007594.ref048","doi-asserted-by":"crossref","first-page":"717","DOI":"10.1016\/j.physbeh.2005.08.061","article-title":"Neural bases of food-seeking: affect, arousal and reward in corticostriatolimbic circuits","volume":"86","author":"BW Balleine","year":"2005","journal-title":"Physiology & behavior"},{"issue":"6","key":"pcbi.1007594.ref049","doi-asserted-by":"crossref","first-page":"1347","DOI":"10.1162\/089976602753712972","article-title":"Multiple model-based reinforcement learning","volume":"14","author":"K Doya","year":"2002","journal-title":"Neural computation"},{"issue":"12","key":"pcbi.1007594.ref050","doi-asserted-by":"crossref","first-page":"1704","DOI":"10.1038\/nn1560","article-title":"Uncertainty-based competition between prefrontal and dorsolateral striatal systems for behavioral control","volume":"8","author":"ND Daw","year":"2005","journal-title":"Nature neuroscience"},{"issue":"9","key":"pcbi.1007594.ref051","doi-asserted-by":"crossref","first-page":"1321","DOI":"10.1177\/0956797617708288","article-title":"Cost-benefit arbitration between multiple reinforcement-learning systems","volume":"28","author":"W Kool","year":"2017","journal-title":"Psychological science"},{"issue":"3","key":"pcbi.1007594.ref052","doi-asserted-by":"crossref","first-page":"e1002410","DOI":"10.1371\/journal.pcbi.1002410","article-title":"Bonsai trees in your head: how the Pavlovian system sculpts goal-directed choices by pruning decision trees","volume":"8","author":"QJ Huys","year":"2012","journal-title":"PLoS computational biology"},{"issue":"4","key":"pcbi.1007594.ref053","doi-asserted-by":"crossref","first-page":"160","DOI":"10.1145\/122344.122377","article-title":"Dyna, an integrated architecture for learning, planning, and reacting","volume":"2","author":"RS Sutton","year":"1991","journal-title":"ACM SIGART Bulletin"},{"key":"pcbi.1007594.ref054","doi-asserted-by":"crossref","first-page":"85","DOI":"10.1016\/j.cobeha.2015.08.010","article-title":"Integrating memories to guide decisions","volume":"5","author":"D Shohamy","year":"2015","journal-title":"Current Opinion in Behavioral Sciences"},{"issue":"9","key":"pcbi.1007594.ref055","doi-asserted-by":"crossref","first-page":"680","DOI":"10.1038\/s41562-017-0180-8","article-title":"The successor representation in human reinforcement learning","volume":"1","author":"I Momennejad","year":"2017","journal-title":"Nature Human Behaviour"},{"issue":"1","key":"pcbi.1007594.ref056","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","article-title":"Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning","volume":"112","author":"RS Sutton","year":"1999","journal-title":"Artificial Intelligence"},{"issue":"7","key":"pcbi.1007594.ref057","doi-asserted-by":"crossref","first-page":"1036","DOI":"10.1111\/j.1460-9568.2012.08050.x","article-title":"Habits, action sequences and reinforcement learning","volume":"35","author":"A Dezfouli","year":"2012","journal-title":"European Journal of Neuroscience"},{"issue":"1","key":"pcbi.1007594.ref058","doi-asserted-by":"crossref","first-page":"54","DOI":"10.1037\/a0038339","article-title":"Hierarchical control over effortful behavior by rodent medial frontal cortex: A computational model","volume":"122","author":"CB Holroyd","year":"2015","journal-title":"Psychological Review"},{"issue":"7","key":"pcbi.1007594.ref059","doi-asserted-by":"crossref","first-page":"e0180234","DOI":"10.1371\/journal.pone.0180234","article-title":"A neural model of hierarchical reinforcement learning","volume":"12","author":"D Rasmussen","year":"2017","journal-title":"PloS one"},{"issue":"1655","key":"pcbi.1007594.ref060","doi-asserted-by":"crossref","first-page":"20130480","DOI":"10.1098\/rstb.2013.0480","article-title":"Model-based hierarchical reinforcement learning and human action control","volume":"369","author":"M Botvinick","year":"2014","journal-title":"Philosophical Transactions of the Royal Society B: Biological Sciences"},{"key":"pcbi.1007594.ref061","doi-asserted-by":"crossref","first-page":"338","DOI":"10.1007\/3-540-45622-8_34","volume-title":"Abstraction, Reformulation, and Approximation","author":"A McGovern","year":"2002"},{"key":"pcbi.1007594.ref062","unstructured":"Dayan P, Hinton GE. Feudal Reinforcement Learning. In: Hanson SJ, Cowan JD, Giles CL, editors. Advances in Neural Information Processing Systems 5. Morgan-Kaufmann; 1993. p. 271\u2013278. Available from: http:\/\/papers.nips.cc\/paper\/714-feudal-reinforcement-learning.pdf."},{"key":"pcbi.1007594.ref063","volume-title":"Computing Research Repository","author":"AS Vezhnevets","year":"2017"},{"issue":"1","key":"pcbi.1007594.ref064","first-page":"227","article-title":"Hierarchical Reinforcement Learning with the MAXQ Value Function Decomposition","volume":"13","author":"TG Dietterich","year":"2000","journal-title":"J Artif Int Res"},{"key":"pcbi.1007594.ref065","unstructured":"Hengst B. Discovering Hierarchy in Reinforcement Learning with HEXQ. In: Proceedings of the Nineteenth International Conference on Machine Learning. ICML\u201902. San Francisco, CA, USA: Morgan Kaufmann Publishers Inc.; 2002. p. 243\u2013250. Available from: http:\/\/dl.acm.org\/citation.cfm?id=645531.656017."},{"key":"pcbi.1007594.ref066","volume-title":"Computing Research Repository","author":"MC Machado","year":"2017"},{"key":"pcbi.1007594.ref067","doi-asserted-by":"crossref","first-page":"196","DOI":"10.1007\/3-540-45622-8_15","volume-title":"Abstraction, Reformulation, and Approximation","author":"B Ravindran","year":"2002"},{"key":"pcbi.1007594.ref068","first-page":"1281","volume-title":"Advances in Neural Information Processing Systems","author":"N Chentanez","year":"2005"},{"key":"pcbi.1007594.ref069","unstructured":"Kulkarni TD, Narasimhan KR, Saeedi A, Tenenbaum JB. Hierarchical Deep Reinforcement Learning: Integrating Temporal Abstraction and Intrinsic Motivation. In: Proceedings of the 30th International Conference on Neural Information Processing Systems. NIPS\u201916. USA: Curran Associates Inc.; 2016. p. 3682\u20133690. Available from: http:\/\/dl.acm.org\/citation.cfm?id=3157382.3157509."},{"key":"pcbi.1007594.ref070","doi-asserted-by":"crossref","first-page":"212","DOI":"10.1007\/3-540-45622-8_16","volume-title":"Abstraction, Reformulation, and Approximation","author":"M Stolle","year":"2002"},{"key":"pcbi.1007594.ref071","unstructured":"McGovern A, Barto AG. Automatic Discovery of Subgoals in Reinforcement Learning Using Diverse Density. In: Proceedings of the Eighteenth International Conference on Machine Learning. ICML\u201901. San Francisco, CA, USA: Morgan Kaufmann Publishers Inc.; 2001. p. 361\u2013368. Available from: http:\/\/dl.acm.org\/citation.cfm?id=645530.655681."},{"key":"pcbi.1007594.ref072","doi-asserted-by":"crossref","unstructured":"Digney B. Emergent Hierarchical Control Structures: Learning Reactive \/ Hierarchical Relationships in Reinforcement Environments. In: Proceedings of the Fourth Conference on the Simulation of Adaptive Behavior: SAB 96; 1996.","DOI":"10.7551\/mitpress\/3118.003.0044"},{"key":"pcbi.1007594.ref073","unstructured":"\u015eim\u015fek O, Barto AG. Skill Characterization Based on Betweenness. In: Proceedings of the 21st International Conference on Neural Information Processing Systems. NIPS\u201908. USA: Curran Associates Inc.; 2008. p. 1497\u20131504. Available from: http:\/\/dl.acm.org\/citation.cfm?id=2981780.2981967."},{"key":"pcbi.1007594.ref074","doi-asserted-by":"crossref","unstructured":"Menache I, Mannor S, Shimkin N. Q-Cut\u2014Dynamic Discovery of Sub-goals in Reinforcement Learning. In: Elomaa T, Mannila H, Toivonen H, editors. Machine Learning: ECML 2002. Berlin, Heidelberg: Springer Berlin Heidelberg; 2002. p. 295\u2013306.","DOI":"10.1007\/3-540-36755-1_25"},{"key":"pcbi.1007594.ref075","doi-asserted-by":"crossref","unstructured":"Mannor S, Menache I, Hoze A, Klein U. Dynamic Abstraction in Reinforcement Learning via Clustering. In: Proceedings of the Twenty-first International Conference on Machine Learning. ICML\u201904. New York, NY, USA: ACM; 2004. p. 71\u2013. Available from: http:\/\/doi.acm.org\/10.1145\/1015330.1015355.","DOI":"10.1145\/1015330.1015355"},{"key":"pcbi.1007594.ref076","doi-asserted-by":"crossref","unstructured":"\u015eim\u015fek O, Wolfe AP, Barto AG. Identifying Useful Subgoals in Reinforcement Learning by Local Graph Partitioning. In: Proceedings of the 22Nd International Conference on Machine Learning. ICML\u201905. New York, NY, USA: ACM; 2005. p. 816\u2013823. Available from: http:\/\/doi.acm.org\/10.1145\/1102351.1102454.","DOI":"10.1145\/1102351.1102454"},{"key":"pcbi.1007594.ref077","unstructured":"Girgin S, Polat F, Alhajj R. Learning by Automatic Option Discovery from Conditionally Terminating Sequences. In: ECAI 2006, 17th European Conference on Artificial Intelligence. vol. 141; 2006. p. 494\u2013498."},{"key":"pcbi.1007594.ref078","volume-title":"Computing Research Repository","author":"A Vezhnevets","year":"2016"},{"key":"pcbi.1007594.ref079","volume-title":"Compositional Policy Priors","author":"D Wingate","year":"2013"},{"issue":"2","key":"pcbi.1007594.ref080","doi-asserted-by":"crossref","first-page":"337","DOI":"10.1007\/s10994-016-5580-x","article-title":"Probabilistic inference for determining options in reinforcement learning","volume":"104","author":"C Daniel","year":"2016","journal-title":"Machine Learning"},{"issue":"1","key":"pcbi.1007594.ref081","doi-asserted-by":"crossref","first-page":"108","DOI":"10.1080\/03640210701802071","article-title":"A rational analysis of rule-based concept learning","volume":"32","author":"ND Goodman","year":"2008","journal-title":"Cognitive science"},{"key":"pcbi.1007594.ref082","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.cogpsych.2017.05.001","article-title":"Where do hypotheses come from?","volume":"96","author":"I Dasgupta","year":"2017","journal-title":"Cognitive psychology"},{"issue":"2","key":"pcbi.1007594.ref083","doi-asserted-by":"crossref","first-page":"285","DOI":"10.1016\/j.cognition.2012.10.010","article-title":"Rational variability in children\u2019s causal inferences: The sampling hypothesis","volume":"126","author":"S Denison","year":"2013","journal-title":"Cognition"},{"issue":"1","key":"pcbi.1007594.ref084","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1162\/NECO_a_00226","article-title":"Multistability and perceptual inference","volume":"24","author":"SJ Gershman","year":"2012","journal-title":"Neural computation"},{"issue":"12","key":"pcbi.1007594.ref085","doi-asserted-by":"crossref","first-page":"883","DOI":"10.1016\/j.tics.2016.10.003","article-title":"Bayesian brains without probabilities","volume":"20","author":"AN Sanborn","year":"2016","journal-title":"Trends in cognitive sciences"},{"issue":"4","key":"pcbi.1007594.ref086","doi-asserted-by":"crossref","first-page":"599","DOI":"10.1111\/cogs.12101","article-title":"One and done? Optimal decisions from very few samples","volume":"38","author":"E Vul","year":"2014","journal-title":"Cognitive science"},{"key":"pcbi.1007594.ref087","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1016\/j.jmp.2017.01.002","article-title":"Online learning of symbolic concepts","volume":"77","author":"P Thaker","year":"2017","journal-title":"Journal of Mathematical Psychology"},{"issue":"4","key":"pcbi.1007594.ref088","doi-asserted-by":"crossref","first-page":"455","DOI":"10.1016\/j.cogdev.2012.07.005","article-title":"Theory learning as stochastic search in the language of thought","volume":"27","author":"TD Ullman","year":"2012","journal-title":"Cognitive Development"},{"key":"pcbi.1007594.ref089","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1016\/j.cobeha.2015.08.009","article-title":"Reinforcement learning, efficient coding, and the statistics of natural tasks","volume":"5","author":"M Botvinick","year":"2015","journal-title":"Current Opinion in Behavioral Sciences"},{"issue":"2","key":"pcbi.1007594.ref090","doi-asserted-by":"crossref","first-page":"349","DOI":"10.1198\/jcgs.2009.06134","article-title":"Examples of adaptive MCMC","volume":"18","author":"GO Roberts","year":"2009","journal-title":"Journal of Computational and Graphical Statistics"},{"issue":"2","key":"pcbi.1007594.ref091","doi-asserted-by":"crossref","first-page":"249","DOI":"10.1080\/10618600.2000.10474879","article-title":"Markov chain sampling methods for Dirichlet process mixture models","volume":"9","author":"RM Neal","year":"2000","journal-title":"Journal of computational and graphical statistics"}],"updated-by":[{"DOI":"10.1371\/journal.pcbi.1007594","type":"new_version","label":"New version","source":"publisher","updated":{"date-parts":[[2020,4,16]],"date-time":"2020-04-16T00:00:00Z","timestamp":1586995200000}}],"container-title":["PLOS Computational Biology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.1007594","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,3]],"date-time":"2024-08-03T10:32:17Z","timestamp":1722681137000},"score":1,"resource":{"primary":{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.1007594"}},"subtitle":[],"editor":[{"given":"David","family":"Pascucci","sequence":"first","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2020,4,6]]},"references-count":91,"journal-issue":{"issue":"4","published-online":{"date-parts":[[2020,4,6]]}},"URL":"https:\/\/doi.org\/10.1371\/journal.pcbi.1007594","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/499418","asserted-by":"object"}]},"ISSN":["1553-7358"],"issn-type":[{"value":"1553-7358","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,4,6]]}}}