{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,13]],"date-time":"2026-07-13T23:10:02Z","timestamp":1783984202639,"version":"3.55.0"},"reference-count":120,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,1,25]],"date-time":"2022-01-25T00:00:00Z","timestamp":1643068800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,25]],"date-time":"2022-01-25T00:00:00Z","timestamp":1643068800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["433323019, 402776968"],"award-info":[{"award-number":["433323019, 402776968"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["402791933"],"award-info":[{"award-number":["402791933"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["TRR 169"],"award-info":[{"award-number":["TRR 169"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["402776968"],"award-info":[{"award-number":["402776968"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["198647426,390727645"],"award-info":[{"award-number":["198647426,390727645"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["402776968, TRR 169"],"award-info":[{"award-number":["402776968, TRR 169"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004189","name":"Max-Planck-Gesellschaft","doi-asserted-by":"publisher","award":["IMPRS-IS"],"award-info":[{"award-number":["IMPRS-IS"]}],"id":[{"id":"10.13039\/501100004189","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100005156","name":"Alexander von Humboldt-Stiftung","doi-asserted-by":"publisher","award":["Feodor Lynen"],"award-info":[{"award-number":["Feodor Lynen"]}],"id":[{"id":"10.13039\/100005156","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat Mach Intell"],"DOI":"10.1038\/s42256-021-00433-9","type":"journal-article","created":{"date-parts":[[2022,1,25]],"date-time":"2022-01-25T17:05:26Z","timestamp":1643130326000},"page":"11-20","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":82,"title":["Intelligent problem-solving as integrated hierarchical reinforcement learning"],"prefix":"10.1038","volume":"4","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5473-3221","authenticated-orcid":false,"given":"Manfred","family":"Eppe","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2741-6551","authenticated-orcid":false,"given":"Christian","family":"Gumbsch","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Matthias","family":"Kerzel","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Phuong D. H.","family":"Nguyen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8120-8537","authenticated-orcid":false,"given":"Martin V.","family":"Butz","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Stefan","family":"Wermter","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2022,1,25]]},"reference":[{"key":"433_CR1","doi-asserted-by":"publisher","first-page":"686","DOI":"10.1016\/j.cub.2019.01.008","volume":"29","author":"R Gruber","year":"2019","unstructured":"Gruber, R. et al. New Caledonian crows use mental representations to solve metatool problems. Curr. Biol. 29, 686\u2013692 (2019).","journal-title":"Curr. Biol."},{"key":"433_CR2","doi-asserted-by":"crossref","unstructured":"Butz, M. V. & Kutter, E. F. How the Mind Comes into Being (Oxford Univ. Press, 2017).","DOI":"10.1093\/acprof:oso\/9780198739692.001.0001"},{"key":"433_CR3","unstructured":"Perkins, D. N. & Salomon, G. in International Encyclopedia of Education (eds. Husen T. & Postelwhite T. N.) 6452\u20136457 (Pergamon Press, 1992)."},{"key":"433_CR4","doi-asserted-by":"publisher","first-page":"262","DOI":"10.1016\/j.cognition.2008.08.011","volume":"113","author":"MM Botvinick","year":"2009","unstructured":"Botvinick, M. M., Niv, Y. & Barto, A. C. Hierarchically organized behavior and its neural foundations: a reinforcement learning perspective. Cognition 113, 262\u2013280 (2009).","journal-title":"Cognition"},{"key":"433_CR5","doi-asserted-by":"publisher","first-page":"e1007594","DOI":"10.1371\/journal.pcbi.1007594","volume":"16","author":"MS Tomov","year":"2020","unstructured":"Tomov, M. S., Yagati, S., Kumar, A., Yang, W. & Gershman, S. J. Discovery of hierarchical representations for efficient planning.PLoS Comput. Biol. 16, e1007594 (2020).","journal-title":"PLoS Comput. Biol."},{"key":"433_CR6","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1109\/MSP.2017.2743240","volume":"34","author":"K Arulkumaran","year":"2017","unstructured":"Arulkumaran, K., Deisenroth, M. P., Brundage, M. & Bharath, A. A. Deep reinforcement learning: a brief survey. IEEE Signal Process. Mag. 34, 26\u201338 (2017).","journal-title":"IEEE Signal Process. Mag."},{"key":"433_CR7","unstructured":"Li, Y. Deep reinforcement learning: an overview. Preprint at https:\/\/arxiv.org\/abs\/1701.07274 (2018)."},{"key":"433_CR8","unstructured":"Sutton, R. S. & Barto, A. G. Reinforcement Learning: An Introduction 2nd edn (MIT Press, 2018)."},{"key":"433_CR9","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1038\/s42256-019-0025-4","volume":"1","author":"EO Neftci","year":"2019","unstructured":"Neftci, E. O. & Averbeck, B. B. Reinforcement learning in artificial and biological systems. Nat. Mach. Intell. 1, 133\u2013143 (2019).","journal-title":"Nat. Mach. Intell."},{"key":"433_CR10","doi-asserted-by":"publisher","first-page":"123","DOI":"10.3389\/frobt.2019.00123","volume":"6","author":"M Eppe","year":"2019","unstructured":"Eppe, M., Nguyen, P. D. H. & Wermter, S. From semantics to execution: integrating action planning with reinforcement learning for robotic causal problem-solving. Front. Robot. AI 6, 123 (2019).","journal-title":"Front. Robot. AI"},{"key":"433_CR11","unstructured":"Oh, J., Singh, S., Lee, H. & Kohli, P. Zero-shot task generalization with multi-task deep reinforcement learning. In Proc. 34th International Conference on Machine Learning (ICML) (eds. Precup, D. & Teh, Y. W.) 2661\u20132670 (PMLR, 2017)."},{"key":"433_CR12","unstructured":"Sohn, S., Oh, J. & Lee, H. Hierarchical reinforcement learning for zero-shot generalization with subtask dependencies. In Proc. 32nd International Conference on Neural Information Processing Systems (NeurIPS) (eds Bengio S. et al.) Vol. 31, 7156\u20137166 (ACM, 2018)."},{"key":"433_CR13","doi-asserted-by":"publisher","first-page":"280","DOI":"10.1016\/j.tics.2004.04.001","volume":"8","author":"M Hegarty","year":"2004","unstructured":"Hegarty, M. Mechanical reasoning by mental simulation. Trends Cogn. Sci. 8, 280\u2013285 (2004).","journal-title":"Trends Cogn. Sci."},{"key":"433_CR14","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1007\/BF00053358","volume":"18","author":"KJ Klauer","year":"1989","unstructured":"Klauer, K. J. Teaching for analogical transfer as a means of improving problem-solving, thinking and learning. Instruct. Sci. 18, 179\u2013192 (1989).","journal-title":"Instruct. Sci."},{"key":"433_CR15","doi-asserted-by":"publisher","unstructured":"Duncker, K. & Lees, L. S. On problem-solving. Psychol. Monographs 58, No.5 (whole No. 270), 85\u2013101 https:\/\/doi.org\/10.1037\/h0093599 (1945).","DOI":"10.1037\/h0093599"},{"key":"433_CR16","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1016\/j.neunet.2009.03.004","volume":"22","author":"P Dayan","year":"2009","unstructured":"Dayan, P. Goal-directed control and its antipodes. Neural Netw. 22, 213\u2013219 (2009).","journal-title":"Neural Netw."},{"key":"433_CR17","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1016\/j.neuron.2013.09.007","volume":"80","author":"RJ Dolan","year":"2013","unstructured":"Dolan, R. J. & Dayan, P. Goals and habits in the brain. Neuron 80, 312\u2013325 (2013).","journal-title":"Neuron"},{"key":"433_CR18","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1146\/annurev-psych-010416-044216","volume":"68","author":"JP O\u2019Doherty","year":"2017","unstructured":"O\u2019Doherty, J. P., Cockburn, J. & Pauli, W. M. Learning, reward, and decision making. Annu. Rev. Psychol. 68, 73\u2013100 (2017).","journal-title":"Annu. Rev. Psychol."},{"key":"433_CR19","first-page":"257","volume":"4","author":"EC Tolman","year":"1930","unstructured":"Tolman, E. C. & Honzik, C. H. Introduction and removal of reward, and maze performance in rats. Univ. California Publ. Psychol. 4, 257\u2013275 (1930).","journal-title":"Univ. California Publ. Psychol."},{"key":"433_CR20","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1177\/1059-712302-010002-01","volume":"10","author":"MV Butz","year":"2002","unstructured":"Butz, M. V. & Hoffmann, J. Anticipations control behavior: animal behavior in an anticipatory learning classifier system. Adaptive Behav. 10, 75\u201396 (2002).","journal-title":"Adaptive Behav."},{"key":"433_CR21","doi-asserted-by":"crossref","unstructured":"Miller, G. A., Galanter, E. & Pribram, K. H. Plans and the Structure of Behavior (Holt, Rinehart & Winston, 1960).","DOI":"10.1037\/10039-000"},{"key":"433_CR22","doi-asserted-by":"publisher","first-page":"20130480","DOI":"10.1098\/rstb.2013.0480","volume":"369","author":"M Botvinick","year":"2014","unstructured":"Botvinick, M. & Weinstein, A. Model-based hierarchical reinforcement learning and human action control. Philos. Trans. R. Soc. B Biol. Sci. 369, 20130480 (2014).","journal-title":"Philos. Trans. R. Soc. B Biol. Sci."},{"key":"433_CR23","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1207\/s15427633scc0304_5","volume":"3","author":"JM Wiener","year":"2003","unstructured":"Wiener, J. M. & Mallot, H. A. \u2019Fine-to-coarse\u2019 route planning and navigation in regionalized environments. Spatial Cogn. Comput. 3, 331\u2013358 (2003).","journal-title":"Spatial Cogn. Comput."},{"key":"433_CR24","doi-asserted-by":"publisher","first-page":"176","DOI":"10.1007\/s00426-003-0154-5","volume":"68","author":"A Stock","year":"2004","unstructured":"Stock, A. & Stock, C. A short history of ideo-motor action. Psychol. Res. 68, 176\u2013188 (2004).","journal-title":"Psychol. Res."},{"key":"433_CR25","doi-asserted-by":"publisher","first-page":"849","DOI":"10.1017\/S0140525X01000103","volume":"24","author":"B Hommel","year":"2001","unstructured":"Hommel, B., M\u00fcsseler, J., Aschersleben, G. & Prinz, W. The theory of event coding (TEC): a framework for perception and action planning. Behav. Brain Sci. 24, 849\u2013878 (2001).","journal-title":"Behav. Brain Sci."},{"key":"433_CR26","doi-asserted-by":"crossref","unstructured":"Hoffmann, J. in Anticipatory Behavior in Adaptive Learning Systems: Foundations, Theories and Systems (eds Butz, M. V. et al.) 44\u201365 (Springer, 2003).","DOI":"10.1007\/978-3-540-45002-3_4"},{"key":"433_CR27","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1007\/s10339-007-0162-2","volume":"8","author":"W Kunde","year":"2007","unstructured":"Kunde, W., Elsner, K. & Kiesel, A. No anticipation-no action: the role of anticipation in action and perception. Cogn. Process. 8, 71\u201378 (2007).","journal-title":"Cogn. Process."},{"key":"433_CR28","doi-asserted-by":"publisher","first-page":"617","DOI":"10.1146\/annurev.psych.59.103006.093639","volume":"59","author":"LW Barsalou","year":"2008","unstructured":"Barsalou, L. W. Grounded cognition. Annu. Rev. Psychol. 59, 617\u2013645 (2008).","journal-title":"Annu. Rev. Psychol."},{"key":"433_CR29","doi-asserted-by":"publisher","first-page":"925","DOI":"10.3389\/fpsyg.2016.00925","volume":"7","author":"MV Butz","year":"2016","unstructured":"Butz, M. V. Toward a unified sub-symbolic computational theory of cognition. Front. Psychol. 7, 925 (2016).","journal-title":"Front. Psychol."},{"key":"433_CR30","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1016\/j.bandl.2009.08.002","volume":"112","author":"F Pulverm\u00fcller","year":"2010","unstructured":"Pulverm\u00fcller, F. Brain embodiment of syntax and grammar: discrete combinatorial mechanisms spelt out in neuronal circuits. Brain Lang. 112, 167\u2013179 (2010).","journal-title":"Brain Lang."},{"key":"433_CR31","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton, R. S., Precup, D. & Singh, S. Between MDPs and semi-MDPs: a framework for temporal abstraction in reinforcement learning. Artif. Intell. 112, 181\u2013211 (1999).","journal-title":"Artif. Intell."},{"key":"433_CR32","doi-asserted-by":"publisher","first-page":"660","DOI":"10.1016\/j.conb.2005.10.011","volume":"15","author":"T Flash","year":"2005","unstructured":"Flash, T. & Hochner, B. Motor primitives in vertebrates and invertebrates. Curr. Opin. Neurobiol. 15, 660\u2013666 (2005).","journal-title":"Curr. Opin. Neurobiol."},{"key":"433_CR33","unstructured":"Schaal, S. in Adaptive Motion of Animals and Machines (eds. Kimura, H. et al.) 261\u2013280 (Springer, 2006)."},{"key":"433_CR34","unstructured":"Feldman, J., Dodge, E. & Bryant, J. in The Oxford Handbook of Linguistic Analysis (eds Heine, B. & Narrog, H.) 111\u2013138 (Oxford Univ. Press, 2009)."},{"key":"433_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1111\/1468-0017.00153","volume":"16","author":"JA Fodor","year":"2001","unstructured":"Fodor, J. A. Language, thought and compositionality. Mind Lang. 16, 1\u201315 (2001).","journal-title":"Mind Lang."},{"key":"433_CR36","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1146\/annurev-psych-122216-011829","volume":"71","author":"SM Frankland","year":"2020","unstructured":"Frankland, S. M. & Greene, J. D. Concepts and compositionality: in search of the brain\u2019s language of thought. Annu. Rev. Psychol. 71, 273\u2013303 (2020).","journal-title":"Annu. Rev. Psychol."},{"key":"433_CR37","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1080\/09540091.2011.569880","volume":"23","author":"JE Hummel","year":"2011","unstructured":"Hummel, J. E. Getting symbols out of a neural architecture. Connection Sci. 23, 109\u2013118 (2011).","journal-title":"Connection Sci."},{"key":"433_CR38","doi-asserted-by":"crossref","unstructured":"Haynes, J. D., Wisniewski, D., Gorgen, K., Momennejad, I. & Reverberi, C. FMRI decoding of intentions: compositionality, hierarchy and prospective memory. In Proc. 3rd International Winter Conference on Brain-Computer Interface (BCI), 1-3 (IEEE, 2015).","DOI":"10.1109\/IWW-BCI.2015.7073031"},{"key":"433_CR39","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/9629.001.0001","volume-title":"The Geometry of Meaning","author":"P G\u00e4rdenfors","year":"2014","unstructured":"G\u00e4rdenfors, P. The Geometry of Meaning: Semantics Based on Conceptual Spaces (MIT Press, 2014)."},{"key":"433_CR40","unstructured":"Lakoff, G. & Johnson, M. Philosophy in the Flesh (Basic Books, 1999)."},{"key":"433_CR41","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1016\/j.artint.2017.11.005","volume":"256","author":"M Eppe","year":"2018","unstructured":"Eppe, M. et al. A computational framework for concept blending. Artif. Intell. 256, 105\u2013129 (2018).","journal-title":"Artif. Intell."},{"key":"433_CR42","unstructured":"Turner, M. The Origin of Ideas (Oxford Univ. Press, 2014)."},{"key":"433_CR43","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1037\/0003-066X.55.1.68","volume":"55","author":"EL Deci","year":"2000","unstructured":"Deci, E. L. & Ryan, R. M. Self-determination theory and the facilitation of intrinsic motivation. Am. Psychol. 55, 68\u201378 (2000).","journal-title":"Am. Psychol."},{"key":"433_CR44","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1080\/17588928.2015.1020053","volume":"6","author":"K Friston","year":"2015","unstructured":"Friston, K. et al. Active inference and epistemic value. Cogn. Neurosci. 6, 187\u2013214 (2015).","journal-title":"Cogn. Neurosci."},{"key":"433_CR45","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1126\/science.153.3731.25","volume":"153","author":"DE Berlyne","year":"1966","unstructured":"Berlyne, D. E. Curiosity and exploration. Science 153, 25\u201333 (1966).","journal-title":"Science"},{"key":"433_CR46","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1037\/0033-2909.116.1.75","volume":"116","author":"G Loewenstein","year":"1994","unstructured":"Loewenstein, G. The psychology of curiosity: a review and reinterpretation. Psychol. Bull. 116, 75\u201398 (1994).","journal-title":"Psychol. Bull."},{"key":"433_CR47","doi-asserted-by":"crossref","unstructured":"Oudeyer, P.-Y., Kaplan, F. & Hafner, V. V. Intrinsic motivation systems for autonomous mental development. In IEEE Transactions on Evolutionary Computation (eds. Coello, C. A. C. et al.) Vol. 11, 265\u2013286 (IEEE, 2007).","DOI":"10.1109\/TEVC.2006.890271"},{"key":"433_CR48","doi-asserted-by":"publisher","first-page":"104","DOI":"10.2478\/v10059-008-0009-4","volume":"39","author":"W Pisula","year":"2008","unstructured":"Pisula, W. Play and exploration in animals\u2014a comparative analysis. Polish Psychol. Bull. 39, 104\u2013107 (2008).","journal-title":"Polish Psychol. Bull."},{"key":"433_CR49","doi-asserted-by":"publisher","first-page":"1419","DOI":"10.1016\/0028-3932(95)00073-C","volume":"33","author":"M Jeannerod","year":"1995","unstructured":"Jeannerod, M. Mental imagery in the motor context. Neuropsychologia 33, 1419\u20131432 (1995).","journal-title":"Neuropsychologia"},{"key":"433_CR50","doi-asserted-by":"crossref","unstructured":"Kahnemann, D. & Tversky, A. in Judgement under Uncertainty: Heuristics and Biases (eds Kahneman, D. et al.) Ch. 14, 201\u2013208 (Cambridge Univ. Press, 1982).","DOI":"10.1017\/CBO9780511809477.015"},{"key":"433_CR51","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1037\/0022-3514.56.2.161","volume":"56","author":"GL Wells","year":"1989","unstructured":"Wells, G. L. & Gavanski, I. Mental simulation of causality. J. Personal. Social Psychol. 56, 161\u2013169 (1989).","journal-title":"J. Personal. Social Psychol."},{"key":"433_CR52","doi-asserted-by":"publisher","first-page":"429","DOI":"10.1037\/0003-066X.53.4.429","volume":"53","author":"SE Taylor","year":"1998","unstructured":"Taylor, S. E., Pham, L. B., Rivkin, I. D. & Armor, D. A. Harnessing the imagination: mental simulation, self-regulation and coping. Am. Psychol. 53, 429\u2013439 (1998).","journal-title":"Am. Psychol."},{"key":"433_CR53","doi-asserted-by":"crossref","unstructured":"Kaplan, F. & Oudeyer, P.-Y. in Embodied Artificial Intelligence, Lecture Notes in Computer Science Vol. 3139 (eds Iida, F. et al.) 259\u2013270 (Springer, 2004).","DOI":"10.1007\/978-3-540-27833-7_19"},{"key":"433_CR54","doi-asserted-by":"publisher","first-page":"230","DOI":"10.1109\/TAMD.2010.2056368","volume":"2","author":"J Schmidhuber","year":"2010","unstructured":"Schmidhuber, J. Formal theory of creativity, fun, and intrinsic motivation. IEEE Trans. Auton. Mental Dev. 2, 230\u2013247 (2010).","journal-title":"IEEE Trans. Auton. Mental Dev."},{"key":"433_CR55","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1007\/s00422-011-0424-z","volume":"104","author":"K Friston","year":"2011","unstructured":"Friston, K., Mattout, J. & Kilner, J. Action understanding and active inference. Biol. Cybern. 104, 137\u2013160 (2011).","journal-title":"Biol. Cybern."},{"key":"433_CR56","doi-asserted-by":"crossref","unstructured":"Oudeyer, P.-Y. Computational theories of curiosity-driven learning. In The New Science of Curiosity (ed. Goren Gordon), 43-72 (Nova Science Publishers, 2018); https:\/\/arxiv.org\/abs\/1802.10546","DOI":"10.31234\/osf.io\/3p8f6"},{"key":"433_CR57","doi-asserted-by":"publisher","first-page":"3463","DOI":"10.1007\/s11229-018-01932-w","volume":"198","author":"M Colombo","year":"2021","unstructured":"Colombo, M. & Wright, C. First principles in the life sciences: the free-energy principle, organicism and mechanism. Synthese 198, 3463\u20133488 (2021).","journal-title":"Synthese"},{"key":"433_CR58","doi-asserted-by":"publisher","first-page":"580","DOI":"10.1002\/wcs.142","volume":"2","author":"Y Huang","year":"2011","unstructured":"Huang, Y. & Rao, R. P. Predictive coding. WIREs Cogn. Sci. 2, 580\u2013593 (2011).","journal-title":"WIREs Cogn. Sci."},{"key":"433_CR59","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1038\/nrn2787","volume":"11","author":"K Friston","year":"2010","unstructured":"Friston, K. The free-energy principle: a unified brain theory? Nat. Rev. Neurosci. 11, 127\u2013138 (2010).","journal-title":"Nat. Rev. Neurosci."},{"key":"433_CR60","doi-asserted-by":"publisher","first-page":"712","DOI":"10.1016\/j.tins.2004.10.007","volume":"27","author":"DC Knill","year":"2004","unstructured":"Knill, D. C. & Pouget, A. The Bayesian brain: the role of uncertainty in neural coding and computation. Trends Neurosci. 27, 712\u2013719 (2004).","journal-title":"Trends Neurosci."},{"key":"433_CR61","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1017\/S0140525X12000477","volume":"36","author":"A Clark","year":"2013","unstructured":"Clark, A. Whatever next? Predictive brains, situated agents, and the future of cognitive science. Behav. Brain Sci. 36, 181\u2013204 (2013).","journal-title":"Behav. Brain Sci."},{"key":"433_CR62","doi-asserted-by":"crossref","unstructured":"Clark, A. Surfing Uncertainty: Prediction, Action and the Embodied Mind (Oxford Univ. Press, 2016).","DOI":"10.1093\/acprof:oso\/9780190217013.001.0001"},{"key":"433_CR63","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1037\/0033-2909.133.2.273","volume":"133","author":"JM Zacks","year":"2007","unstructured":"Zacks, J. M., Speer, N. K., Swallow, K. M., Braver, T. S. & Reyonolds, J. R. Event perception: a mind\/brain perspective. Psychol. Bull. 133, 273\u2013293 (2007).","journal-title":"Psychol. Bull."},{"key":"433_CR64","unstructured":"Eysenbach, B., Ibarz, J., Gupta, A. & Levine, S. Diversity is all you need: learning skills without a reward function. In International Conference on Learning Representations (ICLR, 2019)."},{"key":"433_CR65","unstructured":"Frans, K., Ho, J., Chen, X., Abbeel, P. & Schulman, J. Meta learning shared hierarchies. In Proc. International Conference on Learning Representations https:\/\/openreview.net\/pdf?id=SyX0IeWAW (ICLR, 2018)."},{"key":"433_CR66","unstructured":"Heess, N. et al. Learning and transfer of modulated locomotor controllers. Preprint at https:\/\/arxiv.org\/abs\/1610.05182 (2016)."},{"key":"433_CR67","unstructured":"Jiang, Y., Gu, S., Murphy, K. & Finn, C. Language as an abstraction for hierarchical deep reinforcement learning. In Neural Information Processing Systems (NeurIPS) (eds. Wallach, H. et al.) 9414\u20139426 (ACM, 2019)."},{"key":"433_CR68","unstructured":"Li, A. C., Florensa, C., Clavera, I. & Abbeel, P. Sub-policy adaptation for hierarchical reinforcement learning. In Proc. International Conference on Learning Representations https:\/\/openreview.net\/forum?id=ByeWogStDS (ICLR, 2020)."},{"key":"433_CR69","unstructured":"Qureshi, A. H. et al. Composing task-agnostic policies with deep reinforcement learning. In Proc. International Conference on Learning Representations https:\/\/openreview.net\/forum?id=H1ezFREtwH (ICLR, 2020)."},{"key":"433_CR70","unstructured":"Sharma, A., Gu, S., Levine, S., Kumar, V. & Hausman, K. Dynamics-aware unsupervised discovery of skills. In Proc. International Conference on Learning Representations https:\/\/openreview.net\/forum?id=HJgLZR4KvH (ICLR, 2020)."},{"key":"433_CR71","doi-asserted-by":"crossref","unstructured":"Tessler, C., Givony, S., Zahavy, T., Mankowitz, D. J. & Mannor, S. A deep hierarchical approach to lifelong learning in minecraft. In Proc. 31st AAAI Conference on Artificial Intelligence 1553\u20131561 (AAAI, 2017).","DOI":"10.1609\/aaai.v31i1.10744"},{"key":"433_CR72","unstructured":"Vezhnevets, A. et al. Strategic attentive writer for learning macro-actions. In Neural Information Processing Systems (NIPS) (eds. Lee, D. et al.) 3494\u20133502 (NIPS, 2016)."},{"key":"433_CR73","doi-asserted-by":"crossref","unstructured":"Devin, C., Gupta, A., Darrell, T., Abbeel, P. & Levine, S. Learning modular neural network policies for multi-task and multi-robot transfer. In Proc. International Conference on Robotics and Automation (ICRA) (eds. Okamura, A. et al.) 2169\u20132176 (IEEE, 2017).","DOI":"10.1109\/ICRA.2017.7989250"},{"key":"433_CR74","unstructured":"Hejna, D. J., Abbeel, P. & Pinto, L. Hierarchically decoupled morphological transfer. In Proc. International Conference on Machine Learning (ICML) (eds. Daum\u00e9 III, H. & Singh, A.) 11409\u201311420 (PMLR, 2020)."},{"key":"433_CR75","unstructured":"Hamrick, J. B. et al. On the role of planning in model-based deep reinforcement learning. In Proc. International Conference on Learning Representations https:\/\/openreview.net\/pdf?id=IrM64DGB21 (ICLR, 2021)."},{"key":"433_CR76","doi-asserted-by":"crossref","unstructured":"Sutton, R. S. Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In Proc. 7th International Conference on Machine Learning (ICML) (eds. Porter, B. W. & Mooney, R. J.) 216\u2013224 (Morgan Kaufmann, 1990).","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"433_CR77","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1613\/jair.1141","volume":"20","author":"D Nau","year":"2003","unstructured":"Nau, D. et al. SHOP2: an HTN planning system. J. Artif. Intell. Res. 20, 379\u2013404 (2003).","journal-title":"J. Artif. Intell. Res."},{"key":"433_CR78","doi-asserted-by":"crossref","unstructured":"Lyu, D., Yang, F., Liu, B. & Gustafson, S. SDRL: interpretable and data-efficient deep reinforcement learning leveraging symbolic planning. In Proc. AAAI Conference on Artificial Intelligence Vol. 33, 2970\u20132977 (AAAI, 2019).","DOI":"10.1609\/aaai.v33i01.33012970"},{"key":"433_CR79","doi-asserted-by":"publisher","first-page":"485","DOI":"10.1007\/s10514-019-09871-2","volume":"44","author":"A Ma","year":"2020","unstructured":"Ma, A., Ouimet, M. & Cort\u00e9s, J. Hierarchical reinforcement learning via dynamic subspace search for multi-agent planning. Auton. Robot. 44, 485\u2013503 (2020).","journal-title":"Auton. Robot."},{"key":"433_CR80","doi-asserted-by":"crossref","unstructured":"Bacon, P.-L., Harb, J. & Precup, D. The option-critic architecture. In Proc. 31st AAAI Conference on Artificial Intelligence 1726\u20131734 (AAAI, 2017).","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"433_CR81","unstructured":"Dietterich, T. G. State abstraction in MAXQ hierarchical reinforcement learning. In Advances in Neural Information Processing Systems (NIPS) (eds. Solla, S. et al.) Vol. 12, 994\u20131000 (NIPS, 1999)."},{"key":"433_CR82","unstructured":"Kulkarni, T. D., Narasimhan, K. R., Saeedi, A. & Tenenbaum, J. B. Hierarchical deep reinforcement learning: integrating temporal abstraction and intrinsic motivation. In Neural Information Processing Systems (NIPS) (eds. Lee, D. et al.) 3675\u20133683 (NIPS, 2016)."},{"key":"433_CR83","unstructured":"Shankar, T., Pinto, L., Tulsiani, S. & Gupta, A. Discovering motor programs by recomposing demonstrations. In Proc. International Conference on Learning Representations https:\/\/openreview.net\/attachment?id=rkgHY0NYwr&name=original_pdf (ICLR, 2020)."},{"key":"433_CR84","unstructured":"Vezhnevets, A. S., Wu, Y. T., Eckstein, M., Leblond, R. & Leibo, J. Z. Options as responses: grounding behavioural hierarchies in multi-agent reinforcement learning. In Proc. International Conference on Machine Learning (ICML) (eds. Daum\u00e9 III, H. & Singh, A.) 9733\u20139742 (PMLR, 2020)."},{"key":"433_CR85","doi-asserted-by":"publisher","first-page":"11782","DOI":"10.1109\/ACCESS.2020.2965930","volume":"8","author":"B Ghazanfari","year":"2020","unstructured":"Ghazanfari, B., Afghah, F. & Taylor, M. E. Sequential association rule mining for autonomously extracting hierarchical task structures in reinforcement learning. IEEE Access 8, 11782\u201311799 (2020).","journal-title":"IEEE Access"},{"key":"433_CR86","unstructured":"Levy, A., Konidaris, G., Platt, R. & Saenko, K. Learning multi-level hierarchies with hindsight. In Proc. International Conference on Learning Representations https:\/\/openreview.net\/pdf?id=ryzECoAcY7 (ICLR, 2019)."},{"key":"433_CR87","unstructured":"Nachum, O., Gu, S., Lee, H. & Levine, S. Data-efficient hierarchical reinforcement learning. In Proc. 32nd International Conference on Neural Information Processing Systems (NIPS) (eds. Bengio, S. et al.) 3307\u20133317 (NIPS, 2018)."},{"key":"433_CR88","doi-asserted-by":"crossref","unstructured":"Rafati, J. & Noelle, D. C. Learning representations in model-free hierarchical reinforcement learning. In Proc. 33rd AAAI Conference on Artificial Intelligence 10009\u201310010 (AAAI, 2019).","DOI":"10.1609\/aaai.v33i01.330110009"},{"key":"433_CR89","doi-asserted-by":"crossref","unstructured":"R\u00f6der, F., Eppe, M., Nguyen, P. D. H. & Wermter, S. Curious hierarchical actor-critic reinforcement learning. In Proc. International Conference on Artificial Neural Networks (ICANN) (eds. Farka\u0161, I. et al.) 408\u2013419 (Springer, 2020).","DOI":"10.1007\/978-3-030-61616-8_33"},{"key":"433_CR90","unstructured":"Zhang, T., Guo, S., Tan, T., Hu, X. & Chen, F. Generating adjacency-constrained subgoals in hierarchical reinforcement learning. In Neural Information Processing Systems (NIPS) (eds. Larochelle, H. et al.) 21579-21590 (NIPS, 2020)."},{"key":"433_CR91","doi-asserted-by":"crossref","unstructured":"Lample, G. & Chaplot, D. S. Playing FPS games with deep reinforcement learning. In Proc. 31st AAAI Conference on Artificial Intelligence 2140\u20132146 (AAAI, 2017).","DOI":"10.1609\/aaai.v31i1.10827"},{"key":"433_CR92","unstructured":"Vezhnevets, A. S. et al. FeUdal networks for hierarchical reinforcement learning. In Proc. 34th International Conference on Machine Learning (ICML) (eds. Precup, D. & Teh, Y. W.) Vol. 70, 3540\u20133549 (PMLR, 2017)."},{"key":"433_CR93","doi-asserted-by":"crossref","unstructured":"Wulfmeier, M. et al. Compositional Transfer in Hierarchical Reinforcement Learning. In Robotics: Science and System XVI (RSS) (eds. Toussaint M. et al.) (Robotics: Science and Systems Foundation, 2020); https:\/\/arxiv.org\/abs\/1906.11228","DOI":"10.15607\/RSS.2020.XVI.054"},{"key":"433_CR94","doi-asserted-by":"publisher","first-page":"5174","DOI":"10.1109\/TNNLS.2018.2805379","volume":"29","author":"Z Yang","year":"2018","unstructured":"Yang, Z., Merrick, K., Jin, L. & Abbass, H. A. Hierarchical deep reinforcement learning for continuous action control. IEEE Trans. Neural Netw. Learn. Syst. 29, 5174\u20135184 (2018).","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"433_CR95","doi-asserted-by":"crossref","unstructured":"Toussaint, M., Allen, K. R., Smith, K. A. & Tenenbaum, J. B. Differentiable physics and stable modes for tool-use and manipulation planning. In Proc. Robotics: Science and Systems XIV (RSS) (eds. Kress-Gazit, H. et al.) https:\/\/ipvs.informatik.uni-stuttgart.de\/mlr\/papers\/18-toussaint-RSS.pdf (Robotics: Science and Systems Foundation, 2018).","DOI":"10.15607\/RSS.2018.XIV.044"},{"key":"433_CR96","doi-asserted-by":"crossref","unstructured":"Akrour, R., Veiga, F., Peters, J. & Neumann, G. Regularizing reinforcement learning with state abstraction. In Proc. IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS) 534\u2013539 (IEEE, 2018).","DOI":"10.1109\/IROS.2018.8594201"},{"key":"433_CR97","unstructured":"Schaul, T. & Ring, M. Better generalization with forecasts. In Proc. 23rd International Joint Conference on Artificial Intelligence (IJCAI) (ed. Rossi, F.) 1656\u20131662 (AAAI, 2013)."},{"key":"433_CR98","unstructured":"Colas, C., Akakzia, A., Oudeyer, P.-Y., Chetouani, M. & Sigaud, O. Language-conditioned goal generation: a new approach to language grounding for RL. Preprint at https:\/\/arxiv.org\/abs\/2006.07043 (2020)."},{"key":"433_CR99","first-page":"12541","volume":"32","author":"S Blaes","year":"2019","unstructured":"Blaes, S., Pogancic, M. V., Zhu, J. J. & Martius, G. Control what you can: intrinsically motivated task-planning agent. Neural Inf. Process. Syst. 32, 12541\u201312552 (2019).","journal-title":"Neural Inf. Process. Syst."},{"key":"433_CR100","unstructured":"Haarnoja, T., Hartikainen, K., Abbeel, P. & Levine, S. Latent space policies for hierarchical reinforcement learning. In Proc. International Conference on Machine Learning (ICML) (eds. Dy, J. & Krause, A.) Vol. 4, 2965\u20132975 (PMLR, 2018)."},{"key":"433_CR101","doi-asserted-by":"publisher","first-page":"e0180234","DOI":"10.1371\/journal.pone.0180234","volume":"12","author":"D Rasmussen","year":"2017","unstructured":"Rasmussen, D., Voelker, A. & Eliasmith, C. A neural model of hierarchical reinforcement learning. PLoS ONE 12, e0180234 (2017).","journal-title":"PLoS ONE"},{"key":"433_CR102","unstructured":"Riedmiller, M. et al. Learning by playing\u2014solving sparse reward tasks from scratch. In Proc. International Conference on Machine Learning (ICML) (eds. Dy, J. & Krause, A.) Vol. 10, 6910\u20136919 (PMLR, 2018)."},{"key":"433_CR103","doi-asserted-by":"crossref","unstructured":"Yang, F., Lyu, D., Liu, B. & Gustafson, S. PEORL: integrating symbolic planning and hierarchical reinforcement learning for robust decision-making. In Proc. 27th International Joint Conference on Artificial Intelligence (IJCAI) (ed. Lang, J.) 4860\u20134866 (IJCAI, 2018).","DOI":"10.24963\/ijcai.2018\/675"},{"key":"433_CR104","unstructured":"Machado, M. C., Bellemare, M. G. & Bowling, M. A Laplacian framework for option discovery in reinforcement learning. In Proc. International Conference on Machine Learning (ICML) (eds. Precup, D. & Teh, Y. W.) Vol. 5, 3567\u20133582 (PMLR, 2017)."},{"key":"433_CR105","doi-asserted-by":"crossref","unstructured":"Pathak, D., Agrawal, P., Efros, A. A. & Darrell, T. Curiosity-driven exploration by self-supervised prediction. In Proc. 34th International Conference on Machine Learning (ICML) (eds. Precup, D. & Teh, Y. W.) 2778\u20132787 (PMLR, 2017).","DOI":"10.1109\/CVPRW.2017.70"},{"key":"433_CR106","doi-asserted-by":"crossref","unstructured":"Schillaci, G. et al. Intrinsic motivation and episodic memories for robot exploration of high-dimensional sensory spaces. Adaptive Behav. 29 549\u2013566 (2020).","DOI":"10.1177\/1059712320922916"},{"key":"433_CR107","unstructured":"Colas, C., Fournier, P., Sigaud, O., Chetouani, M. & Oudeyer, P.-Y. CURIOUS: intrinsically motivated modular multi-goal reinforcement learning. In Proc. International Conference on Machine Learning (ICML) (eds. Chaudhuri, K. & Salakhutdinov, R.) 1331\u20131340 (PMLR, 2019)."},{"key":"433_CR108","doi-asserted-by":"publisher","first-page":"103630","DOI":"10.1016\/j.robot.2020.103630","volume":"133","author":"MB Hafez","year":"2020","unstructured":"Hafez, M. B., Weber, C., Kerzel, M. & Wermter, S. Improving robot dual-system motor learning with intrinsically motivated meta-control and latent-space experience imagination. Robot. Auton. Syst. 133, 103630 (2020).","journal-title":"Robot. Auton. Syst."},{"key":"433_CR109","unstructured":"Yamamoto, K., Onishi, T. & Tsuruoka, Y. Hierarchical reinforcement learning with abductive planning. In Proc. ICML\/IJCAI\/AAMAS 2018 Workshop on Planning and Learning (PAL-18) (2018)."},{"key":"433_CR110","doi-asserted-by":"crossref","unstructured":"Wu, B., Gupta, J. K. & Kochenderfer, M. J. Model primitive hierarchical lifelong reinforcement learning. In Proc. International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS) (eds. Agmon, N. et al.) Vol. 1, 34\u201342 (IFAAMAS, 2019).","DOI":"10.1007\/s10458-020-09451-0"},{"key":"433_CR111","doi-asserted-by":"crossref","unstructured":"Li, Z., Narayan, A. & Leong, T. Y. An efficient approach to model-based hierarchical reinforcement learning. In Proc. 31st AAAI Conference on Artificial Intelligence 3583\u20133589 (AAAI, 2017).","DOI":"10.1609\/aaai.v31i1.11024"},{"key":"433_CR112","unstructured":"Hafner, D., Lillicrap, T. & Norouzi, M. Dream to control: learning behaviors by latent imagination. In Proc. International Conference on Learning Representations https:\/\/openreview.net\/pdf?id=S1lOTC4tDS (ICLR, 2020)."},{"key":"433_CR113","doi-asserted-by":"crossref","unstructured":"Deisenroth, M. P., Rasmussen, C. E. & Fox, D. Learning to control a low-cost manipulator using data-efficient reinforcement learning. In Robotics: Science and Systems VII (RSS) (eds. Durrant-Whyte, H. et al.) 57\u201364 (Robotics: Science and Systems Foundation, 2011).","DOI":"10.15607\/RSS.2011.VII.008"},{"key":"433_CR114","unstructured":"Ha, D. & Schmidhuber, J. Recurrent world models facilitate policy evolution. In Proc. 32nd International Conference on Neural Information Processing Systems (NeurIPS) (eds. Bengio, S. et al.) 2455\u20132467 (NIPS, 2018)."},{"key":"433_CR115","unstructured":"Battaglia, P. W. et al. Relational inductive biases, deep learning and graph networks. Preprint at https:\/\/arxiv.org\/abs\/1806.01261 (2018)."},{"key":"433_CR116","unstructured":"Andrychowicz, M. et al. Hindsight experience replay. In Proc. Neural Information Processing Systems (NIPS) (eds. Guyon I. et al.) 5048\u20135058 (NIPS, 2017); https:\/\/papers.nips.cc\/paper\/7090-hindsight-experience-replay.pdf"},{"key":"433_CR117","doi-asserted-by":"publisher","first-page":"e41703","DOI":"10.7554\/eLife.41703","volume":"8","author":"P Schwartenbeck","year":"2019","unstructured":"Schwartenbeck, P. et al. Computational mechanisms of curiosity and goal-directed exploration. eLife 8, e41703 (2019).","journal-title":"eLife"},{"key":"433_CR118","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P. & Levine, S. Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In Proc. International Conference on Machine Learning (ICML) (eds. Dy, J. & Krause, A.) 1861\u20131870 (PMLR, 2018)."},{"key":"433_CR119","doi-asserted-by":"publisher","first-page":"681","DOI":"10.1016\/j.neuron.2005.04.026","volume":"46","author":"AJ Yu","year":"2005","unstructured":"Yu, A. J. & Dayan, P. Uncertainty, neuromodulation and attention. Neuron 46, 681\u2013692 (2005).","journal-title":"Neuron"},{"key":"433_CR120","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1111\/tops.12502","volume":"13","author":"DA Baldwin","year":"2021","unstructured":"Baldwin, D. A. & Kosie, J. E. How does the mind render streaming experience as events? Top. Cogn. Sci. 13, 79\u2013105 (2021).","journal-title":"Top. Cogn. Sci."}],"container-title":["Nature Machine Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s42256-021-00433-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-021-00433-9","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-021-00433-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,25]],"date-time":"2022-11-25T09:12:38Z","timestamp":1669367558000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s42256-021-00433-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,25]]},"references-count":120,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2022,1]]}},"alternative-id":["433"],"URL":"https:\/\/doi.org\/10.1038\/s42256-021-00433-9","relation":{},"ISSN":["2522-5839"],"issn-type":[{"value":"2522-5839","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,1,25]]},"assertion":[{"value":"18 December 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 December 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 January 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}