{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T18:57:31Z","timestamp":1743101851092,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":145,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642323744"},{"type":"electronic","value":"9783642323751"}],"license":[{"start":{"date-parts":[[2012,11,10]],"date-time":"2012-11-10T00:00:00Z","timestamp":1352505600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013]]},"DOI":"10.1007\/978-3-642-32375-1_4","type":"book-chapter","created":{"date-parts":[[2013,3,29]],"date-time":"2013-03-29T03:35:02Z","timestamp":1364528102000},"page":"73-91","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Exploration from Generalization Mediated by Multiple Controllers"],"prefix":"10.1007","author":[{"given":"Peter","family":"Dayan","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2012,11,10]]},"reference":[{"key":"4_CR1","unstructured":"Acuna, D., Schrater, P.: Improving bayesian reinforcement learning using transition abstraction. In: ICML\/UAI\/COLT Workshop on Abstraction in Reinforcement Learning. Montreal, Canada (2009)"},{"key":"4_CR2","unstructured":"Asmuth, J., Li, L., Littman, M., Nouri, A., Wingate, D.: A bayesian sampling approach to exploration in reinforcement learning. In: UAI, Montreal, Canada (2009)"},{"key":"4_CR3","doi-asserted-by":"crossref","first-page":"403","DOI":"10.1146\/annurev.neuro.28.061604.135709","volume":"28","author":"G Aston-Jones","year":"2005","unstructured":"Aston-Jones, G., Cohen, J.D.: An integrative theory of locus coeruleus-norepinephrine function: Adaptive gain and optimal performance. Annu. Rev. Neurosci. 28, 403\u2013450 (2005)","journal-title":"Annu. Rev. Neurosci."},{"issue":"2","key":"4_CR4","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multiarmed bandit problem. Mach. Learn. 47(2), 235\u2013256 (2002a)","journal-title":"Mach. Learn."},{"issue":"1","key":"4_CR5","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1137\/S0097539701398375","volume":"32","author":"P Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Freund, Y., Schapire, R.: The nonstochastic multiarmed bandit problem. SIAM J. Comput. 32(1), 48\u201377 (2002b)","journal-title":"SIAM J. Comput."},{"issue":"5","key":"4_CR6","doi-asserted-by":"crossref","first-page":"717","DOI":"10.1016\/j.physbeh.2005.08.061","volume":"86","author":"BW Balleine","year":"2005","unstructured":"Balleine, B.W.: Neural bases of food-seeking: Affect, arousal and reward in corticostriatolimbic circuits. Physiol. Behav. 86(5), 717\u2013730 (2005)","journal-title":"Physiol. Behav."},{"issue":"9","key":"4_CR7","doi-asserted-by":"crossref","first-page":"379","DOI":"10.1016\/0166-2236(94)90047-7","volume":"17","author":"R Bandler","year":"1994","unstructured":"Bandler, R., Shipley, M.T.: Columnar organization in the midbrain periaqueductal gray: Modules for emotional expression? Trends Neurosci. 17(9), 379\u2013389 (1994)","journal-title":"Trends Neurosci."},{"key":"4_CR8","first-page":"215","volume-title":"Models of Information Processing in the Basal Ganglia","author":"A Barto","year":"1995","unstructured":"Barto, A.: Adaptive critics and the basal ganglia. In: Houk, J., Davis, J., Beiser, D. (eds.) Models of Information Processing in the Basal Ganglia, pp. 215\u2013232. MIT, Cambridge (1995)"},{"issue":"4","key":"4_CR9","doi-asserted-by":"crossref","first-page":"341","DOI":"10.1023\/A:1025696116075","volume":"13","author":"A Barto","year":"2003","unstructured":"Barto, A., Mahadevan, S.: Recent advances in hierarchical reinforcement learning. Discr. Event Dyn. Syst. 13(4), 341\u2013379 (2003)","journal-title":"Discr. Event Dyn. Syst."},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Barto, A., Singh, S., Chentanez, N.: Intrinsically motivated learning of hierarchical collections of skills. In: ICDL 2004, La Jolla, CA (2004)","DOI":"10.21236\/ADA440280"},{"issue":"5","key":"4_CR11","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TSMC.1983.6313077","volume":"13","author":"A Barto","year":"1983","unstructured":"Barto, A., Sutton, R., Anderson, C.: Neuronlike elements that can solve difficult learning control problems. IEEE Trans. Syst. Man Cybern. 13(5), 834\u2013846 (1983)","journal-title":"IEEE Trans. Syst. Man Cybern."},{"key":"4_CR12","first-page":"17","volume-title":"Intrinsically Motivated Learning in Natural and Artificial Systems","author":"AG Barto","year":"2012","unstructured":"Barto, A.G.: Intrinsic motivation and reinforcement learning. In: Baldassarre, G., Mirolli, M. (eds.) Intrinsically Motivated Learning in Natural and Artificial Systems, pp. 17\u201347. Springer, Berlin (2012)"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"Beal, M., Ghahramani, Z., Rasmussen, C.: The infinite hidden Markov model. In: NIPS, pp. 577\u2013584, Vancouver, Canada (2002)","DOI":"10.7551\/mitpress\/1120.003.0079"},{"issue":"9","key":"4_CR14","doi-asserted-by":"crossref","first-page":"1214","DOI":"10.1038\/nn1954","volume":"10","author":"T.E.J. Behrens","year":"2007","unstructured":"Behrens, T.E.J., Woolrich, M.W., Walton, M.E., Rushworth, M.F.S.: Learning the value of information in an uncertain world. Nat. Neurosci. 10(9), 1214\u20131221 (2007)","journal-title":"Nat. Neurosci."},{"key":"4_CR15","volume-title":"Dynamic Programming","author":"RE Bellman","year":"1957","unstructured":"Bellman, R.E.: Dynamic Programming. Princeton University Press, Princeton (1957)"},{"key":"4_CR16","doi-asserted-by":"crossref","first-page":"179","DOI":"10.1016\/j.physbeh.2004.02.004","volume":"81","author":"KC Berridge","year":"2004","unstructured":"Berridge, K.C.: Motivation concepts in behavioral neuroscience. Physiol. Behav. 81, 179\u2013209 (2004)","journal-title":"Physiol. Behav."},{"key":"4_CR17","doi-asserted-by":"crossref","DOI":"10.1007\/978-94-015-3711-7","volume-title":"Bandit Problems: Sequential Allocation of Experiments","author":"DA Berry","year":"1985","unstructured":"Berry, D.A., Fristedt, B.: Bandit Problems: Sequential Allocation of Experiments. Springer, Berlin (1985)"},{"key":"4_CR18","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1146\/annurev.ps.39.020188.000355","volume":"39","author":"DC Blanchard","year":"1988","unstructured":"Blanchard, D.C., Blanchard, R.J.: Ethoexperimental approaches to the biology of emotion. Annu. Rev. Psychol. 39, 43\u201368 (1988)","journal-title":"Annu. Rev. Psychol."},{"issue":"2","key":"4_CR19","doi-asserted-by":"crossref","first-page":"125","DOI":"10.1080\/01969720590897107","volume":"36","author":"D Blank","year":"2005","unstructured":"Blank, D., Kumar, D., Meeden, L., Marshall, J.: Bringing up robot: Fundamental mechanisms for creating a self-motivated, self-organizing architecture. Cybern. Syst. 36(2), 125\u2013150 (2005)","journal-title":"Cybern. Syst."},{"key":"4_CR20","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1037\/h0028589","volume":"77","author":"RC Bolles","year":"1970","unstructured":"Bolles, R.C.: Species-specific defense reactions and avoidance learning. Psychol. Rev. 77, 32\u201348 (1970)","journal-title":"Psychol. Rev."},{"issue":"3","key":"4_CR21","doi-asserted-by":"crossref","first-page":"262","DOI":"10.1016\/j.cognition.2008.08.011","volume":"113","author":"MM Botvinick","year":"2009","unstructured":"Botvinick, M.M., Niv, Y., Barto, A.C.: Hierarchically organized behavior and its neural foundations: A reinforcement learning perspective. Cognition 113(3), 262\u2013280 (2009)","journal-title":"Cognition"},{"issue":"1","key":"4_CR22","doi-asserted-by":"crossref","first-page":"74","DOI":"10.1038\/npp.2010.151","volume":"36","author":"Y.-L. Boureau","year":"2011","unstructured":"Boureau, Y.-L., Dayan, P.: Opponency revisited: Competition and cooperation between dopamine and serotonin. Neuropsychopharmacology 36(1), 74\u201397 (2011)","journal-title":"Neuropsychopharmacology"},{"key":"4_CR23","first-page":"213","volume":"3","author":"R Brafman","year":"2003","unstructured":"Brafman, R., Tennenholtz, M.: R-max-a general polynomial time algorithm for near-optimal reinforcement learning. J. Mach. Learn. Res. 3, 213\u2013231 (2003)","journal-title":"J. Mach. Learn. Res."},{"issue":"9","key":"4_CR24","doi-asserted-by":"crossref","first-page":"681","DOI":"10.1037\/h0040090","volume":"16","author":"K Breland","year":"1961","unstructured":"Breland, K., Breland, M.: The misbehavior of organisms. Am. Psychol. 16(9), 681\u201384 (1961)","journal-title":"Am. Psychol."},{"key":"4_CR25","doi-asserted-by":"crossref","first-page":"77","DOI":"10.1109\/2.33","volume":"21","author":"G Carpenter","year":"1988","unstructured":"Carpenter, G., Grossberg, S.: The ART of adaptive pattern recognition by a self-organizing neural network. Computer 21, 77\u201388 (1988)","journal-title":"Computer"},{"issue":"1","key":"4_CR26","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1023\/A:1007379606734","volume":"28","author":"R Caruana","year":"1997","unstructured":"Caruana, R.: Multitask learning. Mach. Learn. 28(1), 41\u201375 (1997)","journal-title":"Mach. Learn."},{"key":"4_CR27","unstructured":"Collins, A.: Apprentissage et Contr\u00f4le Cognitif: Une Th\u00e9orie de la Fonction Executive Pr\u00e9frontale Humaine. Ph.D. Thesis, Universit\u00e9 Pierre et Marie Curie, Paris (2010)"},{"key":"4_CR28","unstructured":"Courville, A., Daw, N., Touretzky, D.: Similarity and discrimination in classical conditioning: A latent variable account. In: NIPS, pp. 313\u2013320, Vancouver, Canada (2004)"},{"issue":"2","key":"4_CR29","doi-asserted-by":"crossref","first-page":"199","DOI":"10.1016\/j.conb.2006.03.006","volume":"16","author":"ND Daw","year":"2006","unstructured":"Daw, N.D., Doya, K.: The computational neurobiology of learning and reward. Curr. Opin. Neurobiol. 16(2), 199\u2013204 (2006)","journal-title":"Curr. Opin. Neurobiol."},{"key":"4_CR30","doi-asserted-by":"crossref","first-page":"603","DOI":"10.1016\/S0893-6080(02)00052-7","volume":"15","author":"ND Daw","year":"2002","unstructured":"Daw, N.D., Kakade, S., Dayan, P.: Opponent interactions between serotonin and dopamine. Neural Netw. 15, 603\u201316 (2002)","journal-title":"Neural Netw."},{"issue":"12","key":"4_CR31","doi-asserted-by":"crossref","first-page":"1704","DOI":"10.1038\/nn1560","volume":"8","author":"ND Daw","year":"2005","unstructured":"Daw, N.D., Niv, Y., Dayan, P.: Uncertainty-based competition between prefrontal and dorsolateral striatal systems for behavioral control. Nat. Neurosci. 8(12), 1704\u20131711 (2005)","journal-title":"Nat. Neurosci."},{"issue":"7095","key":"4_CR32","doi-asserted-by":"crossref","first-page":"876","DOI":"10.1038\/nature04766","volume":"441","author":"ND Daw","year":"2006","unstructured":"Daw, N.D., O\u2019Doherty, J.P., Dayan, P., Seymour, B., Dolan, R.J.: Cortical substrates for exploratory decisions in humans. Nature 441(7095), 876\u2013879 (2006)","journal-title":"Nature"},{"key":"4_CR33","doi-asserted-by":"crossref","first-page":"1","DOI":"10.3389\/neuro.10.001.2007","volume":"1","author":"P Dayan","year":"2007","unstructured":"Dayan, P.: Bilinearity, rules, and prefrontal cortex. Front. Comput. Neurosci. 1, 1 (2007)","journal-title":"Front. Comput. Neurosci."},{"key":"4_CR34","volume-title":"Advances in Neural Information Processing Systems (NIPS) 5","author":"P Dayan","year":"1993","unstructured":"Dayan, P., Hinton, G.: Feudal reinforcement learning. In: Hanson, S.J., Cowan, J.D., Giles, C.L. (eds.) Advances in Neural Information Processing Systems (NIPS) 5. MIT, Cambridge (1993)"},{"issue":"2","key":"4_CR35","doi-asserted-by":"crossref","first-page":"e4","DOI":"10.1371\/journal.pcbi.0040004","volume":"4","author":"P Dayan","year":"2008","unstructured":"Dayan, P., Huys, Q.J.M.: Serotonin, inhibition, and negative mood. PLoS Comput. Biol. 4(2), e4 (2008)","journal-title":"PLoS Comput. Biol."},{"key":"4_CR36","doi-asserted-by":"crossref","first-page":"95","DOI":"10.1146\/annurev.neuro.051508.135607","volume":"32","author":"P Dayan","year":"2009","unstructured":"Dayan, P., Huys, Q.J.M.: Serotonin in affective control. Annu. Rev. Neurosci. 32, 95\u2013126 (2009)","journal-title":"Annu. Rev. Neurosci."},{"issue":"8","key":"4_CR37","doi-asserted-by":"crossref","first-page":"1153","DOI":"10.1016\/j.neunet.2006.03.002","volume":"19","author":"P Dayan","year":"2006","unstructured":"Dayan, P., Niv, Y., Seymour, B., Daw, N.D.: The misbehavior of value and the discipline of the will. Neural Netw. 19(8), 1153\u20131160 (2006)","journal-title":"Neural Netw."},{"issue":"1","key":"4_CR38","first-page":"5","volume":"25","author":"P Dayan","year":"1996","unstructured":"Dayan, P., Sejnowski, T.: Exploration bonuses and dual control. Mach. Learn. 25(1), 5\u201322 (1996)","journal-title":"Mach. Learn."},{"key":"4_CR39","doi-asserted-by":"crossref","first-page":"305","DOI":"10.1177\/026988119100500414","volume":"5","author":"J.F.W. Deakin","year":"1991","unstructured":"Deakin, J.F.W., Graeff, F.G.: 5-HT and mechanisms of defence. J. Psychopharmacol. 5, 305\u2013316 (1991)","journal-title":"J. Psychopharmacol."},{"key":"4_CR40","unstructured":"Dearden, R., Friedman, N., Andre, D.: Model based Bayesian exploration. In: UAI, Stockholm, Sweden pp. 150\u2013159 (1999)"},{"key":"4_CR41","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4899-2271-7","volume-title":"Intrinsic motivation and self-determination in human behavior","author":"E Deci","year":"1985","unstructured":"Deci, E., Ryan, R.: Intrinsic motivation and self-determination in human behavior. Plenum, New York (1985)"},{"key":"4_CR42","volume-title":"Contemporary animal learning theory","author":"A Dickinson","year":"1980","unstructured":"Dickinson, A.: Contemporary animal learning theory. Cambridge University Press, Cambridge (1980)"},{"key":"4_CR43","first-page":"497","volume-title":"Stevens\u2019 Handbook of Experimental Psychology","author":"A Dickinson","year":"2002","unstructured":"Dickinson, A., Balleine, B.: The role of learning in motivation. In: Gallistel, C. (ed.) Stevens\u2019 Handbook of Experimental Psychology, vol. 3, pp. 497\u2013533. Wiley, New York (2002)"},{"key":"4_CR44","unstructured":"Dietterich, T.: The MAXQ method for hierarchical reinforcement learning. In: ICML, pp. 118\u2013126, Madison, Wisconsin, (1998)"},{"issue":"1","key":"4_CR45","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1613\/jair.639","volume":"13","author":"T Dietterich","year":"2000","unstructured":"Dietterich, T.: Hierarchical reinforcement learning with the MAXQ value function decomposition. J. Artif. Intell. Res. 13(1), 227\u2013303 (2000)","journal-title":"J. Artif. Intell. Res."},{"issue":"4\u20136","key":"4_CR46","doi-asserted-by":"crossref","first-page":"495","DOI":"10.1016\/S0893-6080(02)00044-8","volume":"15","author":"K Doya","year":"2002","unstructured":"Doya, K.: Metalearning and neuromodulation. Neural Netw. 15(4\u20136), 495\u2013506 (2002)","journal-title":"Neural Netw."},{"issue":"6","key":"4_CR47","doi-asserted-by":"crossref","first-page":"1347","DOI":"10.1162\/089976602753712972","volume":"14","author":"K. Doya","year":"2002","unstructured":"Doya, K., Samejima, K., ichi Katagiri, K., Kawato, M.: Multiple model-based reinforcement learning. Neural Comput. 14(6), 1347\u20131369 (2002)","journal-title":"Neural Comput."},{"key":"4_CR48","unstructured":"Duff, M.: Optimal Learning: Computational approaches for Bayes-adaptive Markov decision processes. Ph.D. Thesis, Computer Science Department, University of Massachusetts, Amherst (2000)"},{"issue":"2","key":"4_CR49","doi-asserted-by":"crossref","first-page":"325","DOI":"10.1023\/A:1017944732463","volume":"49","author":"D Foster","year":"2002","unstructured":"Foster, D., Dayan, P.: Structure in the space of value functions. Mach. Learn. 49(2), 325\u2013346 (2002)","journal-title":"Mach. Learn."},{"key":"4_CR50","unstructured":"Gershman, S., Cohen, J., Niv, Y.: Learning to selectively attend. In: Proceedings of the 32nd Annual Conference of the Cognitive Science Society, Portland, Oregon (2010a)"},{"key":"4_CR51","volume-title":"Learning latent structure: Carving nature at its joints","author":"S Gershman","year":"2010","unstructured":"Gershman, S., Niv, Y.: Learning latent structure: Carving nature at its joints. Curr. Opin. Neurobiol. (2010)"},{"issue":"1","key":"4_CR52","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1037\/a0017808","volume":"117","author":"SJ Gershman","year":"2010","unstructured":"Gershman, S.J., Blei, D.M., Niv, Y.: Context, learning, and extinction. Psychol. Rev. 117(1), 197\u2013209 (2010b)","journal-title":"Psychol. Rev."},{"key":"4_CR53","volume-title":"Multi-Armed Bandit Allocation Indices","author":"JC Gittins","year":"1989","unstructured":"Gittins, J.C.: Multi-Armed Bandit Allocation Indices. Wiley, New York (1989)"},{"key":"4_CR54","doi-asserted-by":"crossref","first-page":"382","DOI":"10.1016\/0023-9690(76)90044-8","volume":"7","author":"F Goodkin","year":"1976","unstructured":"Goodkin, F.: Rats learn the relationship between responding and environmental events: An expansion of the learned helplessness hypothesis. Learn. Motiv. 7, 382\u2013393 (1976)","journal-title":"Learn. Motiv."},{"key":"4_CR55","doi-asserted-by":"crossref","DOI":"10.1093\/acprof:oso\/9780198522713.001.0001","volume-title":"The Neuropsychology of Anxiety","author":"JA Gray","year":"2003","unstructured":"Gray, J.A., McNaughton, N.: The Neuropsychology of Anxiety, 2nd edn. OUP, Oxford (2003)","edition":"2"},{"key":"4_CR56","volume-title":"The Psychology of Learning","author":"E Guthrie","year":"1952","unstructured":"Guthrie, E.: The Psychology of Learning. Harper & Row, New York (1952)"},{"issue":"1485","key":"4_CR57","doi-asserted-by":"crossref","first-page":"1601","DOI":"10.1098\/rstb.2007.2055","volume":"362","author":"TE Hazy","year":"2007","unstructured":"Hazy, T.E., Frank, M.J., O\u2019reilly, R.C.: Towards an executive without a homunculus: Computational models of the prefrontal cortex\/basal ganglia system. Philos. Trans. R. Soc. Lond. B Biol. Sci. 362(1485), 1601\u20131613 (2007)","journal-title":"Philos. Trans. R. Soc. Lond. B Biol. Sci."},{"issue":"5","key":"4_CR58","doi-asserted-by":"crossref","first-page":"3031","DOI":"10.1152\/jn.2000.83.5.3031","volume":"83","author":"CM Hempel","year":"2000","unstructured":"Hempel, C.M., Hartman, K.H., Wang, X.J., Turrigiano, G.G., Nelson, S.B.: Multiple forms of short-term plasticity at excitatory synapses in rat medial prefrontal cortex. J. Neurophysiol. 83(5), 3031\u20133041 (2000)","journal-title":"J. Neurophysiol."},{"key":"4_CR59","doi-asserted-by":"crossref","first-page":"443","DOI":"10.3758\/BF03200092","volume":"14","author":"WA Hershberger","year":"1986","unstructured":"Hershberger, W.A.: An approach through the looking-glass. Anim. Learn. Behav. 14, 443\u201351 (1986)","journal-title":"Anim. Learn. Behav."},{"issue":"5214","key":"4_CR60","doi-asserted-by":"crossref","first-page":"1158","DOI":"10.1126\/science.7761831","volume":"268","author":"GE Hinton","year":"1995","unstructured":"Hinton, G.E., Dayan, P., Frey, B.J., Neal, R.M.: The \u201cwake-sleep\u201d algorithm for unsupervised neural networks. Science 268(5214), 1158\u20131161 (1995)","journal-title":"Science"},{"issue":"1358","key":"4_CR61","doi-asserted-by":"crossref","first-page":"1177","DOI":"10.1098\/rstb.1997.0101","volume":"352","author":"GE Hinton","year":"1997","unstructured":"Hinton, G.E., Ghahramani, Z.: Generative models for discovering sparse distributed representations. Philos. Trans. R. Soc. Lond. B Biol. Sci. 352(1358), 1177\u20131190 (1997)","journal-title":"Philos. Trans. R. Soc. Lond. B Biol. Sci."},{"issue":"5786","key":"4_CR62","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1126\/science.1127647","volume":"313","author":"GE Hinton","year":"2006","unstructured":"Hinton, G.E., Salakhutdinov, R.R.: Reducing the dimensionality of data with neural networks. Science 313(5786), 504\u2013507 (2006)","journal-title":"Science"},{"issue":"4\u20135","key":"4_CR63","doi-asserted-by":"crossref","first-page":"461","DOI":"10.1016\/S0028-3908(98)00038-0","volume":"37","author":"P Holland","year":"1998","unstructured":"Holland, P.: Amount of training affects associatively-activated event representation. Neuropharmacology 37(4\u20135), 461\u2013469 (1998)","journal-title":"Neuropharmacology"},{"issue":"4","key":"4_CR64","doi-asserted-by":"crossref","first-page":"651","DOI":"10.1016\/S0306-4522(00)00019-1","volume":"96","author":"JC Horvitz","year":"2000","unstructured":"Horvitz, J.C.: Mesolimbocortical and nigrostriatal dopamine responses to salient non-reward events. Neuroscience 96(4), 651\u2013656 (2000)","journal-title":"Neuroscience"},{"issue":"2","key":"4_CR65","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1016\/S0006-8993(97)00265-5","volume":"759","author":"JC Horvitz","year":"1997","unstructured":"Horvitz, J.C., Stewart, T., Jacobs, B.L.: Burst activity of ventral tegmental dopamine neurons is elicited by sensory stimuli in the awake cat. Brain Res. 759(2), 251\u2013258 (1997)","journal-title":"Brain Res."},{"issue":"1","key":"4_CR66","doi-asserted-by":"crossref","first-page":"22","DOI":"10.1109\/TSSC.1966.300074","volume":"2","author":"R Howard","year":"1966","unstructured":"Howard, R.: Information value theory. IEEE Trans. Syst. Sci. Cybern. 2(1), 22\u201326 (1966)","journal-title":"IEEE Trans. Syst. Sci. Cybern."},{"key":"4_CR67","doi-asserted-by":"crossref","first-page":"407","DOI":"10.1142\/S0219843607001011","volume":"4","author":"X Huang","year":"2007","unstructured":"Huang, X., Weng, J.: Inherent value systems for autonomous mental development. Int. J. Human. Robot. 4, 407\u2013433 (2007)","journal-title":"Int. J. Human. Robot."},{"key":"4_CR68","doi-asserted-by":"crossref","DOI":"10.1007\/b138233","volume-title":"Universal Artificial Intelligence: Sequential Decisions Based on Algorithmic Probability","author":"M Hutter","year":"2005","unstructured":"Hutter, M.: Universal Artificial Intelligence: Sequential Decisions Based on Algorithmic Probability. Springer, Berlin (2005)"},{"key":"4_CR69","unstructured":"Huys, Q.: Reinforcers and control. Towards a computational \u00e6tiology of depression. Ph.D. Thesis, Gatsby Computational Neuroscience Unit, UCL (2007)"},{"key":"4_CR70","doi-asserted-by":"crossref","first-page":"314","DOI":"10.1016\/j.cognition.2009.01.008","volume":"113","author":"Q.J.M. Huys","year":"2009","unstructured":"Huys, Q.J.M., Dayan, P.: A Bayesian formulation of behavioral control. Cognition 113, 314\u2013328 (2009)","journal-title":"Cognition"},{"issue":"4\u20136","key":"4_CR71","doi-asserted-by":"crossref","first-page":"665","DOI":"10.1016\/S0893-6080(02)00056-4","volume":"15","author":"S Ishii","year":"2002","unstructured":"Ishii, S., Yoshida, W., Yoshimoto, J.: Control of exploitation-exploration meta-parameter in reinforcement learning. Neural Netw. 15(4\u20136), 665\u2013687 (2002)","journal-title":"Neural Netw."},{"issue":"1\u20132","key":"4_CR72","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1016\/S0004-3702(98)00023-X","volume":"101","author":"L Kaelbling","year":"1998","unstructured":"Kaelbling, L., Littman, M., Cassandra, A.: Planning and acting in partially observable stochastic domains. Artif. Intell. 101(1\u20132), 99\u2013134 (1998)","journal-title":"Artif. Intell."},{"issue":"4\u20136","key":"4_CR73","doi-asserted-by":"crossref","first-page":"549","DOI":"10.1016\/S0893-6080(02)00048-5","volume":"15","author":"S Kakade","year":"2002","unstructured":"Kakade, S., Dayan, P.: Dopamine: Generalization and bonuses. Neural Netw. 15(4\u20136), 549\u2013559 (2002)","journal-title":"Neural Netw."},{"issue":"2","key":"4_CR74","doi-asserted-by":"crossref","first-page":"209","DOI":"10.1023\/A:1017984413808","volume":"49","author":"M Kearns","year":"2002","unstructured":"Kearns, M., Singh, S.: Near-optimal reinforcement learning in polynomial time. Mach. Learn. 49(2), 209\u2013232 (2002)","journal-title":"Mach. Learn."},{"issue":"7\u20138","key":"4_CR75","doi-asserted-by":"crossref","first-page":"669","DOI":"10.1016\/S0149-7634(01)00049-5","volume":"25","author":"KA Keay","year":"2001","unstructured":"Keay, K.A., Bandler, R.: Parallel circuits mediating distinct emotional coping reactions to different types of stress. Neurosci. Biobehav. Rev. 25(7\u20138), 669\u2013678 (2001)","journal-title":"Neurosci. Biobehav. Rev."},{"issue":"4","key":"4_CR76","doi-asserted-by":"crossref","first-page":"400","DOI":"10.1093\/cercor\/13.4.400","volume":"13","author":"S Killcross","year":"2003","unstructured":"Killcross, S., Coutureau, E.: Coordination of actions and habits in the medial prefrontal cortex of rats. Cereb. Cortex 13(4), 400\u2013408 (2003)","journal-title":"Cereb. Cortex"},{"key":"4_CR77","unstructured":"Konidaris, G., Barto, A.: Building portable options: Skill transfer in reinforcement learning. In: IJCAI, pp. 895\u2013900, Hyderabad, India (2007)"},{"key":"4_CR78","unstructured":"Konidaris, G., Barto, A.: Efficient skill learning using abstraction selection. In: IJCAI, pp.\u00a01107\u20131112, Pasadena, California (2009)"},{"issue":"3","key":"4_CR79","doi-asserted-by":"crossref","first-page":"380","DOI":"10.1016\/j.cognition.2008.11.014","volume":"110","author":"KA Krueger","year":"2009","unstructured":"Krueger, K.A., Dayan, P.: Flexible shaping: How learning in small steps helps. Cognition 110(3), 380\u2013394 (2009)","journal-title":"Cognition"},{"key":"4_CR80","volume-title":"Conditioning and Associative Learning","author":"NJ Mackintosh","year":"1983","unstructured":"Mackintosh, N.J.: Conditioning and Associative Learning. Oxford University Press, Oxford (1983)"},{"issue":"1\u20132","key":"4_CR81","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1016\/S0004-3702(02)00378-8","volume":"147","author":"O Madani","year":"2003","unstructured":"Madani, O., Hanks, S., Condon, A.: On the undecidability of probabilistic planning and related stochastic optimization problems. Artif. Intell. 147(1\u20132), 5\u201334 (2003)","journal-title":"Artif. Intell."},{"issue":"4","key":"4_CR82","doi-asserted-by":"crossref","first-page":"397","DOI":"10.31887\/DCNS.2006.8.4\/smaier","volume":"8","author":"SF Maier","year":"2006","unstructured":"Maier, S.F., Amat, J., Baratta, M.V., Paul, E., Watkins, L.R.: Behavioral control, the medial prefrontal cortex, and resilience. Dialogues Clin. Neurosci. 8(4), 397\u2013406 (2006)","journal-title":"Dialogues Clin. Neurosci."},{"issue":"4\u20135","key":"4_CR83","doi-asserted-by":"crossref","first-page":"829","DOI":"10.1016\/j.neubiorev.2005.03.021","volume":"29","author":"SF Maier","year":"2005","unstructured":"Maier, S.F., Watkins, L.R.: Stressor controllability and learned helplessness: The roles of the dorsal raphe nucleus, serotonin, and corticotropin-releasing factor. Neurosci. Biobehav. Rev. 29(4\u20135), 829\u2013841 (2005)","journal-title":"Neurosci. Biobehav. Rev."},{"issue":"3","key":"4_CR84","doi-asserted-by":"crossref","first-page":"285","DOI":"10.1016\/j.neubiorev.2004.03.005","volume":"28","author":"N McNaughton","year":"2004","unstructured":"McNaughton, N., Corr, P.J.: A two-dimensional neuropsychology of defense: Fear\/anxiety and defensive distance. Neurosci. Biobehav. Rev. 28(3), 285\u2013305 (2004)","journal-title":"Neurosci. Biobehav. Rev."},{"key":"4_CR85","first-page":"49","volume-title":"Intrinsically Motivated Learning in Natural and Artificial Systems","author":"M Mirolli","year":"2012","unstructured":"Mirolli, M., Baldassarre, G.: Functions and mechanisms of intrinsic motivations: The knowledge versus competence distinction. In: Baldassarre, G., Mirolli, M. (eds.) Intrinsically Motivated Learning in Natural and Artificial Systems, pp. 49\u201372. Springer, Berlin (2012)"},{"issue":"5869","key":"4_CR86","doi-asserted-by":"crossref","first-page":"1543","DOI":"10.1126\/science.1150769","volume":"319","author":"G Mongillo","year":"2008","unstructured":"Mongillo, G., Barak, O., Tsodyks, M.: Synaptic theory of working memory. Science 319(5869), 1543\u20131546 (2008)","journal-title":"Science"},{"issue":"5","key":"4_CR87","doi-asserted-by":"crossref","first-page":"1936","DOI":"10.1523\/JNEUROSCI.16-05-01936.1996","volume":"16","author":"PR Montague","year":"1996","unstructured":"Montague, P.R., Dayan, P., Sejnowski, T.J.: A framework for mesencephalic dopamine systems based on predictive hebbian learning. J. Neurosci. 16(5), 1936\u20131947 (1996)","journal-title":"J. Neurosci."},{"issue":"2","key":"4_CR88","doi-asserted-by":"crossref","first-page":"249","DOI":"10.1080\/10618600.2000.10474879","volume":"9","author":"R Neal","year":"2000","unstructured":"Neal, R.: Markov chain sampling methods for Dirichlet process mixture models. J. Comput. Graph. Stat. 9(2), 249\u2013265 (2000)","journal-title":"J. Comput. Graph. Stat."},{"key":"4_CR89","unstructured":"Ng, A., Harada, D., Russell, S.: Policy invariance under reward transformations: Theory and application to reward shaping. In: ICML, pp. 278\u2013287, Bled, Slovenia (1999)"},{"key":"4_CR90","unstructured":"Nouri, A., Littman, M.: Multi-resolution exploration in continuous spaces. NIPS, pp. 1209\u20131216 (2009)"},{"issue":"2","key":"4_CR91","doi-asserted-by":"crossref","first-page":"283","DOI":"10.1162\/089976606775093909","volume":"18","author":"RC O\u2019Reilly","year":"2006","unstructured":"O\u2019Reilly, R.C., Frank, M.J.: Making working memory work: A computational model of learning in the prefrontal cortex and basal ganglia. Neural Comput. 18(2), 283\u2013328 (2006)","journal-title":"Neural Comput."},{"issue":"2","key":"4_CR92","doi-asserted-by":"crossref","first-page":"265","DOI":"10.1109\/TEVC.2006.890271","volume":"11","author":"P Oudeyer","year":"2007","unstructured":"Oudeyer, P., Kaplan, F., Hafner, V.: Intrinsic motivation systems for autonomous mental development. IEEE Trans. Evol. Comput. 11(2), 265\u2013286 (2007)","journal-title":"IEEE Trans. Evol. Comput."},{"key":"4_CR93","doi-asserted-by":"crossref","DOI":"10.1093\/oso\/9780195096736.001.0001","volume-title":"Affective Neuroscience","author":"J Panksepp","year":"1998","unstructured":"Panksepp, J.: Affective Neuroscience. OUP, New York (1998)"},{"issue":"3","key":"4_CR94","doi-asserted-by":"crossref","first-page":"441","DOI":"10.1287\/moor.12.3.441","volume":"12","author":"C Papadimitriou","year":"1987","unstructured":"Papadimitriou, C., Tsitsiklis, J.: The complexity of Markov decision processes. Math. Oper. Res. 12(3), 441\u2013450 (1987)","journal-title":"Math. Oper. Res."},{"key":"4_CR95","unstructured":"Parr, R., Russell, S.: Reinforcement learning with hierarchies of machines. In: NIPS, pp.\u00a01043\u20131049, Denver, Colorado (1998)"},{"key":"4_CR96","doi-asserted-by":"crossref","unstructured":"Poupart, P., Vlassis, N., Hoey, J., Regan, K.: An analytic solution to discrete bayesian reinforcement learning. In: ICML, pp. 697\u2013704, Pittsburgh, Pennslyvania (2006)","DOI":"10.1145\/1143844.1143932"},{"volume-title":"Probabilistic Models of the Brain: Perception and Neural Function","year":"2002","key":"4_CR97","unstructured":"Rao, R.P.N., Olshausen, B.A., Lewicki, M.S. (eds.): Probabilistic Models of the Brain: Perception and Neural Function. MIT, Cambridge (2002)"},{"key":"4_CR98","first-page":"129","volume-title":"Intrinsically Motivated Learning in Natural and Artificial Systems","author":"P Redgrave","year":"2012","unstructured":"Redgrave, P., Gurney, K., Stafford, T., Thirkettle, M., Lewis, J.: The role of the basal ganglia in discovering novel actions. In: Baldassarre, G., Mirolli, M. (eds.) Intrinsically Motivated Learning in Natural and Artificial Systems, pp. 129\u2013149. Springer, Berlin (2012)"},{"issue":"4","key":"4_CR99","doi-asserted-by":"crossref","first-page":"146","DOI":"10.1016\/S0166-2236(98)01373-3","volume":"22","author":"P Redgrave","year":"1999","unstructured":"Redgrave, P., Prescott, T.J., Gurney, K.: Is the short-latency dopamine response too short to signal reward error? Trends Neurosci. 22(4), 146\u2013151 (1999)","journal-title":"Trends Neurosci."},{"issue":"9","key":"4_CR100","doi-asserted-by":"crossref","first-page":"3261","DOI":"10.1523\/JNEUROSCI.21-09-03261.2001","volume":"21","author":"S.M. Reynolds","year":"2001","unstructured":"Reynolds, S.M., Berridge, K.C. (2001): Fear and feeding in the nucleus accumbens shell: Rostrocaudal segregation of GABA-elicited defensive behavior versus eating behavior. J.\u00a0Neurosci. 21(9), 3261\u20133270 (1999)","journal-title":"J. Neurosci."},{"issue":"16","key":"4_CR101","doi-asserted-by":"crossref","first-page":"7308","DOI":"10.1523\/JNEUROSCI.22-16-07308.2002","volume":"22","author":"SM Reynolds","year":"2002","unstructured":"Reynolds, S.M., Berridge, K.C.: Positive and negative motivation in nucleus accumbens shell: Bivalent rostrocaudal gradients for GABA-elicited eating, taste \u201cliking\u201d\/\u201cdisliking\u201d reactions, place preference\/avoidance, and fear. J. Neurosci. 22(16), 7308\u20137320 (2002)","journal-title":"J. Neurosci."},{"issue":"4","key":"4_CR102","doi-asserted-by":"crossref","first-page":"423","DOI":"10.1038\/nn2061","volume":"11","author":"SM Reynolds","year":"2008","unstructured":"Reynolds, S.M., Berridge, K.C.: Emotional environments retune the valence of appetitive versus fearful functions in nucleus accumbens. Nat. Neurosci. 11(4), 423\u2013425 (2008)","journal-title":"Nat. Neurosci."},{"issue":"1","key":"4_CR103","doi-asserted-by":"crossref","first-page":"77","DOI":"10.1023\/A:1007331723572","volume":"28","author":"M Ring","year":"1997","unstructured":"Ring, M.: CHILD: A first step towards continual learning. Mach. Learn. 28(1), 77\u2013104 (1997)","journal-title":"Mach. Learn."},{"key":"4_CR104","unstructured":"Ring, M.: Toward a formal framework for continual learning. In: NIPS Workshop on Inductive Transfer, Whistler, Canada (2005)"},{"issue":"4","key":"4_CR105","doi-asserted-by":"crossref","first-page":"389","DOI":"10.1038\/nn2066","volume":"11","author":"M.F.S. Rushworth","year":"2008","unstructured":"Rushworth, M.F.S., Behrens, T.E.J.: Choice, uncertainty and value in prefrontal and cingulate cortex. Nat. Neurosci. 11(4), 389\u2013397 (2008)","journal-title":"Nat. Neurosci."},{"issue":"1","key":"4_CR106","doi-asserted-by":"crossref","first-page":"54","DOI":"10.1006\/ceps.1999.1020","volume":"25","author":"R Ryan","year":"2000","unstructured":"Ryan, R., Deci, E.: Intrinsic and extrinsic motivations: Classic definitions and new directions. Contemp. Educ. Psychol. 25(1), 54\u201367 (2000)","journal-title":"Contemp. Educ. Psychol."},{"issue":"7","key":"4_CR107","doi-asserted-by":"crossref","first-page":"985","DOI":"10.1016\/S0893-6080(02)00235-6","volume":"16","author":"K Samejima","year":"2003","unstructured":"Samejima, K., Doya, K., Kawato, M.: Inter-module credit assignment in modular reinforcement learning. Neural Netw. 16(7), 985\u2013994 (2003)","journal-title":"Neural Netw."},{"key":"4_CR108","doi-asserted-by":"crossref","first-page":"210","DOI":"10.1147\/rd.33.0210","volume":"3","author":"A Samuel","year":"1959","unstructured":"Samuel, A.: Some studies in machine learning using the game of checkers. IBM J. Res. Dev. 3, 210\u2013229 (1959)","journal-title":"IBM J. Res. Dev."},{"key":"4_CR109","doi-asserted-by":"crossref","unstructured":"Schembri, M., Mirolli, M., Baldassarre, G.: Evolving childhood\u2019s length and learning parameters in an intrinsically motivated reinforcement learning robot. In: Proceedings of the Seventh International Conference on Epigenetic Robotics, pp. 141\u2013148, Piscataway, New Jersey (2007)","DOI":"10.1109\/DEVLRN.2007.4354052"},{"key":"4_CR110","doi-asserted-by":"crossref","unstructured":"Schmidhuber, J.: Curious model-building control systems. In: IJCNN, pp. 1458\u20131463, Seattle, Washington State IEEE (1991)","DOI":"10.1109\/IJCNN.1991.170605"},{"key":"4_CR111","doi-asserted-by":"crossref","unstructured":"Schmidhuber, J.: G\u00f6del machines: Fully self-referential optimal universal self-improvers. Artif. Gen. Intell., pp. 199\u2013226 (2006)","DOI":"10.1007\/978-3-540-68677-4_7"},{"key":"4_CR112","doi-asserted-by":"crossref","first-page":"117","DOI":"10.1007\/s12559-009-9014-y","volume":"1","author":"J Schmidhuber","year":"2009","unstructured":"Schmidhuber, J.: Ultimate cognition \u00e0 la g\u00f6del. Cogn. Comput. 1, 117\u2013193 (2009)","journal-title":"Cogn. Comput."},{"key":"4_CR113","volume-title":"Helplessness: On Depression, Development, and Death","author":"M Seligman","year":"1975","unstructured":"Seligman, M.: Helplessness: On Depression, Development, and Death. WH Freeman, San Francisco (1975)"},{"key":"4_CR114","first-page":"302","volume-title":"Classical Conditioning","author":"F Sheffield","year":"1965","unstructured":"Sheffield, F.: Relation between classical conditioning and instrumental learning. In: Prokasy, W. (ed.) Classical Conditioning, pp. 302\u2013322. Appelton-Century-Crofts, New York (1965)"},{"key":"4_CR115","doi-asserted-by":"crossref","unstructured":"\u015eim\u015fek, \u00d6., Barto, A.G.: An intrinsic reward mechanism for efficient exploration. In: ICML, pp.\u00a0833\u2013840, Pittsburgh, Pennsylvania (2006)","DOI":"10.1145\/1143844.1143949"},{"issue":"3","key":"4_CR116","first-page":"323","volume":"8","author":"S Singh","year":"1992","unstructured":"Singh, S.: Transfer of learning by composing solutions of elemental sequential tasks. Mach. Learn. 8(3), 323\u2013339 (1992)","journal-title":"Mach. Learn."},{"key":"4_CR117","doi-asserted-by":"crossref","unstructured":"Singh, S., Barto, A., Chentanez, N.: Intrinsically motivated reinforcement learning. In: NIPS, pp.\u00a01281\u20131288, Vancouver, Canada (2005)","DOI":"10.21236\/ADA440280"},{"issue":"3","key":"4_CR118","doi-asserted-by":"crossref","first-page":"549","DOI":"10.1037\/0022-3514.71.3.549","volume":"71","author":"EA Skinner","year":"1996","unstructured":"Skinner, E.A.: A guide to constructs of control. J. Pers. Soc. Psychol. 71(3), 549\u2013570 (1996)","journal-title":"J. Pers. Soc. Psychol."},{"issue":"1","key":"4_CR119","doi-asserted-by":"crossref","first-page":"61","DOI":"10.1080\/09548980500361624","volume":"17","author":"A Smith","year":"2006","unstructured":"Smith, A., Li, M., Becker, S., Kapur, S.: Dopamine, prediction error and associative learning: A\u00a0model-based account. Network 17(1), 61\u201384 (2006)","journal-title":"Network"},{"key":"4_CR120","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1017\/S0140525X00022871","volume":"9","author":"P Soubri\u00e9","year":"1986","unstructured":"Soubri\u00e9, P.: Reconciling the role of central serotonin neurons in human and animal behaviour. Behav. Brain Sci. 9, 319\u2013364 (1986)","journal-title":"Behav. Brain Sci."},{"key":"4_CR121","unstructured":"Strens, M.: A Bayesian framework for reinforcement learning. In: ICML, pp. 943\u2013950, Stanford, California (2000)"},{"issue":"3","key":"4_CR122","doi-asserted-by":"crossref","first-page":"871","DOI":"10.1016\/S0306-4522(98)00697-6","volume":"91","author":"RE Suri","year":"1999","unstructured":"Suri, R.E., Schultz, W.: A neural network model with dopamine-like reinforcement signal that learns a spatial delayed response task. Neuroscience 91(3), 871\u2013890 (1999)","journal-title":"Neuroscience"},{"issue":"1","key":"4_CR123","first-page":"9","volume":"3","author":"R Sutton","year":"1988","unstructured":"Sutton, R.: Learning to predict by the methods of temporal differences. Mach. Learn. 3(1), 9\u201344 (1988)","journal-title":"Mach. Learn."},{"key":"4_CR124","first-page":"224","volume":"216","author":"R Sutton","year":"1990","unstructured":"Sutton, R.: Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. ICML Austin, Texas 216, 224 (1990)","journal-title":"ICML Austin, Texas"},{"issue":"1","key":"4_CR125","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"R Sutton","year":"1999","unstructured":"Sutton, R., Precup, D., Singh, S.: Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning. Artif. Intell. 112(1), 181\u2013211 (1999)","journal-title":"Artif. Intell."},{"key":"4_CR126","volume-title":"Reinforcement Learning: An Introduction (Adaptive Computation and Machine Learning)","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction (Adaptive Computation and Machine Learning). MIT, Cambridge (1998)"},{"issue":"5","key":"4_CR127","first-page":"1004","volume":"123","author":"F Tanaka","year":"2003","unstructured":"Tanaka, F., Yamamura, M.: Multitask reinforcement learning on the distribution of MDPs. IEEJ Trans. Electron. Inform. Syst. C 123(5), 1004\u20131011 (2003)","journal-title":"IEEJ Trans. Electron. Inform. Syst. C"},{"issue":"476","key":"4_CR128","doi-asserted-by":"crossref","first-page":"1566","DOI":"10.1198\/016214506000000302","volume":"101","author":"Y Teh","year":"2006","unstructured":"Teh, Y., Jordan, M., Beal, M., Blei, D.: Hierarchical dirichlet processes. J. Am. Stat. Assoc. 101(476), 1566\u20131581 (2006)","journal-title":"J. Am. Stat. Assoc."},{"issue":"7","key":"4_CR129","doi-asserted-by":"crossref","first-page":"309","DOI":"10.1016\/j.tics.2006.05.009","volume":"10","author":"J Tenenbaum","year":"2006","unstructured":"Tenenbaum, J., Griffiths, T., Kemp, C.: Theory-based Bayesian models of inductive learning and reasoning. Trends Cogn. Sci. 10(7), 309\u2013318 (2006)","journal-title":"Trends Cogn. Sci."},{"key":"4_CR130","unstructured":"Thibaux, R., Jordan, M.: Hierarchical beta processes and the Indian buffet process. In: AIStats, pp. 564\u2013571, San Juan, Puerto Rico (2007)"},{"key":"4_CR131","volume-title":"Animal Intelligence","author":"E Thorndike","year":"1911","unstructured":"Thorndike, E.: Animal Intelligence. MacMillan, New York (1911)"},{"key":"4_CR132","unstructured":"Thrun, S., Schwartz, A.: Finding structure in reinforcement learning. In: NIPS, pp.\u00a0385\u2013392, Denver, Colorado (1995)"},{"issue":"4","key":"4_CR133","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1037\/h0061626","volume":"55","author":"EC Tolman","year":"1948","unstructured":"Tolman, E.C.: Cognitive maps in rats and men. Psychol. Rev. 55(4), 189\u2013208 (1948)","journal-title":"Psychol. Rev."},{"issue":"11","key":"4_CR134","doi-asserted-by":"crossref","first-page":"2225","DOI":"10.1111\/j.1460-9568.2009.06796.x","volume":"29","author":"E Tricomi","year":"2009","unstructured":"Tricomi, E., Balleine, B.W., O\u2019Doherty, J.P.: A specific role for posterior dorsolateral striatum in human habit learning. Eur. J. Neurosci. 29(11), 2225\u20132232 (2009)","journal-title":"Eur. J. Neurosci."},{"issue":"15","key":"4_CR135","doi-asserted-by":"crossref","first-page":"4019","DOI":"10.1523\/JNEUROSCI.0564-07.2007","volume":"27","author":"VV Valentin","year":"2007","unstructured":"Valentin, V.V., Dickinson, A., O\u2019Doherty, J.P.: Determining the neural substrates of goal-directed learning in the human brain. J. Neurosci. 27(15), 4019\u20134026 (2007)","journal-title":"J. Neurosci."},{"issue":"2","key":"4_CR136","doi-asserted-by":"crossref","first-page":"147","DOI":"10.1007\/s00422-008-0288-z","volume":"100","author":"E. Vasilaki","year":"2009","unstructured":"Vasilaki, E., Fusi, S., Wang, X.-J., Senn, W. (2009): Learning flexible sensori-motor mappings in a complex network. Biol. Cybern. 100(2), 147\u2013158 (2007)","journal-title":"Biol. Cybern."},{"key":"4_CR137","doi-asserted-by":"crossref","unstructured":"Wang, T., Lizotte, D., Bowling, M., Schuurmans, D.: Bayesian sparse sampling for on-line reward optimization. In: ICML, pp. 956\u2013963, Bonn, Germany (2005)","DOI":"10.1145\/1102351.1102472"},{"key":"4_CR138","unstructured":"Watkins, C. (1989): Learning from delayed rewards. Ph.D. Thesis, University of Cambridge (2005)"},{"key":"4_CR139","doi-asserted-by":"crossref","unstructured":"Wiering, M., Schmidhuber, J.: Efficient model-based exploration. In: Simulation of Adaptive Behavior, pp. 223\u2013228, Zurich, Switzerland (1998)","DOI":"10.7551\/mitpress\/3119.003.0034"},{"issue":"4","key":"4_CR140","doi-asserted-by":"crossref","first-page":"511","DOI":"10.1901\/jeab.1969.12-511","volume":"12","author":"DR Williams","year":"1969","unstructured":"Williams, D.R., Williams, H.: Auto-maintenance in the pigeon: Sustained pecking despite contingent non-reinforcement. J. Exp. Anal. Behav. 12(4), 511\u2013520 (1969)","journal-title":"J. Exp. Anal. Behav."},{"key":"4_CR141","doi-asserted-by":"crossref","unstructured":"Wilson, A., Fern, A., Ray, S., Tadepalli, P.: Multi-task reinforcement learning: A hierarchical bayesian approach. In: ICML, pp. 1015\u20131022, Corvallis, Oregon (2007)","DOI":"10.1145\/1273496.1273624"},{"key":"4_CR142","unstructured":"Wingate, D., Goodman, N.D., Roy, D.M., Kaelbling, L.P., Tenenbaum, J.B.: Bayesian policy search with policy priors. In: Proceedings of the Twenty-Second International Joint Conference on Artificial Intelligence-Volume, vol. 2, pp. 1565\u20131570. AAAI Press, Menlo Park (2011)"},{"issue":"7\u20138","key":"4_CR143","doi-asserted-by":"crossref","first-page":"1317","DOI":"10.1016\/S0893-6080(98)00066-5","volume":"11","author":"DM Wolpert","year":"1998","unstructured":"Wolpert, D.M., Kawato, M.: Multiple paired forward and inverse models for motor control. Neural Netw. 11(7\u20138), 1317\u20131329 (1998)","journal-title":"Neural Netw."},{"issue":"5","key":"4_CR144","doi-asserted-by":"crossref","first-page":"781","DOI":"10.1016\/j.neuron.2006.05.006","volume":"50","author":"W Yoshida","year":"2006","unstructured":"Yoshida, W., Ishii, S.: Resolution of uncertainty in prefrontal cortex. Neuron 50(5), 781\u2013789 (2006)","journal-title":"Neuron"},{"issue":"4","key":"4_CR145","doi-asserted-by":"crossref","first-page":"681","DOI":"10.1016\/j.neuron.2005.04.026","volume":"46","author":"AJ Yu","year":"2005","unstructured":"Yu, A.J., Dayan, P.: Uncertainty, neuromodulation, and attention. Neuron 46(4), 681\u2013692 (2005)","journal-title":"Neuron"}],"container-title":["Intrinsically Motivated Learning in Natural and Artificial Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-32375-1_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,8]],"date-time":"2024-05-08T03:50:31Z","timestamp":1715140231000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-32375-1_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,11,10]]},"ISBN":["9783642323744","9783642323751"],"references-count":145,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-32375-1_4","relation":{},"subject":[],"published":{"date-parts":[[2012,11,10]]},"assertion":[{"value":"10 November 2012","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}