{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T06:18:29Z","timestamp":1775801909871,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":105,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642323744","type":"print"},{"value":"9783642323751","type":"electronic"}],"license":[{"start":{"date-parts":[[2012,11,10]],"date-time":"2012-11-10T00:00:00Z","timestamp":1352505600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013]]},"DOI":"10.1007\/978-3-642-32375-1_2","type":"book-chapter","created":{"date-parts":[[2013,3,29]],"date-time":"2013-03-29T03:35:02Z","timestamp":1364528102000},"page":"17-47","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":109,"title":["Intrinsic Motivation and Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Andrew G.","family":"Barto","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2012,11,10]]},"reference":[{"key":"2_CR1","first-page":"487","volume-title":"Artificial Life II (Proceedings Volume X in the Santa Fe Institute Studies in the Sciences of Complexity","author":"DH Ackley","year":"1991","unstructured":"Ackley, D.H., Littman, M.: Interactions between learning and evolution. In: Langton, C., Taylor,\u00a0C., Farmer, C., Rasmussen, S. (eds.) Artificial Life II (Proceedings Volume X in the Santa Fe Institute Studies in the Sciences of Complexity, pp. 487\u2013509. Addison-Wesley, Reading (1991)"},{"key":"2_CR2","doi-asserted-by":"crossref","first-page":"117","DOI":"10.1177\/105971230401200203","volume":"12","author":"P Andry","year":"2004","unstructured":"Andry, P., Gaussier, P., Nadel, J., Hirsbrunner, B.: Learning invariant sensorimotor behaviors: A\u00a0developmental approach to imitation mechanisms. Adap. Behav. 12, 117\u2013140 (2004)","journal-title":"Adap. Behav."},{"key":"2_CR3","volume-title":"Psychological Theories of Motivation","author":"HR Arkes","year":"1982","unstructured":"Arkes, H.R., Garske, J.P.: Psychological Theories of Motivation. Brooks\/Cole, Monterey (1982)"},{"key":"2_CR4","doi-asserted-by":"crossref","unstructured":"Baranes, A., Oudeyer, P.-Y.: Intrinsically motivated goal exploration for active motor learning in robots: A case study. In: Proceedings of the IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS 2010), Taipei, Taiwan 2010","DOI":"10.1109\/IROS.2010.5651385"},{"key":"2_CR5","doi-asserted-by":"crossref","first-page":"341","DOI":"10.1023\/A:1025696116075","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto, A.G., Mahadevan, S.: Recent advances in hierarchical reinforcement learning. Discr. Event Dynam. Syst. Theory Appl. 13, 341\u2013379 (2003)","journal-title":"Discr. Event Dynam. Syst. Theory Appl."},{"key":"2_CR6","unstructured":"Barto, A.G., Singh, S., Chentanez, N.: Intrinsically motivated learning of hierarchical collections of skills. In: Proceedings of the International Conference on Developmental Learning (ICDL), La Jolla, CA 2004"},{"key":"2_CR7","unstructured":"Barto, A.G., Sutton, R.S., Anderson, C.W.: Neuronlike elements that can solve difficult learningcontrol problems. 13, 835\u2013846 (1983). IEEE Trans. Sys. Man, Cybern. Reprinted in J.A. Anderson and E. Rosenfeld (eds.), Neurocomputing: Foundations of Research, pp. 535\u2013549, MIT, Cambridge (1988)"},{"key":"2_CR8","unstructured":"Beck, R.C.: Motivation. Theories and Principles, 2nd edn. Prentice-Hall, Englewood Cliffs (1983)"},{"key":"2_CR9","first-page":"180","volume":"45","author":"DE Berlyne","year":"1954","unstructured":"Berlyne, D.E.: A theory of human curiosity. Br. J. Psychol. 45, 180\u2013191 (1954)","journal-title":"Br. J. Psychol."},{"key":"2_CR10","doi-asserted-by":"crossref","unstructured":"Berlyne, D.E.: Conflict, Arousal., Curiosity. McGraw-Hill, New York (1960)","DOI":"10.1037\/11164-000"},{"key":"2_CR11","first-page":"25","volume":"143","author":"DE Berlyne","year":"1966","unstructured":"Berlyne, D.E.: Curiosity and exploration. Science 143, 25\u201333 (1966)","journal-title":"Curiosity and exploration. Science"},{"key":"2_CR12","volume-title":"Aesthetics and Psychobiology","author":"DE Berlyne","year":"1971","unstructured":"Berlyne, D.E.: Aesthetics and Psychobiology. Appleton-Century-Crofts, New York (1971)"},{"key":"2_CR13","volume-title":"Neuro-Dynamic Programming","author":"DP Bertsekas","year":"1996","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Athena Scientific, Belmont (1996)"},{"key":"2_CR14","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1017\/S0140525X00059380","volume":"1","author":"D Bindra","year":"1978","unstructured":"Bindra, D.: How adaptive behavior is produced: A perceptual-motivational alternative to response reinforcement. Behav. Brain Sci. 1, 41\u201391 (1978)","journal-title":"Behav. Brain Sci."},{"key":"2_CR15","doi-asserted-by":"crossref","unstructured":"Breazeal, C., Brooks, A., Gray, J., Hoffman, G., Lieberman, J., Lee, H., Lockerd, A., Mulanda, D.: Tutelage and collaboration for humanoid robots. Int. J. Human. Robot. 1 (2004)","DOI":"10.1142\/S0219843604000150"},{"key":"2_CR16","doi-asserted-by":"crossref","unstructured":"Bush, V.: Science the endless frontier: Areport to the president. Technical report (1945)","DOI":"10.2307\/3625196"},{"issue":"2","key":"2_CR17","doi-asserted-by":"crossref","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L Busoniu","year":"2008","unstructured":"Busoniu, L., Babuska, R., Schutter, B.D.: A comprehensive survey of multi-agent reinforcement learning. IEEE Trans. Syst. Man Cybern. C Appl. Rev. 38(2), 156\u2013172 (2008)","journal-title":"IEEE Trans. Syst. Man Cybern. C Appl. Rev."},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Cannon, W.B.: The Wisdom of the Body. W.W. Norton, New York (1932)","DOI":"10.1097\/00000441-193212000-00028"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Clark, W.A., Farley, B.G.: Generalization of pattern recognition in a self-organizing system. In: AFIPS\u2019 55 (Western) Proceedings of the March 1\u20133, 1955, Western Joint Computer Conference, Los Angeles, CA, pp.\u00a086\u201391, ACM, New York (1955)","DOI":"10.1145\/1455292.1455309"},{"key":"2_CR20","volume-title":"Motivation: Theory and Research","author":"CN Cofer","year":"1964","unstructured":"Cofer, C.N., Appley, M.H.: Motivation: Theory and Research. Wiley, New York (1964)"},{"key":"2_CR21","doi-asserted-by":"crossref","first-page":"936","DOI":"10.1007\/11553090_94","volume-title":"Advances in Artificial Life: 8th European Conference, ECAL 2005. Canterbury, UK LNAI vol. 3630","author":"T Damoulas","year":"2005","unstructured":"Damoulas, T., Cos-Aguilera, I., Hayes, G.M., Taylor, T.: Valency for adaptive homeostatic agents: Relating evolution and learning. In: Capcarrere, M.S., Freitas, A.A., Bentley, P.J., Johnson,\u00a0C.G., Timmis, J. (eds.) Advances in Artificial Life: 8th European Conference, ECAL 2005. Canterbury, UK LNAI vol. 3630, pp. 936\u2013945. Springer, Berlin (2005)"},{"issue":"5","key":"2_CR22","doi-asserted-by":"crossref","first-page":"593","DOI":"10.1521\/soco.2008.26.5.593","volume":"26","author":"ND Daw","year":"2008","unstructured":"Daw, N.D., Shohamy, D.: The cognitive neuroscience of motivation and learning. Soc. Cogn. 26(5), 593\u2013620 (2008)","journal-title":"Soc. Cogn."},{"key":"2_CR23","first-page":"11","volume-title":"Advances in Neural Information Processing Systems 14: Proceedings of the 2001 Conference","author":"P Dayan","year":"2001","unstructured":"Dayan, P.: Motivated reinforcement learning. In: Dietterich, T.G., Becker, S., Ghahramani, Z. (eds.) Advances in Neural Information Processing Systems 14: Proceedings of the 2001 Conference, pp. 11\u201318. MIT, Cambridge (2001)"},{"key":"2_CR24","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4899-2271-7","volume-title":"Intrinsic Motivation and Self-Determination in Human Behavior","author":"EL Deci","year":"1985","unstructured":"Deci, E.L., Ryan, R.M.: Intrinsic Motivation and Self-Determination in Human Behavior. Plenum, New York (1985)"},{"key":"2_CR25","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1037\/h0046861","volume":"64","author":"WN Dember","year":"1957","unstructured":"Dember, W.N., Earl, R.W.: Analysis of exploratory, manipulatory, and curiosity behaviors. Psychol. Rev. 64, 91\u201396 (1957)","journal-title":"Psychol. Rev."},{"key":"2_CR26","doi-asserted-by":"crossref","first-page":"514","DOI":"10.1037\/h0046780","volume":"50","author":"WN Dember","year":"1957","unstructured":"Dember, W.N., Earl, R.W., Paradise, N.: Response by rats to differential stimulus complexity. J.\u00a0Comp. Physiol. Psychol. 50, 514\u2013518 (1957)","journal-title":"J. Comp. Physiol. Psychol."},{"key":"2_CR27","first-page":"497","volume-title":"Handbook of Experimental Psychology, 3rd edn. Learning, Motivation, and Emotion","author":"A Dickinson","year":"2002","unstructured":"Dickinson, A., Balleine, B.: The role of leaning in the operation of motivational systems. In: Gallistel, R. (ed.) Handbook of Experimental Psychology, 3rd edn. Learning, Motivation, and Emotion, pp. 497\u2013533. Wiley, New York (2002)"},{"key":"2_CR28","doi-asserted-by":"crossref","first-page":"400","DOI":"10.1177\/1059712308092835","volume":"16","author":"S Elfwing","year":"2008","unstructured":"Elfwing, S., Uchibe, E., Doya, K., Christensen, H.I.: Co-evolution of shaping rewards and meta-parameters in reinforcement learning. Adap. Behav. 16, 400\u2013412 (2008)","journal-title":"Adap. Behav."},{"key":"2_CR29","volume-title":"The Physiological Mechanisms of Motivation","author":"A Epstein","year":"1982","unstructured":"Epstein, A.: Instinct and motivation as explanations of complex behavior. In: Pfaff, D.W. (ed.) The Physiological Mechanisms of Motivation. Springer, New York (1982)"},{"key":"2_CR30","doi-asserted-by":"crossref","unstructured":"Friston, K.J., Daunizeau, J., Kilner, J., Kiebel, S.J.: Action and behavior: A free-energy formulation. Biol. Cybern. (2010). Pubished online February 11, 2020","DOI":"10.1007\/s00422-010-0364-z"},{"key":"2_CR31","doi-asserted-by":"crossref","DOI":"10.1037\/13084-000","volume-title":"The Play of Man","author":"K Groos","year":"1901","unstructured":"Groos, K.: The Play of Man. D. Appleton, New York (1901)"},{"key":"2_CR32","doi-asserted-by":"crossref","first-page":"289","DOI":"10.1037\/h0058114","volume":"43","author":"HF Harlow","year":"1950","unstructured":"Harlow, H.F.: Learning and satiation of response in intrinsically motivated complex puzzle performance by monkeys. J. Comp. Physiol. Psychol. 43, 289\u2013294 (1950)","journal-title":"J. Comp. Physiol. Psychol."},{"key":"2_CR33","doi-asserted-by":"crossref","first-page":"228","DOI":"10.1037\/h0056906","volume":"40","author":"HF Harlow","year":"1950","unstructured":"Harlow, H.F., Harlow, M.K., Meyer, D.R.: Learning motivated by a manipulation drive. J. Exp. Psychol. 40, 228\u2013234 (1950)","journal-title":"J. Exp. Psychol."},{"key":"2_CR34","doi-asserted-by":"crossref","unstructured":"Hart, S., Grupen, R.: Intrinsically motivated affordance discovery and modeling. In: Baldassarre, G., Mirolli, M. (eds.) Intrinsically Motivated Learning in Natural and Artificial Systems. Springer, Berlin (2012, this volume)","DOI":"10.1007\/978-3-642-32375-1_12"},{"key":"2_CR35","volume-title":"The Organization of Behavior","author":"DO Hebb","year":"1949","unstructured":"Hebb, D.O.: The Organization of Behavior. Wiley, New York (1949)"},{"key":"2_CR36","doi-asserted-by":"crossref","first-page":"33","DOI":"10.1080\/21674086.1942.11925485","volume":"11","author":"I Hendrick","year":"1942","unstructured":"Hendrick, I.: Instinct and ego during infancy. Psychoanal. Quart. 11, 33\u201358 (1942)","journal-title":"Psychoanal. Quart."},{"issue":"2","key":"2_CR37","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1142\/S0219525909002258","volume":"12","author":"F Hesse","year":"2009","unstructured":"Hesse, F., Der, R., Herrmann, M., Michael, J.: Modulated exploratory dynamics can shape self-organized behavior. Adv. Complex Syst. 12(2), 273\u2013292 (2009)","journal-title":"Adv. Complex Syst."},{"key":"2_CR38","volume-title":"Principles of Behavior","author":"CL Hull","year":"1943","unstructured":"Hull, C.L.: Principles of Behavior. D. Appleton-Century, New York (1943)"},{"key":"2_CR39","volume-title":"Essentials of Behavior","author":"CL Hull","year":"1951","unstructured":"Hull, C.L.: Essentials of Behavior. Yale University Press, New Haven (1951)"},{"key":"2_CR40","volume-title":"A Behavior System: An Introduction to Behavior Theory Concerning the Individual Organism","author":"CL Hull","year":"1952","unstructured":"Hull, C.L.: A Behavior System: An Introduction to Behavior Theory Concerning the Individual Organism. Yale University Press, New Haven (1952)"},{"key":"2_CR41","volume-title":"Hilgard and Marquis\u2019 Conditioning and Learning","author":"GA Kimble","year":"1961","unstructured":"Kimble, G.A.: Hilgard and Marquis\u2019 Conditioning and Learning. Appleton-Century-Crofts, Inc., New York (1961)"},{"key":"2_CR42","volume-title":"Motivation","author":"SB Klein","year":"1982","unstructured":"Klein, S.B.: Motivation. Biosocial Approaches. McGraw-Hill, New York (1982)"},{"key":"2_CR43","unstructured":"Klopf, A.H.: Brain function and adaptive systems\u2014A heterostatic theory. Technical report AFCRL-72-0164, Air Force Cambridge Research Laboratories, Bedford. A summary appears in Proceedings of the International Conference on Systems, Man, and Cybernetics, 1974, IEEE Systems, Man, and Cybernetics Society, Dallas (1972)"},{"key":"2_CR44","volume-title":"The Hedonistic Neuron: A Theory of Memory, Learning, and Intelligence","author":"AH Klopf","year":"1982","unstructured":"Klopf, A.H.: The Hedonistic Neuron: A Theory of Memory, Learning, and Intelligence. Hemisphere, Washington (1982)"},{"key":"2_CR45","unstructured":"Lenat, D.B.: AM: An artificial intelligence approach to discovery in mathematics. Ph.D. Thesis, Stanford University (1976)"},{"key":"2_CR46","volume-title":"The Compass of Pleasure: How Our Brains Make Fatty Foods, Orgasm, Exercise, Marijuana, Generosity, Vodka, Learning, and Gambling Feel So Good","author":"DJ Linden","year":"2011","unstructured":"Linden, D.J.: The Compass of Pleasure: How Our Brains Make Fatty Foods, Orgasm, Exercise, Marijuana, Generosity, Vodka, Learning, and Gambling Feel So Good. Viking, New York (2011)"},{"key":"2_CR47","unstructured":"Littman, M.L., Ackley, D.H.: Adaptation in constant utility nonstationary environments. In: Proceedings of the Fourth International Conference on Genetic Algorithms, San Diego, CA pp. 136\u2013142 (1991)"},{"key":"2_CR48","doi-asserted-by":"crossref","first-page":"151","DOI":"10.1080\/09540090310001655110","volume":"15","author":"M Lungarella","year":"2003","unstructured":"Lungarella, M., Metta, G., Pfeiffer, R., Sandini, G.: Developmental robotics: A survey. Connect. Sci. 15, 151\u2013190 (2003)","journal-title":"Connect. Sci."},{"key":"2_CR49","volume-title":"Conditioning and Associative Learning","author":"NJ Mackintosh","year":"1983","unstructured":"Mackintosh, N.J.: Conditioning and Associative Learning. Oxford University Press, New York (1983)"},{"key":"2_CR50","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/3830.001.0001","volume-title":"Intelligent Behavior in Animals and Robots","author":"D McFarland","year":"1993","unstructured":"McFarland, D., B\u00f6sser, T.: Intelligent Behavior in Animals and Robots. MIT, Cambridge (1993)"},{"key":"2_CR51","volume-title":"Adaptive, Learning, and Pattern Recognition Systems: Theory and Applications","year":"1970","unstructured":"Mendel, J.M., Fu, K.S. (eds.): Adaptive, Learning, and Pattern Recognition Systems: Theory and Applications. Academic, New York (1970)"},{"key":"2_CR52","first-page":"287","volume-title":"Adaptive, Learning and Pattern Recognition Systems:Theory and Applications","author":"JM Mendel","year":"1970","unstructured":"Mendel, J.M., McLaren, R.W.: Reinforcement learning control and pattern recognition systems. In: Mendel, J.M., Fu, K.S. (eds.) Adaptive, Learning and Pattern Recognition Systems:Theory and Applications, pp. 287\u2013318. Academic, New York (1970)"},{"key":"2_CR53","first-page":"137","volume-title":"Machine Intelligence 2","author":"D Michie","year":"1968","unstructured":"Michie, D., Chambers, R.A.: BOXES: An experiment in adaptive control. In: Dale, E., Michie, D. (eds.) Machine Intelligence 2, pp. 137\u2013152. Oliver and Boyd, Edinburgh (1968)"},{"key":"2_CR54","unstructured":"Minsky, M.L.: Theory of neural-analog reinforcement systems and its application to the brain-model problem. Ph.D. Thesis, Princeton University (1954)"},{"key":"2_CR55","unstructured":"Minsky, M.L.: Steps toward artificial intelligence. Proc. Inst. Radio Eng. 49, 8\u201330 (1961). Reprinted in E.A. Feigenbaum and J. Feldman (eds.) Computers and Thought, pp. 406\u2013450. McGraw-Hill, New York (1963)"},{"key":"2_CR56","doi-asserted-by":"crossref","first-page":"58","DOI":"10.1016\/0023-9690(71)90048-8","volume":"2","author":"SO Mollenauer","year":"1971","unstructured":"Mollenauer, S.O.: Shifts in deprivations level: Different effects depending on the amount of preshift training. Learn. Motiv. 2, 58\u201366 (1971)","journal-title":"Learn. Motiv."},{"key":"2_CR57","volume-title":"Learning Automata: An Introduction","author":"K Narendra","year":"1989","unstructured":"Narendra, K., Thathachar, M.A.L.: Learning Automata: An Introduction. Prentice Hall, Englewood Cliffs (1989)"},{"key":"2_CR58","doi-asserted-by":"crossref","first-page":"419","DOI":"10.1037\/h0058775","volume":"47","author":"J Olds","year":"1954","unstructured":"Olds, J., Milner, P.: Positive reinforcement produced by electrical stimulation of septal areas and other regions of rat brain. J. Comp. Physiol. Psychol. 47, 419\u2013427 (1954)","journal-title":"J. Comp. Physiol. Psychol."},{"key":"2_CR59","doi-asserted-by":"crossref","unstructured":"Oudeyer, P.-Y., Kaplan, F.: What is intrinsic motivation? A typology of computational approaches. Front. Neurorobot. 1:6, doi: 10.3389\/neuro.12.006.2007 (2007)","DOI":"10.3389\/neuro.12.006.2007"},{"key":"2_CR60","doi-asserted-by":"crossref","first-page":"265","DOI":"10.1109\/TEVC.2006.890271","volume":"11","author":"P.-Y. Oudeyer","year":"2007","unstructured":"Oudeyer, P.-Y., Kaplan, F., Hafner, V.: Intrinsic motivation systems for autonomous mental development. IEEE Trans. Evol. Comput. 11, 265\u2013286 (2007)","journal-title":"IEEE Trans. Evol. Comput."},{"key":"2_CR61","volume-title":"Motivation: Theory and Research","author":"HL Petri","year":"1981","unstructured":"Petri, H.L.: Motivation: Theory and Research. Wadsworth Publishing Company, Belmont (1981)"},{"key":"2_CR62","doi-asserted-by":"crossref","DOI":"10.1037\/11494-000","volume-title":"The Origins of Intelligence in Children","author":"J Piaget","year":"1952","unstructured":"Piaget, J.: The Origins of Intelligence in Children. Norton, New York (1952)"},{"key":"2_CR63","volume-title":"Affective Computing","author":"RW Picard","year":"1997","unstructured":"Picard, R.W.: Affective Computing. MIT, Cambridge (1997)"},{"key":"2_CR64","unstructured":"Prince, C.G., Demiris, Y., Marom, Y., Kozima, H., Balkenius, C. (eds.): Proceedings of the Second International Workshop on Epigenetic Robotics: Modeling Cognitive Development in Robotic Systems. Lund University Cognitive Studies, vol. 94. Lund University, Lund (2001)"},{"key":"2_CR65","first-page":"64","volume-title":"Classical Conditioning","author":"RA Rescorla","year":"1972","unstructured":"Rescorla, R.A., Wagner, A.R.: A theory of Pavlovian conditioning: Variationsin the effectiveness of reinforcement and nonreinforcement. In: Black, A.H., Prokasy, W.F. (eds.) Classical Conditioning, vol. II, pp. 64\u201399. Appleton-Century-Crofts, New York (1972)"},{"key":"2_CR66","volume-title":"Principles of Neurodynamics: Perceptrons and the Theory of Brain Mechanisms","author":"F Rosenblatt","year":"1962","unstructured":"Rosenblatt, F.: Principles of Neurodynamics: Perceptrons and the Theory of Brain Mechanisms. Spartan Books, Washington (1962)"},{"issue":"6088","key":"2_CR67","doi-asserted-by":"crossref","first-page":"533","DOI":"10.1038\/323533a0","volume":"323","author":"D Rumelhart","year":"1986","unstructured":"Rumelhart, D., Hintont, G., Williams, R.: Learning representations by back-propagating errors. Nature 323(6088), 533\u2013536 (1986)","journal-title":"Nature"},{"key":"2_CR68","doi-asserted-by":"crossref","first-page":"54","DOI":"10.1006\/ceps.1999.1020","volume":"25","author":"RM Ryan","year":"2000","unstructured":"Ryan, R.M., Deci, E.L.: Intrinsic and extrinsic motivations: Classic definitions and new directions. Contemp. Educ. Psychol. 25, 54\u201367 (2000)","journal-title":"Contemp. Educ. Psychol."},{"key":"2_CR69","doi-asserted-by":"crossref","first-page":"225","DOI":"10.1006\/jeth.2000.2753","volume":"97","author":"L Samuelson","year":"2001","unstructured":"Samuelson, L.: Introduction to the evolution of preferences. J. Econ. Theory 97, 225\u2013230 (2001)","journal-title":"J. Econ. Theory"},{"key":"2_CR70","first-page":"119","volume":"1","author":"L Samuelson","year":"2006","unstructured":"Samuelson, L., Swinkels, J.: Information, evolution, and utility. Theor. Econ. 1, 119\u2013142 (2006)","journal-title":"Theor. Econ."},{"key":"2_CR71","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1080\/095400900750060131","volume":"12","author":"T Savage","year":"2000","unstructured":"Savage, T.: Artificial motives: A review of motivation in artificial creatures. Connect. Sci. 12, 211\u2013277 (2000)","journal-title":"Connect. Sci."},{"key":"2_CR72","doi-asserted-by":"crossref","unstructured":"Schembri, M., Mirolli, M., Baldassarre, G.: Evolving internal reinforcers for an intrinsically motivated reinforcement-learning robot. In: Proceedings of the 6th International Conference on Development and Learning (ICDL2007), Imperial College, London 2007","DOI":"10.1109\/DEVLRN.2007.4354052"},{"key":"2_CR73","unstructured":"Schmidhuber, J.: Adaptive confidence and adaptive curiosity. Technical report FKI-149-91, Institut f\u00fcr Informatik, Technische Universit\u00e4t M\u00fcnchen (1991a)"},{"key":"2_CR74","doi-asserted-by":"crossref","unstructured":"Schmidhuber, J.: A possibility for implementing curiosity and boredom in model-building neural controllers. In: From Animals to Animats: Proceedings of the First International Conference on Simulation of Adaptive Behavior, pp. 222\u2013227. MIT, Cambridge (1991b)","DOI":"10.7551\/mitpress\/3115.003.0030"},{"key":"2_CR75","volume-title":"What\u2019s interesting? Technical report TR-35\u201397","author":"J Schmidhuber","year":"1997","unstructured":"Schmidhuber, J.: What\u2019s interesting? Technical report TR-35-97. IDSIA, Lugano (1997)"},{"key":"2_CR76","doi-asserted-by":"crossref","unstructured":"Schmidhuber, J.: Artificial curiosity based on discovering novel algorithmic predictability through coevolution. In: Proceedings of the Congress on Evolutionary Computation, vol. 3, pp. 1612\u20131618. IEEE (1999)","DOI":"10.1109\/CEC.1999.785467"},{"key":"2_CR77","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1007\/978-3-642-02565-5_4","volume-title":"Anticipatory Behavior in Adaptive Learning Systems. From Psychological Theories to Artificial Cognitive Systems","author":"J Schmidhuber","year":"2009","unstructured":"Schmidhuber, J.: Driven by compression progress: A simple principle explains essential aspects of subjective beauty, novelty, surprise, interestingness, attention, curiosity, creativity, art, science, music, jokes. In: Pezzulo, G., Butz, M.V., Sigaud, O., Baldassarre, G. (eds.) Anticipatory Behavior in Adaptive Learning Systems. From Psychological Theories to Artificial Cognitive Systems, pp. 48\u201376. Springer, Berlin (2009)"},{"issue":"1","key":"2_CR78","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1152\/jn.1998.80.1.1","volume":"80","author":"W Schultz","year":"1998","unstructured":"Schultz, W.: Predictive reward signal of dopamine neurons. J. Neurophysiol. 80(1), 1\u201327 (1998)","journal-title":"J. Neurophysiol."},{"issue":"3","key":"2_CR79","doi-asserted-by":"crossref","first-page":"1652","DOI":"10.4249\/scholarpedia.1652","volume":"2","author":"W Schultz","year":"2007","unstructured":"Schultz, W.: Reward. Scholarpedia 2(3), 1652 (2007a)","journal-title":"Reward. Scholarpedia"},{"issue":"6","key":"2_CR80","doi-asserted-by":"crossref","first-page":"2184","DOI":"10.4249\/scholarpedia.2184","volume":"2","author":"W Schultz","year":"2007","unstructured":"Schultz, W.: Reward signals. Scholarpedia 2(6), 2184 (2007b)","journal-title":"Reward signals. Scholarpedia"},{"key":"2_CR81","volume-title":"Proceedings of the 11th International Joint Conference on Artificial Intelligence, Detroit, MI pp. 669\u2013674","author":"PD Scott","year":"1989","unstructured":"Scott, P.D., Markovitch, S.: Learning novel domains through curiosity and conjecture. In: Sridharan, N.S. (ed.) Proceedings of the 11th International Joint Conference on Artificial Intelligence, Detroit, MI pp.\u00a0669\u2013674. Morgan Kaufmann, San Francisco (1989)"},{"key":"2_CR82","unstructured":"Settles, B.: Active learning literature survey. Technical Report 1648, Computer Sciences, University of Wisconsin-Madison, Madison (2009)"},{"key":"2_CR83","doi-asserted-by":"crossref","unstructured":"Singh, S., Barto, A.G., Chentanez, N.: Intrinsically motivated reinforcement learning. In: Advances in Neural Information Processing Systems 17: Proceedings of the 2004 Conference. MIT, Cambridge (2005)","DOI":"10.21236\/ADA440280"},{"key":"2_CR84","unstructured":"Singh, S., Lewis, R.L., Barto, A.G.: Where do rewards come from? In: Taatgen, N., van Rijn,\u00a0H. (eds.) Proceedings of the 31st Annual Conference of the Cognitive Science Society, Amsterdam pp.\u00a02601\u20132606. Cognitive Science Society (2009)"},{"issue":"2","key":"2_CR85","doi-asserted-by":"crossref","first-page":"70","DOI":"10.1109\/TAMD.2010.2051031","volume":"2","author":"S. Singh","year":"2010","unstructured":"Singh, S., Lewis, R.L., Barto, A.G., Sorg, J.: Intrinsically motivated reinforcement learning: An evolutionary perspective. IEEE Trans. Auton. Mental Dev. 2(2), 70\u201382 (2010). Special issue on Active Learning and Intrinsically Motivated Exploration in Robots: Advances and Challenges","journal-title":"IEEE Trans. Auton. Mental Dev."},{"key":"2_CR86","doi-asserted-by":"crossref","unstructured":"Snel, M., Hayes, G.M.: Evolution of valence systems in an unstable environment. In: Proceedings of the 10th International Conference on Simulation of Adaptive Behavior: From Animals to Animats, Osaka, M.\u00a0Asada, J.C. Hallam, J.-A. Meyer (Eds.) pp. 12\u201321 (2008)","DOI":"10.1007\/978-3-540-69134-1_2"},{"key":"2_CR87","first-page":"1007","volume-title":"Proceedings of the 27th International Conference on Machine Learning","author":"J Sorg","year":"2010","unstructured":"Sorg, J., Singh, S., Lewis, R.L.: Internal rewards mitigate agent boundedness. In: F\u00fcrnkranz, J., Joachims, T. (eds.) Proceedings of the 27th International Conference on Machine Learning, Haifa, Israel, Omnipress pp.\u00a01007\u20131014 (2010)"},{"key":"2_CR88","doi-asserted-by":"crossref","unstructured":"Sutton, R.S.: Reinforcement learning architectures for animats. In: From Animals to Animats: Proceedings of the First International Conference on Simulation of Adaptive Behavior, J.-A. Meyer, S.W.Wilson (Eds.) pp. 288\u2013296. MIT, Cambridge (1991)","DOI":"10.7551\/mitpress\/3115.003.0040"},{"key":"2_CR89","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT, Cambridge (1998)"},{"key":"2_CR90","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between mdps and semi-mdps: A framework for temporal abstraction inreinforcement learning. Artif. Intell. 112, 181\u2013211 (1999)","journal-title":"Artif. Intell."},{"issue":"2","key":"2_CR91","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1162\/neco.1994.6.2.215","volume":"6","author":"GJ Tesauro","year":"1994","unstructured":"Tesauro, G.J.: TD\u2014gammon, a self-teaching backgammon program, achieves master-level play. Neural Comput. 6(2), 215\u2013219 (1994)","journal-title":"Neural Comput."},{"key":"2_CR92","unstructured":"Thomaz, A.L., Breazeal, C.: Transparency and socially guided machine learning. In: Proceedings of the 5th International Conference on Developmental Learning (ICDL) Bloomington, IN (2006)"},{"key":"2_CR93","doi-asserted-by":"crossref","unstructured":"Thomaz, A.L., Hoffman, G., Breazeal, C.: Experiments in socially guided machine learning: Understanding how humans teach. In: Proceedings of the 1st Annual conference on Human-Robot Interaction (HRI) Salt Lake City, UT (2006)","DOI":"10.1145\/1121241.1121315"},{"key":"2_CR94","volume-title":"Animal Intelligence","author":"EL Thorndike","year":"1911","unstructured":"Thorndike, E.L.: Animal Intelligence. Hafner, Darien (1911)"},{"key":"2_CR95","volume-title":"(1911): Motivational Systems","author":"FM Toates","year":"1911","unstructured":"Toates, F.M. (1911): Motivational Systems. Cambridge University Press, Cambridge (1911)"},{"key":"2_CR96","volume-title":"Purposive Behavior in Animals and Men","author":"EC Tolman","year":"1932","unstructured":"Tolman, E.C.: Purposive Behavior in Animals and Men. Naiburg, New York (1932)"},{"key":"2_CR97","volume-title":"Emotions in Humans and Artifacts","year":"1997","unstructured":"Trappl, R., Petta, P., Payr, S. (eds.): Emotions in Humans and Artifacts. MIT, Cambridge (1997)"},{"issue":"10","key":"2_CR98","doi-asserted-by":"crossref","first-page":"1447","DOI":"10.1016\/j.neunet.2008.09.013","volume":"21","author":"E Uchibe","year":"2008","unstructured":"Uchibe, E., Doya, K.: Finding intrinsic rewards by embodied evolution and constrained reinforcement learning. Neural Netw. 21(10), 1447\u20131455 (2008)","journal-title":"Neural Netw."},{"key":"2_CR99","doi-asserted-by":"crossref","first-page":"390","DOI":"10.1109\/TAC.1965.1098193","volume":"10","author":"MD Waltz","year":"1965","unstructured":"Waltz, M.D., Fu, K.S.: A heuristic approach to reinforcement learning control systems. IEEE Transactions on Automatic Control 10, 390\u2013398 (1965)","journal-title":"IEEE Transactions on Automatic Control"},{"key":"2_CR100","doi-asserted-by":"crossref","first-page":"599","DOI":"10.1126\/science.291.5504.599","volume":"291","author":"J Weng","year":"2001","unstructured":"Weng, J., McClelland, J., Pentland, A., Sporns, O., Stockman, I., Sur, M., Thelen, E.: Autonomous mental development by robots and animals. Science 291, 599\u2013600 (2001)","journal-title":"Science"},{"key":"2_CR101","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1109\/TSMC.1987.289329","volume":"17","author":"PJ Werbos","year":"1987","unstructured":"Werbos, P.J.: Building and understanding adaptive systems: A statistical\/numerical approach to factory automation and brain research. IEEE Trans. Sys. Man Cybern. 17, 7\u201320 (1987)","journal-title":"IEEE Trans. Sys. Man Cybern."},{"key":"2_CR102","doi-asserted-by":"crossref","first-page":"297","DOI":"10.1037\/h0040934","volume":"66","author":"RW White","year":"1959","unstructured":"White, R.W.: Motivation reconsidered: The concept of competence. Psychol. Rev. 66, 297\u2013333 (1959)","journal-title":"Psychol. Rev."},{"key":"2_CR103","doi-asserted-by":"crossref","first-page":"455","DOI":"10.1109\/TSMC.1973.4309272","volume":"3","author":"B Widrow","year":"1973","unstructured":"Widrow, B., Gupta, N.K., Maitra, S.: Punish\/reward: Learning with a critic in adaptive thresholdsystems. IEEE Trans. Sys. Man Cybern. 3, 455\u2013465 (1973)","journal-title":"IEEE Trans. Sys. Man Cybern."},{"key":"2_CR104","unstructured":"Widrow, B., Hoff, M.E.: Adaptive switching circuits. In: 1960 WESCON Convention Record Part IV, pp. 96\u2013104. Institute of Radio Engineers, New York (1960). Reprinted in J.A. Anderson and E. Rosenfeld, Neurocomputing: Foundations of Research, pp. 126\u2013134. MIT, Cambridge (1988)"},{"key":"2_CR105","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1037\/h0022630","volume":"73","author":"PT Young","year":"1966","unstructured":"Young, P.T.: Hedonic organization and regulation of behavior. Psychol. Rev. 73, 59\u201386 (1966)","journal-title":"Psychol. Rev."}],"container-title":["Intrinsically Motivated Learning in Natural and Artificial Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-32375-1_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,30]],"date-time":"2025-04-30T02:34:11Z","timestamp":1745980451000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-32375-1_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,11,10]]},"ISBN":["9783642323744","9783642323751"],"references-count":105,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-32375-1_2","relation":{},"subject":[],"published":{"date-parts":[[2012,11,10]]},"assertion":[{"value":"10 November 2012","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}