{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,3,31]],"date-time":"2022-03-31T22:47:21Z","timestamp":1648766841819},"reference-count":63,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2013,6,12]],"date-time":"2013-06-12T00:00:00Z","timestamp":1370995200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["K\u00fcnstl Intell"],"published-print":{"date-parts":[[2013,8]]},"DOI":"10.1007\/s13218-013-0261-4","type":"journal-article","created":{"date-parts":[[2013,6,11]],"date-time":"2013-06-11T09:01:23Z","timestamp":1370941283000},"page":"213-219","source":"Crossref","is-referenced-by-count":0,"title":["Reinforcement Learning: Psychologische und neurobiologische Aspekte"],"prefix":"10.1007","volume":"27","author":[{"given":"Michel","family":"Tokic","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2013,6,12]]},"reference":[{"issue":"1\u20132","key":"261_CR1","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1016\/0025-5564(71)90051-4","volume":"10","author":"JS Albus","year":"1971","unstructured":"Albus JS (1971) A theory of cerebellar function. Math Biosci 10(1\u20132):25\u201361","journal-title":"Math Biosci"},{"issue":"6288","key":"261_CR2","doi-asserted-by":"crossref","first-page":"69","DOI":"10.1038\/347069a0","volume":"347","author":"A Artola","year":"1990","unstructured":"Artola A, Br\u00f6cher S, Singer W (1990) Different voltage-dependent thresholds for inducing long-term depression and long-term potentiation in slices of rat visual cortex. Nature 347(6288):69\u201372","journal-title":"Nature"},{"key":"261_CR3","doi-asserted-by":"crossref","first-page":"403","DOI":"10.1146\/annurev.neuro.28.061604.135709","volume":"28","author":"G Aston-Jones","year":"2005","unstructured":"Aston-Jones G, Cohen JD (2005) An integrative theory of locus coeruleus-norepinephrine function: adaptive gain and optimal performance. Annu Rev Neurosci 28:403\u2013450","journal-title":"Annu Rev Neurosci"},{"key":"261_CR4","first-page":"215","volume-title":"Models of information processing in the basal ganglia","author":"AG Barto","year":"1995","unstructured":"Barto AG (1995) Adaptive critics and the basal ganglia. In: Models of information processing in the basal ganglia. MIT Press, Cambridge, S 215\u2013232"},{"issue":"5270","key":"261_CR5","doi-asserted-by":"crossref","first-page":"477","DOI":"10.1038\/228477a0","volume":"228","author":"C Blakemore","year":"1970","unstructured":"Blakemore C, Cooper GF (1970) Development of the brain depends on the visual environment. Nature 228(5270):477\u2013478","journal-title":"Nature"},{"issue":"18","key":"261_CR6","doi-asserted-by":"crossref","first-page":"8452","DOI":"10.1073\/pnas.1000496107","volume":"107","author":"AC Bostan","year":"2010","unstructured":"Bostan AC, Dum RP, Strick PL (2010) The basal ganglia communicate with the cerebellum. Proc Natl Acad Sci USA 107(18):8452\u20138456","journal-title":"Proc Natl Acad Sci USA"},{"issue":"1481","key":"261_CR7","doi-asserted-by":"crossref","first-page":"933","DOI":"10.1098\/rstb.2007.2098","volume":"362","author":"JD Cohen","year":"2007","unstructured":"Cohen JD, McClure SM, Yu AJ (2007) Should I stay or should I go? How the human brain manages the trade-off between exploitation and exploration. Philos Trans R Soc Lond B, Biol Sci 362(1481):933\u2013942","journal-title":"Philos Trans R Soc Lond B, Biol Sci"},{"issue":"7095","key":"261_CR8","doi-asserted-by":"crossref","first-page":"876","DOI":"10.1038\/nature04766","volume":"441","author":"ND Daw","year":"2006","unstructured":"Daw ND, O\u2019Doherty JP, Dayan P, Seymour B, Dolan RJ (2006) Cortical substrates for exploratory decisions in humans. Nature 441(7095):876\u2013879","journal-title":"Nature"},{"issue":"1","key":"261_CR9","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1080\/09548980902759086","volume":"20","author":"P Dayan","year":"2009","unstructured":"Dayan P (2009) Prospective and retrospective temporal difference learning. Networks 20(1):32\u201346","journal-title":"Networks"},{"key":"261_CR10","unstructured":"Distler M (2012) K\u00f6nnen Lernalgorithmen interagieren wie im Gehirn? Bachelor-thesis, Fachgebiet f\u00fcr Intelligente Autonome Systeme, Technische Universit\u00e4t Darmstadt"},{"issue":"7\u20138","key":"261_CR11","doi-asserted-by":"crossref","first-page":"961","DOI":"10.1016\/S0893-6080(99)00046-5","volume":"12","author":"K Doya","year":"1999","unstructured":"Doya K (1999) What are the computations of the cerebellum, the basal ganglia and the cerebral cortex? Neural Netw 12(7\u20138):961\u2013974","journal-title":"Neural Netw"},{"issue":"6","key":"261_CR12","doi-asserted-by":"crossref","first-page":"732","DOI":"10.1016\/S0959-4388(00)00153-7","volume":"10","author":"K Doya","year":"2000","unstructured":"Doya K (2000) Complementary roles of basal ganglia and cerebellum in learning and motor control. Curr Opin Neurobiol 10(6):732\u2013739","journal-title":"Curr Opin Neurobiol"},{"issue":"1","key":"261_CR13","doi-asserted-by":"crossref","first-page":"30","DOI":"10.2976\/1.2732246\/10.2976\/1","volume":"1","author":"K Doya","year":"2007","unstructured":"Doya K (2007) Reinforcement learning: computational theory and biological mechanisms. HFSP Journal 1(1):30\u201340","journal-title":"HFSP Journal"},{"issue":"4","key":"261_CR14","doi-asserted-by":"crossref","first-page":"410","DOI":"10.1038\/nn2077","volume":"11","author":"K Doya","year":"2008","unstructured":"Doya K (2008) Modulators of decision making. Nat Neurosci 11(4):410\u2013416","journal-title":"Nat Neurosci"},{"key":"261_CR15","doi-asserted-by":"crossref","first-page":"1999","DOI":"10.1016\/j.cor.2006.10.004","volume":"35","author":"NJ Eck van","year":"2008","unstructured":"van Eck NJ, van Wezel M (2008) Application of reinforcement learning to the game of othello. Comput Oper Res 35:1999\u20132017","journal-title":"Comput Oper Res"},{"key":"261_CR16","volume-title":"Proceedings of the 25th IEEE\/RSJ international conference on intelligent robots and systems (IROS)","author":"P Ertle","year":"2012","unstructured":"Ertle P, Tokic M, Cubek R, Voos H, S\u00f6ffker D (2012) Towards learning of safety knowledge from human demonstrations. In: Proceedings of the 25th IEEE\/RSJ international conference on intelligent robots and systems (IROS). IEEE Press, New York"},{"key":"261_CR17","series-title":"LNAI","doi-asserted-by":"crossref","first-page":"90","DOI":"10.1007\/978-3-540-69939-2_9","volume-title":"Artificial neural networks in pattern recognition","author":"S Fau\u00dfer","year":"2008","unstructured":"Fau\u00dfer S, Schwenker F (2008) Neural approximation of Monte Carlo policy evaluation deployed in connect four. In: Artificial neural networks in pattern recognition. LNAI, Bd 5064. Springer, Berlin, S 90\u2013100"},{"key":"261_CR18","doi-asserted-by":"crossref","first-page":"2925","DOI":"10.1109\/ICPR.2010.717","volume-title":"Proceedings of the 20th international conference on pattern recognition (ICPR\u201910)","author":"S Fau\u00dfer","year":"2010","unstructured":"Fau\u00dfer S, Schwenker F (2010) Learning a strategy with neural approximated temporal-difference methods in English draughts. In: Proceedings of the 20th international conference on pattern recognition (ICPR\u201910). IEEE Press, New York, S 2925\u20132928"},{"key":"261_CR19","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1007\/978-90-481-9695-1_26","volume-title":"Advances in cognitive neurodynamics (II)","author":"S Handrich","year":"2011","unstructured":"Handrich S, Herzog A, Wolf A, Herrmann CS (2011) Combining supervised, unsupervised, and reinforcement learning in a network of spiking neurons. In: Advances in cognitive neurodynamics (II). Springer, Berlin, S 163\u2013176"},{"key":"261_CR20","first-page":"143","volume-title":"Proceedings of the 16th European symposium on artificial neural networks (ESANN)","author":"A Hans","year":"2008","unstructured":"Hans A, Schneega\u00dfD, Sch\u00e4fer AM, Udluft S (2008) Safe exploration for reinforcement learning. In: Proceedings of the 16th European symposium on artificial neural networks (ESANN), S\u00a0143\u2013148"},{"key":"261_CR21","volume-title":"The organization of behavior: a neuropsychological theory","author":"DO Hebb","year":"1949","unstructured":"Hebb DO (1949) The organization of behavior: a neuropsychological theory. Wiley, New York"},{"issue":"3933","key":"261_CR22","doi-asserted-by":"crossref","first-page":"869","DOI":"10.1126\/science.168.3933.869","volume":"168","author":"HVB Hirsch","year":"1970","unstructured":"Hirsch HVB, Spinelli DN (1970) Visual experience modifies distribution of horizontally and vertically oriented receptive fields in cats. Science 168(3933):869\u2013871","journal-title":"Science"},{"issue":"2","key":"261_CR23","doi-asserted-by":"crossref","first-page":"95","DOI":"10.1093\/cercor\/5.2.95","volume":"5","author":"JC Houk","year":"1995","unstructured":"Houk JC, Wise SP (1995) Distributed modular architectures linking basal ganglia, cerebellum, and cerebral cortex: their role in planning and controlling action. Cereb Cortex 5(2):95\u2013110","journal-title":"Cereb Cortex"},{"issue":"1","key":"261_CR24","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1113\/jphysiol.1982.sp014103","volume":"324","author":"M Ito","year":"1982","unstructured":"Ito M, Sakurai M, Tongroach P (1982) Climbing fibre induced depression of both mossy fibre responsiveness and glutamate sensitivity of cerebellar purkinje cells. J\u00a0Gen Physiol 324(1):113\u2013134","journal-title":"J\u00a0Gen Physiol"},{"key":"261_CR25","first-page":"311","volume-title":"Proceedings of the 4th international conference on machine learning and applications (ICMLA)","author":"TC Kietzmann","year":"2009","unstructured":"Kietzmann TC, Riedmiller M (2009) The neuro slot car racer: reinforcement learning in a real world setting. In: Proceedings of the 4th international conference on machine learning and applications (ICMLA). IEEE Press, New York, S 311\u2013316"},{"issue":"4","key":"261_CR26","doi-asserted-by":"crossref","first-page":"361","DOI":"10.1007\/s10514-012-9290-3","volume":"33","author":"J Kober","year":"2012","unstructured":"Kober J, Wilhelm A, Oztop E, Peters J (2012) Reinforcement learning to adjust parametrized motor primitives to new situations. Auton Robots 33(4):361\u2013379","journal-title":"Auton Robots"},{"issue":"2","key":"261_CR27","doi-asserted-by":"crossref","first-page":"437","DOI":"10.1113\/jphysiol.1969.sp008820","volume":"202","author":"D Marr","year":"1969","unstructured":"Marr D (1969) A theory of cerebellar cortex. J\u00a0Gen Physiol 202(2):437\u2013470.1","journal-title":"J\u00a0Gen Physiol"},{"issue":"4","key":"261_CR28","doi-asserted-by":"crossref","first-page":"370","DOI":"10.1037\/h0054346","volume":"50","author":"AH Maslow","year":"1943","unstructured":"Maslow AH (1943) A theory of human motivation. Psychol Rev 50(4):370\u2013396","journal-title":"Psychol Rev"},{"key":"261_CR29","first-page":"867","volume-title":"Advances in neural information processing systems","author":"SM McClure","year":"2006","unstructured":"McClure SM, Gilzenrat MS, Cohen JD (2006) An exploration-exploitation model based on norepinephrine and dopamine activity. In: Advances in neural information processing systems, Bd\u00a018. MIT Press, Cambridge, S 867\u2013874"},{"issue":"8","key":"261_CR30","doi-asserted-by":"crossref","first-page":"1057","DOI":"10.1038\/nn1743","volume":"9","author":"G Morris","year":"2006","unstructured":"Morris G, Nevet A, Arkadir D, Vaadia E, Bergman H (2006) Midbrain dopamine neurons encode decisions for future action. Nat Neurosci 9(8):1057\u20131063","journal-title":"Nat Neurosci"},{"key":"261_CR31","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/IJCNN.2012.6252824","volume-title":"Proceedings of the international joint conference of neural networks (IJCNN 2012)","author":"H Ngo","year":"2012","unstructured":"Ngo H, Luciw M, F\u00f6rster A, Schmidhuber J (2012) Learning skills from play: artificial curiosity on a katana robot arm. In: Proceedings of the international joint conference of neural networks (IJCNN 2012), Brisbane, Australia, S 1\u20138"},{"issue":"3","key":"261_CR32","doi-asserted-by":"crossref","first-page":"139","DOI":"10.1016\/j.jmp.2008.12.005","volume":"53","author":"Y Niv","year":"2009","unstructured":"Niv Y (2009) Reinforcement learning in the brain. J\u00a0Math Psychol 53(3):139\u2013154","journal-title":"J\u00a0Math Psychol"},{"issue":"8","key":"261_CR33","doi-asserted-by":"crossref","first-page":"987","DOI":"10.1038\/nn0806-987","volume":"9","author":"Y Niv","year":"2006","unstructured":"Niv Y, Daw ND, Dayan P (2006) Choice values. Nat Neurosci 9(8):987\u2013988","journal-title":"Nat Neurosci"},{"key":"261_CR34","volume-title":"Conditioned reflexes\u2014an investigation of the physiological activity of the cerebral cortex","author":"IP Pavlov","year":"1927","unstructured":"Pavlov IP (1927) Conditioned reflexes\u2014an investigation of the physiological activity of the cerebral cortex. Oxford University Press, London. Translated and edited by GV Anrep"},{"issue":"7\u20139","key":"261_CR35","doi-asserted-by":"crossref","first-page":"1180","DOI":"10.1016\/j.neucom.2007.11.026","volume":"71","author":"J Peters","year":"2008","unstructured":"Peters J, Schaal S (2008) Natural actor-critic. Neurocomputing 71(7\u20139):1180\u20131190","journal-title":"Neurocomputing"},{"issue":"4","key":"261_CR36","doi-asserted-by":"crossref","first-page":"682","DOI":"10.1016\/j.neunet.2008.02.003","volume":"21","author":"J Peters","year":"2008","unstructured":"Peters J, Schaal S (2008) Reinforcement learning of motor skills with policy gradients. Neural Netw 21(4):682\u2013697","journal-title":"Neural Netw"},{"key":"261_CR37","first-page":"64","volume-title":"Classical conditioning II: current research and theory","author":"R Rescorla","year":"1972","unstructured":"Rescorla R, Wagner A (1972) A theory of pavlovian conditioning: variations in the effectiveness of reinforcement and nonreinforcement. In: Classical conditioning II: current research and theory. Appleton-Century-Crofts, New York, S 64\u201399"},{"key":"261_CR38","series-title":"LNCS","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1007\/11564096_32","volume-title":"Machine learning: ECML 2005","author":"M Riedmiller","year":"2005","unstructured":"Riedmiller M (2005) Neural fitted Q iteration\u2014first experiences with a data efficient neural reinforcement learning method. In: Machine learning: ECML 2005. LNCS, Bd 3720. Springer, Berlin, S\u00a0317\u2013328"},{"issue":"1","key":"261_CR39","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1007\/s10514-009-9120-4","volume":"27","author":"M Riedmiller","year":"2009","unstructured":"Riedmiller M, Gabel T, Hafner R, Lange S (2009) Reinforcement learning for robot soccer. Auton Robots 27(1):55\u201373","journal-title":"Auton Robots"},{"key":"261_CR40","volume-title":"Proceedings of the FBIT 2007 conference","author":"M Riedmiller","year":"2007","unstructured":"Riedmiller M, Montemerlo M, Dahlkamp H (2007) Learning to drive a real car in 20 minutes. In: Proceedings of the FBIT 2007 conference, Jeju, Korea. Springer, Berlin"},{"issue":"12","key":"261_CR41","doi-asserted-by":"crossref","first-page":"1615","DOI":"10.1038\/nn2013","volume":"10","author":"MR Roesch","year":"2007","unstructured":"Roesch MR, Calu DJ, Schoenbaum G (2007) Dopamine neurons encode the better option in rats deciding between differently delayed or sized rewards. Nat Neurosci 10(12):1615\u20131624","journal-title":"Nat Neurosci"},{"issue":"1","key":"261_CR42","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1002\/eej.20600","volume":"162","author":"T Sasakawa","year":"2008","unstructured":"Sasakawa T, Hu J, Hirasawa K (2008) A brainlike learning system with supervised, unsupervised, and reinforcement learning. Electr Eng Jpn 162(1):32\u201339","journal-title":"Electr Eng Jpn"},{"key":"261_CR43","first-page":"255","volume-title":"Proceedings of the 23rd IEEE\/RSJ international conference on intelligent robots and systems (IROS)","author":"M Schneider","year":"2010","unstructured":"Schneider M, Ertel W (2010) Robot learning by demonstration with local Gaussian process regression. In: Proceedings of the 23rd IEEE\/RSJ international conference on intelligent robots and systems (IROS). IEEE Press, New York, S 255\u2013260"},{"issue":"1","key":"261_CR44","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1152\/jn.1998.80.1.1","volume":"80","author":"W Schultz","year":"1998","unstructured":"Schultz W (1998) Predictive reward signal of dopamine neurons. J\u00a0Neurophysiol 80(1):1\u201327","journal-title":"J\u00a0Neurophysiol"},{"issue":"5306","key":"261_CR45","doi-asserted-by":"crossref","first-page":"1593","DOI":"10.1126\/science.275.5306.1593","volume":"275","author":"W Schultz","year":"1997","unstructured":"Schultz W, Dayan P, Montague PR (1997) A neural substrate of prediction and reward. Science 275(5306):1593\u20131599","journal-title":"Science"},{"key":"261_CR46","doi-asserted-by":"crossref","first-page":"833","DOI":"10.1145\/1143844.1143949","volume-title":"Proceedings of the 23rd international conference on machine learning","author":"O Simsek","year":"2006","unstructured":"Simsek O, Barto AG (2006) An intrinsic reward mechanism for efficient exploration. In: Proceedings of the 23rd international conference on machine learning, S 833\u2013840"},{"key":"261_CR47","volume-title":"Science and human behavior","author":"BF Skinner","year":"1953","unstructured":"Skinner BF (1953) Science and human behavior. Macmillan, New York"},{"issue":"1","key":"261_CR48","first-page":"9","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton RS (1988) Learning to predict by the methods of temporal differences. Mach Learn 3(1):9\u201344","journal-title":"Mach Learn"},{"key":"261_CR49","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: an introduction. MIT Press, Cambridge"},{"issue":"1\u20132","key":"261_CR50","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(01)00110-2","volume":"134","author":"G Tesauro","year":"2002","unstructured":"Tesauro G (2002) Programming backgammon using self-teaching neural nets. Artif Intell 134(1\u20132):181\u2013199","journal-title":"Artif Intell"},{"key":"261_CR51","volume-title":"Animal intelligence","author":"EL Thorndike","year":"1911","unstructured":"Thorndike EL (1911) Animal intelligence. Macmillan, New York"},{"key":"261_CR52","first-page":"1069","volume-title":"Advances in neural information processing systems","author":"S Thrun","year":"1995","unstructured":"Thrun S (1995) Learning to play the game of chess. In: Advances in neural information processing systems, Bd 7. MIT Press, Cambridge, S 1069\u20131076"},{"key":"261_CR53","first-page":"30","volume":"03\/2009","author":"J Togelius","year":"2009","unstructured":"Togelius J, Schaul T, Wierstra D, Igel C, Gomez F, Schmidhuber J (2009) Ontogenetic and phylogenetic reinforcement learning. K\u00fcnstl Intell 03\/2009:30\u201333","journal-title":"K\u00fcnstl Intell"},{"key":"261_CR54","first-page":"160","volume-title":"Proceedings of the 22th international florida artificial intelligence research society conference (FLAIRS)","author":"M Tokic","year":"2009","unstructured":"Tokic M, Fessler J, Ertel W (2009) The crawler, a class room demonstrator for reinforcement learning. In: Proceedings of the 22th international florida artificial intelligence research society conference (FLAIRS). AAAI Press, New York, S 160\u2013165"},{"key":"261_CR55","series-title":"LNAI","doi-asserted-by":"crossref","first-page":"335","DOI":"10.1007\/978-3-642-24455-1_33","volume-title":"KI 2011: advances in artificial intelligence","author":"M Tokic","year":"2011","unstructured":"Tokic M, Palm G (2011) Value-difference based exploration: adaptive control between epsilon-greedy and softmax. In: KI 2011: advances in artificial intelligence. LNAI, Bd 7006. Springer, Berlin, S 335\u2013346"},{"key":"261_CR56","series-title":"LNCS","doi-asserted-by":"crossref","first-page":"42","DOI":"10.1007\/978-3-642-33266-1_6","volume-title":"Artificial neural networks and machine learning\u00a0\u2013 ICANN 2012","author":"M Tokic","year":"2012","unstructured":"Tokic M, Palm G (2012) Adaptive exploration using stochastic neurons. In: Artificial neural networks and machine learning\u00a0\u2013 ICANN 2012. LNCS, Bd 7553. Springer, Berlin, S 42\u201349"},{"key":"261_CR57","series-title":"LNAI","doi-asserted-by":"crossref","first-page":"60","DOI":"10.1007\/978-3-642-33212-8_6","volume-title":"Artificial neural networks in pattern recognition","author":"M Tokic","year":"2012","unstructured":"Tokic M, Palm G (2012) Gradient algorithms for Exploration\/Exploitation trade-offs: global and local variants. In: Artificial neural networks in pattern recognition. LNAI, Bd 7477. Springer, Berlin, S 60\u201371"},{"issue":"1","key":"261_CR58","doi-asserted-by":"crossref","first-page":"190","DOI":"10.1016\/0006-8993(79)90138-0","volume":"168","author":"T Tsumoto","year":"1979","unstructured":"Tsumoto T, Suda K (1979) Cross-depression: an electrophysiological manifestation of binocular competition in the developing visual cortex. Brain Res 168(1):190\u2013194","journal-title":"Brain Res"},{"issue":"2009","key":"261_CR59","first-page":"12","volume":"03","author":"J Vitay","year":"2009","unstructured":"Vitay J, Fix J, Beuth F, Schroll H, Hamker F (2009) Biological models of reinforcement learning. K\u00fcnstl Intell 03(2009):12\u201318","journal-title":"K\u00fcnstl Intell"},{"key":"261_CR60","first-page":"27","volume":"57","author":"F Wardle","year":"1987","unstructured":"Wardle F (1987) Getting back to the basics of children\u2019s play. Child Care Inf Exch 57:27\u201330","journal-title":"Child Care Inf Exch"},{"key":"261_CR61","unstructured":"Watkins C (1989) Learning from delayed rewards. Ph.D. thesis, University of Cambridge, Cambridge, England"},{"issue":"1","key":"261_CR62","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1037\/h0069608","volume":"3","author":"JB Watson","year":"1920","unstructured":"Watson JB, Rayner R (1920) Conditioned emotional reactions. J\u00a0Exp Psychol 3(1):1\u201314","journal-title":"J\u00a0Exp Psychol"},{"issue":"5","key":"261_CR63","doi-asserted-by":"crossref","first-page":"620","DOI":"10.1093\/jigpal\/jzp049","volume":"18","author":"D Wierstra","year":"2010","unstructured":"Wierstra D, F\u00f6rster A, Peters J, Schmidhuber J (2010) Recurrent policy gradients. Log J IGPL 18(5):620\u2013634","journal-title":"Log J IGPL"}],"container-title":["KI - K\u00fcnstliche Intelligenz"],"original-title":[],"language":"de","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13218-013-0261-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s13218-013-0261-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s13218-013-0261-4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,24]],"date-time":"2022-02-24T20:39:02Z","timestamp":1645735142000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s13218-013-0261-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,6,12]]},"references-count":63,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2013,8]]}},"alternative-id":["261"],"URL":"https:\/\/doi.org\/10.1007\/s13218-013-0261-4","relation":{},"ISSN":["0933-1875","1610-1987"],"issn-type":[{"value":"0933-1875","type":"print"},{"value":"1610-1987","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,6,12]]}}}