{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,31]],"date-time":"2025-12-31T11:16:30Z","timestamp":1767179790291,"version":"build-2238731810"},"reference-count":82,"publisher":"Public Library of Science (PLoS)","issue":"12","license":[{"start":{"date-parts":[[2009,12,4]],"date-time":"2009-12-04T00:00:00Z","timestamp":1259884800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["www.ploscompbiol.org"],"crossmark-restriction":false},"short-container-title":["PLoS Comput Biol"],"DOI":"10.1371\/journal.pcbi.1000586","type":"journal-article","created":{"date-parts":[[2009,12,3]],"date-time":"2009-12-03T19:18:04Z","timestamp":1259867884000},"page":"e1000586","update-policy":"https:\/\/doi.org\/10.1371\/journal.pcbi.corrections_policy","source":"Crossref","is-referenced-by-count":77,"title":["Spike-Based Reinforcement Learning in Continuous State and Action Space: When Policy Gradient Methods Fail"],"prefix":"10.1371","volume":"5","author":[{"given":"Eleni","family":"Vasilaki","sequence":"first","affiliation":[]},{"given":"Nicolas","family":"Fr\u00e9maux","sequence":"additional","affiliation":[]},{"given":"Robert","family":"Urbanczik","sequence":"additional","affiliation":[]},{"given":"Walter","family":"Senn","sequence":"additional","affiliation":[]},{"given":"Wulfram","family":"Gerstner","sequence":"additional","affiliation":[]}],"member":"340","published-online":{"date-parts":[[2009,12,4]]},"reference":[{"key":"ref1","article-title":"Reinforcement learning","author":"R Sutton","year":"1998"},{"key":"ref2","article-title":"Animal Intelligence","author":"E Thorndike","year":"1911"},{"key":"ref3","first-page":"64","article-title":"A theory of pavlovian conditioning: variations in the effectiveness of reinforecement and nonreinforcement.","author":"R Rescorla","year":"1972"},{"key":"ref4","article-title":"The hedonistic neuron: a theory of memory, learning, and intelligence.","author":"A Klopf","year":"1982","journal-title":"Hemisphere"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"85","DOI":"10.3758\/BF03333113","article-title":"A neuronal model of classical conditioning.","volume":"16","author":"A Klopf","year":"1988","journal-title":"Psychobiology"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"135","DOI":"10.1037\/0033-295X.88.2.135","article-title":"Towards a modern theory of adaptive networks: expectation and prediction.","volume":"88","author":"RS Sutton","year":"1981","journal-title":"Psychol Rev"},{"key":"ref7","first-page":"835","article-title":"Neuronlike adaptive elements that can solve difficult learning and control problems.","volume":"13","author":"A Barto","year":"1983","journal-title":"IEEE sys man cybern"},{"key":"ref8","first-page":"497","article-title":"Time-derivative models of pavlovian reinforcement.","author":"R Sutton","year":"1990"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1038\/361031a0","article-title":"A synaptic model of memory: long-term potentiation in the hippocampus.","volume":"361","author":"TVP Bliss","year":"1993","journal-title":"Nature"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1016\/j.neuron.2004.09.012","article-title":"LTP and LTD: An embarassment of riches.","volume":"44","author":"RC Malenka","year":"2004","journal-title":"Neuron"},{"key":"ref11","article-title":"The Organization of Behavior","author":"DO Hebb","year":"1949"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1007\/BF00275687","article-title":"A simplified neuron model as a principal component analyzer.","volume":"15","author":"E Oja","year":"1982","journal-title":"J Math Biol"},{"key":"ref13","article-title":"Self-organization and associative memory, 3rd edition","author":"T Kohonen","year":"1989"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"85","DOI":"10.1007\/BF00288907","article-title":"Self-organization of orientation selective cells in the striate cortex.","volume":"14","author":"C von der Malsburg","year":"1973","journal-title":"Kybernetik"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1523\/JNEUROSCI.02-01-00032.1982","article-title":"Theory of the development of neuron selectivity: orientation specificity and binocular interaction in visual cortex.","volume":"2","author":"E Bienenstock","year":"1982","journal-title":"J Neurosci"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"76","DOI":"10.1038\/383076a0","article-title":"A neuronal learning rule for sub-millisecond temporal coding.","volume":"383","author":"W Gerstner","year":"1996","journal-title":"Nature"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"1178","DOI":"10.1038\/81453","article-title":"Synaptic plastictiy - taming the beast.","volume":"3","author":"LF Abbott","year":"2000","journal-title":"Nat Neurosci"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"8812","DOI":"10.1523\/JNEUROSCI.20-23-08812.2000","article-title":"Stable Hebbian learning from spike timing-dependent plasticity.","volume":"20","author":"MCW van Rossum","year":"2000","journal-title":"J Neurosci"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"35","DOI":"10.1162\/089976601300014628","article-title":"An algorithm for modifying neurotransmitter release probability based on pre- and postsynaptic spike timing.","volume":"13","author":"W Senn","year":"2001","journal-title":"Neural Computat"},{"key":"ref20","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511815706","article-title":"Spiking Neuron Models","author":"W Gerstner","year":"2002"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"459","DOI":"10.1007\/s00422-008-0233-1","article-title":"Phenomenological models of synaptic plasticity based on spike timing.","volume":"98","author":"A Morrison","year":"2008","journal-title":"Biolog Cybern"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"1593","DOI":"10.1126\/science.275.5306.1593","article-title":"A neural substrate for prediction and reward.","volume":"275","author":"W Schultz","year":"1997","journal-title":"Science"},{"key":"ref23","first-page":"187","article-title":"Cellular models of reinforcement.","author":"J Wickens","year":"1995"},{"key":"ref24","doi-asserted-by":"crossref","first-page":"77","DOI":"10.1088\/0954-898X_8_4_001","article-title":"Basal ganglia: structure and computations.","volume":"8","author":"J Wickens","year":"1997","journal-title":"Network-Comp Neural"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1038\/35092560","article-title":"A cellular mechanism of reward-related learning.","volume":"413","author":"JNJ Reynolds","year":"2001","journal-title":"Nature"},{"key":"ref26","doi-asserted-by":"crossref","first-page":"507","DOI":"10.1016\/S0893-6080(02)00045-X","article-title":"Dopamine-dependent plasticity of corticostriatal synapses.","volume":"15","author":"JNJ Reynolds","year":"2002","journal-title":"Neural Networks"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"533","DOI":"10.1038\/385533a0","article-title":"Synaptic tagging and long-term potentiation.","volume":"385","author":"U Frey","year":"1997","journal-title":"Nature"},{"key":"ref28","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1016\/j.neuropharm.2006.07.026","article-title":"The late maintenance of hippocampal LTP: requirements, phases, \u2018synaptic tagging\u2019, \u2018late-associativity\u2019 and implications.","volume":"52","author":"KG Reymann","year":"2007","journal-title":"Neuropharmacology"},{"key":"ref29","doi-asserted-by":"crossref","first-page":"503","DOI":"10.1016\/j.neuroscience.2004.08.014","article-title":"Resetting of \u2018synaptic tags\u2019 is time- and activity-dependent in rat hippocampal ca1 in vitro.","volume":"129","author":"S Sajikumar","year":"2004","journal-title":"Neuroscience"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"5068","DOI":"10.1523\/JNEUROSCI.4940-06.2007","article-title":"Identification of compartment- and process-specific molecules required for \u2018synaptic tagging\u2019 during long-term potentiation and long-term depression in hippocampal CA1.","volume":"27","author":"S Sajikumar","year":"2007","journal-title":"J Neurosci"},{"key":"ref31","doi-asserted-by":"crossref","first-page":"2435","DOI":"10.1523\/JNEUROSCI.4402-07.2008","article-title":"Dopamine receptor activation is required for corticostriatal spike-timing-dependent plasticity.","volume":"28","author":"V Pawlak","year":"2008","journal-title":"J Neurosci"},{"key":"ref32","doi-asserted-by":"crossref","first-page":"41909","DOI":"10.1103\/PhysRevE.69.041909","article-title":"Learning in neural networks by reinforcement of irregular spiking.","volume":"69","author":"X Xie","year":"2004","journal-title":"Phys Rev E"},{"key":"ref33","doi-asserted-by":"crossref","first-page":"1468","DOI":"10.1162\/neco.2007.19.6.1468","article-title":"Reinforcement learning through modulation of spike-timing-dependent synaptic plasticity.","volume":"19","author":"RV Florian","year":"2007","journal-title":"Neural Computat"},{"key":"ref34","doi-asserted-by":"crossref","first-page":"1309","DOI":"10.1162\/neco.2006.18.6.1318","article-title":"Optimal spike-timing dependent plasticity for precise action potential firing in supervised learning.","volume":"18","author":"JP Pfister","year":"2006","journal-title":"Neural Computat"},{"key":"ref35","doi-asserted-by":"crossref","first-page":"2443","DOI":"10.1093\/cercor\/bhl152","article-title":"Solving the distal reward problem through linkage of stdp and dopamine signaling.","volume":"17","author":"E Izhikevich","year":"2007","journal-title":"Cereb Cortex"},{"key":"ref36","doi-asserted-by":"crossref","first-page":"e1000180","DOI":"10.1371\/journal.pcbi.1000180","article-title":"A learning theory for reward-modulated spike-timing-dependent plasticity with application to biofeedback.","volume":"4(10)","author":"R Legenstein","year":"2008","journal-title":"PLoS Comput Biol"},{"key":"ref37","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1162\/neco.2008.08-07-593","article-title":"A spiking neural network model of an actor-critic learning agent.","volume":"21","author":"W Potjans","year":"2009","journal-title":"Neural Comput"},{"key":"ref38","doi-asserted-by":"crossref","first-page":"2245","DOI":"10.1162\/neco.2007.19.8.2245","article-title":"Reinforcement learning, spike-time-dependent plasticity, and the bcm rule.","volume":"19","author":"D Baras","year":"2007","journal-title":"Neural Comput"},{"key":"ref39","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1007\/BF00992696","article-title":"Simple statistical gradient-following methods for connectionist reinforcement learning.","volume":"8","author":"R Williams","year":"1992","journal-title":"Mach Learn"},{"key":"ref40","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1613\/jair.807","article-title":"Experiments with infinite-horizon, policy- gradient estimation.","volume":"15","author":"J Baxter","year":"2001","journal-title":"J Artif Intell Res"},{"key":"ref41","doi-asserted-by":"crossref","first-page":"3648","DOI":"10.1152\/jn.00364.2007","article-title":"Reinforcement Learning With Modulated Spike Timing Dependent Synaptic Plasticity.","volume":"98","author":"MA Farries","year":"2007","journal-title":"J Neurophysiol"},{"key":"ref42","doi-asserted-by":"crossref","first-page":"4498","DOI":"10.1103\/PhysRevE.59.4498","article-title":"Hebbian learning and spiking neurons.","volume":"59","author":"R Kempter","year":"1999","journal-title":"Phys Rev E"},{"key":"ref43","article-title":"Learning from delayed rewards","author":"C Watkins","year":"1989"},{"key":"ref44","doi-asserted-by":"crossref","first-page":"841","DOI":"10.1162\/089976601300014376","article-title":"Temporal difference model reproduces anticipatory neural activity.","volume":"13","author":"R Suri","year":"2001","journal-title":"Neural Comput"},{"key":"ref45","first-page":"385","article-title":"Temporal difference based actor critic learning - convergence and neural implementation.","volume":"22","author":"D Di Castro","year":"2009","journal-title":"NIPS"},{"key":"ref46","doi-asserted-by":"crossref","first-page":"1063","DOI":"10.1016\/S0896-6273(03)00761-X","article-title":"Learning in spiking neural networks by reinforcement of stochastic synaptic transmission.","volume":"40","author":"H Seung","year":"2003","journal-title":"Neuron"},{"key":"ref47","doi-asserted-by":"crossref","first-page":"48104","DOI":"10.1103\/PhysRevLett.97.048104","article-title":"Gradient learning in spiking neural networks by dynamic perturbation of conductances.","volume":"97","author":"I Fiete","year":"2006","journal-title":"Phys Rev Lett"},{"key":"ref48","doi-asserted-by":"crossref","first-page":"245","DOI":"10.1162\/0899766053011555","article-title":"Temporal sequence learning, prediction, and control: a review of different models and their relation to biological mechanisms.","volume":"17","author":"F W\u00f6rg\u00f6tter","year":"2005","journal-title":"Neural Comput"},{"key":"ref49","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1023\/A:1008910918445","article-title":"Computational consequences of temporally asymmetric learning rules: I. Differential Hebbian learning.","volume":"7","author":"P Roberts","year":"1999","journal-title":"J Comput Neurosci"},{"key":"ref50","first-page":"164","article-title":"Predictive sequence learning in recurrent neocortical circuits.","author":"R Rao","year":"2000"},{"key":"ref51","doi-asserted-by":"crossref","first-page":"681","DOI":"10.1038\/297681a0","article-title":"Place navigation impaired in rats with hippocampal lesions.","volume":"297","author":"R Morris","year":"1982","journal-title":"Nature"},{"key":"ref52","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1002\/(SICI)1098-1063(2000)10:1<1::AID-HIPO1>3.0.CO;2-1","article-title":"Models of hippocampally dependent navigation using the temporal difference learning rule.","volume":"10","author":"D Foster","year":"2000","journal-title":"Hippocampus"},{"key":"ref53","doi-asserted-by":"crossref","first-page":"287","DOI":"10.1007\/s004220000171","article-title":"Spatial cognition and neuro-mimetic navigation: a model of hippocampal place cell activity.","volume":"83","author":"A Arleo","year":"2000","journal-title":"Biol Cybern"},{"key":"ref54","doi-asserted-by":"crossref","first-page":"1125","DOI":"10.1016\/j.neunet.2005.08.012","article-title":"Robust self-localisation and navigation based on hippocampal place cells.","volume":"18","author":"T Stroesslin","year":"2005","journal-title":"Neural Networks"},{"key":"ref55","first-page":"245","article-title":"Spatial representation and navigation in a bio-inspired robot.","author":"D Sheynikhovich","year":"2005"},{"key":"ref56","doi-asserted-by":"crossref","first-page":"537","DOI":"10.1016\/j.jphysparis.2004.01.011","article-title":"Place cells, neocortex and spatial navigation: a short review.","volume":"97","author":"B Poucet","year":"2003","journal-title":"J Physiology-Paris"},{"key":"ref57","doi-asserted-by":"crossref","first-page":"3531","DOI":"10.1523\/JNEUROSCI.10-11-03531.1990","article-title":"Hippocampal representation in place learning.","volume":"10","author":"H Eichenbaum","year":"1990","journal-title":"J Neurosci"},{"key":"ref58","doi-asserted-by":"crossref","first-page":"341","DOI":"10.1007\/BF00992701","article-title":"The convergens of TD (<italic>\u03bb<\/italic>) for general <italic>\u03bb<\/italic>.","volume":"8","author":"P Dayan","year":"1992","journal-title":"Mach learn"},{"key":"ref59","doi-asserted-by":"crossref","first-page":"295","DOI":"10.1007\/BF00993978","article-title":"TD(<italic>\u03bb<\/italic>) converges with probability 1.","volume":"14","author":"P Dayan","year":"1994","journal-title":"Mach Learn"},{"key":"ref60","doi-asserted-by":"crossref","first-page":"719","DOI":"10.1073\/pnas.94.2.719","article-title":"The neural code between neocortical pyramidal neurons depends on neurotransmitter release probability.","volume":"94","author":"M Tsodyks","year":"1997","journal-title":"P Natl Acad Sci USA"},{"key":"ref61","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pcbi.1000248","article-title":"Tag-trigger-consolidation: a model of early and late long-term-potentiation and depression.","volume":"4","author":"C Clopath","year":"2008","journal-title":"PLoS Comput Biol"},{"key":"ref62","doi-asserted-by":"crossref","first-page":"173","DOI":"10.1016\/S0006-3495(65)86709-1","article-title":"A theoretical analysis of neuronal variability.","volume":"5","author":"RB Stein","year":"1965","journal-title":"Biophys J"},{"key":"ref63","doi-asserted-by":"crossref","first-page":"139","DOI":"10.1088\/0954-898X_3_2_004","article-title":"Associative memory in a network of \u2018spiking\u2019 neurons.","volume":"3","author":"W Gerstner","year":"1992","journal-title":"Network"},{"key":"ref64","doi-asserted-by":"crossref","first-page":"35","DOI":"10.1007\/s10827-006-7074-5","article-title":"Predicting spike timing of neocortical pyramidal neurons by simple threshold models.","volume":"21","author":"R Jolivet","year":"2006","journal-title":"J Comput Neurosci"},{"key":"ref65","doi-asserted-by":"crossref","first-page":"773","DOI":"10.1098\/rstb.2002.1264","article-title":"Elements of a neurobiological theory of the hippocampus: the role of activity-dependent synaptic plasticity in memory.","volume":"358","author":"R Morris","year":"2003","journal-title":"Phil Trans R Soc Lond B"},{"key":"ref66","first-page":"581","article-title":"Theories of hippocampal function.","author":"R Morris","year":"2007"},{"key":"ref67","doi-asserted-by":"crossref","first-page":"147","DOI":"10.1007\/s00422-008-0288-z","article-title":"Learning flexible sensori-motor mappings in a complex network.","volume":"100","author":"E Vasilaki","year":"2009","journal-title":"Biol Cybern"},{"key":"ref68","doi-asserted-by":"crossref","first-page":"967","DOI":"10.1038\/nrn2022","article-title":"The short-latency dopamine signal: a role in discovering novel actions?","volume":"7","author":"P Redgrave","year":"2006","journal-title":"Nat Rev Neurosci"},{"key":"ref69","doi-asserted-by":"crossref","first-page":"495","DOI":"10.1016\/S0893-6080(02)00044-8","article-title":"Metalearning and neuromodulation.","volume":"15","author":"K Doya","year":"2002","journal-title":"Neural Networks"},{"key":"ref70","doi-asserted-by":"crossref","first-page":"2789","DOI":"10.1523\/JNEUROSCI.19-07-02789.1999","article-title":"Parallel information processing in the dorsal striatum: Relation to hippocampal function.","volume":"19","author":"B Devan","year":"1999","journal-title":"J Neurosci"},{"key":"ref71","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1006\/nlme.1996.0007","article-title":"Inactivation of hippocampus or caudate nucleus with lidocaine differentially affects expression of place and response learning.","volume":"65","author":"M Packard","year":"1996","journal-title":"Neurobiol Learn Mem"},{"key":"ref72","doi-asserted-by":"crossref","first-page":"125","DOI":"10.1006\/nlme.2001.4008","article-title":"Multiple parallel memory systems in the brain of the rat.","volume":"77","author":"N White","year":"2002","journal-title":"Neurobiol Learn and Mem"},{"key":"ref73","article-title":"Principles of behavior","author":"C Hull","year":"1943"},{"key":"ref74","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1037\/h0061626","article-title":"Cogitiva maps in rats and men.","volume":"55","author":"E Toleman","year":"1948","journal-title":"Psychol Rev"},{"key":"ref75","doi-asserted-by":"crossref","first-page":"955","DOI":"10.1016\/S0896-6273(02)01092-9","article-title":"Probabilistic decision making by slow reverrberation in cortical circuits.","volume":"36","author":"XJ Wang","year":"2002","journal-title":"Neuron"},{"key":"ref76","doi-asserted-by":"crossref","first-page":"13028","DOI":"10.1073\/pnas.0900546106","article-title":"Gain in sensitivity and loss in temporal contrast of stdp by dopaminergic modulation at hippocampal synapses.","volume":"106","author":"JC Zhang","year":"2009","journal-title":"Proc Natl Acad Sci USA"},{"key":"ref77","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1126\/science.275.5297.213","article-title":"Regulation of synaptic efficacy by coincidence of postysnaptic AP and EPSP.","volume":"275","author":"H Markram","year":"1997","journal-title":"Science"},{"key":"ref78","doi-asserted-by":"crossref","first-page":"769","DOI":"10.1152\/physrev.00016.2007","article-title":"Dendritic excitability and synaptic plasticity.","volume":"88","author":"PJ Sj\u00f6str\u00f6m","year":"2008","journal-title":"Physiol Rev"},{"key":"ref79","doi-asserted-by":"crossref","first-page":"15224","DOI":"10.1073\/pnas.0505220103","article-title":"Operant matching is a generic outcome of synaptic plasticity based on the covariance between reward and neural activity.","volume":"103","author":"Y Loewenstein","year":"2006","journal-title":"Proc Natl Acad Sci USA"},{"key":"ref80","doi-asserted-by":"crossref","first-page":"250","DOI":"10.1038\/nn.2264","article-title":"Reinforcement learning in populations of spiking neurons.","volume":"12","author":"R Urbanczik","year":"2009","journal-title":"Nat Neurosci"},{"key":"ref81","first-page":"229","article-title":"Learning by statistical cooperation of self-interested neuron-like neuron elements.","volume":"4","author":"A Barto","year":"1985","journal-title":"Hum Neurobiol"},{"key":"ref82","doi-asserted-by":"crossref","first-page":"e1000456","DOI":"10.1371\/journal.pcbi.1000456","article-title":"Towards reproducible descriptions of neuronal network models.","volume":"5","author":"E Nordlie","year":"2009","journal-title":"PLoS Comput Biol"}],"updated-by":[{"DOI":"10.1371\/annotation\/307ea250-3792-4ceb-b905-162d86c96baf","type":"correction","label":"Correction","source":"publisher","updated":{"date-parts":[[2009,12,14]],"date-time":"2009-12-14T00:00:00Z","timestamp":1260748800000}}],"container-title":["PLoS Computational Biology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/dx.plos.org\/10.1371\/journal.pcbi.1000586","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,17]],"date-time":"2024-03-17T20:11:35Z","timestamp":1710706295000},"score":1,"resource":{"primary":{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.1000586"}},"subtitle":[],"editor":[{"given":"Karl J.","family":"Friston","sequence":"first","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2009,12,4]]},"references-count":82,"journal-issue":{"issue":"12","published-online":{"date-parts":[[2009,12,4]]}},"URL":"https:\/\/doi.org\/10.1371\/journal.pcbi.1000586","relation":{},"ISSN":["1553-7358"],"issn-type":[{"value":"1553-7358","type":"electronic"}],"subject":[],"published":{"date-parts":[[2009,12,4]]}}}