{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T05:03:55Z","timestamp":1742965435768,"version":"3.40.3"},"publisher-location":"New York, NY","reference-count":25,"publisher":"Springer New York","isbn-type":[{"type":"electronic","value":"9781461473206"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-1-4614-7320-6_580-2","type":"book-chapter","created":{"date-parts":[[2014,10,7]],"date-time":"2014-10-07T20:50:26Z","timestamp":1412715026000},"page":"1-9","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Reinforcement Learning in Cortical Networks"],"prefix":"10.1007","author":[{"given":"Walter","family":"Senn","sequence":"first","affiliation":[]},{"given":"Jean-Pascal","family":"Pfister","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,9,17]]},"reference":[{"key":"580-2_CR1","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1613\/jair.806","volume":"15","author":"J Baxter","year":"2001","unstructured":"Baxter J, Bartlett P (2001) Infinite-horizon policy-gradient estimation. J Artif Intell Res 15:319\u2013350","journal-title":"J Artif Intell Res"},{"key":"580-2_CR2","first-page":"385","volume-title":"Advances in neural information processing systems","author":"D Castro","year":"2009","unstructured":"Castro D, Volkinshtein S, Meir R (2009) Temporal difference based actor critic learning: convergence and neural implementation. In: Advances in neural information processing systems, vol 21. MIT Press, Cambridge, MA, pp 385\u2013392"},{"key":"580-2_CR3","doi-asserted-by":"publisher","first-page":"876","DOI":"10.1038\/nature04766","volume":"441","author":"ND Daw","year":"2006","unstructured":"Daw ND, O\u2019Doherty JP, Dayan P, Seymour B, Dolan RJ (2006) Cortical substrates for exploratory decisions in humans. Nature 441:876\u2013879","journal-title":"Nature"},{"key":"580-2_CR4","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1016\/j.conb.2008.08.003","volume":"18","author":"P Dayan","year":"2008","unstructured":"Dayan P, Niv Y (2008) Reinforcement learning: the good, the bad and the ugly. Curr Opin Neurobiol 18:185\u2013196","journal-title":"Curr Opin Neurobiol"},{"key":"580-2_CR5","doi-asserted-by":"publisher","first-page":"048104","DOI":"10.1103\/PhysRevLett.97.048104","volume":"97","author":"IR Fiete","year":"2006","unstructured":"Fiete IR, Seung HS (2006) Gradient learning in spiking neural networks by dynamic perturbation of conductances. Phys Rev Lett 97:048104","journal-title":"Phys Rev Lett"},{"key":"580-2_CR6","doi-asserted-by":"publisher","first-page":"1468","DOI":"10.1162\/neco.2007.19.6.1468","volume":"19","author":"RV Florian","year":"2007","unstructured":"Florian RV (2007) Reinforcement learning through modulation of spike-timing-dependent synaptic plasticity. Neural Comput 19:1468\u20131502","journal-title":"Neural Comput"},{"key":"580-2_CR7","doi-asserted-by":"publisher","first-page":"13326","DOI":"10.1523\/JNEUROSCI.6249-09.2010","volume":"30","author":"N Fr\u00e9maux","year":"2010","unstructured":"Fr\u00e9maux N, Sprekeler H, Gerstner W (2010) Functional requirements for reward-modulated spike-timing-dependent plasticity. J Neurosci 30:13326\u201313337","journal-title":"J Neurosci"},{"key":"580-2_CR8","doi-asserted-by":"publisher","first-page":"e1003024","DOI":"10.1371\/journal.pcbi.1003024","volume":"9","author":"N Fr\u00e9maux","year":"2013","unstructured":"Fr\u00e9maux N, Sprekeler H, Gerstner W (2013) Reinforcement learning using a continuous time actor-critic framework with spiking neurons. PLoS Comput Biol 9:e1003024","journal-title":"PLoS Comput Biol"},{"key":"580-2_CR9","doi-asserted-by":"publisher","first-page":"e1002092","DOI":"10.1371\/journal.pcbi.1002092","volume":"7","author":"J Friedrich","year":"2011","unstructured":"Friedrich J, Urbanczik R, Senn W (2011) Spatio-temporal credit assignment in neuronal population learning. PLoS Comput Biol 7:e1002092","journal-title":"PLoS Comput Biol"},{"key":"580-2_CR10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1142\/S0129065714500026","volume":"24","author":"J Friedrich","year":"2014","unstructured":"Friedrich J, Urbanczik R, Senn W (2014) Code-specific learning rules improve action selection by populations of spiking neurons. Int J of Neural Syst 24:1\u201317","journal-title":"Int J of Neural Syst"},{"key":"580-2_CR11","doi-asserted-by":"publisher","first-page":"2443","DOI":"10.1093\/cercor\/bhl152","volume":"17","author":"EM Izhikevich","year":"2007","unstructured":"Izhikevich EM (2007) Solving the distal reward problem through linkage of STDP and dopamine signaling. Cereb Cortex 17:2443\u20132452","journal-title":"Cereb Cortex"},{"key":"580-2_CR12","doi-asserted-by":"publisher","first-page":"1173","DOI":"10.1162\/neco.2008.04-08-750","volume":"21","author":"C Kolodziejski","year":"2009","unstructured":"Kolodziejski C, Porr B, Worgotter F (2009) On the asymptotic equivalence between differential hebbian and temporal difference learning. Neural Comput 21:1173\u20131202","journal-title":"Neural Comput"},{"key":"580-2_CR13","doi-asserted-by":"publisher","first-page":"e1000180","DOI":"10.1371\/journal.pcbi.1000180","volume":"4","author":"R Legenstein","year":"2008","unstructured":"Legenstein R, Pecevski D, Maass W (2008) A learning theory for reward-modulated spike-timing-dependent plasticity with application to biofeedback. PLoS Comput Biol 4:e1000180","journal-title":"PLoS Comput Biol"},{"key":"580-2_CR14","doi-asserted-by":"publisher","first-page":"1318","DOI":"10.1162\/neco.2006.18.6.1318","volume":"18","author":"J Pfister","year":"2006","unstructured":"Pfister J, Toyoizumi T, Barber D, Gerstner W (2006) Optimal spike-timing-dependent plasticity for precise action potential firing in supervised learning. Neural Comput 18:1318\u20131348","journal-title":"Neural Comput"},{"key":"580-2_CR15","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1162\/neco.2008.08-07-593","volume":"21","author":"W Potjans","year":"2009","unstructured":"Potjans W, Morrison A, Diesmann M (2009) A spiking neural network model of an actor-critic learning agent. Neural Comput 21:301\u2013339","journal-title":"Neural Comput"},{"key":"580-2_CR16","doi-asserted-by":"publisher","first-page":"e1001133","DOI":"10.1371\/journal.pcbi.1001133","volume":"7","author":"W Potjans","year":"2011","unstructured":"Potjans W, Diesmann M, Morrison A (2011) An imperfect dopaminergic error signal can drive temporal-difference learning. PLoS Comput Biol 7:e1001133","journal-title":"PLoS Comput Biol"},{"key":"580-2_CR17","doi-asserted-by":"publisher","first-page":"1593","DOI":"10.1126\/science.275.5306.1593","volume":"275","author":"W Schultz","year":"1997","unstructured":"Schultz W, Dayan P, Montague PR (1997) A neural substrate of prediction and reward. Science 275:1593\u20131599","journal-title":"Science"},{"key":"580-2_CR18","doi-asserted-by":"publisher","first-page":"1063","DOI":"10.1016\/S0896-6273(03)00761-X","volume":"40","author":"HS Seung","year":"2003","unstructured":"Seung HS (2003) Learning in spiking neural networks by reinforcement of stochastic synaptic transmission. Neuron 40:1063\u20131073","journal-title":"Neuron"},{"key":"580-2_CR19","doi-asserted-by":"publisher","first-page":"1362","DOI":"10.4249\/scholarpedia.1362","volume":"5","author":"J Sj\u00f6str\u00f6m","year":"2010","unstructured":"Sj\u00f6str\u00f6m J, Gerstner W (2010) Spike-timing dependent plasticity. Scholarpedia 5:1362","journal-title":"Scholarpedia"},{"key":"580-2_CR20","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: an introduction. MIT Press, Cambridge, MA"},{"key":"580-2_CR21","doi-asserted-by":"publisher","first-page":"983","DOI":"10.1038\/430983a","volume":"430","author":"H Tanimoto","year":"2004","unstructured":"Tanimoto H, Heisenberg M, Gerber B (2004) Experimental psychology: event timing turns punishment to reward. Nature 430:983","journal-title":"Nature"},{"key":"580-2_CR22","doi-asserted-by":"publisher","first-page":"250","DOI":"10.1038\/nn.2264","volume":"12","author":"R Urbanczik","year":"2009","unstructured":"Urbanczik R, Senn W (2009) Reinforcement learning in populations of spiking neurons. Nat Neurosci 12:250\u2013252","journal-title":"Nat Neurosci"},{"key":"580-2_CR23","first-page":"229","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams RJ (1992) Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach Learn 8:229\u2013256","journal-title":"Mach Learn"},{"key":"580-2_CR24","doi-asserted-by":"publisher","first-page":"786","DOI":"10.1038\/nn.3068","volume":"15","author":"K Wunderlich","year":"2012","unstructured":"Wunderlich K, Dayan P, Dolan RJ (2012) Mapping value based planning and extensively trained choice in the human brain. Nat Neurosci 15:786\u2013791","journal-title":"Nat Neurosci"},{"key":"580-2_CR25","doi-asserted-by":"publisher","first-page":"041909","DOI":"10.1103\/PhysRevE.69.041909","volume":"69","author":"X Xie","year":"2004","unstructured":"Xie X, Seung HS (2004) Learning in neural networks by reinforcement of irregular spiking. Phys Rev E Stat Nonlin Soft Matter Phys 69:041909","journal-title":"Phys Rev E Stat Nonlin Soft Matter Phys"}],"container-title":["Encyclopedia of Computational Neuroscience"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-1-4614-7320-6_580-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,20]],"date-time":"2023-02-20T13:47:32Z","timestamp":1676900852000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-1-4614-7320-6_580-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9781461473206"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-1-4614-7320-6_580-2","relation":{},"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"20 May 2014, 13:14:32","order":1,"name":"received","label":"Received","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"20 May 2014, 13:14:32","order":2,"name":"accepted","label":"Accepted","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"17 September 2014","order":3,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}