{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T18:32:24Z","timestamp":1776277944632,"version":"3.50.1"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"7792","license":[{"start":{"date-parts":[[2020,1,15]],"date-time":"2020-01-15T00:00:00Z","timestamp":1579046400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,15]],"date-time":"2020-01-15T00:00:00Z","timestamp":1579046400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nature"],"published-print":{"date-parts":[[2020,1,30]]},"DOI":"10.1038\/s41586-019-1924-6","type":"journal-article","created":{"date-parts":[[2020,1,15]],"date-time":"2020-01-15T14:02:47Z","timestamp":1579096967000},"page":"671-675","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":397,"title":["A distributional code for value in dopamine-based reinforcement learning"],"prefix":"10.1038","volume":"577","author":[{"given":"Will","family":"Dabney","sequence":"first","affiliation":[]},{"given":"Zeb","family":"Kurth-Nelson","sequence":"additional","affiliation":[]},{"given":"Naoshige","family":"Uchida","sequence":"additional","affiliation":[]},{"given":"Clara Kwon","family":"Starkweather","sequence":"additional","affiliation":[]},{"given":"Demis","family":"Hassabis","sequence":"additional","affiliation":[]},{"given":"R\u00e9mi","family":"Munos","sequence":"additional","affiliation":[]},{"given":"Matthew","family":"Botvinick","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,1,15]]},"reference":[{"key":"1924_CR1","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1016\/j.conb.2017.03.013","volume":"43","author":"W Schultz","year":"2017","unstructured":"Schultz, W., Stauffer, W. R. & Lak, A. The phasic dopamine signal maturing: from reward via behavioural activation to formal economic utility. Curr. Opin. Neurobiol. 43, 139\u2013148 (2017).","journal-title":"Curr. Opin. Neurobiol."},{"key":"1924_CR2","doi-asserted-by":"publisher","first-page":"15647","DOI":"10.1073\/pnas.1014269108","volume":"108","author":"PW Glimcher","year":"2011","unstructured":"Glimcher, P. W. Understanding dopamine and reinforcement learning: the dopamine reward prediction error hypothesis. Proc. Natl Acad. Sci. USA 108, 15647\u201315654 (2011).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"1924_CR3","doi-asserted-by":"publisher","first-page":"373","DOI":"10.1146\/annurev-neuro-072116-031109","volume":"40","author":"M Watabe-Uchida","year":"2017","unstructured":"Watabe-Uchida, M., Eshel, N. & Uchida, N. Neural circuitry of reward prediction error. Annu. Rev. Neurosci. 40, 373\u2013394 (2017).","journal-title":"Annu. Rev. Neurosci."},{"key":"1924_CR4","unstructured":"Morimura, T., Sugiyama, M., Kashima, H., Hachiya, H. & Tanaka, T. Parametric return density estimation for reinforcement learning. In Proc. 26th Conference on Uncertainty in Artificial Intelligence (eds Grunwald, P. & Spirtes, P.) http:\/\/dl.acm.org\/citation.cfm?id=3023549.3023592 (2010)."},{"key":"1924_CR5","unstructured":"Bellemare, M. G., Dabney, W., & Munos, R. A distributional perspective on reinforcement learning. In International Conference on Machine Learning (eds Precup, D. & The, Y. W.) 449\u2013458 (2017)."},{"key":"1924_CR6","doi-asserted-by":"crossref","unstructured":"Dabney, W. Rowland, M. Bellemare, M. G. & Munos, R. Distributional reinforcement learning with quantile regression. In AAAI Conference on Artificial Intelligence (2018).","DOI":"10.1609\/aaai.v32i1.11791"},{"key":"1924_CR7","unstructured":"Sutton, R. S. & Barto, A. G. Reinforcement Learning: an Introduction Vol. 1 (MIT Press, 1998)."},{"key":"1924_CR8","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V. et al. Human-level control through deep reinforcement learning. Nature 518, 529\u2013533 (2015).","journal-title":"Nature"},{"key":"1924_CR9","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D. et al. Mastering the game of Go with deep neural networks and tree search. Nature 529, 484\u2013489 (2016).","journal-title":"Nature"},{"key":"1924_CR10","doi-asserted-by":"crossref","unstructured":"Hessel, M. et al. Rainbow: combining improvements in deep reinforcement learning. In 32nd AAAI Conference on Artificial Intelligence (2018).","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"1924_CR11","doi-asserted-by":"publisher","first-page":"262","DOI":"10.1016\/j.cognition.2008.08.011","volume":"113","author":"MM Botvinick","year":"2009","unstructured":"Botvinick, M. M., Niv, Y. & Barto, A. G. Hierarchically organized behavior and its neural foundations: a reinforcement learning perspective. Cognition 113, 262\u2013280 (2009).","journal-title":"Cognition"},{"key":"1924_CR12","doi-asserted-by":"publisher","first-page":"860","DOI":"10.1038\/s41593-018-0147-8","volume":"21","author":"JX Wang","year":"2018","unstructured":"Wang, J. X. et al. Prefrontal cortex as a meta-reinforcement learning system. Nat. Neurosci. 21, 860\u2013868 (2018).","journal-title":"Nat. Neurosci."},{"key":"1924_CR13","doi-asserted-by":"publisher","DOI":"10.7554\/eLife.21492","volume":"6","author":"HF Song","year":"2017","unstructured":"Song, H. F., Yang, G. R. & Wang, X. J. Reward-based training of recurrent neural networks for cognitive and value-based tasks. eLife 6, e21492 (2017).","journal-title":"eLife"},{"key":"1924_CR14","unstructured":"Barth-Maron, G. et al. Distributed distributional deterministic policy gradients. In International Conference on Learning Representations https:\/\/openreview.net\/forum?id=SyZipzbCb (2018)."},{"key":"1924_CR15","doi-asserted-by":"crossref","unstructured":"Dabney, W., Ostrovski, G., Silver, D. & Munos, R. Implicit quantile networks for distributional reinforcement learning. In International Conference on Machine Learning (2018).","DOI":"10.1609\/aaai.v32i1.11791"},{"key":"1924_CR16","doi-asserted-by":"publisher","first-page":"1170","DOI":"10.1038\/nn.3495","volume":"16","author":"A Pouget","year":"2013","unstructured":"Pouget, A., Beck, J. M., Ma, W. J. & Latham, P. E. Probabilistic brains: knowns and unknowns. Nat. Neurosci. 16, 1170\u20131178 (2013).","journal-title":"Nat. Neurosci."},{"key":"1924_CR17","doi-asserted-by":"publisher","first-page":"351","DOI":"10.1016\/j.neuropharm.2013.03.019","volume":"76","author":"S Lammel","year":"2014","unstructured":"Lammel, S., Lim, B. K. & Malenka, R. C. Reward and aversion in a heterogeneous midbrain dopamine system. Neuropharmacology 76, 351\u2013359 (2014).","journal-title":"Neuropharmacology"},{"key":"1924_CR18","doi-asserted-by":"publisher","first-page":"1898","DOI":"10.1126\/science.1077349","volume":"299","author":"CD Fiorillo","year":"2003","unstructured":"Fiorillo, C. D., Tobler, P. N. & Schultz, W. Discrete coding of reward probability and uncertainty by dopamine neurons. Science 299, 1898\u20131902 (2003).","journal-title":"Science"},{"key":"1924_CR19","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1038\/nature14855","volume":"525","author":"N Eshel","year":"2015","unstructured":"Eshel, N. et al. Arithmetic and local circuitry underlying dopamine prediction errors. Nature 525, 243\u2013246 (2015).","journal-title":"Nature"},{"key":"1924_CR20","unstructured":"Rowland, M., et al. Statistics and samples in distributional reinforcement learning. In International Conference on Machine Learning (2019)."},{"key":"1924_CR21","doi-asserted-by":"publisher","first-page":"1940","DOI":"10.1126\/science.1102941","volume":"306","author":"MJ Frank","year":"2004","unstructured":"Frank, M. J., Seeberger, L. C. & O\u2019Reilly, R. C. By carrot or by stick: cognitive reinforcement learning in parkinsonism. Science 306, 1940\u20131943 (2004).","journal-title":"Science"},{"key":"1924_CR22","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1016\/j.pscychresns.2005.10.004","volume":"146","author":"J Hirvonen","year":"2006","unstructured":"Hirvonen, J. et al. Striatal dopamine D1 and D2 receptor balance in twins at increased genetic risk for schizophrenia. Psychiatry Res. Neuroimaging 146, 13\u201320 (2006).","journal-title":"Psychiatry Res. Neuroimaging"},{"key":"1924_CR23","doi-asserted-by":"publisher","first-page":"433","DOI":"10.1016\/S0306-4522(98)00465-5","volume":"90","author":"MA Piggott","year":"1999","unstructured":"Piggott, M. A. et al. Dopaminergic activities in the human striatum: rostrocaudal gradients of uptake sites and of D1 and D2 but not of D3 receptor binding or dopamine. Neuroscience 90, 433\u2013445 (1999).","journal-title":"Neuroscience"},{"key":"1924_CR24","doi-asserted-by":"publisher","first-page":"1076","DOI":"10.1016\/j.neuroimage.2004.03.004","volume":"22","author":"P Rosa-Neto","year":"2004","unstructured":"Rosa-Neto, P., Doudet, D. J. & Cumming, P. Gradients of dopamine D1- and D2\/3-binding sites in the basal ganglia of pig and monkey measured by PET. Neuroimage 22, 1076\u20131083 (2004).","journal-title":"Neuroimage"},{"key":"1924_CR25","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pcbi.1005062","volume":"12","author":"JG Mikhael","year":"2016","unstructured":"Mikhael, J. G. & Bogacz, R. Learning reward uncertainty in the basal ganglia. PLOS Comput. Biol. 12, e1005062 (2016).","journal-title":"PLOS Comput. Biol."},{"key":"1924_CR26","doi-asserted-by":"publisher","first-page":"12252","DOI":"10.1073\/pnas.1407535111","volume":"111","author":"B Robb","year":"2014","unstructured":"Robb, B. et al. A computational and neural model of momentary subjective well-being. Proc. Natl Acad. Sci. USA 111, 12252\u201312257 (2014).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"1924_CR27","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1146\/annurev-neuro-071714-033928","volume":"38","author":"QJ Huys","year":"2015","unstructured":"Huys, Q. J., Daw, N. D. & Dayan, P. Depression: a decision-theoretic analysis. Annu. Rev. Neurosci. 38, 1\u201323 (2015).","journal-title":"Annu. Rev. Neurosci."},{"key":"1924_CR28","doi-asserted-by":"publisher","unstructured":"Bennett, D. & Niv, Y. Opening Burton\u2019s clock: psychiatric insights from computational cognitive models. Preprint at https:\/\/doi.org\/10.31234\/osf.io\/y2vzu (2018).","DOI":"10.31234\/osf.io\/y2vzu"},{"key":"1924_CR29","doi-asserted-by":"publisher","first-page":"1304","DOI":"10.1016\/j.neuron.2015.08.028","volume":"87","author":"J Tian","year":"2015","unstructured":"Tian, J. & Uchida, N. Habenula lesions reveal that multiple mechanisms underlie dopamine prediction errors. Neuron 87, 1304\u20131316 (2015).","journal-title":"Neuron"},{"key":"1924_CR30","doi-asserted-by":"publisher","first-page":"479","DOI":"10.1038\/nn.4239","volume":"19","author":"N Eshel","year":"2016","unstructured":"Eshel, N., Tian, J., Bukwich, M. & Uchida, N. Dopamine neurons share common response function for reward prediction error. Nat. Neurosci. 19, 479\u2013486 (2016).","journal-title":"Nat. Neurosci."},{"key":"1924_CR31","doi-asserted-by":"publisher","first-page":"819","DOI":"10.2307\/1911031","volume":"55","author":"WK Newey","year":"1987","unstructured":"Newey, W. K. & Powell, J. L. Asymmetric least squares estimation and testing. Econometrica 55, 819\u2013847 (1987).","journal-title":"Econometrica"},{"key":"1924_CR32","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1016\/0167-7152(94)90031-0","volume":"20","author":"M Chris Jones","year":"1994","unstructured":"Chris Jones, M. Expectiles and m-quantiles are quantiles. Stat. Probab. Lett. 20, 149\u2013153 (1994).","journal-title":"Stat. Probab. Lett."},{"key":"1924_CR33","doi-asserted-by":"publisher","first-page":"901","DOI":"10.1111\/mafi.12080","volume":"26","author":"JF Ziegel","year":"2016","unstructured":"Ziegel, J. F. Coherence and elicitability. Math. Finance 26, 901\u2013918 (2016).","journal-title":"Coherence and elicitability. Math. Finance"},{"key":"1924_CR34","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1613\/jair.3912","volume":"47","author":"MG Bellemare","year":"2013","unstructured":"Bellemare, M. G., Naddaf, Y., Veness, J. & Bowling, M. The arcade learning environment: an evaluation platform for general agents. J. Artif. Intell. Res. 47, 253\u2013279 (2013).","journal-title":"J. Artif. Intell. Res."},{"key":"1924_CR35","unstructured":"Heess, N. et al. Emergence of locomotion behaviours in rich environments. Preprint at https:\/\/arxiv.org\/abs\/1707.02286 (2017)."},{"key":"1924_CR36","doi-asserted-by":"publisher","first-page":"383","DOI":"10.1002\/dvg.20228","volume":"44","author":"CM B\u00e4ckman","year":"2006","unstructured":"B\u00e4ckman, C. M., et al. Characterization of a mouse strain expressing cre recombinase from the 3\u2032 untranslated region of the dopamine transporter locus. Genesis 44, 383\u2013390 (2006).","journal-title":"Genesis"},{"key":"1924_CR37","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1038\/nature10754","volume":"482","author":"JY Cohen","year":"2012","unstructured":"Cohen, J. Y. et al. Neuron-type-specific signals for reward and punishment in the ventral tegmental area. Nature 482, 85\u201388 (2012).","journal-title":"Nature"},{"key":"1924_CR38","doi-asserted-by":"publisher","first-page":"2491","DOI":"10.1016\/j.cub.2014.08.064","volume":"24","author":"WR Stauffer","year":"2014","unstructured":"Stauffer, W. R., Lak, A. & Schultz, W. Dopamine reward prediction error responses reflect marginal utility. Curr. Biol. 24, 2491\u20132500 (2014).","journal-title":"Curr. Biol."},{"key":"1924_CR39","doi-asserted-by":"publisher","first-page":"4710","DOI":"10.1523\/JNEUROSCI.3883-12.2013","volume":"33","author":"CD Fiorillo","year":"2013","unstructured":"Fiorillo, C. D., Song, M. R. & Yun, S. R. Multiphasic temporal dynamics in responses of midbrain dopamine neurons to appetitive and aversive stimuli. J. Neurosci. 33, 4710\u20134725 (2013).","journal-title":"J. Neurosci."},{"key":"1924_CR40","unstructured":"Schaul, T., Quan, J., Antonoglou, I. & Silver, D. Prioritized experience replay. In International Conference on Learning Representations (2016)."},{"key":"1924_CR41","doi-asserted-by":"crossref","unstructured":"Van Hasselt, H., Guez, A. & Silver, D. Deep reinforcement learning with double q-learning. In AAAI Conference on Artificial Intelligence (2016).","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"1924_CR42","unstructured":"Krizhevsky, A. & Hinton, G. Learning Multiple Layers of Features from Tiny Images (Univ. of Toronto, 2009)."}],"container-title":["Nature"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/www.nature.com\/articles\/s41586-019-1924-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/www.nature.com\/articles\/s41586-019-1924-6","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/www.nature.com\/articles\/s41586-019-1924-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,20]],"date-time":"2023-05-20T18:01:39Z","timestamp":1684605699000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41586-019-1924-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,1,15]]},"references-count":42,"journal-issue":{"issue":"7792","published-print":{"date-parts":[[2020,1,30]]}},"alternative-id":["1924"],"URL":"https:\/\/doi.org\/10.1038\/s41586-019-1924-6","relation":{"has-review":[{"id-type":"doi","id":"10.3410\/f.737211343.793572085","asserted-by":"object"},{"id-type":"doi","id":"10.3410\/f.737211343.793570710","asserted-by":"object"}]},"ISSN":["0028-0836","1476-4687"],"issn-type":[{"value":"0028-0836","type":"print"},{"value":"1476-4687","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,1,15]]},"assertion":[{"value":"3 January 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 November 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 January 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}