{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,1,3]],"date-time":"2023-01-03T02:33:43Z","timestamp":1672713223006},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2011,6,30]],"date-time":"2011-06-30T00:00:00Z","timestamp":1309392000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Sci. China Inf. Sci."],"published-print":{"date-parts":[[2011,7]]},"DOI":"10.1007\/s11432-011-4304-x","type":"journal-article","created":{"date-parts":[[2011,6,29]],"date-time":"2011-06-29T19:50:27Z","timestamp":1309377027000},"page":"1430-1443","source":"Crossref","is-referenced-by-count":2,"title":["A stochastic policy search model for matching behavior"],"prefix":"10.1007","volume":"54","author":[{"given":"ZhenBo","family":"Cheng","sequence":"first","affiliation":[]},{"given":"Yu","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"ZhiDong","family":"Deng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2011,6,30]]},"reference":[{"key":"4304_CR1","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1146\/annurev.psych.56.091103.070229","volume":"57","author":"W. Schultz","year":"2006","unstructured":"Schultz W. Behavioral theories and the neurophysiology of reward. Annu Rev Psychol, 2006, 57: 87\u2013115","journal-title":"Annu Rev Psychol"},{"key":"4304_CR2","doi-asserted-by":"crossref","first-page":"1214","DOI":"10.1038\/nn1954","volume":"10","author":"T. E. J. Behrens","year":"2007","unstructured":"Behrens T E J, Woolrich M W, Walton M E, et al. Learning the value of information in an uncertain world. Nat Neurosci, 2007, 10: 1214\u20131221","journal-title":"Nat Neurosci"},{"key":"4304_CR3","doi-asserted-by":"crossref","first-page":"1782","DOI":"10.1126\/science.1094765","volume":"304","author":"L. Sugrue","year":"2004","unstructured":"Sugrue L, Corrado G, Newsome W. Matching behavior and the representation of value in the parietal cortex. Science, 2004, 304: 1782\u20131787","journal-title":"Science"},{"key":"4304_CR4","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1901\/jeab.1961.4-267","volume":"4","author":"R. J. Herrnstein","year":"1961","unstructured":"Herrnstein R J. Relative and absolute strength of response as a function of frequency of reinforcement. J Exp Anal Behav, 1961, 4: 267\u2013272","journal-title":"J Exp Anal Behav"},{"key":"4304_CR5","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1037\/0097-7403.27.4.354","volume":"27","author":"C. R. Gallistel","year":"2001","unstructured":"Gallistel C R, Mark T A, King A P, et al. The rat approximates an ideal detector of changes in rates of reward: implications for the law of effect. J Exp Psychol Anim Behav Process, 2001, 27: 354\u2013372","journal-title":"J Exp Psychol Anim Behav Process"},{"key":"4304_CR6","doi-asserted-by":"crossref","first-page":"555","DOI":"10.1901\/jeab.2005.110-04","volume":"84","author":"B. Lau","year":"2005","unstructured":"Lau B, Glimcher P. Dynamic response-by-response models of matching behavior in rhesus monkeys. J Exp Anal Behav, 2005, 84: 555\u2013579","journal-title":"J Exp Anal Behav"},{"key":"4304_CR7","doi-asserted-by":"crossref","first-page":"135","DOI":"10.1901\/jeab.1976.26-135","volume":"26","author":"C. M. Bradshaw","year":"1976","unstructured":"Bradshaw C M, Szabadi E, Bevan P. Behavior of humans in variable-interval schedules of reinforcement. J Exp Anal Behav, 1976, 26: 135\u2013141","journal-title":"J Exp Anal Behav"},{"key":"4304_CR8","doi-asserted-by":"crossref","first-page":"581","DOI":"10.1901\/jeab.2005.23-05","volume":"84","author":"G. Corrado","year":"2005","unstructured":"Corrado G, Sugrue L, Seung H, et al. Linear-nonlinear-poisson models of primate choice dynamics. J Exp Anal Behav, 2005, 84: 581\u2013617","journal-title":"J Exp Anal Behav"},{"key":"4304_CR9","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1901\/jeab.1981.36-141","volume":"36","author":"W. Vaughan","year":"1981","unstructured":"Vaughan W. Melioration, matching, and maximization. J Exp Anal Behav, 1981, 36: 141\u2013149","journal-title":"J Exp Anal Behav"},{"key":"4304_CR10","doi-asserted-by":"crossref","first-page":"137","DOI":"10.1257\/jep.5.3.137","volume":"5","author":"R. J. Herrnstein","year":"1991","unstructured":"Herrnstein R J, Prelec D. Melioration: A theory of distributed choice. J Econ Perspect, 1991, 5: 137\u2013156","journal-title":"J Econ Perspect"},{"key":"4304_CR11","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1901\/jeab.1983.40-321","volume":"40","author":"J. M. Hinson","year":"1983","unstructured":"Hinson J M, Staddon J E R. Matching, maximizing, and hill-climbing. J Exp Anal Behav, 1983, 40: 321\u2013331","journal-title":"J Exp Anal Behav"},{"key":"4304_CR12","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1162\/neco.2008.20.1.227","volume":"20","author":"Y. Sakai","year":"2008","unstructured":"Sakai Y, Fukai T. The actor-critic learning is behind the matching law: matching versus optimal behaviors. Neural Comput, 2008, 20: 227\u2013251","journal-title":"Neural Comput"},{"key":"4304_CR13","volume-title":"Reinforcement Learning: An Introduction","author":"R. Sutton","year":"1998","unstructured":"Sutton R, Barto A. Reinforcement Learning: An Introduction. Cambridge: The MIT Press, 1998"},{"key":"4304_CR14","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1016\/j.conb.2008.08.003","volume":"18","author":"P. Dayan","year":"2008","unstructured":"Dayan P, Niv Y. Reinforcement learning: the good, the bad and the ugly. Curr Opin Neurobiol, 2008, 18: 185\u2013196","journal-title":"Curr Opin Neurobiol"},{"key":"4304_CR15","first-page":"229","volume":"8","author":"R. J. Williams","year":"1992","unstructured":"Williams R J. Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach Learn, 1992, 8: 229\u2013256","journal-title":"Mach Learn"},{"key":"4304_CR16","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1613\/jair.806","volume":"15","author":"J. Baxter","year":"2001","unstructured":"Baxter J, Bartlett P L. Infinite-horizon policy-gradient estimation. J Artif Intell Res, 2001, 15: 319\u2013350","journal-title":"J Artif Intell Res"},{"key":"4304_CR17","volume-title":"Individual Choice Behavior: A Theoretical Analysis","author":"R. Luce","year":"1959","unstructured":"Luce R. Individual Choice Behavior: A Theoretical Analysis. New York: Wiley, 1959"},{"key":"4304_CR18","doi-asserted-by":"crossref","first-page":"e3795","DOI":"10.1371\/journal.pone.0003795","volume":"3","author":"Y. Sakai","year":"2008","unstructured":"Sakai Y, Fukai T. When does reward maximization lead to matching law? PLoS One, 2008, 3: e3795","journal-title":"PLoS One"},{"key":"4304_CR19","volume-title":"The Foundation Stone for Modern Decision-Making (in Chinese)","author":"H. Simon","year":"1989","unstructured":"Simon H. The Foundation Stone for Modern Decision-Making (in Chinese). Beijing: Beijing Economic College Press, 1989"},{"key":"4304_CR20","doi-asserted-by":"crossref","first-page":"451","DOI":"10.1016\/j.neuron.2008.02.021","volume":"58","author":"B. Lau","year":"2008","unstructured":"Lau B, Glimcher P W. Value representations in the primate striatum during matching behavior. Neuron, 2008, 58: 451\u2013463","journal-title":"Neuron"},{"key":"4304_CR21","doi-asserted-by":"crossref","first-page":"3731","DOI":"10.1523\/JNEUROSCI.5159-05.2006","volume":"26","author":"A. Soltani","year":"2006","unstructured":"Soltani A, Wang X J. A biophysically based neural model of matching law behavior: melioration by stochastic synapses. J Neurosci, 2006, 26: 3731\u20133744","journal-title":"J Neurosci"},{"key":"4304_CR22","doi-asserted-by":"crossref","first-page":"15224","DOI":"10.1073\/pnas.0505220103","volume":"103","author":"Y. Loewenstein","year":"2006","unstructured":"Loewenstein Y, Seung H S. Operant matching is a generic outcome of synaptic plasticity based on the covariance between reward and neural activity. Proc Natl Acad Sci USA, 2006, 103: 15224\u201315229","journal-title":"Proc Natl Acad Sci USA"},{"key":"4304_CR23","doi-asserted-by":"crossref","first-page":"1377","DOI":"10.1126\/science.1962197","volume":"254","author":"E. K. Miller","year":"1991","unstructured":"Miller E K, Li L, Desimone R. A neural mechanism for working and recognition memory in inferior temporal cortex. Science, 1991, 254: 1377\u20131379","journal-title":"Science"},{"key":"4304_CR24","doi-asserted-by":"crossref","first-page":"5154","DOI":"10.1523\/JNEUROSCI.16-16-05154.1996","volume":"16","author":"E. K. Miller","year":"1996","unstructured":"Miller E K, Erickson C A, Desimone R. Neural mechanisms of visual working memory in prefrontal cortex of the macaque. J Neurosci, 1996, 16: 5154\u20135167","journal-title":"J Neurosci"},{"key":"4304_CR25","doi-asserted-by":"crossref","first-page":"638","DOI":"10.1016\/j.conb.2005.10.006","volume":"15","author":"A. M. Graybiel","year":"2005","unstructured":"Graybiel A M. The basal ganglia: learning new tricks and loving it. Curr Opin Neurobiol, 2005, 15: 638\u2013644","journal-title":"Curr Opin Neurobiol"},{"key":"4304_CR26","doi-asserted-by":"crossref","first-page":"209","DOI":"10.1016\/S0079-6123(08)61348-5","volume":"99","author":"M. Amalric","year":"1993","unstructured":"Amalric M, Koob G F. Functionally selective neurochemical afferents and efferents of the mesocorticolimbic and nigrostriatal dopamine system. Prog Brain Res, 1993, 99: 209\u2013226","journal-title":"Prog Brain Res"},{"key":"4304_CR27","doi-asserted-by":"crossref","first-page":"468","DOI":"10.1016\/j.tins.2004.06.006","volume":"27","author":"P. Voorn","year":"2004","unstructured":"Voorn P, Vanderschuren L J M J, Groenewegen H J, et al. Putting a spin on the dorsal-ventral divide of the striatum. Trends Neurosci, 2004, 27: 468\u2013474","journal-title":"Trends Neurosci"},{"key":"4304_CR28","doi-asserted-by":"crossref","first-page":"451","DOI":"10.1016\/S0306-4522(99)00575-8","volume":"96","author":"D. Joel","year":"2000","unstructured":"Joel D, Weiner I. The connections of the dopaminergic system with the striatum in rats and primates: an analysis with respect to the functional and compartmental organization of the striatum. Neuroscience, 2000, 96: 451\u2013474","journal-title":"Neuroscience"},{"key":"4304_CR29","doi-asserted-by":"crossref","first-page":"1337","DOI":"10.1126\/science.1115270","volume":"310","author":"K. Samejima","year":"2005","unstructured":"Samejima K, Ueda Y, Doya K, et al. Representation of action-specific reward values in the striatum. Science, 2005, 310: 1337\u20131340","journal-title":"Science"},{"key":"4304_CR30","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1016\/j.bbr.2008.09.029","volume":"199","author":"M. X. Cohen","year":"2009","unstructured":"Cohen M X, Frank M J. Neurocomputational models of basal ganglia function in learning, memory and choice. Behav Brain Res, 2009, 199: 141\u2013156","journal-title":"Behav Brain Res"},{"key":"4304_CR31","doi-asserted-by":"crossref","first-page":"452","DOI":"10.1126\/science.1094285","volume":"304","author":"J. O\u2019Doherty","year":"2004","unstructured":"O\u2019Doherty J, Dayan P, Schultz J, et al. Dissociable roles of ventral and dorsal striatum in instrumental conditioning. Science, 2004, 304: 452\u2013454","journal-title":"Science"},{"key":"4304_CR32","doi-asserted-by":"crossref","first-page":"760","DOI":"10.1038\/nature03015","volume":"431","author":"P. R. Montague","year":"2004","unstructured":"Montague P R, Hyman S E, Cohen J D. Computational roles for dopamine in behavioural control. Nature, 2004, 431: 760\u2013767","journal-title":"Nature"},{"key":"4304_CR33","doi-asserted-by":"crossref","first-page":"1593","DOI":"10.1126\/science.275.5306.1593","volume":"275","author":"W. Schultz","year":"1997","unstructured":"Schultz W, Dayan P, Montague P R. A neural substrate of prediction and reward. Science, 1997, 275: 1593\u20131599","journal-title":"Science"},{"key":"4304_CR34","doi-asserted-by":"crossref","first-page":"1042","DOI":"10.1038\/nature05051","volume":"442","author":"M. Pessiglione","year":"2006","unstructured":"Pessiglione M, Seymour B, Flandin G, et al. Dopamine-dependent prediction errors underpin reward-seeking behaviour in humans. Nature, 2006, 442: 1042\u20131045","journal-title":"Nature"},{"key":"4304_CR35","doi-asserted-by":"crossref","first-page":"223","DOI":"10.1016\/S0959-4388(02)00314-8","volume":"12","author":"J. D. Cohen","year":"2002","unstructured":"Cohen J D, Braver T S, Brown J W. Computational perspectives on dopamine function in prefrontal cortex. Curr Opin Neurobiol, 2002, 12: 223\u2013229","journal-title":"Curr Opin Neurobiol"},{"key":"4304_CR36","doi-asserted-by":"crossref","first-page":"246","DOI":"10.1093\/cercor\/12.3.246","volume":"12","author":"R. C. O\u2019Reilly","year":"2002","unstructured":"O\u2019Reilly R C, Noelle D C, Braver T S, et al. Prefrontal cortex and dynamic categorization tasks: representational organization and neuromodulatory control. Cereb Cortex, 2002, 12: 246\u2013257","journal-title":"Cereb Cortex"},{"key":"4304_CR37","doi-asserted-by":"crossref","first-page":"283","DOI":"10.1162\/089976606775093909","volume":"18","author":"R. C. O\u2019Reilly","year":"2006","unstructured":"O\u2019Reilly R C, Frank M J. Making working memory work: a computational model of learning in the prefrontal cortex and basal ganglia. Neural Comput, 2006, 18: 283\u2013328","journal-title":"Neural Comput"},{"key":"4304_CR38","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1007\/s10827-005-5705-x","volume":"20","author":"A. J. Gruber","year":"2006","unstructured":"Gruber A J, Dayan P, Gutkin B S, et al. Dopamine modulation in the basal ganglia locks the gate to working memory. J Comput Neurosci, 2006, 20: 153\u2013166","journal-title":"J Comput Neurosci"},{"key":"4304_CR39","first-page":"783","volume":"39","author":"Z. B. Cheng","year":"2009","unstructured":"Cheng Z B, Deng Z D, Yang B. Computational model for simple Bayesian decision (in Chinese). Sci China Ser C-Life Sci, 2009, 39: 783\u2013779","journal-title":"Sci China Ser C-Life Sci"},{"key":"4304_CR40","first-page":"721","volume":"16","author":"J. Q. Cheng","year":"2008","unstructured":"Cheng J Q, Li Y H, Sui N. Decision making and its underlying brain mechanism based on rodent research (in Chinese). Adv Psycholog Sci, 2008, 16: 721\u2013725","journal-title":"Adv Psycholog Sci"}],"container-title":["Science China Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-011-4304-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11432-011-4304-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-011-4304-x","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,12]],"date-time":"2019-06-12T14:34:39Z","timestamp":1560350079000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11432-011-4304-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,6,30]]},"references-count":40,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2011,7]]}},"alternative-id":["4304"],"URL":"https:\/\/doi.org\/10.1007\/s11432-011-4304-x","relation":{},"ISSN":["1674-733X","1869-1919"],"issn-type":[{"value":"1674-733X","type":"print"},{"value":"1869-1919","type":"electronic"}],"subject":[],"published":{"date-parts":[[2011,6,30]]}}}