{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T16:38:05Z","timestamp":1780418285877,"version":"3.54.1"},"publisher-location":"Singapore","reference-count":45,"publisher":"Springer Singapore","isbn-type":[{"value":"9789811612879","type":"print"},{"value":"9789811612886","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-981-16-1288-6_2","type":"book-chapter","created":{"date-parts":[[2021,4,7]],"date-time":"2021-04-07T14:09:03Z","timestamp":1617804543000},"page":"14-33","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["Models of Human Behavioral Agents in\u00a0Bandits, Contextual Bandits and RL"],"prefix":"10.1007","author":[{"given":"Baihan","family":"Lin","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Guillermo","family":"Cecchi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Djallel","family":"Bouneffouf","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jenna","family":"Reinen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Irina","family":"Rish","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2021,4,8]]},"reference":[{"key":"2_CR1","unstructured":"Agrawal, S., Goyal, N.: Analysis of Thompson Sampling for the multi-armed bandit problem. In: COLT 2012 - The 25th Annual Conference on Learning Theory, Edinburgh, Scotland, 25\u201327 June 2012, pp. 39.1\u201339.26 (2012). http:\/\/www.jmlr.org\/proceedings\/papers\/v23\/agrawal12\/agrawal12.pdf"},{"key":"2_CR2","unstructured":"Agrawal, S., Goyal, N.: Thompson sampling for contextual bandits with linear payoffs. In: ICML, no. 3, pp. 127\u2013135 (2013)"},{"issue":"1\u20132","key":"2_CR3","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1023\/A:1018960107028","volume":"23","author":"P Auer","year":"1998","unstructured":"Auer, P., Cesa-Bianchi, N.: On-line learning with malicious noise and the closure algorithm. Ann. Math. Artif. Intell. 23(1\u20132), 83\u201399 (1998)","journal-title":"Ann. Math. Artif. Intell."},{"issue":"2\u20133","key":"2_CR4","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multiarmed bandit problem. Mach. Learn. 47(2\u20133), 235\u2013256 (2002)","journal-title":"Mach. Learn."},{"issue":"1","key":"2_CR5","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1137\/S0097539701398375","volume":"32","author":"P Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Freund, Y., Schapire, R.E.: The nonstochastic multiarmed bandit problem. SIAM J. Comput. 32(1), 48\u201377 (2002)","journal-title":"SIAM J. Comput."},{"key":"2_CR6","doi-asserted-by":"publisher","unstructured":"Bayer, H.M., Glimcher, P.W.: Midbrain dopamine neurons encode a quantitative reward prediction error signal. Neuron 47(1), 129\u2013141 (2005). https:\/\/doi.org\/10.1016\/j.neuron.2005.05.020. http:\/\/www.ncbi.nlm.nih.gov\/pubmed\/15996553. http:\/\/www.pubmedcentral.nih.gov\/articlerender.fcgi?artid=PMC1564381. http:\/\/www.linkinghub.elsevier.com\/retrieve\/pii\/S0896627305004678","DOI":"10.1016\/j.neuron.2005.05.020"},{"issue":"1\u20133","key":"2_CR7","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1016\/0010-0277(94)90018-3","volume":"50","author":"A Bechara","year":"1994","unstructured":"Bechara, A., Damasio, A.R., Damasio, H., Anderson, S.W.: Insensitivity to future consequences following damage to human prefrontal cortex. Cognition 50(1\u20133), 7\u201315 (1994)","journal-title":"Cognition"},{"key":"2_CR8","unstructured":"Beygelzimer, A., Langford, J., Li, L., Reyzin, L., Schapire, R.: Contextual bandit algorithms with supervised learning guarantees. In: Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics, pp. 19\u201326 (2011)"},{"key":"2_CR9","doi-asserted-by":"publisher","unstructured":"Bouneffouf, D., F\u00e9raud, R.: Multi-armed bandit problem with known trend. Neurocomputing 205, 16\u201321 (2016). https:\/\/doi.org\/10.1016\/j.neucom.2016.02.052","DOI":"10.1016\/j.neucom.2016.02.052"},{"key":"2_CR10","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1007\/978-3-319-63703-7_22","volume-title":"Artificial General Intelligence","author":"D Bouneffouf","year":"2017","unstructured":"Bouneffouf, D., Rish, I., Cecchi, G.A.: Bandit models of human behavior: reward processing in mental disorders. In: Everitt, T., Goertzel, B., Potapov, A. (eds.) AGI 2017. LNCS (LNAI), vol. 10414, pp. 237\u2013248. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-63703-7_22"},{"key":"2_CR11","doi-asserted-by":"crossref","unstructured":"Bouneffouf, D., Rish, I., Cecchi, G.A., F\u00e9raud, R.: Context attentive bandits: contextual bandit with restricted context. In: Proceedings of the 26th International Joint Conference on Artificial Intelligence, pp. 1468\u20131475 (2017)","DOI":"10.24963\/ijcai.2017\/203"},{"key":"2_CR12","unstructured":"Chapelle, O., Li, L.: An empirical evaluation of Thompson sampling. In: Advances in Neural Information Processing Systems, pp. 2249\u20132257 (2011)"},{"issue":"2","key":"2_CR13","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1016\/j.conb.2008.08.003","volume":"18","author":"P Dayan","year":"2008","unstructured":"Dayan, P., Niv, Y.: Reinforcement learning: the good, the bad and the ugly. Curr. Opin. Neurobiol. 18(2), 185\u2013196 (2008)","journal-title":"Curr. Opin. Neurobiol."},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Elfwing, S., Seymour, B.: Parallel reward and punishment control in humans and robots: Safe reinforcement learning using the MaxPain algorithm. In: 2017 Joint IEEE International Conference on Development and Learning and Epigenetic Robotics (ICDL-EpiRob), pp. 140\u2013147. IEEE (2017)","DOI":"10.1109\/DEVLRN.2017.8329799"},{"key":"2_CR15","first-page":"1","volume":"5","author":"E Even-Dar","year":"2003","unstructured":"Even-Dar, E., Mansour, Y.: Learning rates for q-learning. J. Mach. Learn. Res. 5, 1\u201325 (2003)","journal-title":"J. Mach. Learn. Res."},{"key":"2_CR16","doi-asserted-by":"publisher","unstructured":"Frank, M.J., O\u2019Reilly, R.C.: A mechanistic account of striatal dopamine function in human cognition: psychopharmacological studies with cabergoline and haloperidol. Behav. Neurosci. 120(3), 497\u2013517 (2006). https:\/\/doi.org\/10.1037\/0735-7044.120.3.497","DOI":"10.1037\/0735-7044.120.3.497"},{"key":"2_CR17","doi-asserted-by":"crossref","unstructured":"Frank, M.J., Seeberger, L.C., O\u2019reilly, R.C.: By carrot or by stick: cognitive reinforcement learning in parkinsonism. Science 306(5703), 1940\u20131943 (2004)","DOI":"10.1126\/science.1102941"},{"issue":"1","key":"2_CR18","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1016\/j.jmp.2009.10.002","volume":"54","author":"DJ Fridberg","year":"2010","unstructured":"Fridberg, D.J., et al.: Cognitive mechanisms underlying risky decision-making in chronic cannabis users. J. Math. Psychol. 54(1), 28\u201338 (2010)","journal-title":"J. Math. Psychol."},{"key":"2_CR19","doi-asserted-by":"publisher","unstructured":"Hart, A.S., Rutledge, R.B., Glimcher, P.W., Phillips, P.E.M.: Phasic dopamine release in the rat nucleus accumbens symmetrically encodes a reward prediction error term. J. Neurosci. 34(3), 698\u2013704 (2014). https:\/\/doi.org\/10.1523\/JNEUROSCI.2489-13.2014. http:\/\/citeseerx.ist.psu.edu\/viewdoc\/download?doi=10.1.1.645.2368&rep=rep1&type=pdf","DOI":"10.1523\/JNEUROSCI.2489-13.2014"},{"key":"2_CR20","unstructured":"Hasselt, H.V.: Double q-learning. In: Advances in Neural Information Processing Systems, pp. 2613\u20132621 (2010)"},{"key":"2_CR21","doi-asserted-by":"publisher","unstructured":"Holmes, A.J., Patrick, L.M.: The myth of optimality in clinical neuroscience. Trends Cogn. Sci. 22(3), 241\u2013257 (2018). https:\/\/doi.org\/10.1016\/j.tics.2017.12.006. http:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1364661317302681","DOI":"10.1016\/j.tics.2017.12.006"},{"key":"2_CR22","doi-asserted-by":"publisher","first-page":"61","DOI":"10.3389\/fnins.2012.00061","volume":"6","author":"A Horstmann","year":"2012","unstructured":"Horstmann, A., Villringer, A., Neumann, J.: Iowa gambling task: there is more to consider than long-term outcome. Using a linear equation model to disentangle the impact of outcome and frequency of gains and losses. Front. Neurosci. 6, 61 (2012)","journal-title":"Front. Neurosci."},{"key":"2_CR23","unstructured":"Lai, T.L., Robbins, H.: Asymptotically efficient adaptive allocation rules. Adv. Appl. Math. 6(1), 4\u201322 (1985). http:\/\/www.cs.utexas.edu\/~shivaram"},{"key":"2_CR24","unstructured":"Langford, J., Zhang, T.: The Epoch-Greedy algorithm for contextual multi-armed bandits (2007)"},{"key":"2_CR25","unstructured":"Langford, J., Zhang, T.: The Epoch-Greedy algorithm for multi-armed bandits with side information. In: Advances in Neural Information Processing Systems, pp. 817\u2013824 (2008)"},{"key":"2_CR26","unstructured":"Li, L., Chu, W., Langford, J., Wang, X.: Unbiased offline evaluation of contextual-bandit-based news article recommendation algorithms. In: King, I., Nejdl, W., Li, H. (eds.) WSDM, pp. 297\u2013306. ACM (2011). http:\/\/dblp.uni-trier.de\/db\/conf\/wsdm\/wsdm2011.html#LiCLW11"},{"key":"2_CR27","unstructured":"Lin, B.: Diabolical games: reinforcement learning environments for lifelong learning (2020)"},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Lin, B.: Online semi-supervised learning in contextual bandits with episodic reward. arXiv preprint arXiv:2009.08457 (2020)","DOI":"10.1007\/978-3-030-64984-5_32"},{"key":"2_CR29","doi-asserted-by":"crossref","unstructured":"Lin, B., Bouneffouf, D., Cecchi, G.: Split q learning: reinforcement learning with two-stream rewards. In: Proceedings of the 28th International Joint Conference on Artificial Intelligence, pp. 6448\u20136449. AAAI Press (2019)","DOI":"10.24963\/ijcai.2019\/913"},{"key":"2_CR30","unstructured":"Lin, B., Bouneffouf, D., Cecchi, G.: Online learning in iterated prisoner\u2019s dilemma to mimic human behavior. arXiv preprint arXiv:2006.06580 (2020)"},{"key":"2_CR31","doi-asserted-by":"crossref","unstructured":"Lin, B., Bouneffouf, D., Cecchi, G.A., Rish, I.: Contextual bandit with adaptive feature extraction. In: 2018 IEEE International Conference on Data Mining Workshops (ICDMW), pp. 937\u2013944. IEEE (2018)","DOI":"10.1109\/ICDMW.2018.00136"},{"key":"2_CR32","unstructured":"Lin, B., Bouneffouf, D., Reinen, J., Rish, I., Cecchi, G.: A story of two streams: reinforcement learning models from human behavior and neuropsychiatry. In: Proceedings of the Nineteenth International Conference on Autonomous Agents and Multi-Agent Systems, AAMAS 2020, pp. 744\u2013752. International Foundation for Autonomous Agents and Multiagent Systems, May 2020"},{"key":"2_CR33","unstructured":"Lin, B., Zhang, X.: Speaker diarization as a fully online learning problem in MiniVox. arXiv preprint arXiv:2006.04376 (2020)"},{"key":"2_CR34","unstructured":"Lin, B., Zhang, X.: VoiceID on the fly: a speaker recognition system that learns from scratch. In: INTERSPEECH (2020)"},{"issue":"2","key":"2_CR35","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1038\/nn.2723","volume":"14","author":"TV Maia","year":"2011","unstructured":"Maia, T.V., Frank, M.J.: From reinforcement learning models to psychiatric and neurological disorders. Nat. Neurosci. 14(2), 154\u2013162 (2011). https:\/\/doi.org\/10.1038\/nn.2723","journal-title":"Nat. Neurosci."},{"key":"2_CR36","doi-asserted-by":"publisher","unstructured":"O\u2019Doherty, J., Dayan, P., Schultz, J., Deichmann, R., Friston, K., Dolan, R.J.: Dissociable roles of ventral and dorsal striatum in instrumental. Science 304, 452\u2013454 (2004). https:\/\/doi.org\/10.1126\/science.1094285. http:\/\/www.sciencemag.org\/content\/304\/5669\/452.full.html. http:\/\/www.sciencemag.org\/content\/suppl\/2004\/04\/13\/304.5669.452.DC1.html. http:\/\/www.sciencemag.org\/content\/304\/5669\/452.full.html#related-urls. http:\/\/www.sciencemag.org\/cgi\/collection\/neuroscience","DOI":"10.1126\/science.1094285"},{"issue":"1","key":"2_CR37","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1080\/13554794.2013.873063","volume":"21","author":"DC Perry","year":"2015","unstructured":"Perry, D.C., Kramer, J.H.: Reward processing in neurodegenerative disease. Neurocase 21(1), 120\u2013133 (2015)","journal-title":"Neurocase"},{"key":"2_CR38","unstructured":"Rummery, G.A., Niranjan, M.: On-line Q-learning using connectionist systems, vol. 37. University of Cambridge, Department of Engineering Cambridge, England (1994)"},{"key":"2_CR39","doi-asserted-by":"publisher","unstructured":"Schultz, W., Dayan, P., Montague, P.R.: A neural substrate of prediction and reward. Science 275(5306), 1593\u20131599 (1997). https:\/\/doi.org\/10.1126\/science.275.5306.1593. http:\/\/www.sciencemag.org\/cgi\/doi\/10.1126\/science.275.5306.1593","DOI":"10.1126\/science.275.5306.1593"},{"key":"2_CR40","doi-asserted-by":"publisher","unstructured":"Seymour, B., Singer, T., Dolan, R.: The neurobiology of punishment. Nat. Rev. Neurosci. 8(4), 300\u2013311 (2007). https:\/\/doi.org\/10.1038\/nrn2119. http:\/\/www.nature.com\/articles\/nrn2119","DOI":"10.1038\/nrn2119"},{"issue":"1","key":"2_CR41","first-page":"340","volume":"3","author":"H Steingroever","year":"2015","unstructured":"Steingroever, H., et al.: Data from 617 healthy participants performing the iowa gambling task: a \u201cMany Labs\u201d collaboration. J. Open Psychol. Data 3(1), 340\u2013353 (2015)","journal-title":"J. Open Psychol. Data"},{"key":"2_CR42","volume-title":"Introduction to Reinforcement Learning","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Introduction to Reinforcement Learning, 1st edn. MIT Press, Cambridge (1998)","edition":"1"},{"key":"2_CR43","unstructured":"Sutton, R.S., Barto, A.G., et al.: Introduction to Reinforcement Learning, vol. 135. MIT press Cambridge (1998)"},{"key":"2_CR44","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1093\/biomet\/25.3-4.285","volume":"25","author":"W Thompson","year":"1933","unstructured":"Thompson, W.: On the likelihood that one unknown probability exceeds another in view of the evidence of two samples. Biometrika 25, 285\u2013294 (1933)","journal-title":"Biometrika"},{"key":"2_CR45","unstructured":"Tversky, A., Kahneman, D.: The framing of decisions and the psychology of choice. Science 211(4481), 453\u2013458 (1981). https:\/\/fenix.tecnico.ulisboa.pt\/downloadFile\/3779576281111\/The framing of decisions and the psychology of choice.pdf"}],"container-title":["Communications in Computer and Information Science","Human Brain and Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-16-1288-6_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,25]],"date-time":"2021-04-25T00:53:59Z","timestamp":1619312039000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-16-1288-6_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9789811612879","9789811612886"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-981-16-1288-6_2","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"8 April 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"HBAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Human Brain and Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kyoto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 January 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 January 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ijcaihbai2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.qaas.zju.edu.cn\/HBAI2020\/important_dates.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"12","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"11","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"92% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the COVID-19 pandemic HBAI 2020 was held in  the year 2021 and transferred into virtual format.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}