{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T08:14:23Z","timestamp":1743149663839,"version":"3.40.3"},"publisher-location":"Cham","reference-count":10,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319701387"},{"type":"electronic","value":"9783319701394"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-70139-4_42","type":"book-chapter","created":{"date-parts":[[2017,10,28]],"date-time":"2017-10-28T01:24:18Z","timestamp":1509153858000},"page":"418-426","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Bayesian Posterior Updating Algorithm in Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Fangzhou","family":"Xiong","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiyong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xu","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Biao","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Charles","family":"Chiu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hong","family":"Qiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,10,29]]},"reference":[{"key":"42_CR1","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling, L.P., Littman, M.L., Moore, A.W.: Reinforcement learning: a survey. J. Artif. Intell. Res. 4, 237\u2013285 (1996)","journal-title":"J. Artif. Intell. Res."},{"key":"42_CR2","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT press, Cambridge (1998)"},{"issue":"5\u20136","key":"42_CR3","doi-asserted-by":"publisher","first-page":"359","DOI":"10.1561\/2200000049","volume":"8","author":"M Ghavamzadeh","year":"2015","unstructured":"Ghavamzadeh, M., Mannor, S., Pineau, J., Tamar, A.: Bayesian reinforcement learning: a survey. Found. Trends? Mach. Learn. 8(5\u20136), 359\u2013483 (2015)","journal-title":"Found. Trends? Mach. Learn."},{"key":"42_CR4","doi-asserted-by":"publisher","first-page":"359","DOI":"10.1007\/978-3-642-27645-3_11","volume":"12","author":"N Vlassis","year":"2012","unstructured":"Vlassis, N., Ghavamzadeh, M., Mannor, S., Poupart, P.: Bayesian reinforcement learning. Reinforcement Learning 12, 359\u2013386 (2012)","journal-title":"Reinforcement Learning"},{"key":"42_CR5","unstructured":"Dearden, R., Friedman, N., Russell, S.: Bayesian Q-learning. In: The Association for the Advancement of Artificial Intelligence, pp. 761\u2013768 (1998)"},{"key":"42_CR6","doi-asserted-by":"crossref","unstructured":"Wang, T., Lizotte, D., Bowling, M., Schuurmans, D.: Bayesian sparse sampling for on-line reward optimization. In: Proceedings of the 22nd international conference on Machine learning, pp. 956\u2013963 (2005)","DOI":"10.1145\/1102351.1102472"},{"issue":"Oct","key":"42_CR7","first-page":"213","volume":"3","author":"RI Brafman","year":"2002","unstructured":"Brafman, R.I., Tennenholtz, M.: R-max-a general polynomial time algorithm for near-optimal reinforcement learning. J. Mach. Learn. Res. 3(Oct), 213\u2013231 (2002)","journal-title":"J. Mach. Learn. Res."},{"key":"42_CR8","unstructured":"Chapelle, O., Li, L.: An empirical evaluation of Thompson sampling. In: Advances in neural information processing systems, pp. 2249\u20132257 (2011)"},{"key":"42_CR9","unstructured":"Strens, M.: A Bayesian framework for reinforcement learning. In: International Conference on Machine Learning, pp. 943\u2013950 (2000)"},{"issue":"6","key":"42_CR10","doi-asserted-by":"publisher","first-page":"e0157088","DOI":"10.1371\/journal.pone.0157088","volume":"11","author":"M Castronovo","year":"2016","unstructured":"Castronovo, M., Ernst, D., Cou\u00ebtoux, A., Fonteneau, R.: Benchmarking for Bayesian reinforcement learning. PloS One 11(6), e0157088 (2016)","journal-title":"PloS One"}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-70139-4_42","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T13:00:47Z","timestamp":1710334847000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-70139-4_42"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319701387","9783319701394"],"references-count":10,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-70139-4_42","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"29 October 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 November 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 November 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.iconip2017.org\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}