{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T11:27:34Z","timestamp":1780054054321,"version":"3.54.0"},"publisher-location":"Berlin, Heidelberg","reference-count":9,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642228865","type":"print"},{"value":"9783642228872","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011]]},"DOI":"10.1007\/978-3-642-22887-2_30","type":"book-chapter","created":{"date-parts":[[2011,7,29]],"date-time":"2011-07-29T04:00:46Z","timestamp":1311912046000},"page":"281-285","source":"Crossref","is-referenced-by-count":2,"title":["Reinforcement Learning and the Bayesian Control Rule"],"prefix":"10.1007","author":[{"given":"Pedro Alejandro","family":"Ortega","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Daniel Alexander","family":"Braun","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Simon","family":"Godsill","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","reference":[{"key":"30_CR1","doi-asserted-by":"crossref","unstructured":"Barto, A., Sutton, R., Anderson, C.: Neuron like elements that can solve difficult learning control problems. IEEE Trans. on Systems, Man and Cybernetics 13 (1983)","DOI":"10.1109\/TSMC.1983.6313077"},{"key":"30_CR2","series-title":"Monographs on Statistics and Applied Probability","doi-asserted-by":"crossref","DOI":"10.1007\/978-94-015-3711-7","volume-title":"Bandit problems: Sequential allocation of experiments","author":"D.A. Berry","year":"1985","unstructured":"Berry, D.A., Fristedt, B.: Bandit problems: Sequential allocation of experiments. Monographs on Statistics and Applied Probability. Chapman & Hall, London (1985)"},{"key":"30_CR3","volume-title":"Neuro-Dynamic Programming","author":"D.P. Bertsekas","year":"1996","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Athena Scientific, Belmont (1996)"},{"key":"30_CR4","volume-title":"Pattern Recognition and Machine Learning","author":"C.M. Bishop","year":"2006","unstructured":"Bishop, C.M.: Pattern Recognition and Machine Learning. Springer, Heidelberg (2006)"},{"key":"30_CR5","doi-asserted-by":"crossref","unstructured":"Ghavamzadeh, M., Engel, Y.: Bayesian actor-critic algorithms. In: Proc. of the 24th International Conference on Machine Learning (2007)","DOI":"10.1145\/1273496.1273534"},{"key":"30_CR6","series-title":"Wiley-Interscience Series in Systems and Optimization","volume-title":"Multi-armed bandit allocation indices","author":"J.C. Gittins","year":"1989","unstructured":"Gittins, J.C.: Multi-armed bandit allocation indices. Wiley-Interscience Series in Systems and Optimization. John Wiley & Sons, Ltd., Chichester (1989)"},{"key":"30_CR7","doi-asserted-by":"crossref","unstructured":"Ortega, P.A., Braun, D.A.: A bayesian rule for adaptive control based on causal interventions. In: The Third Conference on Artificial General Intelligence, pp. 121\u2013126. Atlantis Press, Paris (2010)","DOI":"10.2991\/agi.2010.39"},{"key":"30_CR8","doi-asserted-by":"crossref","first-page":"475","DOI":"10.1613\/jair.3062","volume":"38","author":"P.A. Ortega","year":"2010","unstructured":"Ortega, P.A., Braun, D.A.: A minimum relative entropy principle for learning and acting. Journal of Artificial Intelligence Research\u00a038, 475\u2013511 (2010)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"30_CR9","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"}],"container-title":["Lecture Notes in Computer Science","Artificial General Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-22887-2_30.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,24]],"date-time":"2020-11-24T03:09:26Z","timestamp":1606187366000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-22887-2_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011]]},"ISBN":["9783642228865","9783642228872"],"references-count":9,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-22887-2_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2011]]}}}