{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T18:06:44Z","timestamp":1758823604967,"version":"3.28.0"},"reference-count":29,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010,9]]},"DOI":"10.1109\/allerton.2010.5707072","type":"proceedings-article","created":{"date-parts":[[2011,2,3]],"date-time":"2011-02-03T21:50:52Z","timestamp":1296769852000},"page":"1360-1367","source":"Crossref","is-referenced-by-count":8,"title":["Approximate dynamic programming with correlated Bayesian beliefs"],"prefix":"10.1109","author":[{"given":"Ilya O.","family":"Ryzhov","sequence":"first","affiliation":[]},{"given":"Warren B.","family":"Powell","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"year":"1995","author":"bechhofer","article-title":"Design and Analysis of Experiments for Statistical Selection, Screening and Multiple Comparisons","key":"ref10"},{"year":"1989","author":"gittins","article-title":"Multi-Armed Bandit Allocation Indices","key":"ref11"},{"key":"ref12","article-title":"Markovian decision processes with uncertain transition probabilities or rewards","author":"silver","year":"1963","journal-title":"Technical Report 2 1"},{"key":"ref13","first-page":"1019","article-title":"Local bandit approximation for optimal learning problems","volume":"9","author":"duff","year":"1996","journal-title":"Advances in neural information processing systems"},{"key":"ref14","first-page":"150","article-title":"Model-based Bayesian Exploration","author":"dearden","year":"1999","journal-title":"Proceedings of the 15th Conference on Uncertainty in Artificial Intelligence"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1145\/1143844.1143932"},{"key":"ref16","first-page":"943","article-title":"A Bayesian framework for reinforcement learning","author":"strens","year":"2000","journal-title":"Proceedings of the 17th International Conference on Machine Learning"},{"key":"ref17","first-page":"131","article-title":"Design for an optimal probe","author":"duff","year":"2003","journal-title":"Proceedings of the 20th International Conference on Machine Learning"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1287\/mnsc.1060.0614"},{"key":"ref19","first-page":"761","article-title":"Bayesian Q-Iearning","author":"dearden","year":"1998","journal-title":"Proceedings of the 14th conference on Uncertainty in Artificial Intelligence"},{"year":"1970","author":"degroot","article-title":"Optimal Statistical Decisions","key":"ref28"},{"year":"1998","author":"sutton","article-title":"Reinforcement Learning","key":"ref4"},{"key":"ref27","article-title":"Information collection on a graph","author":"ryzhov","year":"2010","journal-title":"Operations Research"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1086\/500675"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1002\/9780470182963"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.1111\/j.1540-6261.1997.tb02721.x"},{"year":"1996","author":"bertsekas","article-title":"Neuro-Dynamic Programming","key":"ref5"},{"year":"2003","author":"kushner","article-title":"Stochastic Approximation and Recursive Algorithms and Applications","key":"ref8"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1109\/CDC.1997.652501"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","article-title":"Markov Decision Processes","author":"puterman","year":"1994"},{"key":"ref9","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/4168.001.0001","article-title":"Learning in Embedded Systems","author":"kaelbling","year":"1993"},{"year":"1957","author":"bellman","article-title":"Dynamic Programming","key":"ref1"},{"key":"ref20","first-page":"154","article-title":"Bayes Meets Bellman: The Gaussian Process Approach to Temporal Difference Learning","author":"engel","year":"2003","journal-title":"Proceedings of the 20th International Conference on Machine Learning"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1287\/ijoc.1080.0314"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1145\/1102351.1102377"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1137\/070693424"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1016\/0378-3758(95)00169-7"},{"year":"2009","author":"ryzhov","article-title":"The knowledge gradient algorithm for a general class of online learning problems","key":"ref26"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1109\/ADPRL.2009.4927537"}],"event":{"name":"2010 48th Annual Allerton Conference on Communication, Control, and Computing (Allerton)","start":{"date-parts":[[2010,9,29]]},"location":"Monticello, IL, USA","end":{"date-parts":[[2010,10,1]]}},"container-title":["2010 48th Annual Allerton Conference on Communication, Control, and Computing (Allerton)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/5701578\/5706874\/05707072.pdf?arnumber=5707072","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,8]],"date-time":"2019-06-08T09:49:43Z","timestamp":1559987383000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5707072\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,9]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/allerton.2010.5707072","relation":{},"subject":[],"published":{"date-parts":[[2010,9]]}}}