{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T02:11:40Z","timestamp":1729649500175,"version":"3.28.0"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,12,14]],"date-time":"2020-12-14T00:00:00Z","timestamp":1607904000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,12,14]],"date-time":"2020-12-14T00:00:00Z","timestamp":1607904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,12,14]],"date-time":"2020-12-14T00:00:00Z","timestamp":1607904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,12,14]]},"DOI":"10.1109\/cdc42340.2020.9304511","type":"proceedings-article","created":{"date-parts":[[2021,1,13]],"date-time":"2021-01-13T07:27:32Z","timestamp":1610522852000},"page":"1183-1189","source":"Crossref","is-referenced-by-count":3,"title":["Restless Hidden Markov Bandit with Linear Rewards"],"prefix":"10.1109","author":[{"given":"Michal","family":"Yemini","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Amir","family":"Leshem","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anelia","family":"Somekh-Baruch","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","first-page":"397","article-title":"Using confidence bounds for exploitation-exploration tradeoffs","volume":"3","author":"auer","year":"2002","journal-title":"Journal of Machine Learning Research (JMLR)"},{"key":"ref11","article-title":"Stochastic linear optimization under bandit feedback","author":"dani","year":"2008","journal-title":"Conference on Learning Theory (COLT)"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1287\/moor.1100.0446"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472588"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/Allerton.2011.6120273"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2014.09.026"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-67235-9_19"},{"key":"ref17","article-title":"Whittle index policy for dynamic multi-channel allocation in remote state estimation","author":"wang","year":"2019","journal-title":"IEEE Trans Autom Control"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(98)00023-X"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1287\/moor.1080.0371"},{"key":"ref28","doi-asserted-by":"crossref","DOI":"10.1109\/CDC42340.2020.9304511","article-title":"Restless hidden markov bandit with linear rewards","author":"yemini","year":"2020"},{"key":"ref4","first-page":"49","article-title":"Logarithmic online regret bounds for undiscounted reinforcement learning","volume":"19","author":"auer","year":"2007","journal-title":"Advances in neural information processing systems"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1214\/aoap\/1028903453"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2012.2198613"},{"key":"ref6","first-page":"1563","article-title":"Near-optimal regret bounds for reinforcement learning","volume":"11","author":"jaksch","year":"2010","journal-title":"J Mach Learn Res"},{"key":"ref5","article-title":"REGAL: a regularization based algorithm for reinforcement learning in weakly communicating MDPs","author":"bartlett","year":"2009","journal-title":"UAI"},{"key":"ref8","first-page":"2998","article-title":"Near optimal exploration-exploitation in non-communicating Markov decision processes","author":"fruit","year":"2018","journal-title":"Proceedings of the 32Nd International Conference on Neural Information Processing Systems"},{"key":"ref7","article-title":"Exploration bonus for regret minimization in undiscounted discrete and continuous Markov decision processes","author":"qian","year":"2018","journal-title":"CoRR"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2010.2058091"},{"key":"ref9","article-title":"Regret bounds for reinforcement learning via Markov chain concentration","author":"ortner","year":"2018","journal-title":"CoRR"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1987.1104485"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/78.969499"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2016.7799449"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2003.811228"},{"key":"ref24","first-page":"8874","article-title":"Exploration in structured reinforcement learning","author":"ok","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref23","article-title":"Nonparametric gaussian mixture models for the multi-armed contextual bandit","author":"urteaga","year":"2018","journal-title":"CoRR"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1214\/07-AOP384"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2018.8619134"}],"event":{"name":"2020 59th IEEE Conference on Decision and Control (CDC)","start":{"date-parts":[[2020,12,14]]},"location":"Jeju, Korea (South)","end":{"date-parts":[[2020,12,18]]}},"container-title":["2020 59th IEEE Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9303728\/9303729\/09304511.pdf?arnumber=9304511","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,11]],"date-time":"2022-12-11T11:15:55Z","timestamp":1670757355000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9304511\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,12,14]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/cdc42340.2020.9304511","relation":{},"subject":[],"published":{"date-parts":[[2020,12,14]]}}}