{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T09:59:32Z","timestamp":1730195972501,"version":"3.28.0"},"reference-count":47,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,9,26]],"date-time":"2023-09-26T00:00:00Z","timestamp":1695686400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,9,26]],"date-time":"2023-09-26T00:00:00Z","timestamp":1695686400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100016311","name":"Arm","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100016311","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001843","name":"Science and Engineering Research Board","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001843","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,9,26]]},"DOI":"10.1109\/allerton58177.2023.10313396","type":"proceedings-article","created":{"date-parts":[[2023,11,14]],"date-time":"2023-11-14T13:48:25Z","timestamp":1699969705000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["The Reward Biased Method: An Optimism based Approach for Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Akshay","family":"Mete","sequence":"first","affiliation":[{"name":"Texas A &#x0026; M University,ECE Department,College Station,TX,USA"}]},{"given":"Rahul","family":"Singh","sequence":"additional","affiliation":[{"name":"Indian Institute of Science,ECE Department,Bengaluru,India"}]},{"given":"P. R.","family":"Kumar","sequence":"additional","affiliation":[{"name":"Texas A &#x0026; M University,ECE Department,College Station,TX,USA"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Adaptation and learning in automatic systems","volume":"73","author":"Tsypkin","year":"1971"},{"volume-title":"Adaptive control","year":"2013","author":"\u00c5str\u00f6m","key":"ref2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1137\/0323023"},{"key":"ref4","doi-asserted-by":"crossref","DOI":"10.1137\/1.9781611974263","volume-title":"Stochastic systems: Estimation, identification, and adaptive control","author":"Kumar","year":"2015"},{"volume-title":"Adaptive filtering prediction and control","year":"2014","author":"Goodwin","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-08-042375-3.50010-X"},{"volume-title":"Reinforcement learning: An introduction","year":"2018","author":"Sutton","key":"ref7"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7503.003.0011"},{"issue":"4","key":"ref9","article-title":"Near-optimal regret bounds for reinforcement learning","volume":"11","author":"Jaksch","year":"2010","journal-title":"Journal of Machine Learning Research"},{"issue":"9","key":"ref10","first-page":"1240","article-title":"Dual control theory. i","volume":"21","author":"Feldbaum","year":"1960","journal-title":"Avtomatika i Telemekhanika"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1979.1102191"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1985.1103963"},{"key":"ref14","first-page":"6248","article-title":"Exploration through reward biasing: Reward-biased maximum likelihood estimation for stochastic multi-armed bandits","volume-title":"International Conference on Machine Learning","author":"Liu"},{"key":"ref15","first-page":"815","article-title":"Reward Biased Maximum Likelihood Estimation for Reinforcement Learning","volume-title":"Learning for Dynamics and Control","author":"Mete","year":"2021"},{"key":"ref16","article-title":"Augmented rbmle-ucb approach for adaptive control of linear quadratic systems","author":"Mete","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CISS53076.2022.9751189"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/0196-8858(85)90002-8"},{"key":"ref19","first-page":"1","article-title":"Regret bounds for the adaptive control of linear quadratic systems","volume-title":"Proceedings of the 24th Annual Conference on Learning Theory","author":"Abbasi-Yadkori"},{"key":"ref20","first-page":"208","article-title":"Contextual bandits with linear payoff functions","volume-title":"Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics","author":"Chu"},{"key":"ref21","first-page":"10746","article-title":"Reinforcement learning in feature space: Matrix bandit, kernels, and regret bound","volume-title":"International Conference on Machine Learning","author":"Yang"},{"article-title":"Exploration-exploitation in constrained mdps","year":"2020","author":"Efroni","key":"ref22"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i9.16961"},{"article-title":"The statistical complexity of interactive decision making","year":"2021","author":"Foster","key":"ref24"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1982.1102878"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.2307\/2332286"},{"key":"ref27","first-page":"127","article-title":"Thompson sampling for contextual bandits with linear payoffs","volume-title":"Proceedings of the 30th International Conference on Machine Learning","author":"Agrawal"},{"key":"ref28","first-page":"861","article-title":"Thompson sampling for learning parameterized Markov decision processes","volume-title":"Conference on Learning Theory","author":"Gopalan"},{"article-title":"Learning unknown Markov decision processes: A thompson sampling approach","year":"2017","author":"Ouyang","key":"ref29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1137\/0321009"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012999366369"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1147\/rd.114.0389"},{"article-title":"Finite time regret bounds for minimum variance control of autoregressive systems with exogenous inputs","year":"2023","author":"Singh","key":"ref33"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1017\/9781108571401"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012997317499"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.4310\/CIS.2006.v6.n4.a3"},{"key":"ref37","first-page":"1","article-title":"Regret bounds for the adaptive control of linear quadratic systems","volume-title":"Proceedings of the 24th Annual Conference on Learning Theory","author":"Abbasi-Yadkori"},{"key":"ref38","first-page":"1246","article-title":"Thompson sampling for linear-quadratic control problems","volume-title":"Artificial Intelligence and Statistics","author":"Abeille","year":"2017"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2020.108950"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2020.108982"},{"key":"ref41","first-page":"5354","article-title":"Reinforcement learning with fast stabilization in linear dynamical systems","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Lale"},{"key":"ref42","first-page":"592","article-title":"On bayesian upper confidence bounds for bandit problems","volume-title":"Artificial intelligence and statistics","author":"Kaufmann","year":"2012"},{"article-title":"Gaussian process optimization in the bandit setting: No regret and experimental design","year":"2009","author":"Srinivas","key":"ref43"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2017.1663"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1287\/opre.1110.0999"},{"key":"ref46","first-page":"1583","article-title":"Learning to optimize via information-directed sampling","volume":"27","author":"Russo","year":"2014","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref47","first-page":"975","article-title":"Lipschitz bandits: Regret lower bound and optimal algorithms","volume-title":"Conference on Learning Theory","author":"Magureanu"}],"event":{"name":"2023 59th Annual Allerton Conference on Communication, Control, and Computing (Allerton)","start":{"date-parts":[[2023,9,26]]},"location":"Monticello, IL, USA","end":{"date-parts":[[2023,9,29]]}},"container-title":["2023 59th Annual Allerton Conference on Communication, Control, and Computing (Allerton)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10313338\/10313355\/10313396.pdf?arnumber=10313396","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T16:14:55Z","timestamp":1710346495000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10313396\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,26]]},"references-count":47,"URL":"https:\/\/doi.org\/10.1109\/allerton58177.2023.10313396","relation":{},"subject":[],"published":{"date-parts":[[2023,9,26]]}}}