{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T12:21:31Z","timestamp":1730204491210,"version":"3.28.0"},"reference-count":45,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/cdc40024.2019.9029975","type":"proceedings-article","created":{"date-parts":[[2020,3,13]],"date-time":"2020-03-13T04:43:11Z","timestamp":1584074591000},"page":"1977-1984","source":"Crossref","is-referenced-by-count":1,"title":["On Applications of Bootstrap in Continuous Space Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Mohamad Kazem","family":"Shirani Faradonbeh","sequence":"first","affiliation":[]},{"given":"Ambuj","family":"Tewari","sequence":"additional","affiliation":[]},{"given":"George","family":"Michailidis","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"crossref","first-page":"898","DOI":"10.1109\/TAC.1986.1104138","article-title":"Extended least squares and their applications to adaptive control and prediction in linear systems","volume":"31","author":"lai","year":"1986","journal-title":"IEEE Transactions on Automatic Control"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1985.1103963"},{"journal-title":"The Bootstrap and Edgeworth Expansion","year":"2013","author":"hall","key":"ref33"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2005.1469949"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/S1474-6670(17)58509-2"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012991202422"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611974263"},{"key":"ref36","volume":"1","author":"bertsekas","year":"1995","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177693494"},{"journal-title":"Martingale Limit Theory and its Application","year":"2014","author":"hall","key":"ref34"},{"key":"ref10","first-page":"5610","article-title":"Near optimal finite time identification of arbitrary linear dynamical systems","author":"sarkar","year":"2019","journal-title":"International Conference on Machine Learning"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9029975"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012997317499"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.4310\/CIS.2006.v6.n4.a3"},{"key":"ref13","first-page":"1","article-title":"Regret bounds for the adaptive&#x00B4; control of linear quadratic systems","author":"abbasi-yadkori","year":"2011","journal-title":"COLT"},{"article-title":"Optimismbased adaptive regulation of linear-quadratic systems","year":"2017","author":"faradonbeh","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/0196-8858(85)90002-8"},{"article-title":"Input perturbations for adaptive control and learning","year":"2018","author":"faradonbeh","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2018.2883241"},{"key":"ref18","first-page":"1","article-title":"Improved regret bounds for thompson sampling in linear quadratic control problems","author":"abeille","year":"2018","journal-title":"International Conference on Machine Learning"},{"article-title":"On optimality of adaptive linear-quadratic regulators","year":"2018","author":"faradonbeh","key":"ref19"},{"article-title":"On the sample complexity of the linear quadratic regulator","year":"2017","author":"dean","key":"ref28"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.2863925"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2544787"},{"key":"ref3","first-page":"3814","article-title":"Data center cooling using model-predictive control","author":"lazic","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.95.200201"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.jeconom.2004.09.007"},{"key":"ref5","first-page":"222","article-title":"Iterative linear quadratic regulator design for nonlinear biological movement systems","author":"li","year":"2004","journal-title":"ICINCO (1)"},{"key":"ref8","first-page":"375","article-title":"Asymptotic properties of multivariate weighted sums with applications to stochastic regression in linear dynamic systems","author":"lai","year":"1985","journal-title":"Multivariate Analysis"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/0196-8858(86)90004-7"},{"journal-title":"Linear-Quadratic Control An Introduction","year":"1995","author":"dorato","key":"ref2"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2018.07.008"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_6"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.23919\/ACC.2019.8814865"},{"key":"ref45","first-page":"2312","article-title":"Improved algorithms&#x00B4; for linear stochastic bandits","author":"abbasi-yadkori","year":"2011","journal-title":"Advances in neural information processing systems"},{"key":"ref22","first-page":"3601","article-title":"Garbage in, reward out: Bootstrapping exploration in multi-armed bandits","author":"kveton","year":"2019","journal-title":"International Conference on Machine Learning"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1214\/aos\/1176344552"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6911(86)90097-6"},{"article-title":"Thompson sampling with the online bootstrap","year":"2014","author":"eckles","key":"ref24"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6911(86)90065-4"},{"article-title":"New insights into bootstrapping for bandits","year":"2018","author":"vaswani","key":"ref23"},{"key":"ref44","first-page":"355","article-title":"Stochastic linear optimization under bandit feedback","author":"dani","year":"2008","journal-title":"Proc of the 21st Annual Conference on Learning Theory"},{"key":"ref26","first-page":"4026","article-title":"Deep exploration via bootstrapped DQN","author":"osband","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/0047-259X(83)90002-7"},{"article-title":"Bootstrapped thompson sampling and deep exploration","year":"2015","author":"osband","key":"ref25"}],"event":{"name":"2019 IEEE 58th Conference on Decision and Control (CDC)","start":{"date-parts":[[2019,12,11]]},"location":"Nice, France","end":{"date-parts":[[2019,12,13]]}},"container-title":["2019 IEEE 58th Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8977134\/9028853\/09029975.pdf?arnumber=9029975","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,19]],"date-time":"2022-07-19T20:25:47Z","timestamp":1658262347000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9029975\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/cdc40024.2019.9029975","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}