{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T22:19:44Z","timestamp":1775341184280,"version":"3.50.1"},"reference-count":42,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Automat. Contr."],"published-print":{"date-parts":[[2023,2]]},"DOI":"10.1109\/tac.2022.3145632","type":"journal-article","created":{"date-parts":[[2022,1,25]],"date-time":"2022-01-25T20:28:45Z","timestamp":1643142525000},"page":"737-752","source":"Crossref","is-referenced-by-count":52,"title":["Linear Quadratic Control Using Model-Free Reinforcement Learning"],"prefix":"10.1109","volume":"68","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6665-5881","authenticated-orcid":false,"given":"Farnaz Adib","family":"Yaghmaie","sequence":"first","affiliation":[{"name":"Department of Electrical Engineering, Link&#x00F6;ping University, Link&#x00F6;ping, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3270-171X","authenticated-orcid":false,"given":"Fredrik","family":"Gustafsson","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, Link&#x00F6;ping University, Link&#x00F6;ping, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4881-8955","authenticated-orcid":false,"given":"Lennart","family":"Ljung","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, Link&#x00F6;ping University, Link&#x00F6;ping, Sweden"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Playing Atari with deep reinforcement learning","author":"Mnih","year":"2013"},{"key":"ref2","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Duan","year":"2016"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7503.003.0006"},{"key":"ref4","volume-title":"Nonlinear and Adaptive Control Design","volume":"222","author":"Krstic","year":"1995"},{"key":"ref5","volume-title":"Adaptive Control","author":"strm","year":"1994"},{"key":"ref6","volume-title":"Reinforcement Learning: An Introduction","volume":"1","author":"Sutton","year":"2018"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2012.2214134"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/MCAS.2009.933854"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2018.10.038"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2016.2550518"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2016.12.009"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2014.2317301"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2019.108549"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1002\/rnc.3413"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1002\/rnc.4538"},{"key":"ref17","author":"Bertsekas","year":"2019","journal-title":"Reinforcement Learning and Optimal Control"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1515\/9781400874651"},{"issue":"6","key":"ref19","first-page":"1107","article-title":"Least-squares policy iteration","volume":"4","author":"Lagoudakis","year":"2003","journal-title":"J. Mach. Learn. Res."},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/bf00114723"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-053018-023825"},{"key":"ref22","first-page":"605","article-title":"Deterministic policy gradient algorithms","volume-title":"Proc. 31st Int. Conf. Mach. Learn.","volume":"1","author":"Silver"},{"key":"ref23","volume-title":"System IdentificationTheory for the User","author":"Ljung","year":"1999"},{"key":"ref24","volume-title":"Theory and Practice of Recursive Identification","author":"Ljung","year":"1987"},{"key":"ref25","first-page":"3108","article-title":"Model-free linear quadratic control via reduction to expert prediction","volume-title":"Proc. 22nd Int. Conf. Artif. Intell. Statist.","author":"Abbasi-Yadkori","year":"2019"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/s10208-019-09426-y"},{"key":"ref27","first-page":"5005","article-title":"Least-squares temporal difference learning for the linear quadratic regulator","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Tu","year":"2018"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9029916"},{"key":"ref29","first-page":"1467","article-title":"Global convergence of policy gradient methods for the linear quadratic regulator","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fazel","year":"2018"},{"key":"ref30","first-page":"1029","article-title":"Online linear quadratic control","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Cohen","year":"2018"},{"key":"ref31","first-page":"3036","article-title":"The gap between model-based and model-free methods on the linear quadratic regulator: An asymptotic viewpoint","volume-title":"Proc. Conf. Learn. Theory","author":"Tu","year":"2019"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2019.2921512"},{"key":"ref33","first-page":"6510","article-title":"Using reinforcement learning for model-free linear quadratic Gaussian control with process and measurement noises","volume-title":"Proc. IEEE Conf. Decis. Control","author":"F","year":"2019"},{"key":"ref34","first-page":"3320","article-title":"Improper learning for non-stochastic control","volume-title":"Proc. Conf. Learn. Theory","author":"Simchowitz","year":"2020"},{"key":"ref35","first-page":"20876","article-title":"Logarithmic regret bound in partially observable linear dynamical systems","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lale","year":"2020"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2010.2043839"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2009.2022097"},{"key":"ref38","volume-title":"Control TheoryMultivariable and Nonlinear Methods","author":"Glad","year":"2000"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1971.1099755"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-44851-9_5"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.56021\/9781421407944"},{"key":"ref42","volume-title":"MATLAB System Identification Toolbox (R2018a)","year":"2018"}],"container-title":["IEEE Transactions on Automatic Control"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9\/10031614\/09691800.pdf?arnumber=9691800","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,13]],"date-time":"2024-01-13T22:21:45Z","timestamp":1705184505000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9691800\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2]]},"references-count":42,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tac.2022.3145632","relation":{},"ISSN":["0018-9286","1558-2523","2334-3303"],"issn-type":[{"value":"0018-9286","type":"print"},{"value":"1558-2523","type":"electronic"},{"value":"2334-3303","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,2]]}}}