{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T17:41:24Z","timestamp":1772300484702,"version":"3.50.1"},"reference-count":123,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/cdc40024.2019.9029916","type":"proceedings-article","created":{"date-parts":[[2020,3,13]],"date-time":"2020-03-13T04:43:11Z","timestamp":1584074591000},"page":"3724-3740","source":"Crossref","is-referenced-by-count":54,"title":["From self-tuning regulators to reinforcement learning and back again"],"prefix":"10.1109","author":[{"given":"Nikolai","family":"Matni","sequence":"first","affiliation":[]},{"given":"Alexandre","family":"Proutiere","sequence":"additional","affiliation":[]},{"given":"Anders","family":"Rantzer","sequence":"additional","affiliation":[]},{"given":"Stephen","family":"Tu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/S0196-8858(82)80005-5"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/37.710876"},{"key":"ref33","article-title":"L1 Adaptive Control Theory: Guaranteed Robustness with Fast Adaptation","author":"hovakimyan","year":"2010","journal-title":"SIAM"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/0005-1098(94)00127-5"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1985.1104070"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/0005-1098(84)90009-8"},{"key":"ref37","doi-asserted-by":"crossref","first-page":"645","DOI":"10.1016\/0005-1098(84)90014-1","article-title":"Automatic tuning of simple regulators with specifications on phase and amplitude margins","volume":"20","author":"\u00e5str\u00f6m","year":"1984","journal-title":"Automatica"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1080\/00207179.2013.810345"},{"key":"ref35","doi-asserted-by":"crossref","DOI":"10.1201\/b15752","author":"hou","year":"2013","journal-title":"Model Free Adaptive Control Theory and Applications"},{"key":"ref34","doi-asserted-by":"crossref","first-page":"2173","DOI":"10.1109\/TNN.2011.2176141","article-title":"Data-driven model-free adaptive control for a class of mimo nonlinear discrete-time systems","volume":"22","author":"hou","year":"2011","journal-title":"IEEE Trans on Neural Networks"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1137\/0319052"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1977.1101561"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/BFb0005037"},{"key":"ref20","author":"sastry","year":"2011","journal-title":"Adaptive Control Stability Convergence and Robustness"},{"key":"ref22","volume":"2045","author":"bellman","year":"2015","journal-title":"Adaptive Control Processes A Guided Tour"},{"key":"ref21","author":"astolfi","year":"2007","journal-title":"Nonlinear and Adaptive Control with Applications"},{"key":"ref24","doi-asserted-by":"crossref","DOI":"10.1002\/9780470182963","volume":"703","author":"powell","year":"2007","journal-title":"Approximate Dynamic Programming Solving the Curses of Dimensionality"},{"key":"ref23","volume":"5","author":"bertsekas","year":"1996","journal-title":"Neuro-Dynamic Programming"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/ALLERTON.2010.5706896"},{"key":"ref26","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1016\/0005-1098(73)90073-3","article-title":"On self tuning regulators","volume":"9","author":"\u00e5str\u00f6m","year":"1973","journal-title":"Automatica"},{"key":"ref100","article-title":"Logarithmic online regret bounds for undiscounted reinforcement learning","volume":"19","author":"auer","year":"2007","journal-title":"Advances in neural information processing systems"},{"key":"ref25","first-page":"468","article-title":"Design of self-optimizing control system","volume":"80","author":"kalman","year":"1958","journal-title":"Trans ASME"},{"key":"ref50","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1023\/A:1022192903948","article-title":"Least squares policy evaluation algorithms with linear function approximation","volume":"13","author":"nedi?","year":"2003","journal-title":"Discrete Event Dynamic Systems"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1995.7.5.950"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2002.800750"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6911(93)90057-D"},{"key":"ref57","first-page":"1800","article-title":"Simple random search of static linear policies is competitive for reinforcement learning","author":"mania","year":"2018","journal-title":"NeurIPS"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref55","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"Int Conf on ML"},{"key":"ref54","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015"},{"key":"ref53","article-title":"Off-policy actor-critic","author":"degris","year":"2012"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1983.6313077"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/0196-8858(86)90004-7"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref3","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"JMLR"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"123","DOI":"10.1016\/0005-1098(71)90059-8","article-title":"System identification&#x2014;a survey","volume":"7","author":"\u00e5str\u00f6m","year":"1971","journal-title":"Automatica"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1002\/047134608X.W1046"},{"key":"ref49","first-page":"81","article-title":"Learning to act using real-time dynamic programming","volume":"72","author":"barto","year":"1995","journal-title":"AI"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9029621"},{"key":"ref9","author":"chen","year":"2012","journal-title":"Identification and Stochastic Adaptive Control"},{"key":"ref46","article-title":"Learning from delayed rewards","author":"watkins","year":"1989","journal-title":"Ph D Dissertation"},{"key":"ref45","volume":"37","author":"rummery","year":"1994","journal-title":"On-line Q-learning using connectionist systems"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/21.229449"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref42","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ALLERTON.2010.5706895"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/203330.203343"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1561\/2200000070"},{"key":"ref72","first-page":"1","article-title":"Regret bounds for the adaptive control of linear quadratic systems","author":"abbasi-yadkori","year":"2011","journal-title":"Proceedings of the 24th Conf on Learning Theory"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1145\/267460.267481"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2006.1636313"},{"key":"ref76","first-page":"3108","article-title":"Model-free linear quadratic control via reduction to expert prediction","author":"abbasi-yadkori","year":"2019","journal-title":"The 22nd Intl Conf on AI and Statistics"},{"key":"ref77","first-page":"4188","article-title":"Regret bounds for robust adaptive control of the linear quadratic regulator","author":"dean","year":"2018","journal-title":"NeurIPS"},{"key":"ref74","first-page":"1","article-title":"Improved regret bounds for thompson sampling in linear quadratic control problems","author":"abeille","year":"2018","journal-title":"Int Conf on ML"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/ALLERTON.2017.8262873"},{"key":"ref78","article-title":"Learning linear-quadratic regulators efficiently with only $\\sqrt T $ regret","author":"cohen","year":"2019"},{"key":"ref79","article-title":"Certainty equivalent control of LQR is efficient","author":"mania","year":"2019"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/1-84628-095-8_10"},{"key":"ref62","article-title":"On the sample complexity of the linear quadratic regulator","author":"dean","year":"2017"},{"key":"ref61","first-page":"1025","article-title":"Gradient descent learns linear dynamical systems","volume":"19","author":"hardt","year":"2018","journal-title":"Journal of ML Research"},{"key":"ref63","first-page":"439","article-title":"Learning without mixing: Towards a sharp analysis of linear system identification","author":"simchowitz","year":"2018","journal-title":"Conference on Learning Theory"},{"key":"ref64","article-title":"How fast can linear dynamical systems be learned?","author":"sarkar","year":"2018"},{"key":"ref65","article-title":"Non-asymptotic identification of lti systems from a single trajectory","author":"oymak","year":"2018"},{"key":"ref66","article-title":"Finite-time system identification for partially observed lti systems of unknown order","author":"sarkar","year":"2019"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9029499"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/ALLERTON.2018.8635921"},{"key":"ref2","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","author":"duan","year":"2016","journal-title":"Int Conf on ML"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9029815"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref109","article-title":"On the sample complexity of reinforcement learning with a generative model","author":"azar","year":"2012","journal-title":"International Conference on Machine Learning"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1016\/0196-8858(85)90002-8"},{"key":"ref108","article-title":"Fastest convergence for Q-learning","author":"devraj","year":"0"},{"key":"ref94","article-title":"Explore first, exploit next: The true shape of regret in bandit problems","author":"garivier","year":"2018","journal-title":"Mathematics of Operations Research"},{"key":"ref107","article-title":"Convergent temporal-difference learning with arbitrary smooth function approximation","author":"maei","year":"2009","journal-title":"Proceedings of NIPS"},{"key":"ref93","article-title":"Unimodal bandits: Regret lower bounds and optimal algorithms","author":"combes","year":"2014","journal-title":"Proceedings of the 31st International Conference on Machine Learning"},{"key":"ref106","article-title":"Model-based reinforcement learning and the Eluder dimension","volume":"27","author":"osband","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref92","article-title":"Unimodal bandits without smoothness","volume":"abs 1406 7447","author":"combes","year":"2014","journal-title":"CoRR"},{"key":"ref105","article-title":"Improved regret bounds for undiscounted continuous reinforcement learning","author":"lakshmanan","year":"2015","journal-title":"32nd International Conference on Machine Learning"},{"key":"ref91","first-page":"1","article-title":"On the complexity of best-arm identification in multi-armed bandit models","volume":"17","author":"kaufmann","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref104","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-24412-4_15"},{"key":"ref103","article-title":"Online regret bounds for undiscounted continuous reinforcement learning","volume":"25","author":"ortner","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref102","article-title":"Posterior sampling for reinforcement learning: worst-case regret bounds","volume":"31","author":"agrawal","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017984413808"},{"key":"ref112","first-page":"5186","article-title":"Near-optimal time and sample complexities for solving Markov decision processes with a generative model","volume":"31","author":"sidford","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2014.09.029"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1287\/moor.22.1.222"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012994275440"},{"key":"ref96","first-page":"8874","article-title":"Exploration in structured reinforcement learning","author":"ok","year":"2018","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref97","article-title":"Near-optimal regret bounds for reinforcement learning","volume":"22","author":"auer","year":"2009","journal-title":"Advances in neural information processing systems"},{"key":"ref10","author":"goodwin","year":"1977","journal-title":"Dynamic System Identification Experiment Design and Data Analysis"},{"key":"ref11","author":"box","year":"2015","journal-title":"Time Series Analysis Forecasting and Control"},{"key":"ref12","first-page":"267","article-title":"Information theory and an extension of the maximum likelihood principle","author":"akaike","year":"1973","journal-title":"2nd Intl Symposium on Information Theory 1973"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/0005-1098(95)00106-3"},{"key":"ref14","author":"van overschee","year":"2012","journal-title":"Subspace identification for linear systems Theory&#x2014;Implementation&#x2014;Applications"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-514060-7.50014-2"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/9.284879"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"689","DOI":"10.1080\/00207178008922882","article-title":"Why use adaptive techniques for steering large tankers?","volume":"32","author":"\u00e5str\u00f6m","year":"1980","journal-title":"Intl Journ Of Control"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.1994.735224"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2017.8264168"},{"key":"ref17","author":"\u00e5str\u00f6m","year":"2013","journal-title":"Adaptive Control"},{"key":"ref81","article-title":"A tour of reinforcement learning: The view from continuous control","author":"recht","year":"2018","journal-title":"Annu Rev Control Robotics Autonomous Syst"},{"key":"ref18","author":"goodwin","year":"2014","journal-title":"Adaptive Filtering Prediction and Control"},{"key":"ref84","first-page":"1467","article-title":"Global convergence of policy gradient methods for the linear quadratic regulator","volume":"80","author":"fazel","year":"2018","journal-title":"Proceedings of the 35th Intl Conf on ML"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1287\/moor.1100.0446"},{"key":"ref19","author":"narendra","year":"2012","journal-title":"Stable Adaptive Systems"},{"key":"ref83","first-page":"5012","article-title":"Least-squares temporal difference learning for the linear quadratic regulator","author":"tu","year":"2018","journal-title":"Int Conf on ML"},{"key":"ref114","first-page":"4863","article-title":"Is Q-learning provably efficient?","volume":"31","author":"jin","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref113","article-title":"Speedy Q-learning","author":"azar","year":"2011","journal-title":"Proceedings of NIPS"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1016\/j.arcontrol.2019.03.006"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.23919\/ACC.2018.8431891"},{"key":"ref115","article-title":"A system level approach to controller synthesis","author":"wang","year":"2016"},{"key":"ref120","first-page":"1107","article-title":"Least-Squares Policy Iteration","volume":"4","author":"lagoudakis","year":"2003","journal-title":"Journal of ML Research"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2009.01.016"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-3618-5_2"},{"key":"ref122","article-title":"Proximal Policy Optimization Algorithms","author":"schulman","year":"2017"},{"key":"ref123","article-title":"Sample Complexity Bounds for the Linear Quadratic Regulator","author":"tu","year":"2019","journal-title":"Ph D Dissertation"},{"key":"ref85","article-title":"Derivative-Free Methods for Policy Optimization: Guarantees for Linear Quadratic Systems","author":"malik","year":"2019","journal-title":"AISTATS"},{"key":"ref86","article-title":"The gap between model-based and model-free methods on the linear quadratic regulator: An asymptotic viewpoint","author":"tu","year":"2018"},{"key":"ref87","first-page":"5713","article-title":"Unifying PAC and regret: Uniform PAC bounds for episodic reinforcement learning","author":"dann","year":"2017","journal-title":"NeurIPS"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143955"}],"event":{"name":"2019 IEEE 58th Conference on Decision and Control (CDC)","location":"Nice, France","start":{"date-parts":[[2019,12,11]]},"end":{"date-parts":[[2019,12,13]]}},"container-title":["2019 IEEE 58th Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8977134\/9028853\/09029916.pdf?arnumber=9029916","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,19]],"date-time":"2022-07-19T20:20:53Z","timestamp":1658262053000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9029916\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":123,"URL":"https:\/\/doi.org\/10.1109\/cdc40024.2019.9029916","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}