{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T21:05:13Z","timestamp":1768338313697,"version":"3.49.0"},"reference-count":50,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Science and Technology Major Project of China","award":["2022ZD0116700"],"award-info":[{"award-number":["2022ZD0116700"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62033006"],"award-info":[{"award-number":["62033006"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62325305"],"award-info":[{"award-number":["62325305"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Automat. Contr."],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1109\/tac.2024.3455508","type":"journal-article","created":{"date-parts":[[2024,9,6]],"date-time":"2024-09-06T18:28:47Z","timestamp":1725647327000},"page":"1455-1466","source":"Crossref","is-referenced-by-count":10,"title":["Convergence and Sample Complexity of Policy Gradient Methods for Stabilizing Linear Systems"],"prefix":"10.1109","volume":"70","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3142-2903","authenticated-orcid":false,"given":"Feiran","family":"Zhao","sequence":"first","affiliation":[{"name":"Department of Automation and BNRist, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6708-9628","authenticated-orcid":false,"given":"Xingyun","family":"Fu","sequence":"additional","affiliation":[{"name":"Department of Automation and BNRist, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4355-5340","authenticated-orcid":false,"given":"Keyou","family":"You","sequence":"additional","affiliation":[{"name":"Department of Automation and BNRist, Tsinghua University, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202133"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-053018-023825"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"issue":"1","key":"ref6","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"Levine","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"ref7","first-page":"1467","article-title":"Global convergence of policy gradient methods for the linear quadratic regulator","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fazel","year":"2018"},{"key":"ref8","article-title":"LQR through the lens of first order methods: Discrete-time case","author":"Bu","year":"2019"},{"key":"ref9","first-page":"2916","article-title":"Derivative-free methods for policy optimization: Guarantees for linear quadratic systems","volume-title":"Proc. 22nd Int. Conf. Artif. Intell. Statist.","author":"Malik","year":"2019"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2020.3006256"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2021.3087455"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1137\/20M1347942"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2023.3234176"},{"key":"ref14","first-page":"11598","article-title":"Policy optimization provably converges to nash equilibria in zero-sum linear quadratic games","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Zhang","year":"2019"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-023-01938-4"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2023.3275732"},{"key":"ref17","first-page":"287","article-title":"Learning the globally optimal distributed LQ regulator","volume-title":"Proc. Conf. Learn. Dyn. Control","author":"Furieri","year":"2020"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2021.3128592"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1137\/20M1329858"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.3037046"},{"key":"ref21","first-page":"3036","article-title":"The gap between model-based and model-free methods on the linear quadratic regulator: An asymptotic viewpoint","volume-title":"Proc. 32nd Conf. Learn. Theory","volume":"99","author":"Tu","year":"2019"},{"key":"ref22","first-page":"8353","article-title":"Provably global convergence of actor-critic: A case for linear quadratic regulator with ergodic cost","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Yang","year":"2019"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CDC49753.2023.10383470"},{"key":"ref24","article-title":"Data-enabled policy optimization for direct adaptive learning of the LQR","author":"Zhao","year":"2024"},{"key":"ref25","first-page":"16989","article-title":"On the sample complexity of stabilizing lti systems on a single trajectory","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","volume":"35","author":"Hu","year":"2022"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2023.111130"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CDC42340.2020.9304202"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2021.3072007"},{"key":"ref29","first-page":"29274","article-title":"Stabilizing dynamical systems via policy gradient methods","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","volume":"34","author":"Perdomo","year":"2021"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.23919\/ACC45564.2020.9147961"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TCNS.2021.3102008"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1977.1101470"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/s10208-019-09426-y"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2020.3042924"},{"key":"ref35","first-page":"1","article-title":"Regret bounds for the adaptive control of linear quadratic systems","volume-title":"Proc. 24th Annu. Conf. Learn. Theory","volume":"19","author":"Abbasi-Yadkori","year":"2011"},{"key":"ref36","first-page":"2636","article-title":"Efficient reinforcement learning for high dimensional linear quadratic systems","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","volume":"25","author":"Ibrahimi","year":"2012"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2021.3131148"},{"key":"ref38","first-page":"1114","article-title":"Black-box control for linear dynamical systems","volume-title":"Proc. Conf. Learn. Theory","author":"Chen","year":"2021"},{"key":"ref39","article-title":"Explore more and improve regret in linear quadratic regulators","author":"Lale","year":"2020"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2018.2883241"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysconle.2004.09.003"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2019.2959924"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2021.109548"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.2966717"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.3047577"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysconle.2021.104985"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1016\/j.arcontrol.2021.09.005"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2021.110153"},{"key":"ref49","first-page":"269","article-title":"Discount factor as a regularizer in reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Amit","year":"2020"},{"key":"ref50","article-title":"How to discount deep reinforcement learning: Towards new dynamic strategies","author":"Franois-Lavet","year":"2015"}],"container-title":["IEEE Transactions on Automatic Control"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/9\/10906769\/10669082.pdf?arnumber=10669082","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T19:03:32Z","timestamp":1740769412000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10669082\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3]]},"references-count":50,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tac.2024.3455508","relation":{},"ISSN":["0018-9286","1558-2523","2334-3303"],"issn-type":[{"value":"0018-9286","type":"print"},{"value":"1558-2523","type":"electronic"},{"value":"2334-3303","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3]]}}}