{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T05:18:52Z","timestamp":1740719932972,"version":"3.38.0"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,16]],"date-time":"2024-12-16T00:00:00Z","timestamp":1734307200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,16]],"date-time":"2024-12-16T00:00:00Z","timestamp":1734307200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,16]]},"DOI":"10.1109\/cdc56724.2024.10885957","type":"proceedings-article","created":{"date-parts":[[2025,2,26]],"date-time":"2025-02-26T18:43:32Z","timestamp":1740595412000},"page":"6016-6021","source":"Crossref","is-referenced-by-count":0,"title":["On the Convergence of Policy Gradient for Designing a Linear Quadratic Regulator by Leveraging a Proxy System"],"prefix":"10.1109","author":[{"given":"Lintao","family":"Ye","sequence":"first","affiliation":[{"name":"Huazhong University of Science and Technology,School of Artificial Intelligence and Automation,Wuhan,China,430074"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aritra","family":"Mitra","sequence":"additional","affiliation":[{"name":"North Carolina State University,Department of Electrical and Computer Engineering,Raleigh,NC,USA,27695"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vijay","family":"Gupta","sequence":"additional","affiliation":[{"name":"Purdue University,Elmore Family School of Electrical and Computer Engineering,West Lafayette,IN,USA,47906"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"295","article-title":"Reinforcement learning applied to linear quadratic regulation","volume-title":"Proc. Advances in Neural Information Processing Systems","author":"Bradtke"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2012.2214134"},{"key":"ref3","article-title":"Certainty equivalence is efficient for linear quadratic control","volume-title":"Proc. Advances in Neural Information Processing Systems","volume":"32","author":"Mania"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1137\/20M1347942"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CDC51059.2022.9992773"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2022.3215940"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2023.3310345"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-042920-020021"},{"key":"ref9","article-title":"Provably global convergence of actor-critic: A case for linear quadratic regulator with ergodic cost","volume-title":"Proc. Advances in neural information processing systems","volume":"32","author":"Yang"},{"key":"ref10","first-page":"5005","article-title":"Least-squares temporal difference learning for the linear quadratic regulator","volume-title":"Proc. International Conference on Machine Learning","author":"Tu"},{"key":"ref11","first-page":"1467","article-title":"Global convergence of policy gradient methods for the linear quadratic regulator","volume-title":"Proc. International conference on machine learning","author":"Fazel"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2021.0014"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.3037046"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/s40305-024-00546-z"},{"key":"ref15","first-page":"2916","article-title":"Derivative-free methods for policy optimization: Guarantees for linear quadratic systems","volume-title":"Proc. International Conference on Artificial Intelligence and Statistics","author":"Malik"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1137\/20M1382386"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CDC51059.2022.9992404"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2023.3330792"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/tac.2025.3543574"},{"key":"ref20","volume-title":"Dynamic Programming and Optimal Control, 3rd ed","volume":"II","author":"Bertsekas","year":"2011"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1137\/20M1329858"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2009.5400233"},{"key":"ref23","first-page":"1304","article-title":"Online policy gradient for model free learning of linear quadratic regulators with \u221aT regret","volume-title":"Proc. International Conference on Machine Learning","author":"Cassel"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46128-1_50"},{"key":"ref25","first-page":"1328","article-title":"Logarithmic regret for learning linear quadratic regulators efficiently","volume-title":"Proc. International Conference on Machine Learning","author":"Cassel"},{"key":"ref26","article-title":"Model-free learning, with heterogeneous dynamical systems: A federated LQR approach","author":"Wang","year":"2023","journal-title":"arXiv preprint:2308.11743"},{"key":"ref27","first-page":"1114","article-title":"Black-box control for linear dynamical systems","volume-title":"Proc. Conference on Learning Theory","author":"Chen"}],"event":{"name":"2024 IEEE 63rd Conference on Decision and Control (CDC)","start":{"date-parts":[[2024,12,16]]},"location":"Milan, Italy","end":{"date-parts":[[2024,12,19]]}},"container-title":["2024 IEEE 63rd Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10885784\/10885785\/10885957.pdf?arnumber=10885957","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,27]],"date-time":"2025-02-27T08:04:56Z","timestamp":1740643496000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10885957\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,16]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/cdc56724.2024.10885957","relation":{},"subject":[],"published":{"date-parts":[[2024,12,16]]}}}