{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T14:58:22Z","timestamp":1777388302905,"version":"3.51.4"},"reference-count":46,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["52202487"],"award-info":[{"award-number":["52202487"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"State Key Laboratory of Automotive Safety and Energy, China","award":["KFY2212"],"award-info":[{"award-number":["KFY2212"]}]},{"name":"Singapore Ministry of Education Tier 1 Academic Research Fund","award":["A0009030-00-00"],"award-info":[{"award-number":["A0009030-00-00"]}]},{"name":"Singapore Ministry of Education Tier 1 Academic Research Fund","award":["22-5460-A0001"],"award-info":[{"award-number":["22-5460-A0001"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cybern."],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1109\/tcyb.2023.3323316","type":"journal-article","created":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T18:00:36Z","timestamp":1698343236000},"page":"3588-3601","source":"Crossref","is-referenced-by-count":9,"title":["Optimization Landscape of Policy Gradient Methods for Discrete-Time Static Output Feedback"],"prefix":"10.1109","volume":"54","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3697-1576","authenticated-orcid":false,"given":"Jingliang","family":"Duan","sequence":"first","affiliation":[{"name":"School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3718-5593","authenticated-orcid":false,"given":"Jie","family":"Li","sequence":"additional","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1050-1285","authenticated-orcid":false,"given":"Xuyang","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, National University of Singapore, Queenstown, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0656-1901","authenticated-orcid":false,"given":"Kai","family":"Zhao","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, National University of Singapore, Queenstown, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4923-3633","authenticated-orcid":false,"given":"Shengbo Eben","family":"Li","sequence":"additional","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1078-887X","authenticated-orcid":false,"given":"Lin","family":"Zhao","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, National University of Singapore, Queenstown, Singapore"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-19-7784-8"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3255264"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2022.3163816"},{"key":"ref5","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. 4th Int. Conf. Learn. Represent. (ICLR)","author":"Lillicrap"},{"key":"ref6","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. 35th Int. Conf. Mach. Learn. (ICML)","author":"Haarnoja"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3082568"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-10118-9"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3137524"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1137\/19M1288012"},{"key":"ref11","first-page":"1467","article-title":"Global convergence of policy gradient methods for the linear quadratic regulator","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fazel"},{"key":"ref12","article-title":"LQR through the lens of first order methods: Discrete-time case","author":"Bu","year":"2019","journal-title":"arXiv:1907.08921"},{"key":"ref13","article-title":"Global optimality guarantees for policy gradient methods","author":"Bhandari","year":"2019","journal-title":"arXiv:1906.01786"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9029985"},{"key":"ref15","first-page":"2916","article-title":"Derivative-free methods for policy optimization: Guarantees for linear quadratic systems","volume-title":"Proc. 22nd Int. Conf. Artif. Intell. Statist.","author":"Malik"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1137\/20M1382386"},{"key":"ref17","first-page":"559","article-title":"Sample complexity of linear quadratic Gaussian (LQG) control for output feedback systems","volume-title":"Proc. Learn. Dyn. Control","author":"Zheng"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.23919\/ACC50511.2021.9483417"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2022.3176439"},{"key":"ref20","first-page":"287","article-title":"Learning the globally optimal distributed LQ regulator","volume-title":"Proc. Learn. Dyn. Control","author":"Furieri"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1137\/20m1347942"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2017.2655501"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3115785"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2022.3192871"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2022.3222351"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/S0005-1098(96)00141-0"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1137\/20M1329858"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1137\/19M123765X"},{"key":"ref29","article-title":"On topological and metrical properties of stabilizing feedback gains: The MIMO case","author":"Bu","year":"2019","journal-title":"arXiv:1904.02737"},{"key":"ref30","first-page":"1531","article-title":"A natural policy gradient","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"14","author":"Kakade"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICNN.1997.614194"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2018.2884649"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-9467-7"},{"issue":"4","key":"ref34","first-page":"643","article-title":"Gradient methods for minimizing functionals","volume":"3","author":"Polyak","year":"1963","journal-title":"Zhurnal vychislitelnoi matematiki i matematicheskoi fiziki"},{"issue":"4","key":"ref35","first-page":"5","article-title":"Introductory lectures on convex programming volume I: Basic course","volume":"3","author":"Nesterov","year":"1998","journal-title":"Lecture Notes"},{"issue":"87","key":"ref36","first-page":"2","article-title":"A topological property of real analytic subsets","volume":"117","author":"Lojasiewicz","year":"1963","journal-title":"Coll. du CNRS, Les \u00c9qu. aux d\u00e9riv\u00e9es partielles"},{"key":"ref37","first-page":"599","article-title":"Analysis of the optimization landscape of linear quadratic Gaussian (LQG) control","volume-title":"Proc. Learn. Dyn. Control","author":"Tang"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/tac.2023.3275732"},{"key":"ref39","first-page":"1724","article-title":"How to escape saddle points efficiently","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jin"},{"key":"ref40","first-page":"797","article-title":"Escaping from saddle points\u2014Online stochastic gradient for tensor decomposition","volume-title":"Proc. Conf. Learn. Theory","author":"Ge"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1137\/17M1114296"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898718768"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/s10208-015-9296-2"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CoDIT49905.2020.9263912"},{"key":"ref45","volume-title":"Applied Optimal Control & Estimation: Digital Design & Implementation","author":"Lewis","year":"1992"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3103148"}],"container-title":["IEEE Transactions on Cybernetics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6221036\/10542154\/10297124.pdf?arnumber=10297124","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,31]],"date-time":"2024-05-31T04:32:45Z","timestamp":1717129965000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10297124\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6]]},"references-count":46,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tcyb.2023.3323316","relation":{},"ISSN":["2168-2267","2168-2275"],"issn-type":[{"value":"2168-2267","type":"print"},{"value":"2168-2275","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,6]]}}}