{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T21:51:52Z","timestamp":1768341112973,"version":"3.49.0"},"reference-count":38,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Swedish Research Council Distinguished Professor","award":["2017-01078"],"award-info":[{"award-number":["2017-01078"]}]},{"name":"Knut and Alice Wallenberg Foundation Wallenberg Scholar Grant"},{"name":"Swedish Strategic Research Foundation SUCCESS","award":["FUS21-0026"],"award-info":[{"award-number":["FUS21-0026"]}]},{"DOI":"10.13039\/100000181","name":"Air Force Office of Scientific Research","doi-asserted-by":"publisher","award":["#FA9550-19-1-0169"],"award-info":[{"award-number":["#FA9550-19-1-0169"]}],"id":[{"id":"10.13039\/100000181","id-type":"DOI","asserted-by":"publisher"}]},{"name":"NSF","award":["CNS-1932011"],"award-info":[{"award-number":["CNS-1932011"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Automat. Contr."],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1109\/tac.2025.3575649","type":"journal-article","created":{"date-parts":[[2025,6,2]],"date-time":"2025-06-02T14:05:18Z","timestamp":1748873118000},"page":"7477-7492","source":"Crossref","is-referenced-by-count":2,"title":["Policy Evaluation in Distributional LQR"],"prefix":"10.1109","volume":"70","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6464-492X","authenticated-orcid":false,"given":"Zifan","family":"Wang","sequence":"first","affiliation":[{"name":"Division of Decision and Control Systems, School of Electrical Enginnering and Computer Science, KTH Royal Institute of Technology, Stockholm, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2338-5487","authenticated-orcid":false,"given":"Yulong","family":"Gao","sequence":"additional","affiliation":[{"name":"Department of Electrical and Electronic Engineering, Imperial College London, London, U.K."}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1146-2473","authenticated-orcid":false,"given":"Siyi","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computation, Information and Technology, Technical University of Munich, Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1748-8228","authenticated-orcid":false,"given":"Michael M.","family":"Zavlanos","sequence":"additional","affiliation":[{"name":"Department of Mechanical Engineering and Materials Science, Duke University, Durham, NC, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5627-9093","authenticated-orcid":false,"given":"Alessandro","family":"Abate","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Oxford, Oxford, U.K."}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9940-5929","authenticated-orcid":false,"given":"Karl H.","family":"Johansson","sequence":"additional","affiliation":[{"name":"Division of Decision and Control Systems, School of Electrical Enginnering and Computer Science, KTH Royal Institute of Technology, Stockholm, Sweden"}]}],"member":"263","reference":[{"key":"ref1","first-page":"449","article-title":"A distributional perspective on reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Bellemare","year":"2017"},{"key":"ref2","article-title":"Distributed distributional deterministic policy gradients","author":"Barth-Maron","year":"2018"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11791"},{"key":"ref4","first-page":"676","article-title":"Sample-based distributional policy gradient","volume-title":"Proc. Learn. Dyn. Control Conf.","author":"Singh","year":"2022"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636847"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-94662-3_8"},{"issue":"221","key":"ref7","first-page":"1","article-title":"Bridging distributional and risk-sensitive reinforcement learning with provable regret bounds","volume":"25","author":"Liang","year":"2024","journal-title":"J. Mach. Learn. Res."},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2019.2919467"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.geits.2022.100062"},{"key":"ref10","first-page":"29","article-title":"An analysis of categorical distributional reinforcement learning","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Rowland","year":"2018"},{"issue":"163","key":"ref11","first-page":"1","article-title":"An analysis of quantile temporal-difference learning","volume":"25","author":"Rowland","year":"2024","journal-title":"J. Mach. Learn. Res."},{"key":"ref12","first-page":"958","article-title":"Improving robustness via risk averse distributional reinforcement learning","volume-title":"Proc. Learn. Dyn. Control Conf.","author":"Singh","year":"2020"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s10208-019-09426-y"},{"key":"ref14","first-page":"5005","article-title":"Least-squares temporal difference learning for the linear quadratic regulator","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Tu","year":"2018"},{"key":"ref15","first-page":"1467","article-title":"Global convergence of policy gradient methods for the linear quadratic regulator","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fazel","year":"2018"},{"key":"ref16","first-page":"2916","article-title":"Derivative-free methods for policy optimization: Guarantees for linear quadratic systems","volume-title":"Proc. 22nd Int. Conf. Artif. Intell. Statist.","author":"Malik","year":"2019"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2021.3128592"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2022.3145632"},{"key":"ref19","first-page":"559","article-title":"Sample complexity of linear quadratic Gaussian (LQG) control for output feedback systems","volume-title":"Proc. Learn. Dyn. Control Conf.","author":"Zheng","year":"2021"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2015.2444134"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2024.112095"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1137\/22M1494105"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2021.3131149"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2022.3195381"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2021.3086842"},{"key":"ref26","article-title":"Risk-aware stability of discrete-time systems","author":"Chapman","year":"2022"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2022.3142131"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1137\/22M1494105"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2024.3394348"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.3030884"},{"key":"ref31","first-page":"18613","article-title":"Distributionally robust linear quadratic control","volume":"36","author":"Takesen","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/tac.2025.3575649"},{"key":"ref33","volume-title":"Introduction to Stochastic Control Theory","author":"strm","year":"2012"},{"key":"ref34","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/14207.001.0001","volume-title":"Distributional Reinforcement Learning","author":"Bellemare","year":"2023"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/9.59806"},{"key":"ref36","volume-title":"Dynamic Programming and Optimal Control: Volume II","author":"Bertsekas","year":"2012"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CDC42340.2020.9304202"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-053018-023825"}],"container-title":["IEEE Transactions on Automatic Control"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/9\/11218261\/11020758-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/9\/11218261\/11020758.pdf?arnumber=11020758","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,29]],"date-time":"2025-10-29T17:51:23Z","timestamp":1761760283000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11020758\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11]]},"references-count":38,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/tac.2025.3575649","relation":{},"ISSN":["0018-9286","1558-2523","2334-3303"],"issn-type":[{"value":"0018-9286","type":"print"},{"value":"1558-2523","type":"electronic"},{"value":"2334-3303","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11]]}}}