{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,15]],"date-time":"2025-10-15T10:36:44Z","timestamp":1760524604708,"version":"3.37.3"},"reference-count":24,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Wallenberg AI, Autonomous Systems and Software Program"},{"name":"ZENITH, Excellence Center"},{"name":"Sensor informatics and Decision-making for the Digital Transformation"},{"name":"Wallenberg AI, Autonomous Systems and Software Program"},{"DOI":"10.13039\/501100004063","name":"Knut och Alice Wallenbergs Stiftelse","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004063","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["ECCS-2227311"],"award-info":[{"award-number":["ECCS-2227311"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Vinnova Competence Center LINK-SIC"},{"DOI":"10.13039\/501100004359","name":"Vetenskapsr\u00e5det","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004359","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Automat. Contr."],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1109\/tac.2024.3385680","type":"journal-article","created":{"date-parts":[[2024,4,5]],"date-time":"2024-04-05T18:57:10Z","timestamp":1712343430000},"page":"6397-6404","source":"Crossref","is-referenced-by-count":4,"title":["Reinforcement Learning for Partially Observable Linear Gaussian Systems Using Batch Dynamics of Noisy Observations"],"prefix":"10.1109","volume":"69","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6665-5881","authenticated-orcid":false,"given":"Farnaz Adib","family":"Yaghmaie","sequence":"first","affiliation":[{"name":"Faculty of Electrical Engineering, Link&#x00F6;ping University, Link&#x00F6;ping, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0800-5140","authenticated-orcid":false,"given":"Hamidreza","family":"Modares","sequence":"additional","affiliation":[{"name":"College of Engineering, Michigan State University, East Lansing, MI, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3270-171X","authenticated-orcid":false,"given":"Fredrik","family":"Gustafsson","sequence":"additional","affiliation":[{"name":"Faculty of Electrical Engineering, Link&#x00F6;ping University, Link&#x00F6;ping, Sweden"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-053018-023825"},{"article-title":"A crash course on reinforcement learning","year":"2021","author":"Yaghmaie","key":"ref2"},{"volume-title":"Reinforcement Learn. and Optimal Control","year":"2019","author":"Bertsekas","key":"ref3"},{"key":"ref4","volume-title":"Reinforcement Learning: An Introduction","volume":"1","author":"Sutton","year":"2018"},{"key":"ref5","first-page":"1467","article-title":"Global convergence of policy gradient methods for the linear quadratic regulator","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fazel","year":"2018"},{"key":"ref6","article-title":"Online optimal tracking of linear systems with adversarial disturbances","author":"Yaghmaie","year":"2022","journal-title":"Trans. Mach. Learn. Res."},{"volume-title":"System Identification - Theory the User","year":"1999","author":"Ljung","key":"ref7"},{"key":"ref8","article-title":"Online reference tracking for linear systems with unknown dynamics and unknown disturbances","author":"Niknejad","year":"2023","journal-title":"Trans. Mach. Learn. Res."},{"volume-title":"Adaptive Control","year":"1994","author":"strm","key":"ref9"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2022.3145632"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9029904"},{"key":"ref12","first-page":"29","article-title":"Deep recurrent Q-learning for partially observable MDPs","volume-title":"Proc. AAAI Fall Symp. - Tech. Rep.","volume":"FS-15-06","author":"Hausknecht","year":"2015"},{"key":"ref13","first-page":"2137","article-title":"Learning to communicate with deep multi-agent reinforcement learning","author":"Foerster","year":"2016","journal-title":"Adv. Neural Inf. Process. Syst."},{"article-title":"On improving deep reinforcement learning for POMDPs","year":"2018","author":"Zhu","key":"ref14"},{"key":"ref15","article-title":"Making non-stochastic control (almost) as easy as stochastic","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Simchowitz","year":"2020"},{"key":"ref16","first-page":"20876","article-title":"Logarithmic regret bound in partially observable linear dynamical systems","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lale","year":"2020"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2010.2043839"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1201\/9781315221656"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1201\/9781315274737"},{"volume-title":"Dynamic Programming and Optimal Control, Vol. I","year":"2012","author":"Bertsekas","key":"ref20"},{"key":"ref21","first-page":"3108","article-title":"Model-free linear quadratic control via reduction to expert prediction","volume-title":"Proc. 22nd Int. Conf. Artif. Intell. Statist.","author":"Abbasi-Yadkori","year":"2019"},{"key":"ref22","first-page":"5005","article-title":"Least-squares temporal difference learning for the linear quadratic regulator","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Tu","year":"2018"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1971.1099755"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1137\/20M1382386"}],"container-title":["IEEE Transactions on Automatic Control"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/9\/10654305\/10493115-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9\/10654305\/10493115.pdf?arnumber=10493115","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,6]],"date-time":"2025-01-06T03:28:55Z","timestamp":1736134135000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10493115\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9]]},"references-count":24,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/tac.2024.3385680","relation":{},"ISSN":["0018-9286","1558-2523","2334-3303"],"issn-type":[{"type":"print","value":"0018-9286"},{"type":"electronic","value":"1558-2523"},{"type":"electronic","value":"2334-3303"}],"subject":[],"published":{"date-parts":[[2024,9]]}}}