{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T01:39:51Z","timestamp":1768354791993,"version":"3.49.0"},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T00:00:00Z","timestamp":1765238400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T00:00:00Z","timestamp":1765238400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,9]]},"DOI":"10.1109\/cdc57313.2025.11312642","type":"proceedings-article","created":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T18:19:56Z","timestamp":1768241996000},"page":"4174-4181","source":"Crossref","is-referenced-by-count":1,"title":["Policy Gradient for LQR with Domain Randomization"],"prefix":"10.1109","author":[{"given":"Tesshu","family":"Fujinami","sequence":"first","affiliation":[{"name":"University of Pennsylvania,Department of Electrical and Systems Engineering"}]},{"given":"Bruce D.","family":"Lee","sequence":"additional","affiliation":[{"name":"University of Pennsylvania,Department of Electrical and Systems Engineering"}]},{"given":"Nikolai","family":"Matni","sequence":"additional","affiliation":[{"name":"University of Pennsylvania,Department of Electrical and Systems Engineering"}]},{"given":"George J.","family":"Pappas","sequence":"additional","affiliation":[{"name":"University of Pennsylvania,Department of Electrical and Systems Engineering"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202133"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460528"},{"key":"ref3","article-title":"Solving rubik\u2019s cube with a robot hand","author":"Akkaya","year":"2019"},{"key":"ref4","first-page":"1162","article-title":"Active domain randomization","volume-title":"Conference on Robot Learning","author":"Mehta"},{"key":"ref5","article-title":"Domain randomization is sample efficient for linear quadratic control","author":"Fujinami","year":"2025"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2019.2942989"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2006.875041"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/9.62270"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/0005-1098(93)90187-X"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/S0005-1098(01)00122-4"},{"key":"ref11","article-title":"Understanding domain randomization for sim-to-real transfer","author":"Chen","year":"2021"},{"key":"ref12","article-title":"Pac reinforcement learning without real-world feedback","author":"Zhong","year":"2019"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2022.799893"},{"key":"ref14","first-page":"1467","article-title":"Global convergence of policy gradient methods for the linear quadratic regulator","volume-title":"International conference on machine learning","author":"Fazel"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1137\/20m1347942"},{"key":"ref16","article-title":"Global convergence using policy gradient methods for model-free markovian jump linear quadratic control","author":"Rathod","year":"2021"},{"key":"ref17","article-title":"Globally convergent policy search over dynamic filters for output estimation","author":"Umenberger","year":"2022"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-042920-020021"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2019.12.176"},{"key":"ref20","article-title":"Model-free learning with heterogeneous dynamical systems: A federated lqr approach","author":"Wang","year":"2023"},{"key":"ref21","first-page":"29274","article-title":"Stabilizing dynamical systems via policy gradient methods","volume":"34","author":"Perdomo","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref22","doi-asserted-by":"crossref","DOI":"10.1109\/CDC57313.2025.11312642","article-title":"Policy gradient for lqr with domain randomization","author":"Fujinami","year":"2025"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1017\/9781108231596"},{"key":"ref24","article-title":"Blue River Controls: A toolkit for Reinforcement Learning Control Systems on Hardware","volume-title":"Accepted at the Workshop on Deep Reinforcement Learning at the 33rd Conference on Neural Information Processing Systems (NeurIPS 2019)","author":"Polzounov"},{"key":"ref25","first-page":"8937","article-title":"Naive exploration is optimal for online lqr","volume-title":"International Conference on Machine Learning","author":"Simchowitz"}],"event":{"name":"2025 IEEE 64th Conference on Decision and Control (CDC)","location":"Rio de Janeiro, Brazil","start":{"date-parts":[[2025,12,9]]},"end":{"date-parts":[[2025,12,12]]}},"container-title":["2025 IEEE 64th Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11311984\/11311968\/11312642.pdf?arnumber=11312642","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T08:44:00Z","timestamp":1768293840000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11312642\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,9]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/cdc57313.2025.11312642","relation":{},"subject":[],"published":{"date-parts":[[2025,12,9]]}}}