{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T14:39:24Z","timestamp":1771511964599,"version":"3.50.1"},"reference-count":15,"publisher":"Society for Industrial & Applied Mathematics (SIAM)","issue":"2","funder":[{"DOI":"10.13039\/100000879","name":"Alfred P. Sloan Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000879","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100008047","name":"Carnegie Mellon University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100008047","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000181","name":"Air Force Office of Scientific Research","doi-asserted-by":"publisher","award":["FA9550-22-1-0198"],"award-info":[{"award-number":["FA9550-22-1-0198"]}],"id":[{"id":"10.13039\/100000181","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000183","name":"Army Research Office","doi-asserted-by":"publisher","award":["W911NF-11-1-0304"],"award-info":[{"award-number":["W911NF-11-1-0304"]}],"id":[{"id":"10.13039\/100000183","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["N00014-19-1-2404"],"award-info":[{"award-number":["N00014-19-1-2404"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["N00014-19-1-2404"],"award-info":[{"award-number":["N00014-19-1-2404"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["N00014-22-1-2354"],"award-info":[{"award-number":["N00014-22-1-2354"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006785","name":"Google","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006785","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-2106778"],"award-info":[{"award-number":["CCF-2106778"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["DMS-2134080"],"award-info":[{"award-number":["DMS-2134080"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-1901199"],"award-info":[{"award-number":["CCF-1901199"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-2007911"],"award-info":[{"award-number":["CCF-2007911"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-2148212"],"award-info":[{"award-number":["CNS-2148212"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-2106778"],"award-info":[{"award-number":["CCF-2106778"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["DMS-2134080"],"award-info":[{"award-number":["DMS-2134080"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-1901199"],"award-info":[{"award-number":["CCF-1901199"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-2007911"],"award-info":[{"award-number":["CCF-2007911"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-2148212"],"award-info":[{"award-number":["CNS-2148212"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-2221009"],"award-info":[{"award-number":["CCF-2221009"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-1907661"],"award-info":[{"award-number":["CCF-1907661"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-2218713"],"award-info":[{"award-number":["IIS-2218713"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-2218773"],"award-info":[{"award-number":["IIS-2218773"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF 2002272"],"award-info":[{"award-number":["CCF 2002272"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS 2107304"],"award-info":[{"award-number":["IIS 2107304"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["SIAM J. Optim."],"published-print":{"date-parts":[[2023,6,30]]},"DOI":"10.1137\/21m1456789","type":"journal-article","created":{"date-parts":[[2023,6,23]],"date-time":"2023-06-23T11:05:25Z","timestamp":1687518325000},"page":"1061-1091","source":"Crossref","is-referenced-by-count":19,"title":["Policy Mirror Descent for Regularized Reinforcement Learning: A Generalized Framework with Linear Convergence"],"prefix":"10.1137","volume":"33","author":[{"given":"Wenhao","family":"Zhan","sequence":"first","affiliation":[{"name":"Department of Electrical and Computer Engineering, Princeton University, Princeton, NJ 08544 USA."}]},{"given":"Shicong","family":"Cen","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Carnegie Mellon University, Pittsburgh, PA 15213 USA."}]},{"given":"Baihe","family":"Huang","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, Berkeley, CA 94720 USA."}]},{"given":"Yuxin","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Statistics and Data Science, Wharton School, University of Pennsylvania, Philadelphia, PA 19104 USA."}]},{"given":"Jason D.","family":"Lee","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Princeton University, Princeton, NJ 08544 USA."}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6766-5459","authenticated-orcid":true,"given":"Yuejie","family":"Chi","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Carnegie Mellon University, Pittsburgh, PA 15213 USA."}]}],"member":"351","published-online":{"date-parts":[[2023,6,22]]},"reference":[{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6377(02)00231-6"},{"key":"ref8","volume-title":"Dynamic Programming and Optimal Control","author":"Bertsekas D. P.","year":"2017","edition":"4"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2021.2151"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012995281742"},{"key":"ref27","doi-asserted-by":"crossref","unstructured":"G. Lan , Policy mirror descent for reinforcement learning: Linear convergence, new sampling complexity, and generalized problem classes, Math. Program., 198 (2023), pp. 1059\u20131106.","DOI":"10.1007\/s10107-022-01816-5"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-008-0261-6"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-017-1173-0"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2018.2800085"},{"key":"ref33","author":"Li G.","journal-title":"Math. Program."},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref43","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"Puterman M. L.","year":"2014"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/BF01016429"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1080\/09540099108946587"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1137\/20M1347942"}],"container-title":["SIAM Journal on Optimization"],"original-title":[],"language":"en","deposited":{"date-parts":[[2023,6,30]],"date-time":"2023-06-30T15:17:39Z","timestamp":1688138259000},"score":1,"resource":{"primary":{"URL":"https:\/\/epubs.siam.org\/doi\/10.1137\/21M1456789"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,22]]},"references-count":15,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,6,30]]}},"alternative-id":["10.1137\/21M1456789"],"URL":"https:\/\/doi.org\/10.1137\/21m1456789","relation":{},"ISSN":["1052-6234","1095-7189"],"issn-type":[{"value":"1052-6234","type":"print"},{"value":"1095-7189","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,6,22]]}}}