{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T21:51:20Z","timestamp":1768341080624,"version":"3.49.0"},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,12,13]],"date-time":"2023-12-13T00:00:00Z","timestamp":1702425600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,13]],"date-time":"2023-12-13T00:00:00Z","timestamp":1702425600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000183","name":"ARO","doi-asserted-by":"publisher","award":["W911NF2010055"],"award-info":[{"award-number":["W911NF2010055"]}],"id":[{"id":"10.13039\/100000183","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["EPCN 1935389"],"award-info":[{"award-number":["EPCN 1935389"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,12,13]]},"DOI":"10.1109\/cdc49753.2023.10384289","type":"proceedings-article","created":{"date-parts":[[2024,1,19]],"date-time":"2024-01-19T13:38:36Z","timestamp":1705671516000},"page":"776-781","source":"Crossref","is-referenced-by-count":4,"title":["Convex Q Learning in a Stochastic Environment"],"prefix":"10.1109","author":[{"given":"Fan","family":"Lu","sequence":"first","affiliation":[{"name":"University of Florida,Department of Electrical and Computer Engineering,Gainesville,FL,32611"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sean P.","family":"Meyn","sequence":"additional","affiliation":[{"name":"University of Florida,Department of Electrical and Computer Engineering,Gainesville,FL,32611"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"key":"ref2","first-page":"3610","article-title":"Logistic Q-learning","volume-title":"Proc. of The Intl. Conference on Artificial Intelligence and Statistics","volume":"130","author":"Bas Serrano"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1287\/moor.1040.0094"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1287\/moor.1060.0208"},{"key":"ref5","first-page":"2232","article-title":"Zap Q-learning","volume-title":"Proc. of the Intl. Conference on Neural Information Processing Systems","author":"Devraj","year":"2017"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2021.3133184"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.23919\/ACC.2019.8815153"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysconle.2022.105392"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2017.2743163"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.23919\/ACC50511.2021.9483244"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CDC51059.2022.9992867"},{"key":"ref12","author":"Lu","year":"2023","journal-title":"Convex Q-learning in a stochastic environment: Extended version"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.6.3.259"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2009.5399753"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1017\/9781009051873"},{"key":"ref16","author":"Meyn","year":"2023","journal-title":"Stability of Q-learning through design and optimism"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511804410"},{"key":"ref18","first-page":"1101","article-title":"Efficient global planning in large MDPs via stochastic primal-dual optimization","volume-title":"International Conference on Algorithmic Learning Theory","author":"Neu","year":"2023"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553478"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2018.2851375"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/0022-247X(85)90317-8"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.2200\/S00268ED1V01Y201005AIM009"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114724"},{"key":"ref24","volume-title":"Learning from Delayed Rewards","author":"Watkins","year":"1989"}],"event":{"name":"2023 62nd IEEE Conference on Decision and Control (CDC)","location":"Singapore, Singapore","start":{"date-parts":[[2023,12,13]]},"end":{"date-parts":[[2023,12,15]]}},"container-title":["2023 62nd IEEE Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10383192\/10383193\/10384289.pdf?arnumber=10384289","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,23]],"date-time":"2024-01-23T11:26:46Z","timestamp":1706009206000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10384289\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,13]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/cdc49753.2023.10384289","relation":{},"subject":[],"published":{"date-parts":[[2023,12,13]]}}}