{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T12:27:06Z","timestamp":1730204826936,"version":"3.28.0"},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,12,6]],"date-time":"2022-12-06T00:00:00Z","timestamp":1670284800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,6]],"date-time":"2022-12-06T00:00:00Z","timestamp":1670284800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,12,6]]},"DOI":"10.1109\/cdc51059.2022.9992867","type":"proceedings-article","created":{"date-parts":[[2023,1,10]],"date-time":"2023-01-10T19:26:56Z","timestamp":1673378816000},"page":"4065-4071","source":"Crossref","is-referenced-by-count":2,"title":["Convex Analytic Theory for Convex Q-Learning"],"prefix":"10.1109","author":[{"given":"Fan","family":"Lu","sequence":"first","affiliation":[{"name":"University of Florida,Department of Electrical and Computer Engineering,Gainesville,FL,32611"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Prashant G.","family":"Mehta","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign (UIUC),Mechanical Science and Engineering,Coordinated Science Laboratory and the Department"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sean P.","family":"Meyn","sequence":"additional","affiliation":[{"name":"University of Florida,Department of Electrical and Computer Engineering,Gainesville,FL,32611"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gergely","family":"Neu","sequence":"additional","affiliation":[{"name":"Universitat Pompeu Fabra (Barcelona, Spain),Department of Information and Communication Technologies"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"key":"ref2","first-page":"3610","article-title":"Logistic Q-learning","volume-title":"Proc. of The Intl. Conference on Artificial Intelligence and Statistics","volume":"130","author":"Bas Serrano"},{"volume-title":"Neuro-Dynamic Programming. Atena Scientific","year":"1996","author":"Bertsekas","key":"ref3"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1007\/978-1-4615-0805-2_11"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1287\/moor.1060.0208"},{"key":"ref6","first-page":"996","article-title":"Reinforcement learning with function approximation converges to a region","volume-title":"Proc. of the 13th Intl. Conference on Neural Information Processing Systems","author":"Gordon"},{"key":"ref7","article-title":"Approaching quartic convergence rates for quasi-stochastic approximation with application to gradient-free optimization","author":"Lauand","year":"2022","journal-title":"Neurips (to appear"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1109\/MCS.2023.3291884"},{"key":"ref19","article-title":"Random features for large-scale kernel machines","volume":"20","author":"Rahimi","year":"2007","journal-title":"Advances in neural information processing system"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.23919\/ACC.2019.8815275"},{"key":"ref10","first-page":"arXiv:1912.02270","article-title":"A unified switching system perspective and ODE analysis of Q-learning algorithms","author":"Lee","year":"2019"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.23919\/ACC50511.2021.9483244"},{"volume-title":"Linear and nonlinear programming","year":"2003","author":"Luenberger","key":"ref12"},{"key":"ref13","first-page":"719","article-title":"Toward off-policy learning control with function approximation","volume-title":"Proc. ICML","author":"Maei"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1287\/mnsc.6.3.259"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1109\/CDC.2009.5399753"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1145\/1390156.1390240"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1017\/9781009051873"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1109\/5.58338"},{"key":"ref20","article-title":"Weighted sums of random kitchen sinks: Replacing minimization with randomization in learning","volume":"21","author":"Rahimi","year":"2008","journal-title":"Advances in neural information processing system"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1016\/0022-247X(85)90317-8"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1109\/tnn.1998.712192"},{"key":"ref23","first-page":"1609","article-title":"A convergent O(n) algorithm for off-policy temporal-difference learning with linear function approximation","volume-title":"Proc. of the Intl. Conference on Neural Information Processing Systems","author":"Sutton"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1007\/BF00114724"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1016\/j.sysconle.2008.10.004"}],"event":{"name":"2022 IEEE 61st Conference on Decision and Control (CDC)","start":{"date-parts":[[2022,12,6]]},"location":"Cancun, Mexico","end":{"date-parts":[[2022,12,9]]}},"container-title":["2022 IEEE 61st Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9992315\/9992317\/09992867.pdf?arnumber=9992867","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T11:52:08Z","timestamp":1706788328000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9992867\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,6]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/cdc51059.2022.9992867","relation":{},"subject":[],"published":{"date-parts":[[2022,12,6]]}}}