{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T10:49:26Z","timestamp":1725706166221},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/cdc40024.2019.9029247","type":"proceedings-article","created":{"date-parts":[[2020,3,13]],"date-time":"2020-03-13T00:43:11Z","timestamp":1584060191000},"page":"5244-5251","source":"Crossref","is-referenced-by-count":5,"title":["Quasi-Stochastic Approximation and Off-Policy Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Andrey","family":"Bernstein","sequence":"first","affiliation":[]},{"given":"Yue","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Marcello","family":"Colombino","sequence":"additional","affiliation":[]},{"given":"Emiliano","family":"Dall'Anese","sequence":"additional","affiliation":[]},{"given":"Prashant","family":"Mehta","sequence":"additional","affiliation":[]},{"given":"Sean","family":"Meyn","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/9.580874"},{"key":"ref32","article-title":"Optimal rate of convergence for quasi-stochastic approximation","author":"bernstein","year":"2019","journal-title":"(in preparation)"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.1994.735224"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref35","article-title":"The proximal augmented Lagrangian method for nonsmooth composite optimization","author":"dhingra","year":"2018","journal-title":"IEEE Trans Automat Control"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1287\/opre.51.6.850.24925"},{"key":"ref10","first-page":"3598","article-title":"Q-learning and Pontryagin&#x2019;s minimum principle","author":"mehta","year":"2009","journal-title":"Proc of the IEEE Conf on Dec and Control"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1214\/105051604000000116"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-4087-0_2"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1002\/0471669784"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1080\/02331889008802246"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1515\/mcma-2011-0018"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2011.5991485"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729392"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/9.119632"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/858481.858486"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1214\/aos\/1176346589"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-75894-2"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1137\/0330046"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012997331639"},{"journal-title":"Neuro-Dynamic Programming","year":"1996","author":"bertsekas","key":"ref6"},{"key":"ref29","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-031-01551-9","author":"szepesv\u00e1ri","year":"2010","journal-title":"Algorithms for Reinforcement Learning"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4899-2696-8"},{"key":"ref8","article-title":"Fastest convergence for Q-learning","author":"devraj","year":"2017","journal-title":"ArXiv e-prints"},{"key":"ref7","article-title":"Feature selection for neuro-dynamic programming","author":"huang","year":"2011","journal-title":"Reinforcement Learning and Approximate Dynamic Programming for Feedback Control"},{"journal-title":"Stochastic Approximation A Dynamical Systems View-point","year":"2008","author":"borkar","key":"ref2"},{"key":"ref9","article-title":"Zap Q-learning","author":"devraj","year":"2017","journal-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729586"},{"key":"ref20","first-page":"385","article-title":"Online convex optimization in the bandit setting: Gradient descent without a gradient","author":"flaxman","year":"2005","journal-title":"Proceedings of the sixteenth annual ACM-SIAM symposium on Discrete algorithms"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1561\/2200000024"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2007.04.016"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/S0005-1098(99)00183-1"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2018.2839563"},{"key":"ref26","article-title":"Efficient estimators from a slowly convergent Robbins-Monro processes","author":"ruppert","year":"1988","journal-title":"Cornell University School of Operations Research and Industrial Engineering Ithaca NY Tech Rep Tech Rept No 781"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/9.895589"}],"event":{"name":"2019 IEEE 58th Conference on Decision and Control (CDC)","start":{"date-parts":[[2019,12,11]]},"location":"Nice, France","end":{"date-parts":[[2019,12,13]]}},"container-title":["2019 IEEE 58th Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8977134\/9028853\/09029247.pdf?arnumber=9029247","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,28]],"date-time":"2023-09-28T11:01:55Z","timestamp":1695898915000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9029247\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/cdc40024.2019.9029247","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}