{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T21:36:28Z","timestamp":1729632988488,"version":"3.28.0"},"reference-count":41,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010,12]]},"DOI":"10.1109\/cdc.2010.5717930","type":"proceedings-article","created":{"date-parts":[[2011,2,22]],"date-time":"2011-02-22T17:37:42Z","timestamp":1298396262000},"page":"1409-1416","source":"Crossref","is-referenced-by-count":10,"title":["Q-learning and enhanced policy iteration in discounted dynamic programming"],"prefix":"10.1109","author":[{"given":"Dimitri P.","family":"Bertsekas","sequence":"first","affiliation":[]},{"given":"Huizhen","family":"Yu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"Analysis of Some Incremental Variants of Policy Iteration: First Steps Toward Understanding Actor-Critic Learning Systems","author":"williams","year":"1993","journal-title":"Report NU-CCS-93&#x2013;11"},{"article-title":"Learning from Delayed Rewards","year":"0","author":"watkins","key":"ref38"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114724"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1986.1104412"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"article-title":"Reinforcement Learning","year":"1998","author":"sutton","key":"ref30"},{"key":"ref37","article-title":"On Regression-Based Stopping Times","author":"van roy","year":"2009","journal-title":"Discrete Event Dynamic Systems"},{"key":"ref36","first-page":"59","article-title":"On the Convergence of Optimistic Policy Iteration","volume":"3","author":"tsitsiklis","year":"2002","journal-title":"J of Machine Learning Research"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/BF00993306"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/9.793723"},{"journal-title":"Approximate Dynamic Programming","year":"2010","author":"bertsekas","key":"ref10"},{"key":"ref40","article-title":"A Least Squares Q-Learning Algorithm for Optimal Stopping Problems","author":"yu","year":"0","journal-title":"Lab for Information and Decision Systems Report 2731"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2007.09.009"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012995282784"},{"article-title":"Stochastic Approximation: A Dynamical Systems Viewpoint","year":"2008","author":"borkar","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017936530646"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114723"},{"key":"ref16","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-84628-690-2","article-title":"Simulation-Based Algorithms for Markov Decision Processes","author":"chang","year":"2007"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-69082-7"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/s10626-006-8134-8"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50040-2"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"article-title":"Neuro-Dynamic Programming","year":"1996","author":"bertsekas","key":"ref4"},{"key":"ref27","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","article-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"puterman","year":"1994"},{"article-title":"Parallel and Distributed Computation: Numerical Methods","year":"1997","author":"bertsekas","key":"ref3"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1982.1102980"},{"key":"ref29","article-title":"A Convergent O(n) Algorithm for Off-Policy Temporal-Difference Learning with Linear Function Approximation","author":"sutton","year":"2008","journal-title":"Proc NIPS"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2010.5717930"},{"key":"ref8","article-title":"Dynamic Programming and Optimal Control","volume":"i","author":"bertsekas","year":"2005"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/BF02591967"},{"key":"ref2","article-title":"Temporal Differences-Based Policy Iteration and Applications in Neuro-Dynamic Programming","author":"bertsekas","year":"1996","journal-title":"Lab for Info and Decision Systems Report LIDS-P-2349"},{"key":"ref9","article-title":"Dynamic Programming and Optimal Control","volume":"ii","author":"bertsekas","year":"2007"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012998346621"},{"article-title":"Simulation-Based Optimization Parametric Optimization Techniques and Reinforcement Learning","year":"2003","author":"gosavi","key":"ref20"},{"key":"ref22","article-title":"Reinforcement Learning Algorithm for Partially Observable Markov Decision Problems","author":"jaakkola","year":"1995","journal-title":"Proc NIPS"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1994.6.6.1185"},{"key":"ref24","article-title":"Convergent Temporal-Difference Learning with Arbitrary Smooth Function Approximation","author":"maei","year":"2009","journal-title":"Proc NIPS"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2009.4927528"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s10479-005-5732-z"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1002\/9780470182963"},{"article-title":"Control Techniques for Complex Networks","year":"2007","author":"meyn","key":"ref25"}],"event":{"name":"2010 49th IEEE Conference on Decision and Control (CDC)","start":{"date-parts":[[2010,12,15]]},"location":"Atlanta, GA, USA","end":{"date-parts":[[2010,12,17]]}},"container-title":["49th IEEE Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/5707200\/5716927\/05717930.pdf?arnumber=5717930","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,19]],"date-time":"2017-06-19T20:05:10Z","timestamp":1497902710000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5717930\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,12]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/cdc.2010.5717930","relation":{},"subject":[],"published":{"date-parts":[[2010,12]]}}}