{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T08:18:05Z","timestamp":1729671485001,"version":"3.28.0"},"reference-count":34,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010,12]]},"DOI":"10.1109\/cdc.2010.5717644","type":"proceedings-article","created":{"date-parts":[[2011,2,22]],"date-time":"2011-02-22T17:37:42Z","timestamp":1298396262000},"page":"3034-3039","source":"Crossref","is-referenced-by-count":2,"title":["Pathologies of temporal difference methods in approximate dynamic programming"],"prefix":"10.1109","author":[{"given":"Dimitri P.","family":"Bertsekas","sequence":"first","affiliation":[]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref33","DOI":"10.1287\/moor.1060.0188"},{"doi-asserted-by":"publisher","key":"ref32","DOI":"10.1007\/BF00114724"},{"key":"ref31","first-page":"23","article-title":"Improve-ments on Learning Tetris with Cross-Entropy","volume":"32","author":"thiery","year":"2009","journal-title":"Int Comput Game Assoc J"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.1007\/BF00992697"},{"doi-asserted-by":"publisher","key":"ref34","DOI":"10.1287\/moor.1100.0441"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1007\/BF00114723"},{"key":"ref11","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-84628-690-2","author":"chang","year":"2007","journal-title":"Simulation-Based Algorithms for Markov Decision Processes"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1007\/978-0-387-69082-7"},{"key":"ref13","article-title":"Aproximate Dynamic Programming via a Smoothed Approximate Linear Program","author":"desai","year":"2009","journal-title":"submitted"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1007\/1-84628-095-8_6"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1016\/B978-1-55860-377-6.50040-2"},{"key":"ref16","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4757-3766-0","author":"gosavi","year":"2003","journal-title":"Simulation-based Optimization Parametric Optimization Techniques and Reinforcement Learning"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/ADPRL.2007.368208"},{"key":"ref18","article-title":"A Natural Policy Gradient","volume":"14","author":"kakade","year":"2002","journal-title":"Advances in neural information processing systems"},{"key":"ref19","first-page":"1107","article-title":"Least-Squares Policy Iteration","volume":"4","author":"lagoudakis","year":"2003","journal-title":"J of Machine Learning Research"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1007\/BF00115009"},{"year":"1996","author":"bertsekas","journal-title":"Neuro-Dynamic Programming","key":"ref4"},{"year":"1998","author":"sutton","journal-title":"Reinforcement Learning","key":"ref27"},{"key":"ref3","article-title":"Tempo-ral Differences-Based Policy Iteration and Applications in Neuro-Dynamic Programming","author":"bertsekas","year":"1996","journal-title":"LIDS-P-2349"},{"key":"ref6","volume":"i and ii","author":"bertsekas","year":"2007","journal-title":"Dynamic Programming and Optimal Control"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.1162\/neco.2006.18.12.2936"},{"year":"1996","author":"bertsekas","article-title":"Lecture at NSF Workshop on Reinforcement Learning","key":"ref5"},{"key":"ref8","article-title":"Approximate Policy Iteration: A Survey and Some New Methods","author":"bertsekas","year":"2010","journal-title":"Report LIDS-P-2833 MIT to appear in Journal of Control Theory and Applications"},{"year":"2010","author":"bertsekas","journal-title":"Approximate Dynamic Programming","key":"ref7"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1109\/TSMC.1983.6313077"},{"key":"ref9","doi-asserted-by":"crossref","DOI":"10.1007\/978-93-86279-38-5","author":"borkar","year":"2008","journal-title":"Stochastic Approximation A Dynamical Systems Viewpoint"},{"key":"ref1","article-title":"Online Least-Squares Policy Iteration for Reinforcement Learning Control","author":"busoniu","year":"2009","journal-title":"unpublished report"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1017\/CBO9780511804410"},{"key":"ref22","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","author":"puterman","year":"1994","journal-title":"Markov decision pro-cesses discrete stochastic dynamic programming"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1002\/9780470182963"},{"key":"ref24","article-title":"Reinforcement Learning with Soft State Aggregation","author":"singh","year":"1995","journal-title":"Advances in Neural Information Processing Systems 7"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1016\/B978-1-55860-335-6.50042-8"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.1147\/rd.116.0601"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1147\/rd.33.0210"}],"event":{"name":"2010 49th IEEE Conference on Decision and Control (CDC)","start":{"date-parts":[[2010,12,15]]},"location":"Atlanta, GA, USA","end":{"date-parts":[[2010,12,17]]}},"container-title":["49th IEEE Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/5707200\/5716927\/05717644.pdf?arnumber=5717644","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,11,19]],"date-time":"2021-11-19T09:25:16Z","timestamp":1637313916000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5717644\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,12]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/cdc.2010.5717644","relation":{},"subject":[],"published":{"date-parts":[[2010,12]]}}}