{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,26]],"date-time":"2025-10-26T14:17:32Z","timestamp":1761488252127,"version":"3.28.0"},"reference-count":39,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009,3]]},"DOI":"10.1109\/adprl.2009.4927527","type":"proceedings-article","created":{"date-parts":[[2009,5,19]],"date-time":"2009-05-19T19:50:44Z","timestamp":1242762644000},"page":"66-73","source":"Crossref","is-referenced-by-count":9,"title":["A convergent recursive least squares approximate policy iteration algorithm for multi-dimensional Markov decision process with continuous state and action spaces"],"prefix":"10.1109","author":[{"given":"Jun","family":"Ma","sequence":"first","affiliation":[],"role":[{"role":"author","vocab":"crossref"}]},{"given":"Warren B.","family":"Powell","sequence":"additional","affiliation":[],"role":[{"role":"author","vocab":"crossref"}]}],"member":"263","reference":[{"key":"19","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.1994.735224"},{"key":"35","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.1987.272506"},{"key":"17","first-page":"1040","article-title":"reinforcement learning with function approximation converges to a region","volume":"13","author":"gordon","year":"2001","journal-title":"Advances in neural information processing systems"},{"key":"36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-82336-7"},{"key":"18","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"key":"33","doi-asserted-by":"publisher","DOI":"10.1002\/9780470182963"},{"key":"15","first-page":"1627","article-title":"a convergent form of approximate policy iteration","author":"perkins","year":"2003","journal-title":"Advances in neural information processing systems"},{"key":"34","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-3267-7"},{"key":"16","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50040-2"},{"journal-title":"Local Polynomial Modelling and Its Applications","year":"1996","author":"fan","key":"39"},{"key":"13","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114723"},{"key":"14","doi-asserted-by":"crossref","DOI":"10.23919\/ECC.2007.7068926","article-title":"convergence of q-learning with linear function approximation","author":"melo","year":"2007","journal-title":"Proceedings of the European Control Conference 2007"},{"journal-title":"Neuro-Dynamic Programming","year":"1996","author":"bertsekas","key":"37"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1109\/9.580874"},{"journal-title":"Numerical Methods in Economics","year":"1998","author":"judd","key":"38"},{"key":"12","first-page":"748","article-title":"convergence of reinforcement learning with general function approximators","author":"papavassiliou","year":"1999","journal-title":"Proceedings of the Sixteenth International Joint Conference on Artificial Intelligence"},{"key":"21","article-title":"greedy adaptive critics for lqr problems: convergence proofs","author":"landelius","year":"1997","journal-title":"Neural Computation"},{"key":"20","first-page":"295","article-title":"reinforcement learning applied to linear quadratic regulation","author":"bradtke","year":"1993","journal-title":"Advances in neural information processing systems"},{"journal-title":"Rewarding Excursions Extending Reinforcement Learning to Complex Domains","year":"2007","author":"szita","key":"22"},{"key":"23","doi-asserted-by":"publisher","DOI":"10.1109\/9.650016"},{"key":"24","first-page":"880","article-title":"finite time bounds for sampling based fitted value iteration","author":"munos","year":"2005","journal-title":"Proceedings of the 22nd International Conference on Machine Learning"},{"key":"25","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2007.368207"},{"key":"26","article-title":"fitted q-iteration in continuous action-space mdps","author":"antos","year":"2007","journal-title":"Proceedings of Neural Information Processing Systems Conference (NIPS)"},{"key":"27","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-007-5038-2"},{"key":"28","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017928328829"},{"key":"29","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1995.7.2.270"},{"journal-title":"Stochastic Optimal Control The Discrete-Time Case","year":"1978","author":"bertsekas","key":"3"},{"journal-title":"Convergence Proofs for Least Squares Policy Iteration Algorithm of High-Dimensional Infinite Horizon Markov Decision Process Problems","year":"2008","author":"ma","key":"2"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114724"},{"key":"1","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","author":"puterman","year":"1994","journal-title":"Markov Decision Processes"},{"key":"30","first-page":"369","article-title":"generalization in reinforcement learning: safely approximating the value function","author":"boyan","year":"1995","journal-title":"Advances in neural information processing systems"},{"key":"7","first-page":"77","article-title":"approximate solutions of a discounted markovian decision process","volume":"98","author":"reetz","year":"1977","journal-title":"Bonner Mathematische Schriften"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.2307\/2003635"},{"journal-title":"Reinforcement Learning An Introduction","year":"1998","author":"sutton","key":"32"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.2.1.83"},{"key":"31","doi-asserted-by":"crossref","DOI":"10.1109\/CDC.1997.652501","article-title":"a neuro-dynamic programming approach to retailer inventory management","volume":"4","author":"van roy","year":"1997","journal-title":"Decision and Control 1997 Proceedings of the 36th IEEE Conference on"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1162\/jmlr.2003.4.6.1107"},{"key":"9","doi-asserted-by":"publisher","DOI":"10.1016\/0022-247X(85)90317-8"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1287\/moor.3.3.231"}],"event":{"name":"2009 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","start":{"date-parts":[[2009,3,30]]},"location":"Nashville, TN, USA","end":{"date-parts":[[2009,4,2]]}},"container-title":["2009 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/4910084\/4927513\/04927527.pdf?arnumber=4927527","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,20]],"date-time":"2019-05-20T06:51:41Z","timestamp":1558335101000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/4927527\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,3]]},"references-count":39,"URL":"https:\/\/doi.org\/10.1109\/adprl.2009.4927527","relation":{},"subject":[],"published":{"date-parts":[[2009,3]]}}}