{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T13:07:31Z","timestamp":1771074451603,"version":"3.50.1"},"reference-count":82,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011,4]]},"DOI":"10.1109\/adprl.2011.5967353","type":"proceedings-article","created":{"date-parts":[[2011,8,4]],"date-time":"2011-08-04T01:40:00Z","timestamp":1312422000000},"page":"1-8","source":"Crossref","is-referenced-by-count":42,"title":["Approximate reinforcement learning: An overview"],"prefix":"10.1109","author":[{"given":"Lucian","family":"Busoniu","sequence":"first","affiliation":[]},{"given":"Damien","family":"Ernst","sequence":"additional","affiliation":[]},{"given":"Bart","family":"De Schutter","sequence":"additional","affiliation":[]},{"given":"Robert","family":"Babuska","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-75225-7_15"},{"key":"ref72","author":"otterlo","year":"2009","journal-title":"The Logic of Adaptive Behavior"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553476"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2008.925890"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1002\/9780470182963"},{"key":"ref74","first-page":"201","article-title":"Online optimization in X-armed bandits","author":"bubeck","year":"2009","journal-title":"Advances in Neural Information Processing Systems 21"},{"key":"ref39","first-page":"486","article-title":"Online leastsquares policy iteration for reinforcement learning control","year":"2010","journal-title":"Proceedings2010 American Control Conference (ACC-10)"},{"key":"ref75","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1613\/jair.2085","article-title":"Solving factored MDPs with hybrid state and action variables","volume":"27","author":"kveton","year":"2006","journal-title":"Journal of Artificial Intelligence Research"},{"key":"ref38","first-page":"33","article-title":"Learning RoboCup-keepaway with kernels","volume":"1","author":"jung","year":"0","journal-title":"Gaussian Processes in Practice ser JMLR Workshop and Conference Proceedings"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-69082-7"},{"key":"ref79","article-title":"Approximate dynamic programming","author":"bertsekas","year":"2010","journal-title":"update of Chapter 6 in volume 2 of the book Dynamic Programming and Optimal Control"},{"key":"ref33","first-page":"21","article-title":"Least-squares A policy iteration: Bias-variance trade-off in control problems","author":"thiery","year":"0","journal-title":"Proceedings of the 27th International Conference on Machine Learning (ICML-10)"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114723"},{"key":"ref31","first-page":"1107","article-title":"Least-squares policy iteration","volume":"4","author":"lagoudakis","year":"2003","journal-title":"Journal of Machine Learning Research"},{"key":"ref30","first-page":"441","article-title":"Regularized policy iteration","author":"farahmand","year":"2009","journal-title":"Advances in Neural Information Processing Systems 21"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-008-5069-3"},{"key":"ref36","first-page":"21","article-title":"Reinforcement learning as classification: Leveraging modern classifiers","author":"lagoudakis","year":"0","journal-title":"Proc of the 20th International Conference on Machine Learning (ICML"},{"key":"ref35","author":"bertsekas","year":"2010","journal-title":"Approximate policy iteration A survey and some new methods"},{"key":"ref34","article-title":"Temporal differences-based policy iteration and applications in neuro-dynamic programming","author":"bertsekas","year":"1996","journal-title":"Massachusetts Institute of Technology Cambridge US Tech Rep LIDS-P-2349"},{"key":"ref60","first-page":"877","article-title":"Evolutionary function approximation for reinforcement learning","volume":"7","author":"whiteson","year":"2006","journal-title":"Journal of Machine Learning Research"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2009.4927533"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143901"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1145\/1102351.1102377"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/11871842_23"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553504"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390240"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/s10479-005-5732-z"},{"key":"ref66","first-page":"2169","article-title":"Proto-value functions: A Laplacian framework for learning representation and control in Markov decision processes","volume":"8","author":"mahadevan","year":"2007","journal-title":"Journal of Machine Learning Research"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-007-5038-2"},{"key":"ref67","first-page":"14","article-title":"Regularization and feature selection in leastsquares temporal difference learning","author":"kolter","year":"0","journal-title":"Proceedings of the 26th Annual International Conference on Machine Learning (ICML '09)"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390251"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2007.368199"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-031-01551-9","author":"szepesvari","year":"2010","journal-title":"Algorithms for Reinforcement Learning"},{"key":"ref1","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref20","first-page":"4","article-title":"Interpolation-based Qvlearning","author":"szepesvari","year":"0","journal-title":"Proceedings of the 21st International conference on machine learning (ICML'04)"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992699"},{"key":"ref21","first-page":"14","article-title":"Reinforcement learning: An overview","author":"glorennec","year":"0","journal-title":"Proceedings European Symposium on Intelligent Techniques (ESIT-OO)"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017928328829"},{"key":"ref23","first-page":"1177","article-title":"Fitted Q-iteration by advantage weighted regression","author":"neumann","year":"2009","journal-title":"Advances in Neural Information Processing Systems 21"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2009.5160611"},{"key":"ref25","first-page":"815","article-title":"Finite time bounds for fitted value iteration","volume":"9","author":"munos","year":"2008","journal-title":"Journal of Machine Learning Research"},{"key":"ref50","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Advances in Neural Information Processing Systems 12"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012901385691"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114724"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2010.2050586"},{"key":"ref57","first-page":"21","article-title":"The cross-entropy method for fast policy search","author":"mannor","year":"0","journal-title":"Proc of the 20th International Conference on Machine Learning (ICML"},{"key":"ref56","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-84628-690-2","author":"chang","year":"2007","journal-title":"Simulation-Based Algorithms for Markov Decision Processes"},{"key":"ref55","first-page":"18","article-title":"Efficient nonlinear control through neuroevolution","volume":"4212","author":"gomez","year":"0","journal-title":"Proceedings 17th European Conference on Machine Learning (ECML-06)"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2009.07.008"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2007.11.026"},{"key":"ref52","first-page":"1531","article-title":"A natural policy gradient","author":"kakade","year":"2001","journal-title":"Advances in Neural Information Processing Systems 14"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TBME.2008.926699"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2006.377527"},{"key":"ref40","first-page":"1038","article-title":"Generalization in reinforcement learning: Successful examples using sparse coarse coding","author":"sutton","year":"1996","journal-title":"Advances in Neural Information Processing Systems 8"},{"key":"ref12","author":"bertsekas","year":"1978","journal-title":"Stochastic Optimal Control The Discrete Time Case"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","article-title":"Reinforcement learning: A survey","volume":"4","author":"kaelbling","year":"1996","journal-title":"Journal of Artificial Intelligence Research"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref15","article-title":"On-line Q-Iearning using connectionist systems","author":"rummery","year":"1994","journal-title":"Technical Report CUED\/F-INFENG\/TR 310 Cambridge University Department of Engineering"},{"key":"ref82","doi-asserted-by":"crossref","first-page":"619","DOI":"10.1016\/S1574-0021(96)01016-7","article-title":"Numerical dynamic programming in economics","volume":"1","author":"rust","year":"1996","journal-title":"Handbook of Computational Economics"},{"key":"ref16","first-page":"503","article-title":"Tree-based batch mode reinforcement learning","volume":"6","author":"ernst","year":"2005","journal-title":"Journal of Machine Learning Research"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.3166\/ejc.11.310-334"},{"key":"ref17","first-page":"3","article-title":"Neural fitted Q-iteration - first experiences with a data efficient neural reinforcement learning method","volume":"3720","author":"riedmiller","year":"0","journal-title":"Proceedings of the 16th European Conference on Machine Learning (ECML'05)"},{"key":"ref18","first-page":"9","article-title":"Fitted Q-iteration in continuous action-space MDPs","author":"antos","year":"2008","journal-title":"Advances in Neural Information Processing Systems 20"},{"key":"ref19","first-page":"361","article-title":"Reinforcement learning with soft state aggregation","author":"singh","year":"1995","journal-title":"Advances in Neural Information Processing Systems 7"},{"key":"ref80","first-page":"25","article-title":"Reinforce-ment learning in a nutshell","author":"heidrich-meisner","year":"0","journal-title":"Proceedings of 15th European Symposium on Artificial Neural Networks (ESANN)"},{"key":"ref4","author":"bertsekas","year":"1996","journal-title":"Neuro-Dynamic Programming"},{"key":"ref3","author":"sigaud","year":"2010","journal-title":"Markov Decision Processes in Artificial Intelligence"},{"key":"ref6","volume":"2","author":"bertsekas","year":"2007","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/9780470544785"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2008.02.003"},{"key":"ref7","article-title":"Reinforcement Learning and Dynamic Programming Using Function Approximator, ser","author":"busoniu","year":"2010","journal-title":"Automation and Control Engineering"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1983.6313077"},{"key":"ref9","first-page":"1","article-title":"An application of reinforcement learning to aerobatic helicopter flight","author":"abbeel","year":"2007","journal-title":"Advances in Neural Information Processing Systems 19"},{"key":"ref46","first-page":"21","article-title":"Should one compute the Temporal Difference fix point or minimize the Bellman Residual? the unified oblique projection view","author":"scherrer","year":"0","journal-title":"Proceedings of the 27th International Conference on Machine Learning (ICML-10)"},{"key":"ref45","first-page":"21","article-title":"Convergence of least squares temporal difference methods under general conditions","author":"yu","year":"0","journal-title":"Proceedings of the 27th International Conference on Machine Learning (ICML-10)"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022145020786"},{"key":"ref47","first-page":"21","article-title":"Finite-sample analysis of LSTD","author":"lazaric","year":"0","journal-title":"Proceedings of the 27th International Conference on Machine Learning (ICML-10)"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"key":"ref41","first-page":"10","article-title":"Online exploration in least-squares policy iteration","volume":"2","author":"li","year":"0","journal-title":"Proceedings 8th International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS-09)"},{"key":"ref44","first-page":"21","article-title":"Analysis of a classification-based policy iteration algorithm","author":"lazaric","year":"0","journal-title":"Proceedings 27th International Conference on Machine Learning (ICML-1 0)"},{"key":"ref43","first-page":"21","article-title":"Toward off-policy learning control with function approximation","author":"maei","year":"0","journal-title":"Proceedings of the 27th International Conference on Machine Learning (ICML-10)"}],"event":{"name":"2011 Ieee Symposium On Adaptive Dynamic Programming And Reinforcement Learning","location":"Paris, France","start":{"date-parts":[[2011,4,11]]},"end":{"date-parts":[[2011,4,15]]}},"container-title":["2011 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/5958170\/5967347\/05967353.pdf?arnumber=5967353","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,9]],"date-time":"2024-04-09T14:55:00Z","timestamp":1712674500000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5967353\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,4]]},"references-count":82,"URL":"https:\/\/doi.org\/10.1109\/adprl.2011.5967353","relation":{},"subject":[],"published":{"date-parts":[[2011,4]]}}}