{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T08:54:33Z","timestamp":1729673673786,"version":"3.28.0"},"reference-count":17,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009,3]]},"DOI":"10.1109\/adprl.2009.4927539","type":"proceedings-article","created":{"date-parts":[[2009,5,19]],"date-time":"2009-05-19T15:50:44Z","timestamp":1242748244000},"page":"153-160","source":"Crossref","is-referenced-by-count":6,"title":["Policy search with cross-entropy optimization of basis functions"],"prefix":"10.1109","author":[{"given":"Lucian","family":"Busoniu","sequence":"first","affiliation":[]},{"given":"Damien","family":"Ernst","sequence":"additional","affiliation":[]},{"given":"Bart","family":"De Schutter","sequence":"additional","affiliation":[]},{"given":"Robert","family":"Babuska","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"17","first-page":"463","article-title":"learning to drive a bicycle using reinforcement learning and shaping","author":"randl\ufffdv","year":"1998","journal-title":"Proceedings 15th International Conference on Machine Learning (ICML-98)"},{"key":"15","doi-asserted-by":"publisher","DOI":"10.1016\/j.orl.2006.11.005"},{"key":"16","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1007\/978-3-540-77949-0_3","article-title":"continuousstate reinforcement learning with fuzzy approximation","volume":"4865","author":"bus?oniu","year":"2008","journal-title":"Adaptive Agents and Multi-Agent Systems III ser Lecture Notes in Computer Science"},{"key":"13","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-4321-0"},{"key":"14","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012901385691"},{"key":"11","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-84628-690-2","author":"chang","year":"2007","journal-title":"Simulation-Based Algorithms for Markov Decision Processes"},{"key":"12","first-page":"877","article-title":"evolutionary function approximation for reinforcement learning","volume":"7","author":"whiteson","year":"2006","journal-title":"Journal of Machine Learning Research"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017992615625"},{"journal-title":"Reinforcement Learning An Introduction","year":"1998","author":"sutton","key":"2"},{"key":"1","volume":"2","author":"bertsekas","year":"2007","journal-title":"Dynamic Programming and Optimal Control"},{"key":"10","first-page":"512","article-title":"the cross-entropy method for fast policy search","author":"mannor","year":"2003","journal-title":"Proceedings 20th International Conference on Machine Learning (ICML-03) Washington"},{"key":"7","first-page":"1057","article-title":"policy gradient methods for reinforcement learning with function approximation","volume":"12","author":"sutton","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"6","first-page":"2169","article-title":"proto-value functions: a laplacian framework for learning representation and control in markov decision processes","volume":"8","author":"mahadevan","year":"2007","journal-title":"Journal of Machine Learning Research"},{"key":"5","article-title":"tree-based batch mode reinforcement learning","volume":"6","author":"ernst","year":"2005","journal-title":"Journal of Machine Learning Research"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1162\/jmlr.2003.4.6.1107"},{"key":"9","first-page":"771","article-title":"policy gradient in continuous time","volume":"7","author":"munos","year":"2006","journal-title":"Journal of Machine Learning Research"},{"key":"8","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1023\/A:1022145020786","article-title":"approximate gradient methods in policy-space optimization of markov reward processes","volume":"13","author":"marbach","year":"2003","journal-title":"Discrete Event Dynamic Systems Theory and Applications"}],"event":{"name":"2009 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","start":{"date-parts":[[2009,3,30]]},"location":"Nashville, TN, USA","end":{"date-parts":[[2009,4,2]]}},"container-title":["2009 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/4910084\/4927513\/04927539.pdf?arnumber=4927539","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,18]],"date-time":"2017-06-18T16:02:34Z","timestamp":1497801754000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/4927539\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,3]]},"references-count":17,"URL":"https:\/\/doi.org\/10.1109\/adprl.2009.4927539","relation":{},"subject":[],"published":{"date-parts":[[2009,3]]}}}