{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:26:35Z","timestamp":1765545995639,"version":"3.28.0"},"reference-count":25,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014,12]]},"DOI":"10.1109\/adprl.2014.7010612","type":"proceedings-article","created":{"date-parts":[[2015,1,20]],"date-time":"2015-01-20T02:48:03Z","timestamp":1421722083000},"page":"1-8","source":"Crossref","is-referenced-by-count":6,"title":["Convergent reinforcement learning control with neural networks and continuous action search"],"prefix":"10.1109","author":[{"given":"Minwoo","family":"Lee","sequence":"first","affiliation":[]},{"given":"Charles W.","family":"Anderson","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992699"},{"key":"ref11","first-page":"845","author":"liu","year":"2012","journal-title":"Regularized Off-policy Td-learning"},{"journal-title":"Gradient Temporal-difference Learning Algorithms","year":"2011","author":"maei","key":"ref12"},{"key":"ref13","first-page":"1204","author":"maei","year":"2009","journal-title":"Convergent Temporal-difference Learning with Arbitrary Smooth Function Approximation"},{"key":"ref14","first-page":"719","article-title":"Toward off-policy learning control with function approximation","author":"maei","year":"2010","journal-title":"Proceedings of the 27th International Conference on Machine Learning"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017988514716"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(05)80056-5"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1177\/105971239700600201"},{"key":"ref18","first-page":"537","article-title":"Scaling reinforcement learning toward robocup soccer","volume":"1","author":"stone","year":"2001","journal-title":"International Conference on Machine Learning"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1177\/105971230501300301"},{"journal-title":"Rl-competition","year":"2009","author":"community","key":"ref4"},{"key":"ref3","first-page":"833","article-title":"Reinforcement learning in continuous action spaces through sequential monte carlo methods","volume":"20","author":"bonarini","year":"2008","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/0004-3702(94)90047-7"},{"key":"ref5","article-title":"Off-policy actor-critic","author":"degris","year":"2012","journal-title":"arXiv preprint arXiv 1205 4839"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1111\/j.1096-3642.1985.tb01178.x"},{"key":"ref7","first-page":"347","article-title":"Learning to control an octopus arm with gaussian process temporal difference methods","volume":"18","author":"engel","year":"2006","journal-title":"Advances in neural information processing systems"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/S0921-8890(97)00043-2"},{"key":"ref9","first-page":"2169","author":"kolter","year":"2011","journal-title":"The Fixed Points of Off-policy Td"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"journal-title":"Reinforcement Learning An Introduction","year":"1998","author":"sutton","key":"ref20"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1023\/A:1007609817671","article-title":"On the convergence of temporal-difference learning with linear function approximation","volume":"42","author":"tadi?","year":"2001","journal-title":"Machine Learning"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2007.368199"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/9.580874"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1152\/jn.00684.2004"}],"event":{"name":"2014 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","start":{"date-parts":[[2014,12,9]]},"location":"Orlando, FL, USA","end":{"date-parts":[[2014,12,12]]}},"container-title":["2014 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7000183\/7010603\/07010612.pdf?arnumber=7010612","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,23]],"date-time":"2017-06-23T03:55:02Z","timestamp":1498190102000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7010612\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,12]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/adprl.2014.7010612","relation":{},"subject":[],"published":{"date-parts":[[2014,12]]}}}