{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T00:36:07Z","timestamp":1729643767078,"version":"3.28.0"},"reference-count":29,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014,12]]},"DOI":"10.1109\/adprl.2014.7010631","type":"proceedings-article","created":{"date-parts":[[2015,1,19]],"date-time":"2015-01-19T21:48:03Z","timestamp":1421704083000},"page":"1-6","source":"Crossref","is-referenced-by-count":1,"title":["A data-based online reinforcement learning algorithm with high-efficient exploration"],"prefix":"10.1109","author":[{"given":"Yuanheng","family":"Zhu","sequence":"first","affiliation":[]},{"given":"Dongbin","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"article-title":"Learning from delayed rewards","year":"1989","author":"watkins","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-003-0369-9"},{"article-title":"On-line Q-learning using connectionist systems","year":"1994","author":"rummery","key":"ref12"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"1136","DOI":"10.23919\/ACC.1989.4790360","article-title":"neural network control of unknown nonlinear systems","author":"li","year":"1989","journal-title":"1989 American Control Conference ACC"},{"key":"ref14","article-title":"The role of exploration in learning control","author":"thrun","year":"1992","journal-title":"Handbook for Intelligent Control Neural Fuzzy and Adaptive Approaches"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2012.09.034"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s00500-013-1110-y"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2011.2122257"},{"key":"ref18","first-page":"3471","article-title":"The application of ADHDP(?) method to coordinated multiple ramps metering","volume":"5","author":"bai","year":"2009","journal-title":"International Journal of Innovative Computing"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017984413808"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-012-1249-y"},{"key":"ref4","first-page":"1","article-title":"Adaptive neural control using reinforcement learning for a class of robot manipulator","author":"tang","year":"2013","journal-title":"Neural Comput Appl"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2008.926614"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2011.01.045"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-013-1361-7"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-012-1243-4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-011-0707-2"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2011.2166384"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2013.06.037"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.1201\/9781439821091","author":"busoniu","year":"2010","journal-title":"Reinforcement Learning and Dynamic Programming Using Function Approximators"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2012.2203336"},{"journal-title":"Reinforcement Learning An Introduction","year":"1998","author":"sutton","key":"ref1"},{"key":"ref20","first-page":"213","article-title":"R-max a general polynomial time algorithm for near-optimal reinforcement learning","volume":"3","author":"brafman","year":"2003","journal-title":"J Mach Learn Res"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143955"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/1102351.1102459"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017992615625"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-010-5186-7"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1049\/iet-cta.2011.0783"},{"key":"ref25","first-page":"503","article-title":"Tree-based batch mode reinforcement learning","volume":"6","author":"ernst","year":"2005","journal-title":"J Mach Learn Res"}],"event":{"name":"2014 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","start":{"date-parts":[[2014,12,9]]},"location":"Orlando, FL, USA","end":{"date-parts":[[2014,12,12]]}},"container-title":["2014 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7000183\/7010603\/07010631.pdf?arnumber=7010631","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,19]],"date-time":"2019-08-19T17:42:39Z","timestamp":1566236559000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7010631\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,12]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/adprl.2014.7010631","relation":{},"subject":[],"published":{"date-parts":[[2014,12]]}}}