{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T19:57:14Z","timestamp":1760385434730,"version":"3.37.3"},"reference-count":33,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2016,4,1]],"date-time":"2016-04-01T00:00:00Z","timestamp":1459468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61473024"],"award-info":[{"award-number":["61473024"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2016,4]]},"DOI":"10.1109\/tnnls.2015.2424233","type":"journal-article","created":{"date-parts":[[2015,5,1]],"date-time":"2015-05-01T18:40:25Z","timestamp":1430505625000},"page":"771-782","source":"Crossref","is-referenced-by-count":19,"title":["Kernel-Based Least Squares Temporal Difference With Gradient Correction"],"prefix":"10.1109","volume":"27","author":[{"given":"Tianheng","family":"Song","sequence":"first","affiliation":[]},{"given":"Dazi","family":"Li","sequence":"additional","affiliation":[]},{"given":"Liulin","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Kotaro","family":"Hirasawa","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","first-page":"424","article-title":"Sparse reinforcement learning via convex optimization","author":"qin","year":"2014","journal-title":"Proc 31th Int Conf Mach Learn"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/1102351.1102377"},{"key":"ref31","first-page":"154","article-title":"Bayes meets Bellman: The Gaussian process approach to temporal difference learning","author":"engel","year":"2003","journal-title":"Proc 20th Int Conf Mach Learn"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2013.2283842"},{"key":"ref10","first-page":"1204","article-title":"Convergent temporal-difference learning with arbitrary smooth function approximation","author":"maei","year":"2009","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref11","first-page":"719","article-title":"Toward off-policy learning control with function approximation","author":"maei","year":"2010","journal-title":"Proc 27th Int Conf Mach Learn"},{"key":"ref12","first-page":"845","article-title":"Regularized off-policy TD-learning","author":"liu","year":"2012","journal-title":"Proc 25th Annu Conf Neural Inf Process Syst (NIPS)"},{"key":"ref13","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-031-01551-9","author":"szepesv\u00e1ri","year":"2010","journal-title":"Algorithms for Reinforcement Learning"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2013.2270561"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2012.2200500"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2012.2229293"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"338","DOI":"10.1109\/ADPRL.2007.368208","article-title":"Kernelizing LSPE( $\\lambda $ )","author":"jung","year":"2007","journal-title":"Proc IEEE Symp Approx Dyn Program Reinforcement Learn"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2007.899161"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/9.580874"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2011.2178446"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"1950","DOI":"10.1109\/TNET.2012.2187923","article-title":"An information theoretic approach of designing sparse kernel adaptive filters","volume":"20","author":"liu","year":"2009","journal-title":"IEEE Trans Neural Netw"},{"key":"ref3","first-page":"1107","article-title":"Least-squares policy iteration","volume":"4","author":"lagoudakis","year":"2003","journal-title":"J Mach Learn Res"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2013.2258936"},{"key":"ref5","first-page":"1609","article-title":"A convergent O(n) temporal-difference algorithm for off-policy learning with linear function approximation","author":"sutton","year":"2008","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017936530646"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114723"},{"key":"ref2","first-page":"441","article-title":"iLSTD: Eligibility traces and convergence analysis","volume":"19","author":"geramifard","year":"2007","journal-title":"Proc Adv Neural Inf Process Syst"},{"journal-title":"Reinforcement Learning An Introduction","year":"1998","author":"sutton","key":"ref9"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553504"},{"key":"ref20","volume":"2","author":"bertsekas","year":"1995","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2004.830985"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-009-5128-4"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.2.213"},{"journal-title":"Learning With Kernels Support Vector Machines Regularization Optimization and Beyond","year":"2002","author":"scholkopf","key":"ref23"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2008.2009895"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1162\/089976602317250933"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/7434085\/7100931.pdf?arnumber=7100931","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,10]],"date-time":"2023-08-10T00:28:35Z","timestamp":1691627315000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7100931\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,4]]},"references-count":33,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2015.2424233","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"type":"print","value":"2162-237X"},{"type":"electronic","value":"2162-2388"}],"subject":[],"published":{"date-parts":[[2016,4]]}}}