{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T03:46:06Z","timestamp":1776138366009,"version":"3.50.1"},"reference-count":64,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Basic Research Program of China 973 Program","doi-asserted-by":"crossref","award":["2012CB720003"],"award-info":[{"award-number":["2012CB720003"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["61121003"],"award-info":[{"award-number":["61121003"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"General Research Fund project from Science and Technology on Aircraft Control Laboratory of Beihang University","award":["9140C480301130C48001"],"award-info":[{"award-number":["9140C480301130C48001"]}]},{"name":"NPRP from the Qatar National Research Fund (a member of Qatar Foundation)","award":["# NPRP 4-1162-1-181"],"award-info":[{"award-number":["# NPRP 4-1162-1-181"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cybern."],"published-print":{"date-parts":[[2015,1]]},"DOI":"10.1109\/tcyb.2014.2319577","type":"journal-article","created":{"date-parts":[[2014,5,9]],"date-time":"2014-05-09T18:08:02Z","timestamp":1399658882000},"page":"65-76","source":"Crossref","is-referenced-by-count":343,"title":["Off-Policy Reinforcement Learning for &lt;inline-formula&gt; &lt;tex-math notation=\"LaTeX\"&gt;$ H_\\infty $ &lt;\/tex-math&gt;&lt;\/inline-formula&gt; Control Design"],"prefix":"10.1109","volume":"45","author":[{"given":"Biao","family":"Luo","sequence":"first","affiliation":[]},{"given":"Huai-Ning","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Tingwen","family":"Huang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2012.2227253"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022664528457"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2008.11.006"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1002\/rnc.1760"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2006.884959"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"717","DOI":"10.1080\/002071798221542","article-title":"Successive Galerkin approximation algorithms for nonlinear optimal and robust control","volume":"71","author":"beard","year":"1998","journal-title":"Int J Control"},{"key":"ref37","doi-asserted-by":"crossref","first-page":"2159","DOI":"10.1016\/S0005-1098(97)00128-3","article-title":"Galerkin approximations of the generalized Hamilton-Jacobi-Bellman equation","volume":"33","author":"beard","year":"1997","journal-title":"Automatica"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1979.4310171"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2008.2006113"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2012.11.021"},{"key":"ref60","first-page":"417","article-title":"Off-policy temporal-difference learning with function approximation","author":"precup","year":"2001","journal-title":"Proc 18th ICML"},{"key":"ref62","author":"green","year":"1995","journal-title":"Linear Robust Control"},{"key":"ref61","first-page":"719","article-title":"Toward off-policy learning control with function approximation","author":"maei","year":"2010","journal-title":"Proceedings of the 27th ICML"},{"key":"ref63","author":"stevens","year":"2003","journal-title":"Aircraft Control and Simulation"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/9.376058"},{"key":"ref64","first-page":"311","article-title":"An $L_{2}$ disturbance attenuation solution to the nonlinear benchmark problem","volume":"8","author":"escobar","year":"1999","journal-title":"Int J Robust Nonlinear Control"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/9.256331"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/9.159566"},{"key":"ref2","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","article-title":"Reinforcement learning: A survey","volume":"4","author":"kaelbling","year":"1996","journal-title":"J Artif Intell Res"},{"key":"ref20","article-title":"Data-based approximate policy iteration for nonlinear continuous-time optimal control design","author":"luo","year":"2013"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"621","DOI":"10.1109\/TNNLS.2013.2281663","article-title":"Policy iteration adaptive dynamic programming algorithm for discrete-time nonlinear systems","volume":"25","author":"liu","year":"2014","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2013.09.043"},{"key":"ref24","author":"zhou","year":"1996","journal-title":"Robust and Optimal Control"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1021\/ie4031743"},{"key":"ref26","author":"ba?ar","year":"2008","journal-title":"H-Optimal Control and Related Minimax Design Problems A Dynamic Game Approach"},{"key":"ref25","doi-asserted-by":"crossref","DOI":"10.1007\/3-540-76074-1","author":"schaft","year":"1996","journal-title":"$ L_ 2 $ -Gain and Passivity in Nonlinear Control"},{"key":"ref50","article-title":"Efficient exploration in reinforcement learning","author":"thrun","year":"1992"},{"key":"ref51","author":"courant","year":"2004","journal-title":"Methods of Mathematical Physics"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2011.03.005"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2007.900227"},{"key":"ref57","doi-asserted-by":"crossref","first-page":"779","DOI":"10.1016\/j.automatica.2004.11.034","article-title":"Nearly optimal control laws for nonlinear systems with saturating actuators using a neural network HJB approach","volume":"41","author":"abu-khalaf","year":"2005","journal-title":"Automatica"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1968.1098829"},{"key":"ref55","author":"slotine","year":"1991","journal-title":"Applied nonlinear control"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1002\/0471781819"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1016\/0021-9991(78)90004-9"},{"key":"ref52","author":"finlayson","year":"1972","journal-title":"The Method of Weighted Residuals and Variational Principles With Applications in Fluid Mechanics Heat and Mass Transfer"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2009.2027233"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2011.2168538"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2008.2000204"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.jprocont.2012.04.011"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1049\/PBCE081E"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"1136","DOI":"10.23919\/ACC.1989.4790360","article-title":"neural network control of unknown nonlinear systems","author":"li","year":"1989","journal-title":"1989 American Control Conference ACC"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"1538","DOI":"10.1109\/TSMCB.2012.2194781","article-title":"Approximate optimal control design for nonlinear one-dimensional parabolic PDE systems using empirical eigenfunctions and neural network","volume":"42","author":"luo","year":"2012","journal-title":"IEEE Trans Syst Man Cybern B Cybern"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1021\/ie300897m"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"779","DOI":"10.1109\/TSMCB.2012.2216523","article-title":"Finite-approximation-error-based optimal control approach for discrete-time nonlinear systems","volume":"43","author":"liu","year":"2013","journal-title":"IEEE Trans Cybern"},{"key":"ref18","article-title":"A novel iterative $ \\theta $ -adaptive dynamic programming for discrete-time nonlinear systems","author":"wei","year":"2013","journal-title":"IEEE Trans Autom Sci Eng"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"418","DOI":"10.1109\/TNNLS.2013.2280013","article-title":"Decentralized stabilization for a class of continuous-time nonlinear interconnected systems using online learning optimal control approach","volume":"25","author":"liu","year":"2014","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1002\/9780470182963"},{"key":"ref3","author":"bertsekas","year":"2005","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2007.899229"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2002.801727"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2008.08.017"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2008.920269"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2012.2203336"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2009.03.008"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2012.08.012"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.2514\/3.21495"},{"key":"ref48","doi-asserted-by":"crossref","first-page":"1884","DOI":"10.1109\/TNNLS.2012.2217349","article-title":"Neural network based online simultaneous policy update algorithm for solving the HJI equation in nonlinear $ H_\\infty $ control","volume":"23","author":"wu","year":"2012","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/s11768-011-0166-4"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2010.10.033"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1002\/acs.2348"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1002\/rnc.2814"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2010.02.018"}],"container-title":["IEEE Transactions on Cybernetics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6221036\/6983641\/06813673.pdf?arnumber=6813673","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T16:27:39Z","timestamp":1642004859000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6813673\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,1]]},"references-count":64,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tcyb.2014.2319577","relation":{},"ISSN":["2168-2267","2168-2275"],"issn-type":[{"value":"2168-2267","type":"print"},{"value":"2168-2275","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,1]]}}}