{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T02:17:20Z","timestamp":1775182640850,"version":"3.50.1"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2017,2,24]],"date-time":"2017-02-24T00:00:00Z","timestamp":1487894400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61273136"],"award-info":[{"award-number":["61273136"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61573353"],"award-info":[{"award-number":["61573353"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61533017"],"award-info":[{"award-number":["61533017"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Intell Rev"],"published-print":{"date-parts":[[2018,4]]},"DOI":"10.1007\/s10462-017-9548-4","type":"journal-article","created":{"date-parts":[[2017,2,23]],"date-time":"2017-02-23T23:54:06Z","timestamp":1487894046000},"page":"531-547","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":78,"title":["Comprehensive comparison of online ADP algorithms for continuous-time optimal control"],"prefix":"10.1007","volume":"49","author":[{"given":"Yuanheng","family":"Zhu","sequence":"first","affiliation":[]},{"given":"Dongbin","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,2,24]]},"reference":[{"issue":"5","key":"9548_CR1","doi-asserted-by":"crossref","first-page":"779","DOI":"10.1016\/j.automatica.2004.11.034","volume":"41","author":"M Abu-Khalaf","year":"2005","unstructured":"Abu-Khalaf M, Lewis FL (2005) Nearly optimal control laws for nonlinear systems with saturating actuators using a neural network HJB approach. Automatica 41(5):779\u2013791","journal-title":"Automatica"},{"issue":"4","key":"9548_CR2","doi-asserted-by":"crossref","first-page":"943","DOI":"10.1109\/TSMCB.2008.926614","volume":"38","author":"A Al-Tamimi","year":"2008","unstructured":"Al-Tamimi A, Lewis FL, Abu-Khalaf M (2008) Discrete-time nonlinear HJB solution using approximate dynamic programming: convergence proof. IEEE Trans Syst Man Cybern Part B Cybern 38(4):943\u2013949","journal-title":"IEEE Trans Syst Man Cybern Part B Cybern"},{"key":"9548_CR3","volume-title":"Optimal control and viscosity solutions of Hamilton\u2013Jacobi\u2013Bellman equations","author":"M Bardi","year":"2008","unstructured":"Bardi M, Capuzzo-Dolcetta I (2008) Optimal control and viscosity solutions of Hamilton\u2013Jacobi\u2013Bellman equations. Springer, NewYork"},{"issue":"5","key":"9548_CR4","doi-asserted-by":"crossref","first-page":"717","DOI":"10.1080\/002071798221542","volume":"71","author":"R Beard","year":"1998","unstructured":"Beard R, McLain T et al (1998) Successive Galerkin approximation algorithms for nonlinear optimal and robust control. Int J Control 71(5):717\u2013743","journal-title":"Int J Control"},{"issue":"12","key":"9548_CR5","doi-asserted-by":"crossref","first-page":"2159","DOI":"10.1016\/S0005-1098(97)00128-3","volume":"33","author":"RW Beard","year":"1997","unstructured":"Beard RW, Saridis GN, Wen JT (1997) Galerkin approximations of the generalized Hamilton\u2013Jacobi\u2013Bellman equation. Automatica 33(12):2159\u20132177","journal-title":"Automatica"},{"issue":"1","key":"9548_CR6","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1016\/j.automatica.2012.09.019","volume":"49","author":"S Bhasin","year":"2013","unstructured":"Bhasin S, Kamalapurkar R, Johnson M, Vamvoudakis KG, Lewis FL, Dixon WE (2013) A novel actor\u2013critic-identifier architecture for approximate optimal control of uncertain nonlinear systems. Automatica 49(1):82\u201392","journal-title":"Automatica"},{"key":"9548_CR7","volume-title":"Neural networks for optimization and signal processing","author":"A Cochocki","year":"1993","unstructured":"Cochocki A, Unbehauen R (1993) Neural networks for optimization and signal processing, 1st edn. Wiley, NewYork, NY","edition":"1"},{"issue":"6","key":"9548_CR8","doi-asserted-by":"crossref","first-page":"1083","DOI":"10.1016\/0005-1098(92)90053-I","volume":"28","author":"K Hunt","year":"1992","unstructured":"Hunt K, Sbarbaro D, Zbikowski R, Gawthrop P (1992) Neural networks for control systemsa survey. Automatica 28(6):1083\u20131112","journal-title":"Automatica"},{"issue":"5","key":"9548_CR9","doi-asserted-by":"crossref","first-page":"882","DOI":"10.1109\/TNNLS.2013.2294968","volume":"25","author":"Y Jiang","year":"2014","unstructured":"Jiang Y, Jiang ZP (2014) Robust adaptive dynamic programming and feedback stabilization of nonlinear systems. IEEE Trans Neural Netw Learn Syst 25(5):882\u2013893","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"11","key":"9548_CR10","doi-asserted-by":"crossref","first-page":"2917","DOI":"10.1109\/TAC.2015.2414811","volume":"60","author":"Y Jiang","year":"2015","unstructured":"Jiang Y, Jiang ZP (2015) Global adaptive dynamic programming for continuous-time nonlinear systems. IEEE Trans Autom Control 60(11):2917\u20132929","journal-title":"IEEE Trans Autom Control"},{"key":"9548_CR11","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling LP, Littman ML, Moore AW (1996) Reinforcement learning: a survey. J Artif Intell Res 4:237\u2013285","journal-title":"J Artif Intell Res"},{"issue":"3","key":"9548_CR12","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1109\/MCAS.2009.933854","volume":"9","author":"FL Lewis","year":"2009","unstructured":"Lewis FL, Vrabie D (2009) Reinforcement learning and adaptive dynamic programming for feedback control. IEEE Circuits Syst Mag 9(3):32\u201350","journal-title":"IEEE Circuits Syst Mag"},{"issue":"10","key":"9548_CR13","doi-asserted-by":"crossref","first-page":"1513","DOI":"10.1109\/TNNLS.2013.2276571","volume":"24","author":"H Modares","year":"2013","unstructured":"Modares H, Lewis FL, Naghibi-Sistani MB (2013) Adaptive optimal control of unknown constrained-input systems using policy iteration and neural networks. IEEE Trans Neural Netw Learn Syst 24(10):1513\u20131525","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"1","key":"9548_CR14","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1016\/j.automatica.2013.09.043","volume":"50","author":"H Modares","year":"2014","unstructured":"Modares H, Lewis FL, Naghibi-Sistani MB (2014) Integral reinforcement learning and experience replay for adaptive optimal control of partially-unknown constrained-input continuous-time systems. Automatica 50(1):193\u2013202","journal-title":"Automatica"},{"issue":"2","key":"9548_CR15","doi-asserted-by":"crossref","first-page":"140","DOI":"10.1109\/TSMCC.2002.801727","volume":"32","author":"JJ Murray","year":"2002","unstructured":"Murray JJ, Cox CJ, Lendaris GG, Saeks R (2002) Adaptive dynamic programming. IEEE Trans Syst Man Cybern Part C Appl Rev 32(2):140\u2013153","journal-title":"IEEE Trans Syst Man Cybern Part C Appl Rev"},{"issue":"3","key":"9548_CR16","doi-asserted-by":"crossref","first-page":"223","DOI":"10.1023\/A:1015008417172","volume":"17","author":"C Ribeiro","year":"2002","unstructured":"Ribeiro C (2002) Reinforcement learning agents. Artif Intell Rev 17(3):223\u2013250","journal-title":"Artif Intell Rev"},{"issue":"4","key":"9548_CR17","doi-asserted-by":"crossref","first-page":"851","DOI":"10.1109\/TNNLS.2015.2399020","volume":"26","author":"R Song","year":"2015","unstructured":"Song R, Lewis F, Wei Q, Zhang HG, Jiang ZP, Levine D (2015) Multiple actor\u2013critic structures for continuous-time optimal control using input\u2013output data. IEEE Trans Neural Netw Learn Syst 26(4):851\u2013865","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"9548_CR18","volume-title":"Aircraft control and simulation","author":"BL Stevens","year":"2003","unstructured":"Stevens BL, Lewis FL (2003) Aircraft control and simulation. Wiley, Hoboken"},{"key":"9548_CR19","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: an introduction. MIT Press, Cambridge"},{"key":"9548_CR20","doi-asserted-by":"crossref","unstructured":"Vamvoudakis K, Vrabie D, Lewis F (2011) Online adaptive learning of optimal control solutions using integral reinforcement learning. In: IEEE symposium on adaptive dynamic programming and reinforcement learning (ADPRL), pp 250\u2013257","DOI":"10.1109\/ADPRL.2011.5967359"},{"issue":"5","key":"9548_CR21","doi-asserted-by":"crossref","first-page":"878","DOI":"10.1016\/j.automatica.2010.02.018","volume":"46","author":"KG Vamvoudakis","year":"2010","unstructured":"Vamvoudakis KG, Lewis FL (2010) Online actor\u2013critic algorithm to solve the continuous-time infinite horizon optimal control problem. Automatica 46(5):878\u2013888","journal-title":"Automatica"},{"issue":"17","key":"9548_CR22","doi-asserted-by":"crossref","first-page":"2686","DOI":"10.1002\/rnc.3018","volume":"24","author":"KG Vamvoudakis","year":"2014","unstructured":"Vamvoudakis KG, Vrabie D, Lewis FL (2014) Online adaptive algorithm for optimal control with integral reinforcement learning. Int J Robust Nonlinear Control 24(17):2686\u20132710","journal-title":"Int J Robust Nonlinear Control"},{"issue":"3","key":"9548_CR23","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1016\/j.neunet.2009.03.008","volume":"22","author":"D Vrabie","year":"2009","unstructured":"Vrabie D, Lewis F (2009) Neural network approach to continuous-time direct adaptive optimal control for partially unknown nonlinear systems. Neural Netw 22(3):237\u2013246","journal-title":"Neural Netw"},{"issue":"8","key":"9548_CR24","doi-asserted-by":"crossref","first-page":"1825","DOI":"10.1016\/j.automatica.2012.05.049","volume":"48","author":"D Wang","year":"2012","unstructured":"Wang D, Liu D, Wei Q, Zhao D, Jin N (2012) Optimal control of unknown nonaffine nonlinear discrete-time systems based on adaptive dynamic programming. Automatica 48(8):1825\u20131832","journal-title":"Automatica"},{"issue":"2","key":"9548_CR25","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1109\/MCI.2009.932261","volume":"4","author":"FY Wang","year":"2009","unstructured":"Wang FY, Zhang H, Liu D (2009) Adaptive dynamic programming: an introduction. IEEE Comput Intell Mag 4(2):39\u201347","journal-title":"IEEE Comput Intell Mag"},{"issue":"6","key":"9548_CR26","first-page":"25","volume":"22","author":"PJ Werbos","year":"1977","unstructured":"Werbos PJ (1977) Advanced forecasting methods for global crisis warning and models of intelligence. Gen Syst Yearb 22(6):25\u201338","journal-title":"Gen Syst Yearb"},{"key":"9548_CR27","volume-title":"Adaptive dynamic programming for control: algorithms and stability","author":"H Zhang","year":"2012","unstructured":"Zhang H, Liu D, Luo Y, Wang D (2012) Adaptive dynamic programming for control: algorithms and stability. Springer, NewYork"},{"issue":"1","key":"9548_CR28","doi-asserted-by":"crossref","first-page":"206","DOI":"10.1109\/TSMCB.2012.2203336","volume":"43","author":"H Zhang","year":"2013","unstructured":"Zhang H, Cui L, Luo Y (2013) Near-optimal control for nonzero-sum differential games of continuous-time nonlinear systems using single-network ADP. IEEE Trans Cybern 43(1):206\u2013216","journal-title":"IEEE Trans Cybern"},{"issue":"2","key":"9548_CR29","doi-asserted-by":"crossref","first-page":"346","DOI":"10.1109\/TNNLS.2014.2371046","volume":"26","author":"D Zhao","year":"2015","unstructured":"Zhao D, Zhu Y (2015) MEC\u2014a near-optimal online reinforcement learning algorithm for continuous deterministic systems. IEEE Trans Neural Netw Learn Syst 26(2):346\u2013356","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"3","key":"9548_CR30","doi-asserted-by":"crossref","first-page":"854","DOI":"10.1109\/TCYB.2015.2488680","volume":"46","author":"D Zhao","year":"2016","unstructured":"Zhao D, Zhang Q, Wang D, Zhu Y (2016) Experience replay for optimal control of nonzero-sum game systems with unknown dynamics. IEEE Trans Cybern 46(3):854\u2013865","journal-title":"IEEE Trans Cybern"},{"key":"9548_CR31","unstructured":"Zhu Y, Zhao D, He H, Ji J (2016a) Event-triggered optimal control for partially-unknown constrained-input systems via adaptive dynamic programming. IEEE Trans Ind Electron PP(99):1"},{"issue":"12","key":"9548_CR32","doi-asserted-by":"crossref","first-page":"1339","DOI":"10.1049\/iet-cta.2015.0769","volume":"10","author":"Y Zhu","year":"2016","unstructured":"Zhu Y, Zhao D, Li X (2016b) Using reinforcement learning techniques to solve continuous-time non-linear optimal tracking problem without system dynamics. IET Control Theory Appl 10(12):1339\u20131347","journal-title":"IET Control Theory Appl"},{"issue":"3","key":"9548_CR33","doi-asserted-by":"crossref","first-page":"714","DOI":"10.1109\/TNNLS.2016.2561300","volume":"28","author":"Y Zhu","year":"2017","unstructured":"Zhu Y, Zhao D, Li X (2017a) Iterative adaptive dynamic programming for solving unknown nonlinear zero-sum game based on online data. IEEE Trans Neural Netw Learn Syst 28(3):714\u2013725","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"9548_CR34","unstructured":"Zhu Y, Zhao D, Yang X, Zhang Q (2017b) Policy iteration for $${H}_\\infty $$ H \u221e optimal control of polynomial nonlinear systems via sum of squares programming. IEEE Trans Cybern PP(99):1\u201310"}],"container-title":["Artificial Intelligence Review"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10462-017-9548-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-017-9548-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-017-9548-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,18]],"date-time":"2019-09-18T21:28:19Z","timestamp":1568842099000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10462-017-9548-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,2,24]]},"references-count":34,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2018,4]]}},"alternative-id":["9548"],"URL":"https:\/\/doi.org\/10.1007\/s10462-017-9548-4","relation":{},"ISSN":["0269-2821","1573-7462"],"issn-type":[{"value":"0269-2821","type":"print"},{"value":"1573-7462","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,2,24]]}}}