{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T14:47:06Z","timestamp":1773931626723,"version":"3.50.1"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2019,4,2]],"date-time":"2019-04-02T00:00:00Z","timestamp":1554163200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Sci. China Inf. Sci."],"published-print":{"date-parts":[[2019,5]]},"DOI":"10.1007\/s11432-018-9602-1","type":"journal-article","created":{"date-parts":[[2019,4,8]],"date-time":"2019-04-08T02:03:40Z","timestamp":1554689020000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":24,"title":["Policy iteration based Q-learning for linear nonzero-sum quadratic differential games"],"prefix":"10.1007","volume":"62","author":[{"given":"Xinxing","family":"Li","sequence":"first","affiliation":[]},{"given":"Zhihong","family":"Peng","sequence":"additional","affiliation":[]},{"given":"Li","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Wenzhong","family":"Zha","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,4,2]]},"reference":[{"key":"9602_CR1","volume-title":"Dynamic Noncooperative Game Theory (Classics in Applied Mathematics)","author":"T Basar","year":"1999","unstructured":"Basar T, Olsder G J. Dynamic Noncooperative Game Theory (Classics in Applied Mathematics). 2nd ed. Philadelphia: SIAM, 1999","edition":"2nd ed"},{"key":"9602_CR2","doi-asserted-by":"publisher","first-page":"2778","DOI":"10.1109\/TAC.2012.2194335","volume":"57","author":"P Falugi","year":"2012","unstructured":"Falugi P, Kountouriotis P A, Vinter R B. Differential games controllers that confine a system to a safe region in the state space, with applications to surge tank control. IEEE Trans Automat Contr, 2012, 57: 2778\u20132788","journal-title":"IEEE Trans Automat Contr"},{"key":"9602_CR3","doi-asserted-by":"publisher","first-page":"1409","DOI":"10.1109\/TCYB.2016.2546381","volume":"47","author":"W Z Zha","year":"2017","unstructured":"Zha W Z, Chen J, Peng Z H, et al. Construction of barrier in a fishing game with point capture. IEEE Trans Cybern, 2017, 47: 1409\u20131422","journal-title":"IEEE Trans Cybern"},{"key":"9602_CR4","first-page":"042306","volume":"57","author":"F H Lin","year":"2014","unstructured":"Lin F H, Liu Q, Zhou X W, et al. Towards green for relay in InterPlaNetary Internet based on differential game model. Sci China Inf Sci, 2014, 57: 042306","journal-title":"Sci China Inf Sci"},{"key":"9602_CR5","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1109\/TCYB.2014.2319577","volume":"45","author":"B Luo","year":"2015","unstructured":"Luo B, Wu H N, Huang T. Off-policy reinforcement learning for H\n                           \u221e control design. IEEE Trans Cybern, 2015, 45: 65\u201376","journal-title":"IEEE Trans Cybern"},{"key":"9602_CR6","doi-asserted-by":"publisher","first-page":"717","DOI":"10.1080\/002071798221542","volume":"71","author":"R W Bea","year":"1998","unstructured":"Bea R W. Successive Galerkin approximation algorithms for nonlinear optimal and robust control. Int J Control, 1998, 71: 717\u2013743","journal-title":"Int J Control"},{"key":"9602_CR7","doi-asserted-by":"publisher","first-page":"1243","DOI":"10.1109\/TNN.2008.2000204","volume":"19","author":"M Abu-Khalaf","year":"2008","unstructured":"Abu-Khalaf M, Lewis F L, Huang J. Neurodynamic programming and zero-sum games for constrained control systems. IEEE Trans Neural Netw, 2008, 19: 1243\u20131252","journal-title":"IEEE Trans Neural Netw"},{"key":"9602_CR8","doi-asserted-by":"publisher","first-page":"264","DOI":"10.1109\/9.481532","volume":"41","author":"G Freiling","year":"1996","unstructured":"Freiling G, Jank G, Abou-Kandil H. On global existence of solutions to coupled matrix Riccati equations in closed-loop Nash games. IEEE Trans Automat Contr, 1996, 41: 264\u2013269","journal-title":"IEEE Trans Automat Contr"},{"key":"9602_CR9","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1007\/978-1-4612-4274-1_17","volume-title":"New Trends in Dynamic Games and Applications","author":"T Y Li","year":"1995","unstructured":"Li T Y, Gajic Z. Lyapunov iterations for solving coupled algebraic riccati equations of nash differential games and algebraic riccati equations of zero-sum game. In: New Trends in Dynamic Games and Applications. Boston: Birkh\u00e4user, 1995. 333\u20133"},{"key":"9602_CR10","doi-asserted-by":"crossref","unstructured":"Possieri C, Sassano M. An algebraic geometry approach for the computation of all linear feedback Nash equilibria in LQ differential games. In: Proceedings of the 54th IEEE Conference on Decision and Control, Osaka, 2015. 5197\u20133","DOI":"10.1109\/CDC.2015.7403032"},{"key":"9602_CR11","volume-title":"LQ Dynamic Optimization and Differential Games","author":"J C Engwerda","year":"2005","unstructured":"Engwerda J C. LQ Dynamic Optimization and Differential Games. New York: Wiley, 2005"},{"key":"9602_CR12","doi-asserted-by":"publisher","first-page":"950","DOI":"10.1109\/TAC.2014.2362334","volume":"60","author":"T Mylvaganam","year":"2015","unstructured":"Mylvaganam T, Sassano M, Astolfi A. Constructive \u03b1-Nash equilibria for nonzero-sum differential games. IEEE Trans Automat Contr, 2015, 60: 950\u2013965","journal-title":"IEEE Trans Automat Contr"},{"key":"9602_CR13","volume-title":"Reinforcement Learning: an Introduction","author":"R S Sutton","year":"1998","unstructured":"Sutton R S, Barto A G. Reinforcement Learning: an Introduction. Cambridge: MIT Press, 1998"},{"key":"9602_CR14","volume-title":"Handbook of Intelligent Control","author":"P J Werbos","year":"1992","unstructured":"Werbos P J. Approximate dynamic programming for real-time control and neural modeling. In: Handbook of Intelligent Control. New York: Van Nostrand, 1992"},{"key":"9602_CR15","volume-title":"Neuro-Dynamic Programming","author":"D P Bertsekas","year":"1996","unstructured":"Bertsekas D P, Tsitsiklis J N. Neuro-Dynamic Programming. Belmont: Athena Scientific, 1996"},{"key":"9602_CR16","first-page":"131","volume":"11","author":"P J Werbos","year":"1968","unstructured":"Werbos P J. The elements of intelligence. Cybernetica, 1968, 11: 131","journal-title":"Cybernetica"},{"key":"9602_CR17","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1162\/089976600300015961","volume":"12","author":"K Doya","year":"2000","unstructured":"Doya K. Reinforcement learning in continuous time and space. Neural Computation, 2000, 12: 219\u2013245","journal-title":"Neural Computation"},{"key":"9602_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TCYB.2016.2519449","volume":"47","author":"Q L Wei","year":"2016","unstructured":"Wei Q L, Lewis F L, Sun Q Y, et al. Discrete-time deterministic Q-learning: a novel convergence analysis. IEEE Trans Cyber, 2016, 47: 1\u201314","journal-title":"IEEE Trans Cyber"},{"key":"9602_CR19","doi-asserted-by":"publisher","first-page":"058201","DOI":"10.1007\/s11432-016-9022-1","volume":"60","author":"D Wang","year":"2017","unstructured":"Wang D, Mu C X. Developing nonlinear adaptive optimal regulators through an improved neural learning mechanism. Sci China Inf Sci, 2017, 60: 058201","journal-title":"Sci China Inf Sci"},{"key":"9602_CR20","doi-asserted-by":"publisher","first-page":"477","DOI":"10.1016\/j.automatica.2008.08.017","volume":"45","author":"D Vrabie","year":"2009","unstructured":"Vrabie D, Pastravanu O, Abu-Khalaf M, et al. Adaptive optimal control for continuous-time linear systems based on policy iteration. Automatica, 2009, 45: 477\u2013484","journal-title":"Automatica"},{"key":"9602_CR21","doi-asserted-by":"publisher","first-page":"2699","DOI":"10.1016\/j.automatica.2012.06.096","volume":"48","author":"Y Jiang","year":"2012","unstructured":"Jiang Y, Jiang Z P. Computational adaptive optimal control for continuous-time linear systems with completely unknown dynamics. Automatica, 2012, 48: 2699\u20132704","journal-title":"Automatica"},{"key":"9602_CR22","doi-asserted-by":"publisher","first-page":"3281","DOI":"10.1016\/j.automatica.2014.10.056","volume":"50","author":"B Luo","year":"2014","unstructured":"Luo B, Wu H N, Huang T W, et al. Data-based approximate policy iteration for affine nonlinear continuous-time optimal control design. Automatica, 2014, 50: 3281\u20133290","journal-title":"Automatica"},{"key":"9602_CR23","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1016\/j.automatica.2010.10.033","volume":"47","author":"H G Zhang","year":"2011","unstructured":"Zhang H G, Wei Q L, Liu D R. An iterative adaptive dynamic programming method for solving a class of nonlinear zero-sum differential games. Automatica, 2011, 47: 207\u2013214","journal-title":"Automatica"},{"key":"9602_CR24","doi-asserted-by":"publisher","first-page":"353","DOI":"10.1007\/s11768-011-0166-4","volume":"9","author":"D Vrabie","year":"2011","unstructured":"Vrabie D, Lewis F L. Adaptive dynamic programming for online solution of a zero-sum differential game. J Control Theor Appl, 2011, 9: 353\u2013360","journal-title":"J Control Theor Appl"},{"key":"9602_CR25","doi-asserted-by":"publisher","first-page":"714","DOI":"10.1109\/TNNLS.2016.2561300","volume":"28","author":"Y H Zhu","year":"2017","unstructured":"Zhu Y H, Zhao D B, Li X G. Iterative adaptive dynamic programming for solving unknown nonlinear zero-sum game based on online data. IEEE Trans Neural Netw Learn Syst, 2017, 28: 714\u2013725","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"9602_CR26","doi-asserted-by":"publisher","first-page":"2550","DOI":"10.1109\/TNNLS.2015.2441749","volume":"26","author":"H Modares","year":"2015","unstructured":"Modares H, Lewis F L, Jiang Z P. H\n                           \u221e tracking control of completely unknown continuous-time systems via off-policy reinforcement learning. IEEE Trans Neural Netw Learn Syst, 2015, 26: 2550\u20132562","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"9602_CR27","doi-asserted-by":"publisher","first-page":"144","DOI":"10.1016\/j.automatica.2016.12.009","volume":"78","author":"B Kiumarsi","year":"2017","unstructured":"Kiumarsi B, Lewis F L, Jiang Z P. H\n                           \u221e control of linear discrete-time systems: off-policy reinforcement learning. Automatica, 2017, 78: 144\u2013152","journal-title":"Automatica"},{"key":"9602_CR28","doi-asserted-by":"publisher","first-page":"1598","DOI":"10.1016\/j.automatica.2012.05.074","volume":"48","author":"K G Vamvoudakis","year":"2012","unstructured":"Vamvoudakis K G, Lewis F L, Hudas G R. Multi-agent differential graphical games: Online adaptive learning solution for synchronization with optimality. Automatica, 2012, 48: 1598\u20131611","journal-title":"Automatica"},{"key":"9602_CR29","doi-asserted-by":"publisher","first-page":"206","DOI":"10.1109\/TSMCB.2012.2203336","volume":"43","author":"H G Zhang","year":"2013","unstructured":"Zhang H G, Cui L L, Luo Y H. Near-optimal control for nonzero-sum differential games of continuous-time nonlinear systems using single-network ADP. IEEE Trans Cybern, 2013, 43: 206\u2013216","journal-title":"IEEE Trans Cybern"},{"key":"9602_CR30","doi-asserted-by":"publisher","first-page":"3331","DOI":"10.1109\/TCYB.2016.2611613","volume":"47","author":"H G Zhang","year":"2017","unstructured":"Zhang H G, Jiang H, Luo C M, et al. Discrete-time nonzero-sum games for multiplayer using policy-iteration-based adaptive dynamic programming algorithms. IEEE Trans Cybern, 2017, 47: 3331\u20133340","journal-title":"IEEE Trans Cybern"},{"key":"9602_CR31","doi-asserted-by":"publisher","first-page":"274","DOI":"10.1016\/j.automatica.2015.08.017","volume":"61","author":"K G Vamvoudakis","year":"2015","unstructured":"Vamvoudakis K G. Non-zero sum Nash Q-learning for unknown deterministic continuous-time linear systems. Automatica, 2015, 61: 274\u2013281","journal-title":"Automatica"},{"key":"9602_CR32","doi-asserted-by":"publisher","first-page":"854","DOI":"10.1109\/TCYB.2015.2488680","volume":"46","author":"D B Zhao","year":"2016","unstructured":"Zhao D B, Zhang Q C, Wang D, et al. Experience replay for optimal control of nonzero-sum game systems with unknown dynamics. IEEE Trans Cybern, 2016, 46: 854\u2013865","journal-title":"IEEE Trans Cybern"},{"key":"9602_CR33","doi-asserted-by":"publisher","first-page":"1645","DOI":"10.1109\/TNNLS.2014.2350835","volume":"26","author":"M Johnson","year":"2015","unstructured":"Johnson M, Kamalapurkar R, Bhasin S, et al. Approximate N-player nonzero-sum game solution for an uncertain continuous nonlinear system. IEEE Trans Neural Netw Learn Syst, 2015, 26: 1645\u20131658","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"9602_CR34","doi-asserted-by":"publisher","first-page":"1015","DOI":"10.1109\/TSMC.2013.2295351","volume":"44","author":"D R Liu","year":"2014","unstructured":"Liu D R, Li H L, Wang D. Online synchronous approximate optimal learning algorithm for multi-player non-zero-sum games with unknown dynamics. IEEE Trans Syst Man Cybern Syst, 2014, 44: 1015\u20131027","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"key":"9602_CR35","doi-asserted-by":"publisher","first-page":"704","DOI":"10.1109\/TNNLS.2016.2582849","volume":"28","author":"R Z Song","year":"2017","unstructured":"Song R Z, Lewis F L, Wei Q L. Off-policy integral reinforcement learning method to solve nonlinear continuous-time multiplayer nonzero-sum games. IEEE Trans Neural Netw Learn Syst, 2017, 28: 704\u2013713","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"9602_CR36","doi-asserted-by":"crossref","unstructured":"Vrabie D, Lewis F L. Integral reinforcement learning for online computation of feedback Nash strategies of nonzero-sum differential games. In: Proceedings of the 49th IEEE Conference on Decision and Control, Atlanta, 2010: 3066\u20133071","DOI":"10.1109\/CDC.2010.5718152"},{"key":"9602_CR37","first-page":"33","volume":"37","author":"K G Vamvoudakis","year":"2017","unstructured":"Vamvoudakis K G, Modares H, Kiumarsi B, et al. Game theory-based control system algorithms with real-time reinforcement learning: how to solve multiplayer games online. IEEE Control Syst, 2017, 37: 33\u201352","journal-title":"IEEE Control Syst"},{"key":"9602_CR38","doi-asserted-by":"publisher","first-page":"54","DOI":"10.1137\/0305004","volume":"5","author":"R J Leake","year":"1967","unstructured":"Leake R J, Liu R W. Construction of suboptimal control sequences. SIAM J Control, 1967, 5: 54\u201363","journal-title":"SIAM J Control"},{"key":"9602_CR39","doi-asserted-by":"publisher","first-page":"1556","DOI":"10.1016\/j.automatica.2011.03.005","volume":"47","author":"K G Vamvoudakis","year":"2011","unstructured":"Vamvoudakis K G, Lewis F L. Multi-player non-zero-sum games: online adaptive learning solution of coupled Hamilton-Jacobi equations. Automatica, 2011, 47: 1556\u20131569","journal-title":"Automatica"},{"key":"9602_CR40","first-page":"279","volume":"8","author":"C Watkins","year":"1992","unstructured":"Watkins C, Dayan P. Q-Learning. Mach Learn, 1992, 8: 279\u2013292","journal-title":"Mach Learn"},{"key":"9602_CR41","unstructured":"Bradtke S J, Ydstie B E, Barto A G. Adaptive linear quadratic control using policy iteration. In: Proceedings of American Control Conference, Baltimore, 1994. 3475\u20133"},{"key":"9602_CR42","doi-asserted-by":"publisher","first-page":"2279","DOI":"10.1007\/s11432-011-4332-6","volume":"54","author":"C L Chen","year":"2011","unstructured":"Chen C L, Dong D Y, Li H X, et al. Hybrid MDP based integrated hierarchical Q-learning. Sci China Inf Sci, 2011, 54: 2279\u20132294","journal-title":"Sci China Inf Sci"},{"key":"9602_CR43","doi-asserted-by":"publisher","first-page":"122203","DOI":"10.1007\/s11432-015-5462-z","volume":"58","author":"Q L Wei","year":"2015","unstructured":"Wei Q L, Liu D R. A novel policy iteration based deterministic Q-learning for discrete-time nonlinear systems. Sci China Inf Sci, 2015, 58: 122203","journal-title":"Sci China Inf Sci"},{"key":"9602_CR44","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1109\/TCYB.2014.2322116","volume":"45","author":"M Palanisamy","year":"2015","unstructured":"Palanisamy M, Modares H, Lewis F L, et al. Continuous-time Q-learning for infinite-horizon discounted cost linear quadratic regulator problems. IEEE Trans Cybern, 2015, 45: 165\u2013176","journal-title":"IEEE Trans Cybern"},{"key":"9602_CR45","doi-asserted-by":"publisher","first-page":"1207","DOI":"10.1109\/TSMC.2016.2563982","volume":"47","author":"P F Yan","year":"2017","unstructured":"Yan P F, Wang D, Li H L, et al. Error bound analysis of Q-function for discounted optimal control problems with policy iteration. IEEE Trans Syst Man Cybern Syst, 2017, 47: 1207\u20131216","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"key":"9602_CR46","doi-asserted-by":"publisher","first-page":"3341","DOI":"10.1109\/TCYB.2016.2623859","volume":"47","author":"B Luo","year":"2017","unstructured":"Luo B, Liu D R, Wu H N, et al. Policy gradient adaptive dynamic programming for data-based optimal control. IEEE Trans Cybern, 2017, 47: 3341\u20133354","journal-title":"IEEE Trans Cybern"},{"key":"9602_CR47","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1016\/j.sysconle.2016.12.003","volume":"100","author":"K G Vamvoudakis","year":"2017","unstructured":"Vamvoudakis K G. Q-learning for continuous-time linear systems: a model-free infinite horizon optimal control approach. Syst Control Lett, 2017, 100: 14\u201320","journal-title":"Syst Control Lett"},{"key":"9602_CR48","doi-asserted-by":"publisher","first-page":"1018","DOI":"10.1109\/TAC.2017.2734840","volume":"63","author":"K G Vamvoudakis","year":"2018","unstructured":"Vamvoudakis K G, Hespanha J P. Cooperative Q-learning for rejection of persistent adversarial inputs in networked linear quadratic systems. IEEE Trans Automat Contr, 2018, 63: 1018\u20131031","journal-title":"IEEE Trans Automat Contr"},{"key":"9602_CR49","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1016\/j.automatica.2018.05.027","volume":"95","author":"S A A Rizvi","year":"2018","unstructured":"Rizvi S A A, Lin Z L. Output feedback Q-learning for discrete-time linear zero-sum games with application to the H-infinity control. Automatica, 2018, 95: 213\u2013221","journal-title":"Automatica"},{"key":"9602_CR50","doi-asserted-by":"publisher","first-page":"4092","DOI":"10.1109\/TIE.2017.2760245","volume":"65","author":"J N Li","year":"2018","unstructured":"Li J N, Chai T Y, Lewis F L, et al. Off-policy Q-learning: set-point design for optimizing dual-rate rougher flotation operational processes. IEEE Trans Ind Electron, 2018, 65: 4092\u20134102","journal-title":"IEEE Trans Ind Electron"},{"key":"9602_CR51","doi-asserted-by":"publisher","first-page":"114","DOI":"10.1109\/TAC.1968.1098829","volume":"13","author":"D Kleinman","year":"1968","unstructured":"Kleinman D. On an iterative technique for Riccati equation computations. IEEE Trans Automat Contr, 1968, 13: 114\u2013115","journal-title":"IEEE Trans Automat Contr"}],"container-title":["Science China Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-018-9602-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11432-018-9602-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-018-9602-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,1]],"date-time":"2020-04-01T00:01:14Z","timestamp":1585699274000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11432-018-9602-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,4,2]]},"references-count":51,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2019,5]]}},"alternative-id":["9602"],"URL":"https:\/\/doi.org\/10.1007\/s11432-018-9602-1","relation":{},"ISSN":["1674-733X","1869-1919"],"issn-type":[{"value":"1674-733X","type":"print"},{"value":"1869-1919","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,4,2]]},"assertion":[{"value":"4 July 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 September 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 April 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"52204"}}