{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T13:22:34Z","timestamp":1743081754097,"version":"3.40.3"},"publisher-location":"Cham","reference-count":16,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319700861"},{"type":"electronic","value":"9783319700878"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-70087-8_84","type":"book-chapter","created":{"date-parts":[[2017,10,23]],"date-time":"2017-10-23T21:10:19Z","timestamp":1508793019000},"page":"822-830","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Off-Policy Reinforcement Learning for Partially Unknown Nonzero-Sum Games"],"prefix":"10.1007","author":[{"given":"Qichao","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Dongbin","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Sibo","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,10,24]]},"reference":[{"key":"84_CR1","volume-title":"Differential Games","author":"A Friedman","year":"2013","unstructured":"Friedman, A.: Differential Games. Courier Corporation, Mineola (2013)"},{"key":"84_CR2","doi-asserted-by":"publisher","first-page":"1071","DOI":"10.1109\/TSMC.2016.2531680","volume":"47","author":"Q Zhang","year":"2016","unstructured":"Zhang, Q., Zhao, D., Zhu, Y.: Event-triggered $$H_\\infty $$ control for continuous-time nonlinear system via concurrent learning. IEEE Trans. Syst. Man Cybern. Syst. 47, 1071\u20131081 (2016). doi:10.1109\/TSMC.2016.2531680","journal-title":"IEEE Trans. Syst. Man Cybern. Syst."},{"issue":"7","key":"84_CR3","doi-asserted-by":"publisher","first-page":"1071","DOI":"10.1109\/TSMC.2016.2531680","volume":"47","author":"Q Zhang","year":"2017","unstructured":"Zhang, Q., Zhao, D., Zhu, Y.: Event-triggered H8 control for continuous-time nonlinear system via concurrent learning. IEEE Trans. Syst. Man Cybern. Syst. 47(7), 1071\u20131081 (2017)","journal-title":"IEEE Trans. Syst. Man Cybern. Syst."},{"issue":"3","key":"84_CR4","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1007\/BF00929443","volume":"3","author":"AW Starr","year":"1969","unstructured":"Starr, A.W., Ho, Y.C.: Nonzero-sum differential games. J. Optim. Theor. Appl. 3(3), 184\u2013206 (1969)","journal-title":"J. Optim. Theor. Appl."},{"key":"84_CR5","doi-asserted-by":"publisher","first-page":"377","DOI":"10.1016\/j.neucom.2017.01.076","volume":"238","author":"Q Zhang","year":"2017","unstructured":"Zhang, Q., Zhao, D., Zhu, Y.: Data-driven adaptive dynamic programming for continuous-time fully cooperative games with partially constrained inputs. Neurocomputing 238, 377\u2013386 (2017)","journal-title":"Neurocomputing"},{"key":"84_CR6","doi-asserted-by":"publisher","first-page":"286","DOI":"10.2307\/1969529","volume":"54","author":"J Nash","year":"1951","unstructured":"Nash, J.: Non-cooperative games. Ann. Math. 54, 286\u2013295 (1951)","journal-title":"Ann. Math."},{"issue":"3","key":"84_CR7","doi-asserted-by":"publisher","first-page":"854","DOI":"10.1109\/TCYB.2015.2488680","volume":"46","author":"D Zhao","year":"2016","unstructured":"Zhao, D., Zhang, Q., Wang, D., et al.: Experience replay for optimal control of nonzero-sum game systems with unknown dynamics. IEEE Trans. Cybern. 46(3), 854\u2013865 (2016)","journal-title":"IEEE Trans. Cybern."},{"issue":"5","key":"84_CR8","doi-asserted-by":"publisher","first-page":"4101","DOI":"10.1109\/TIE.2016.2597763","volume":"64","author":"Y Zhu","year":"2017","unstructured":"Zhu, Y., Zhao, D., He, H., et al.: Event-triggered optimal control for partially unknown constrained-input systems via adaptive dynamic programming. IEEE Trans. Ind. Electron. 64(5), 4101\u20134109 (2017)","journal-title":"IEEE Trans. Ind. Electron."},{"issue":"8","key":"84_CR9","doi-asserted-by":"publisher","first-page":"1556","DOI":"10.1016\/j.automatica.2011.03.005","volume":"47","author":"KG Vamvoudakis","year":"2011","unstructured":"Vamvoudakis, K.G., Lewis, F.L.: Multi-player non-zero-sum games: online adaptive learning solution of coupled Hamilton-Jacobi equations. Automatica 47(8), 1556\u20131569 (2011)","journal-title":"Automatica"},{"issue":"3","key":"84_CR10","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1016\/j.neunet.2009.03.008","volume":"22","author":"D Vrabie","year":"2009","unstructured":"Vrabie, D., Lewis, F.L.: Neural network approach to continuous-time direct adaptive optimal control for partially unknown nonlinear systems. Neural Netw. 22(3), 237\u2013246 (2009)","journal-title":"Neural Netw."},{"issue":"3","key":"84_CR11","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1109\/JAS.2014.7004681","volume":"1","author":"R Kamalapurkar","year":"2014","unstructured":"Kamalapurkar, R., Klotz, J.R., Dixon, W.E.: Concurrent learning-based approximate feedback-nash equilibrium solution of N-player nonzero-sum differential games. IEEE\/CAA J. Automatica Sin. 1(3), 239\u2013247 (2014)","journal-title":"IEEE\/CAA J. Automatica Sin."},{"issue":"10","key":"84_CR12","first-page":"693","volume":"59","author":"Y Jiang","year":"2012","unstructured":"Jiang, Y., Jiang, Z.: Robust adaptive dynamic programming for large-scale systems with an application to multimachine power systems. IEEE Trans. Circ. Syst. II Express Briefs 59(10), 693\u2013697 (2012)","journal-title":"IEEE Trans. Circ. Syst. II Express Briefs"},{"issue":"11","key":"84_CR13","doi-asserted-by":"publisher","first-page":"1544","DOI":"10.1109\/TSMC.2015.2492941","volume":"46","author":"D Wang","year":"2016","unstructured":"Wang, D., Liu, D., Zhang, Q., Zhao, D.: Data-based adaptive critic designs for nonlinear robust optimal control with uncertain dynamics. IEEE Trans. Syst. Man Cybern. Syst. 46(11), 1544\u20131555 (2016)","journal-title":"IEEE Trans. Syst. Man Cybern. Syst."},{"issue":"6","key":"84_CR14","doi-asserted-by":"publisher","first-page":"1460","DOI":"10.1109\/TCYB.2016.2548941","volume":"47","author":"C Mu","year":"2017","unstructured":"Mu, C., Ni, Z., Sun, C., He, H.: Data-driven tracking control with adaptive dynamic programming for a class of continuous-time nonlinear systems. IEEE Trans. Cybern. 47(6), 1460\u20131470 (2017)","journal-title":"IEEE Trans. Cybern."},{"issue":"3","key":"84_CR15","doi-asserted-by":"publisher","first-page":"704","DOI":"10.1109\/TNNLS.2016.2582849","volume":"28","author":"R Song","year":"2017","unstructured":"Song, R., Lewis, F.L., Wei, Q.: Off-policy integral reinforcement learning method to solve nonlinear continuous-time multiplayer nonzero-sum games. IEEE Trans. Neural Netw. Learn. Syst. 28(3), 704\u2013713 (2017)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"1","key":"84_CR16","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1109\/TCYB.2014.2319577","volume":"45","author":"B Luo","year":"2015","unstructured":"Luo, B., Wu, H.N., Huang, T.: Off-policy reinforcement learning for $$H_\\infty $$ control design. IEEE Trans. Cybern. 45(1), 65\u201376 (2015)","journal-title":"IEEE Trans. Cybern."}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-70087-8_84","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T15:25:42Z","timestamp":1710343542000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-70087-8_84"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319700861","9783319700878"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-70087-8_84","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"24 October 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 November 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 November 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.iconip2017.org\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}