{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T16:02:01Z","timestamp":1740153721888,"version":"3.37.3"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2015,8,25]],"date-time":"2015-08-25T00:00:00Z","timestamp":1440460800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61273136"],"award-info":[{"award-number":["61273136"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100011381","name":"State Key Laboratory of Robotics and System","doi-asserted-by":"crossref","award":["SKLRS-2015-ZD-04"],"award-info":[{"award-number":["SKLRS-2015-ZD-04"]}],"id":[{"id":"10.13039\/501100011381","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Cogn Comput"],"published-print":{"date-parts":[[2015,12]]},"DOI":"10.1007\/s12559-015-9350-z","type":"journal-article","created":{"date-parts":[[2015,8,24]],"date-time":"2015-08-24T07:41:10Z","timestamp":1440402070000},"page":"763-771","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Convergence Proof of Approximate Policy Iteration for Undiscounted Optimal Control of Discrete-Time Systems"],"prefix":"10.1007","volume":"7","author":[{"given":"Yuanheng","family":"Zhu","sequence":"first","affiliation":[]},{"given":"Dongbin","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Haibo","family":"He","sequence":"additional","affiliation":[]},{"given":"Junhong","family":"Ji","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,8,25]]},"reference":[{"issue":"5","key":"9350_CR1","doi-asserted-by":"crossref","first-page":"779","DOI":"10.1016\/j.automatica.2004.11.034","volume":"41","author":"M Abu-Khalaf","year":"2005","unstructured":"Abu-Khalaf M, Lewis FL. Nearly optimal control laws for nonlinear systems with saturating actuators using a neural network HJB approach. Automatica. 2005;41(5):779\u201391.","journal-title":"Automatica"},{"issue":"12","key":"9350_CR2","doi-asserted-by":"crossref","first-page":"1989","DOI":"10.1109\/TAC.2006.884959","volume":"51","author":"M Abu-Khalaf","year":"2006","unstructured":"Abu-Khalaf M, Lewis F, Huang J. Policy iterations on the Hamilton\u2013Jacobi\u2013Isaacs equation for $$\\text{ H }_{\\infty }$$ H \u221e state feedback control with input saturation. IEEE Trans Autom Control. 2006;51(12):1989\u201395.","journal-title":"IEEE Trans Autom Control"},{"issue":"1","key":"9350_CR3","doi-asserted-by":"crossref","first-page":"240","DOI":"10.1109\/TSMCB.2006.880135","volume":"37","author":"A Al-Tamimi","year":"2007","unstructured":"Al-Tamimi A, Abu-Khalaf M, Lewis F. Adaptive critic designs for discrete-time zero-sum games with application to $$\\text{ H }_{\\infty }$$ H \u221e control. IEEE Trans Syst Man Cybern B. 2007;37(1):240\u20137.","journal-title":"IEEE Trans Syst Man Cybern B"},{"issue":"4","key":"9350_CR4","doi-asserted-by":"crossref","first-page":"943","DOI":"10.1109\/TSMCB.2008.926614","volume":"38","author":"A Al-Tamimi","year":"2008","unstructured":"Al-Tamimi A, Lewis F, Abu-Khalaf M. Discrete-time nonlinear HJB solution using approximate dynamic programming: Convergence proof. IEEE Trans Syst Man Cybern B. 2008;38(4):943\u20139.","journal-title":"IEEE Trans Syst Man Cybern B"},{"key":"9350_CR5","doi-asserted-by":"crossref","unstructured":"Barty K, Girardeau P, Roy JS, Strugarek C. Q-learning with continuous state spaces and finite decision set. In: Proceedings of the 2007 IEEE international symposium on approximate dynamic programming and reinforcement learning (ADPRL 2007); 2007. pp. 346\u2013351.","DOI":"10.1109\/ADPRL.2007.368209"},{"key":"9350_CR6","volume-title":"Neuro-dynamic programming","author":"DP Bertsekas","year":"1996","unstructured":"Bertsekas DP, Tsitsiklis JN. Neuro-dynamic programming. Belmont, MA: Athena Scientific; 1996."},{"issue":"2","key":"9350_CR7","doi-asserted-by":"crossref","first-page":"264","DOI":"10.1007\/s12559-012-9191-y","volume":"5","author":"M Boaro","year":"2013","unstructured":"Boaro M, Fuselli D, Angelis F, Liu D, Wei Q, Piazza F. Adaptive dynamic programming algorithm for renewable energy scheduling and battery management. Cogn Comput. 2013;5(2):264\u201377.","journal-title":"Cogn Comput"},{"key":"9350_CR8","doi-asserted-by":"crossref","unstructured":"Busoniu L, Ernst D, De Schutter B, Babuska R. Fuzzy approximation for convergent model-based reinforcement learning. In: Proceedings of the 2007 IEEE international conference on Fuzzy systems (FUZZ-IEEE-07), London, UK; 2007. pp. 968\u2013973.","DOI":"10.1109\/FUZZY.2007.4295497"},{"key":"9350_CR9","doi-asserted-by":"crossref","DOI":"10.1201\/9781439821091","volume-title":"Reinforcement learning and dynamic programming using function approximators","author":"L Busoniu","year":"2010","unstructured":"Busoniu L, Babuska R, De Schutter B, Ernst D. Reinforcement learning and dynamic programming using function approximators. New York: CRC Press; 2010."},{"issue":"4","key":"9350_CR10","doi-asserted-by":"crossref","first-page":"543","DOI":"10.1007\/s12559-012-9143-6","volume":"4","author":"F Chen","year":"2012","unstructured":"Chen F, Jiang B, Tao G. Fault self-repairing flight control of a small helicopter via fuzzy feedforward and quantum control techniques. Cogn Comput. 2012;4(4):543\u20138.","journal-title":"Cogn Comput"},{"issue":"4","key":"9350_CR11","doi-asserted-by":"crossref","first-page":"578","DOI":"10.1016\/j.fss.2009.05.003","volume":"161","author":"V Derhami","year":"2010","unstructured":"Derhami V, Majd VJ, Nili Ahmadabadi M. Exploration and exploitation balance management in fuzzy reinforcement learning. Fuzzy Sets Syst. 2010;161(4):578\u201395.","journal-title":"Fuzzy Sets Syst"},{"issue":"12","key":"9350_CR12","doi-asserted-by":"crossref","first-page":"2733","DOI":"10.1109\/TCYB.2014.2314612","volume":"44","author":"A Heydari","year":"2014","unstructured":"Heydari A. Revisiting approximate dynamic programming and its convergence. IEEE Trans Cybern. 2014;44(12):2733\u201343.","journal-title":"IEEE Trans Cybern"},{"key":"9350_CR13","volume-title":"Dynamic programming and Markov processes","author":"R Howard","year":"1960","unstructured":"Howard R. Dynamic programming and Markov processes. Cambridge, MA: MIT Press; 1960."},{"issue":"2","key":"9350_CR14","doi-asserted-by":"crossref","first-page":"200","DOI":"10.1007\/s12559-013-9203-6","volume":"5","author":"G Hui","year":"2013","unstructured":"Hui G, Huang B, Wang Y, Meng X. Quantized control design for coupled dynamic networks with communication constraints. Cogn Comput. 2013;5(2):200\u20136.","journal-title":"Cogn Comput"},{"issue":"3","key":"9350_CR15","doi-asserted-by":"crossref","first-page":"234","DOI":"10.1007\/s12559-009-9020-0","volume":"1","author":"E Ikonen","year":"2009","unstructured":"Ikonen E, Najim K. Multiple model-based control using finite controlled markov chains. Cogn Comput. 2009;1(3):234\u201343.","journal-title":"Cogn Comput"},{"issue":"2","key":"9350_CR16","doi-asserted-by":"crossref","first-page":"252","DOI":"10.1007\/s12559-012-9186-8","volume":"5","author":"Z Jia","year":"2013","unstructured":"Jia Z, Song Y, Cai W. Bio-inspired approach for smooth motion control of wheeled mobile robots. Cogn Comput. 2013;5(2):252\u201363.","journal-title":"Cogn Comput"},{"issue":"3","key":"9350_CR17","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1109\/MCAS.2009.933854","volume":"9","author":"F Lewis","year":"2009","unstructured":"Lewis F, Vrabie D. Reinforcement learning and adaptive dynamic programming for feedback control. IEEE Circuits Syst Mag. 2009;9(3):32\u201350.","journal-title":"IEEE Circuits Syst Mag"},{"issue":"2","key":"9350_CR18","doi-asserted-by":"crossref","first-page":"779","DOI":"10.1109\/TSMCB.2012.2216523","volume":"43","author":"D Liu","year":"2013","unstructured":"Liu D, Wei Q. Finite-approximation-error-based optimal control approach for discrete-time nonlinear systems. IEEE Trans Cybern. 2013;43(2):779\u201389.","journal-title":"IEEE Trans Cybern"},{"issue":"3","key":"9350_CR19","doi-asserted-by":"crossref","first-page":"621","DOI":"10.1109\/TNNLS.2013.2281663","volume":"25","author":"D Liu","year":"2014","unstructured":"Liu D, Wei Q. Policy iteration adaptive dynamic programming algorithm for discrete-time nonlinear systems. IEEE Trans Neural Netw Learn Syst. 2014;25(3):621\u201334.","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"4","key":"9350_CR20","doi-asserted-by":"crossref","first-page":"445","DOI":"10.1007\/s12559-014-9313-9","volume":"7","author":"F Meng","year":"2015","unstructured":"Meng F, Chen X. Correlation coefficients of hesitant fuzzy sets and their application based on fuzzy measures. Cogn Comput. 2015;7(4):445\u201363.","journal-title":"Cogn Comput"},{"key":"9350_CR21","unstructured":"Munos R. Error bounds for approximate policy iteration. In: Proceedings of the 20th international conference on machine learning, Washington, Columbia; 2003. pp. 560\u2013576."},{"issue":"3","key":"9350_CR22","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1007\/s12559-009-9021-z","volume":"1","author":"D Muse","year":"2009","unstructured":"Muse D, Wermter S. Actor-critic learning for platform-independent robot navigation. Cogn Comput. 2009;1(3):203\u201320.","journal-title":"Cogn Comput"},{"issue":"1\u20132","key":"9350_CR23","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1023\/A:1022192903948","volume":"13","author":"A Nedi\u0107","year":"2003","unstructured":"Nedi\u0107 A, Bertsekas DP. Least squares policy evaluation algorithms with linear function approximation. Discrete Event Dyn Syst. 2003;13(1\u20132):79\u2013110.","journal-title":"Discrete Event Dyn Syst"},{"issue":"4","key":"9350_CR24","doi-asserted-by":"crossref","first-page":"515","DOI":"10.1007\/s12559-011-9117-0","volume":"4","author":"R Samar","year":"2012","unstructured":"Samar R, Kamal W. Optimal path computation for autonomous aerial vehicles. Cogn Comput. 2012;4(4):515\u201325.","journal-title":"Cogn Comput"},{"issue":"3","key":"9350_CR25","doi-asserted-by":"crossref","first-page":"496","DOI":"10.1007\/s12559-014-9258-z","volume":"6","author":"Y Song","year":"2014","unstructured":"Song Y, Li Q, Kang Y. Conjugate unscented fastslam for autonomous mobile robots in large-scale environments. Cogn Comput. 2014;6(3):496\u2013509.","journal-title":"Cogn Comput"},{"key":"9350_CR26","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG. Reinforcement learning: an introduction. Cambridge, MA: MIT Press; 1998."},{"key":"9350_CR27","doi-asserted-by":"crossref","unstructured":"Vieira D, Adeodato P, Goncalves P. A temporal difference GNG-based algorithm that can learn to control in reinforcement learning environments. In: Proceedings of the 12th international conference on machine learning and applications (ICMLA 2013), 2013; vol\u00a01, pp. 329\u2013332.","DOI":"10.1109\/ICMLA.2013.67"},{"issue":"2","key":"9350_CR28","doi-asserted-by":"crossref","first-page":"627","DOI":"10.1109\/TASE.2013.2296206","volume":"11","author":"D Wang","year":"2014","unstructured":"Wang D, Liu D, Li H. Policy iteration algorithm for online design of robust control for a class of continuous-time nonlinear systems. IEEE Trans Autom Sci Eng. 2014;11(2):627\u201332.","journal-title":"IEEE Trans Autom Sci Eng"},{"issue":"10","key":"9350_CR29","first-page":"1","volume":"56","author":"Y Wang","year":"2013","unstructured":"Wang Y, Feng G. On finite-time stability and stabilization of nonlinear port-controlled Hamiltonian systems. Sci China Inf Sci. 2013;56(10):1\u201314.","journal-title":"Sci China Inf Sci"},{"issue":"4","key":"9350_CR30","doi-asserted-by":"crossref","first-page":"1176","DOI":"10.1109\/TASE.2013.2280974","volume":"11","author":"Q Wei","year":"2014","unstructured":"Wei Q, Liu D. A novel iterative $$\\theta$$ \u03b8 -adaptive dynamic programming for discrete-time nonlinear systems. IEEE Trans Autom Sci Eng. 2014;11(4):1176\u201390.","journal-title":"IEEE Trans Autom Sci Eng"},{"key":"9350_CR31","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4471-4757-2","volume-title":"Adaptive dynamic programming for control: algorithms and stability","author":"H Zhang","year":"2013","unstructured":"Zhang H, Liu D, Luo Y, Wang D. Adaptive dynamic programming for control: algorithms and stability. London: Springer; 2013."},{"issue":"2","key":"9350_CR32","doi-asserted-by":"crossref","first-page":"346","DOI":"10.1109\/TNNLS.2014.2371046","volume":"26","author":"D Zhao","year":"2015","unstructured":"Zhao D, Zhu Y. MEC-a near-optimal online reinforcement learning algorithm for continuous deterministic systems. IEEE Trans Neural Netw Learn Syst. 2015;26(2):346\u201356.","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"1","key":"9350_CR33","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s11432-013-4851-4","volume":"57","author":"Y Zhao","year":"2014","unstructured":"Zhao Y, Cheng D. On controllability and stabilizability of probabilistic Boolean control networks. Sci China Inf Sci. 2014;57(1):1\u201314.","journal-title":"Sci China Inf Sci"},{"key":"9350_CR34","doi-asserted-by":"crossref","first-page":"124","DOI":"10.1016\/j.neucom.2013.11.055","volume":"149","author":"Y Zhu","year":"2015","unstructured":"Zhu Y, Zhao D, Liu D. Convergence analysis and application of fuzzy-HDP for nonlinear discrete-time HJB systems. Neurocomputing. 2015;149:124\u201331.","journal-title":"Neurocomputing"}],"container-title":["Cognitive Computation"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12559-015-9350-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s12559-015-9350-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12559-015-9350-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,15]],"date-time":"2020-05-15T19:03:41Z","timestamp":1589569421000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s12559-015-9350-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,8,25]]},"references-count":34,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2015,12]]}},"alternative-id":["9350"],"URL":"https:\/\/doi.org\/10.1007\/s12559-015-9350-z","relation":{},"ISSN":["1866-9956","1866-9964"],"issn-type":[{"type":"print","value":"1866-9956"},{"type":"electronic","value":"1866-9964"}],"subject":[],"published":{"date-parts":[[2015,8,25]]}}}