{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,28]],"date-time":"2025-10-28T14:55:16Z","timestamp":1761663316354},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2014,10,30]],"date-time":"2014-10-30T00:00:00Z","timestamp":1414627200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2015,5]]},"DOI":"10.1007\/s00521-014-1738-2","type":"journal-article","created":{"date-parts":[[2014,10,29]],"date-time":"2014-10-29T07:35:09Z","timestamp":1414568109000},"page":"775-787","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["A data-based online reinforcement learning algorithm satisfying probably approximately correct principle"],"prefix":"10.1007","volume":"26","author":[{"given":"Yuanheng","family":"Zhu","sequence":"first","affiliation":[]},{"given":"Dongbin","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,10,30]]},"reference":[{"key":"1738_CR1","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: an introduction. MIT Press, Cambridge"},{"key":"1738_CR2","doi-asserted-by":"crossref","DOI":"10.1201\/9781439821091","volume-title":"Reinforcement learning and dynamic programming using function approximators","author":"L Busoniu","year":"2010","unstructured":"Busoniu L, Babuska R, De Schutter B, Ernst D (2010) Reinforcement learning and dynamic programming using function approximators. CRC Press, New York"},{"issue":"7","key":"1738_CR3","doi-asserted-by":"crossref","first-page":"8477","DOI":"10.1016\/j.eswa.2011.01.045","volume":"38","author":"AH Tan","year":"2011","unstructured":"Tan AH, Ong YS, Tapanuj A (2011) A hybrid agent architecture integrating desire, intention and reinforcement learning. Expert Syst Appl 38(7):8477\u20138487","journal-title":"Expert Syst Appl"},{"key":"1738_CR4","doi-asserted-by":"crossref","unstructured":"Tang L, Liu Y-J, Tong S (2014) Adaptive neural control using reinforcement learning for a class of robot manipulator. Neural Comput Appl 25(1):135\u2013141","DOI":"10.1007\/s00521-013-1455-2"},{"issue":"2","key":"1738_CR5","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1007\/s00521-011-0707-2","volume":"22","author":"D Wang","year":"2013","unstructured":"Wang D, Liu D, Zhao D, Huang Y, Zhang D (2013) A neural-network-based iterative GDHP approach for solving a class of nonlinear optimal control problems with control constraints. Neural Comput Appl 22(2):219\u2013227","journal-title":"Neural Comput Appl"},{"issue":"6","key":"1738_CR6","doi-asserted-by":"crossref","first-page":"1355","DOI":"10.1007\/s00521-013-1361-7","volume":"24","author":"Q Wei","year":"2014","unstructured":"Wei Q, Liu D (2014) Stable iterative adaptive dynamic programming algorithm with approximation errors for discrete-time nonlinear systems. Neural Comput Appl 24(6):1355\u20131367","journal-title":"Neural Comput Appl"},{"key":"1738_CR7","doi-asserted-by":"crossref","first-page":"222","DOI":"10.1016\/j.neucom.2013.06.037","volume":"134","author":"B Wang","year":"2014","unstructured":"Wang B, Zhao D, Alippi C, Liu D (2014) Dual heuristic dynamic programming for nonlinear discrete-time uncertain systems with state delay. Neurocomputing 134:222\u2013229","journal-title":"Neurocomputing"},{"key":"1738_CR8","unstructured":"Watkins C (1989) Learning from delayed rewards. PhD thesis, Cambridge University, Cambridge"},{"issue":"2","key":"1738_CR9","doi-asserted-by":"crossref","first-page":"81","DOI":"10.1007\/s00521-003-0369-9","volume":"12","author":"S Hagen ten","year":"2003","unstructured":"ten Hagen S, Kr\u00f6se B (2003) Neural Q-learning. Neural Comput Appl 12(2):81\u201388","journal-title":"Neural Comput Appl"},{"key":"1738_CR10","unstructured":"Rummery GA, Niranjan M (1994) On-line Q-learning using connectionist systems. Tech. Rep. TR 166, Cambridge University Engineering Department, Cambridge, England"},{"issue":"3","key":"1738_CR11","doi-asserted-by":"crossref","first-page":"628","DOI":"10.1109\/TASE.2012.2198057","volume":"9","author":"D Liu","year":"2012","unstructured":"Liu D, Wang D, Zhao D, Wei Q, Jin N (2012) Neural-network-based optimal control for a class of unknown discrete-time nonlinear systems using globalized dual heuristic programming. IEEE Trans Autom Sci Eng 9(3):628\u2013634","journal-title":"IEEE Trans Autom Sci Eng"},{"key":"1738_CR12","unstructured":"Thrun SB (1992) The role of exploration in learning control. In: White D, Sofge D (eds) Handbook for intelligent control: neural, fuzzy and adaptive approaches. Van Nostrand Reinhold, Florence, Kentucky 41022"},{"key":"1738_CR13","doi-asserted-by":"crossref","first-page":"57","DOI":"10.1016\/j.neucom.2012.09.034","volume":"125","author":"D Zhao","year":"2014","unstructured":"Zhao D, Hu Z, Xia Z, Alippi C, Wang D (2014) Full range adaptive cruise control based on supervised adaptive dynamic programming. Neurocomputing 125:57\u201367","journal-title":"Neurocomputing"},{"issue":"11","key":"1738_CR14","doi-asserted-by":"crossref","first-page":"2089","DOI":"10.1007\/s00500-013-1110-y","volume":"17","author":"D Zhao","year":"2013","unstructured":"Zhao D, Wang B, Liu D (2013) A supervised actor-critic approach for adaptive cruise control. Soft Comput 17(11):2089\u20132099","journal-title":"Soft Comput"},{"issue":"4","key":"1738_CR15","doi-asserted-by":"crossref","first-page":"990","DOI":"10.1109\/TITS.2011.2122257","volume":"12","author":"D Zhao","year":"2011","unstructured":"Zhao D, Bai X, Wang F, Xu J, Yu W (2011) DHP for coordinated freeway ramp metering. IEEE Trans Intell Transp Syst 12(4):990\u2013999","journal-title":"IEEE Trans Intell Transp Syst"},{"issue":"10(B)","key":"1738_CR16","first-page":"3471","volume":"5","author":"X Bai","year":"2009","unstructured":"Bai X, Zhao D, Yi J (2009) The application of ADHDP $$(\\lambda )$$ ( \u03bb ) method to coordinated multiple ramps metering. Int J Innov Comput 5(10(B)):3471\u20133481","journal-title":"Int J Innov Comput"},{"issue":"2\u20133","key":"1738_CR17","doi-asserted-by":"crossref","first-page":"209","DOI":"10.1023\/A:1017984413808","volume":"49","author":"M Kearns","year":"2002","unstructured":"Kearns M, Singh S (2002) Near-optimal reinforcement learning in polynomial time. Mach Learn 49(2\u20133):209\u2013232","journal-title":"Mach Learn"},{"key":"1738_CR18","first-page":"213","volume":"3","author":"RI Brafman","year":"2003","unstructured":"Brafman RI, Tennenholtz M (2003) R-max\u2014a general polynomial time algorithm for near-optimal reinforcement learning. J Mach Learn Res 3:213\u2013231","journal-title":"J Mach Learn Res"},{"key":"1738_CR19","doi-asserted-by":"crossref","unstructured":"Strehl AL, Littman ML (2005) A theoretical analysis of model-based interval estimation. In: Proceedings of 22nd international conference on machine learning (ICML\u201905), pp 856\u2013863","DOI":"10.1145\/1102351.1102459"},{"key":"1738_CR20","doi-asserted-by":"crossref","unstructured":"Strehl AL, Li L, Wiewiora E, Langford J, Littman ML (2006) PAC model-free reinforcement learning. In: Proceedings of 23rd international conference on machine learning (ICML\u201906), pp 881\u2013888","DOI":"10.1145\/1143844.1143955"},{"key":"1738_CR21","unstructured":"Kakade S, Kearns MJ, Langford J (2003) Exploration in metric state spaces. In: Proceedings of 20th international conference on machine learning (ICML\u201903), pp 306\u2013312"},{"key":"1738_CR22","doi-asserted-by":"crossref","unstructured":"Pazis J, Parr R (2013) PAC optimal exploration in continuous space markov decision processes. In: AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v27i1.8678"},{"issue":"3","key":"1738_CR23","doi-asserted-by":"crossref","first-page":"359","DOI":"10.1007\/s10994-010-5186-7","volume":"81","author":"A Bernstein","year":"2010","unstructured":"Bernstein A, Shimkin N (2010) Adaptive-resolution reinforcement learning with polynomial exploration in deterministic domains. Mach Learn 81(3):359\u2013397","journal-title":"Mach Learn"},{"issue":"2\u20133","key":"1738_CR24","doi-asserted-by":"crossref","first-page":"291","DOI":"10.1023\/A:1017992615625","volume":"49","author":"R Munos","year":"2002","unstructured":"Munos R, Moore A (2002) Variable resolution discretization in optimal control. Mach Learn 49(2\u20133):291\u2013323","journal-title":"Mach Learn"},{"key":"1738_CR25","first-page":"503","volume":"6","author":"D Ernst","year":"2005","unstructured":"Ernst D, Geurts P, Wehenkel L (2005) Tree-based batch mode reinforcement learning. J Mach Learn Res 6:503\u2013556","journal-title":"J Mach Learn Res"},{"key":"1738_CR26","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4612-1098-6","volume-title":"Computational geometry: an introduction","author":"FP Preparata","year":"1985","unstructured":"Preparata FP, Shamos MI (1985) Computational geometry: an introduction. Springer, Berlin"},{"issue":"18","key":"1738_CR27","doi-asserted-by":"crossref","first-page":"2725","DOI":"10.1049\/iet-cta.2011.0783","volume":"6","author":"H Li","year":"2012","unstructured":"Li H, Liu D (2012) Optimal control for discrete-time affine nonlinear systems using general value iteration. IET Control Theory Appl 6(18):2725\u20132736","journal-title":"IET Control Theory Appl"},{"issue":"4","key":"1738_CR28","doi-asserted-by":"crossref","first-page":"943","DOI":"10.1109\/TSMCB.2008.926614","volume":"38","author":"A Al-Tamimi","year":"2008","unstructured":"Al-Tamimi A, Lewis FL, Abu-Khalaf M (2008) Discrete-time nonlinear HJB solution using approximate dynamic programming: convergence proof. Trans Syst Man Cyber Part B 38(4):943\u2013949","journal-title":"Trans Syst Man Cyber Part B"},{"issue":"7\u20138","key":"1738_CR29","doi-asserted-by":"crossref","first-page":"1843","DOI":"10.1007\/s00521-012-1249-y","volume":"23","author":"D Liu","year":"2013","unstructured":"Liu D, Yang X, Li H (2013) Adaptive optimal control for a class of continuous-time affine nonlinear systems with unknown internal dynamics. Neural Comput Appl 23(7\u20138):1843\u20131850","journal-title":"Neural Comput Appl"},{"issue":"7\u20138","key":"1738_CR30","doi-asserted-by":"crossref","first-page":"1873","DOI":"10.1007\/s00521-012-1243-4","volume":"23","author":"L Zuo","year":"2013","unstructured":"Zuo L, Xu X, Liu C, Huang Z (2013) A hierarchical reinforcement learning approach for optimal path tracking of wheeled mobile robots. Neural Comput Appl 23(7\u20138):1873\u20131883","journal-title":"Neural Comput Appl"},{"issue":"2","key":"1738_CR31","doi-asserted-by":"crossref","first-page":"61","DOI":"10.1007\/s00521-003-0368-x","volume":"12","author":"R Schoknecht","year":"2003","unstructured":"Schoknecht R, Riedmiller M (2003) Reinforcement learning on explicitly specified time scales. Neural Comput Appl 12(2):61\u201380","journal-title":"Neural Comput Appl"},{"key":"1738_CR32","unstructured":"Neumann G (2005) The reinforcement learning toolbox: reinforcement learning for optimal control tasks. Master\u2019s thesis, Technischen Universit\u00e4t (University of Technology) Graz"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-014-1738-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00521-014-1738-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-014-1738-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,17]],"date-time":"2023-07-17T17:24:45Z","timestamp":1689614685000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00521-014-1738-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,10,30]]},"references-count":32,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2015,5]]}},"alternative-id":["1738"],"URL":"https:\/\/doi.org\/10.1007\/s00521-014-1738-2","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,10,30]]}}}