{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T13:41:27Z","timestamp":1760708487614},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"7-8","license":[{"start":{"date-parts":[[2012,12,25]],"date-time":"2012-12-25T00:00:00Z","timestamp":1356393600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2013,12]]},"DOI":"10.1007\/s00521-012-1243-4","type":"journal-article","created":{"date-parts":[[2012,12,24]],"date-time":"2012-12-24T04:22:32Z","timestamp":1356322952000},"page":"1873-1883","source":"Crossref","is-referenced-by-count":17,"title":["A hierarchical reinforcement learning approach for optimal path tracking of wheeled mobile robots"],"prefix":"10.1007","volume":"23","author":[{"given":"Lei","family":"Zuo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xin","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chunming","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhenhua","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2012,12,25]]},"reference":[{"issue":"3","key":"1243_CR1","doi-asserted-by":"crossref","first-page":"997","DOI":"10.1109\/25.845116","volume":"49","author":"SY Oh","year":"2000","unstructured":"Oh SY, Lee JH et al (2000) A new reinforcement learning vehicle control architecture for vision-based road following [J]. IEEE Trans Veh Technol 49(3):997\u20131005","journal-title":"IEEE Trans Veh Technol"},{"issue":"5","key":"1243_CR2","doi-asserted-by":"crossref","first-page":"881","DOI":"10.1109\/TIE.2003.817484","volume":"50","author":"T Yamaguchi","year":"2003","unstructured":"Yamaguchi T, Sato E et al (2003) Intelligent space and human centered robotics [J]. IEEE Trans Ind Electron 50(5):881\u2013889","journal-title":"IEEE Trans Ind Electron"},{"issue":"3","key":"1243_CR3","doi-asserted-by":"crossref","first-page":"612","DOI":"10.1109\/TIE.2003.812457","volume":"50","author":"JM Lee","year":"2003","unstructured":"Lee JM, Son K et al (2003) Localization of a mobile robot using the image of a moving object [J]. IEEE Trans Ind Electron 50(3):612\u2013619","journal-title":"IEEE Trans Ind Electron"},{"issue":"5","key":"1243_CR4","doi-asserted-by":"crossref","first-page":"661","DOI":"10.1109\/TCST.2004.826964","volume":"12","author":"TC Lee","year":"2004","unstructured":"Lee TC, Tsai CY et al (2004) Fast parking control of mobile robots: a motion planning approach with experimental validation [J]. IEEE Trans Control Syst Technol 12(5):661\u2013676","journal-title":"IEEE Trans Control Syst Technol"},{"issue":"5","key":"1243_CR5","doi-asserted-by":"crossref","first-page":"1418","DOI":"10.1109\/TIM.2004.834093","volume":"53","author":"J Palacin","year":"2004","unstructured":"Palacin J, Salse JA et al (2004) Building a mobile robot for a floor-cleaning operation in domestic environments [J]. IEEE Trans Instrum Meas 53(5):1418\u20131424","journal-title":"IEEE Trans Instrum Meas"},{"issue":"2","key":"1243_CR6","doi-asserted-by":"crossref","first-page":"22","DOI":"10.1109\/MCS.2005.1411382","volume":"25","author":"D Ding","year":"2005","unstructured":"Ding D, Cooper RA (2005) Electric-powered wheelchairs: a review of current technology and insight into future direction [J]. IEEE Control Syst Mag 25(2):22\u201334","journal-title":"IEEE Control Syst Mag"},{"issue":"1","key":"1243_CR7","doi-asserted-by":"crossref","first-page":"148","DOI":"10.1109\/TRA.2003.819730","volume":"20","author":"HS Shim","year":"2004","unstructured":"Shim HS, Sung YG (2004) Stability and four-posture control for nonholonomic mobile robots [J]. IEEE Trans Robot Autom 20(1):148\u2013154","journal-title":"IEEE Trans Robot Autom"},{"issue":"3","key":"1243_CR8","doi-asserted-by":"crossref","first-page":"382","DOI":"10.1109\/TMECH.2009.2018287","volume":"14","author":"DB Zhao","year":"2009","unstructured":"Zhao DB, Deng XY, Yi JQ (2009) Motion and internal force control for omni-directional wheeled mobile robots [J]. IEEE ASME Trans Mechatron 14(3):382\u2013387","journal-title":"IEEE ASME Trans Mechatron"},{"issue":"11","key":"1243_CR9","doi-asserted-by":"crossref","first-page":"798","DOI":"10.1109\/TCSII.2005.852528","volume":"52","author":"Y Wu","year":"2005","unstructured":"Wu Y, Wang B et al (2005) Finite-time tracking controller design for nonholonomic systems with extended chained form[J]. IEEE Trans Circuit Syst II Exp Briefs 52(11):798\u2013802","journal-title":"IEEE Trans Circuit Syst II Exp Briefs"},{"issue":"2","key":"1243_CR10","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1109\/TFUZZ.2006.879998","volume":"15","author":"G Antonelli","year":"2007","unstructured":"Antonelli G, Chiaverini S et al (2007) A fuzzy-logic-based approach for mobile robot path tracking[J]. IEEE Trans Fuzzy Syst 15(2):211\u2013221","journal-title":"IEEE Trans Fuzzy Syst"},{"issue":"1","key":"1243_CR11","doi-asserted-by":"crossref","first-page":"92","DOI":"10.1109\/TITS.2008.2011697","volume":"10","author":"GV Raffo","year":"2009","unstructured":"Raffo GV, Gomes GK et al (2009) A predictive controller for autonomous vehicle path tracking[J]. IEEE Trans Intell Transp Syst 10(1):92\u2013102","journal-title":"IEEE Trans Intell Transp Syst"},{"issue":"7","key":"1243_CR12","doi-asserted-by":"crossref","first-page":"2667","DOI":"10.1109\/TIE.2009.2020077","volume":"56","author":"R Wai","year":"2009","unstructured":"Wai R, Liu C (2009) Design of dynamic petri recurrent fuzzy neural network and its application to path-tracking control of nonholonomic mobile robot[J]. IEEE Trans Ind Electron 56(7):2667\u20132683","journal-title":"IEEE Trans Ind Electron"},{"key":"1243_CR13","doi-asserted-by":"crossref","first-page":"54","DOI":"10.1016\/j.neucom.2011.06.035","volume":"88","author":"O Mohareri","year":"2012","unstructured":"Mohareri O, Dhaouadi R et al (2012) Indirect adaptive tracking control of a nonholonomic mobile robot via neural networks[J]. Neurocomputing 88:54\u201366","journal-title":"Neurocomputing"},{"issue":"8","key":"1243_CR14","doi-asserted-by":"crossref","first-page":"1362","DOI":"10.1109\/TAC.2007.902731","volume":"52","author":"AP Aguiar","year":"2007","unstructured":"Aguiar AP, Hespanha JP (2007) Trajectory-tracking and path-following of underactuated autonomous vehicles with parametric modeling uncertainty[J]. IEEE Trans Autom Cont 52(8):1362\u20131379","journal-title":"IEEE Trans Autom Cont"},{"issue":"3","key":"1243_CR15","doi-asserted-by":"crossref","first-page":"788","DOI":"10.1109\/TSMCB.2008.2009464","volume":"39","author":"D Xu","year":"2009","unstructured":"Xu D, Zhao DB, Yi JQ, Tan XM (2009) Trajectory tracking control of omnidirectional wheeled mobile manipulators: robust neural network based sliding mode approach [J]. IEEE Trans Syst Man Cybern Part B 39(3):788\u2013799","journal-title":"IEEE Trans Syst Man Cybern Part B"},{"issue":"5","key":"1243_CR16","doi-asserted-by":"crossref","first-page":"1199","DOI":"10.1109\/TCST.2009.2034639","volume":"18","author":"BS Park","year":"2010","unstructured":"Park BS, Yoo SJ et al (2010) A simple adaptive control approach for trajectory tracking of electrically driven nonholonomic mobile robots[J]. IEEE Trans Control Syst Technol 18(5):1199\u20131206","journal-title":"IEEE Trans Control Syst Technol"},{"key":"1243_CR17","volume-title":"Reinforcement learning: an introduction[M]","author":"R Sutton","year":"1998","unstructured":"Sutton R, Barto AG (1998) Reinforcement learning: an introduction[M]. The MIT Press, Cambridge"},{"issue":"3","key":"1243_CR18","first-page":"657","volume":"7","author":"Q Zhang","year":"2012","unstructured":"Zhang Q, Li M, Wang XS, Zhang Y (2012) Reinforcement learning in robot path optimization [J]. J Softw 7(3):657\u2013662","journal-title":"J Softw"},{"key":"1243_CR19","doi-asserted-by":"crossref","unstructured":"Zhang PC, Xu X, Liu C, Yuan Q (2009) Reinforcement learning control of a real mobile robot using approximate policy iteration [C]. ISNN 2009, Part III, Lecture Notes in Computer Science, LNCS 5553, pp 278\u2013288","DOI":"10.1007\/978-3-642-01513-7_30"},{"key":"1243_CR20","doi-asserted-by":"crossref","first-page":"217","DOI":"10.1016\/S0019-0578(07)60032-9","volume":"43","author":"GG Yen","year":"2004","unstructured":"Yen GG, Hickey TW (2004) Reinforcement learning algorithms for robotic navigation in dynamic environments. ISA Trans 43:217\u2013230","journal-title":"ISA Trans"},{"issue":"2","key":"1243_CR21","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1109\/MCI.2009.932261","volume":"4","author":"FY Wang","year":"2009","unstructured":"Wang FY, Zhang H, Liu D (2009) Adaptive dynamic programming: an introduction [J]. IEEE Comput Intell Mag 4(2):39\u201347","journal-title":"IEEE Comput Intell Mag"},{"key":"1243_CR22","unstructured":"Sutton R (1996) Generalization in reinforcement learning: successful examples using sparse coarse coding[C]. In: Advances in Neural Information Processing Systems 8 (Proceedings of the 1995 conference). MIT Press, pp 1038\u20131044"},{"key":"1243_CR23","first-page":"259","volume":"16","author":"X Xu","year":"2002","unstructured":"Xu X, He H et al (2002) Efficient reinforcement learning using recursive least-squares methods[J]. J Art Intell Res 16:259\u2013292","journal-title":"J Art Intell Res"},{"key":"1243_CR24","first-page":"1107","volume":"4","author":"MG Lagoudakis","year":"2003","unstructured":"Lagoudakis MG, Parr R (2003) Least-squares policy Iteration[J]. J Mach Learn Res 4:1107\u20131149","journal-title":"J Mach Learn Res"},{"issue":"4","key":"1243_CR25","doi-asserted-by":"crossref","first-page":"988","DOI":"10.1109\/TSMCB.2008.922019","volume":"38","author":"D Liu","year":"2008","unstructured":"Liu D, Javaherian H, Kovalenko O, Huang T (2008) Adaptive critic learning techniques for engine torque and air-fuel ratio control [J]. IEEE Trans Syst Man Cybern Part B Cybern 38(4):988\u2013993","journal-title":"IEEE Trans Syst Man Cybern Part B Cybern"},{"issue":"9","key":"1243_CR26","doi-asserted-by":"crossref","first-page":"1490","DOI":"10.1109\/TNN.2009.2027233","volume":"20","author":"H Zhang","year":"2009","unstructured":"Zhang H, Luo Y, Liu D (2009) Neural-network-based near-optimal control for a class of discrete-time affine nonlinear systems with control constraints. IEEE Trans Neural Netw 20(9):1490\u20131503","journal-title":"IEEE Trans Neural Netw"},{"issue":"1","key":"1243_CR27","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1016\/j.automatica.2010.10.033","volume":"47","author":"HG Zhang","year":"2011","unstructured":"Zhang HG, Wei QL, Liu D (2011) An iterative approximate dynamic programming method to solve for a class of nonlinear zero-sum differential games. Automatica 47(1):207\u2013214","journal-title":"Automatica"},{"key":"1243_CR28","doi-asserted-by":"crossref","unstructured":"Yang Q, Jagannathan S (2007) Online reinforcement learning neural network controller design for nanomanipulation. In: Proceedings of IEEE symposium on approximate dynamic programming and reinforcement learning. Honolulu, HI, pp 225\u2013232","DOI":"10.1109\/ADPRL.2007.368192"},{"issue":"4","key":"1243_CR29","doi-asserted-by":"crossref","first-page":"973","DOI":"10.1109\/TNN.2007.899161","volume":"18","author":"X Xu","year":"2007","unstructured":"Xu X, Hu DW et al (2007) Kernel-based least squares policy iteration for reinforcement learning[J]. IEEE Trans Neural Netw 18(4):973\u2013992","journal-title":"IEEE Trans Neural Netw"},{"key":"1243_CR30","volume-title":"Discovering hierarchy in reinforcement learning[D]","author":"H Bernhard","year":"2003","unstructured":"Bernhard H (2003) Discovering hierarchy in reinforcement learning[D]. University of New South Wales, Australia"},{"key":"1243_CR31","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"SRSP Doina","year":"1999","unstructured":"Doina SRSP et al (1999) Between MDPs and semi-MDPs: a framework for temporal abstraction in reinforcement learning[J]. Artif Intell 112:181\u2013211","journal-title":"Artif Intell"},{"key":"1243_CR32","volume-title":"Hierarchical control and learning for Markov decision processes[D]","author":"R Parr","year":"1998","unstructured":"Parr R (1998) Hierarchical control and learning for Markov decision processes[D]. University of California, Berkeley, California"},{"key":"1243_CR33","first-page":"227","volume":"13","author":"TG Dietterich","year":"2000","unstructured":"Dietterich TG (2000) Hierarchical reinforcement learning with the MAXQ value function decomposition[J]. J Art Intell Res 13:227\u2013303","journal-title":"J Art Intell Res"},{"issue":"12","key":"1243_CR34","doi-asserted-by":"crossref","first-page":"1863","DOI":"10.1109\/TNN.2011.2168422","volume":"22","author":"X Xu","year":"2011","unstructured":"Xu X, Liu C et al (2011) Hierarchical approximate policy iteration with binary-tree state space decomposition[J]. IEEE Trans Neural Netw 22(12):1863\u20131877","journal-title":"IEEE Trans Neural Netw"},{"key":"1243_CR35","volume-title":"Neuro-dynamic programming","author":"DP Bertsekas","year":"1996","unstructured":"Bertsekas DP, Tsitsiklis JN (1996) Neuro-dynamic programming. Athena Scientific, Belmont, Massachusetts"},{"key":"1243_CR36","volume-title":"Statistical learning theory[M]","author":"V Vapnik","year":"1998","unstructured":"Vapnik V (1998) Statistical learning theory[M]. John Wiley and Sons, Inc., New York"},{"key":"1243_CR37","first-page":"2169","volume":"8","author":"S Mahadevan","year":"2007","unstructured":"Mahadevan S, Maggioni M (2007) Proto-value functions: a Laplacian framework for learning representation and control in Markov decision processes[J]. J Mach Learn Res 8:2169\u20132231","journal-title":"J Mach Learn Res"},{"issue":"4","key":"1243_CR38","doi-asserted-by":"crossref","first-page":"403","DOI":"10.1561\/2200000003","volume":"1","author":"S Mahadevan","year":"2008","unstructured":"Mahadevan S (2008) Learning representation and control in Markov decision processes: new Frontiers[J]. Found Trends Mach Learn 1(4):403\u2013565","journal-title":"Found Trends Mach Learn"},{"key":"1243_CR39","doi-asserted-by":"crossref","first-page":"1209","DOI":"10.1016\/S0967-0661(01)00066-1","volume":"9","author":"JE Normey-Rico","year":"2001","unstructured":"Normey-Rico JE, Alcalab I et al (2001) Mobile robot path tracking using a robust PID controller[J]. Control Eng Pract 9:1209\u20131214","journal-title":"Control Eng Pract"},{"key":"1243_CR40","unstructured":"Mahadevan S, Maggioni M (2006) Value function approximation with diffusion wavelets and Laplacian eigenfunctions[C]. In: Proceedings of the neural information processing systems (NIPS). MIT Press"},{"key":"1243_CR41","unstructured":"Munos R (2003) Error bounds for approximate policy iteration. In: Proceedings of the 20th annual international conference machine learning. p 560"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-012-1243-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00521-012-1243-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-012-1243-4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,7,7]],"date-time":"2019-07-07T15:10:50Z","timestamp":1562512250000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00521-012-1243-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,12,25]]},"references-count":41,"journal-issue":{"issue":"7-8","published-print":{"date-parts":[[2013,12]]}},"alternative-id":["1243"],"URL":"https:\/\/doi.org\/10.1007\/s00521-012-1243-4","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,12,25]]}}}