{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,4]],"date-time":"2025-10-04T14:38:24Z","timestamp":1759588704412},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2009,7,24]],"date-time":"2009-07-24T00:00:00Z","timestamp":1248393600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Cogn Comput"],"published-print":{"date-parts":[[2009,9]]},"DOI":"10.1007\/s12559-009-9021-z","type":"journal-article","created":{"date-parts":[[2009,7,23]],"date-time":"2009-07-23T20:17:11Z","timestamp":1248380231000},"page":"203-220","source":"Crossref","is-referenced-by-count":6,"title":["Actor-Critic Learning for Platform-Independent Robot Navigation"],"prefix":"10.1007","volume":"1","author":[{"given":"David","family":"Muse","sequence":"first","affiliation":[]},{"given":"Stefan","family":"Wermter","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2009,7,24]]},"reference":[{"key":"9021_CR1","doi-asserted-by":"crossref","unstructured":"Busquets D, Mantaras RL, Sierra C, Ditterich TG. Reinforcement learning for landmark-based robot navigation. Proceedings of the International Conference on Autonomous Agents and Multiagent Systems; 2002.","DOI":"10.1145\/544862.544938"},{"key":"9021_CR2","unstructured":"Hafner R, Riedmiller M. Reinforcement learning on an omni-directional mobile robot. IEEE\/RSJ International Conference on Intelligent Robots and Systems for Human Security, Health, and Prosperity; 2003."},{"key":"9021_CR3","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1016\/j.robot.2003.11.006","volume":"46","author":"T Kondo","year":"2004","unstructured":"Kondo T, Ito K. A reinforcement learning with evolutionary state recruitment strategy for autonomous mobile robot control. Robot Auton Syst. 2004;46:11\u2013124.","journal-title":"Robot Auton Syst"},{"key":"9021_CR4","doi-asserted-by":"crossref","first-page":"577","DOI":"10.1016\/j.engappai.2004.08.005","volume":"17","author":"ISK Lee","year":"2004","unstructured":"Lee ISK, Lau HYK. Adaptive state space partitioning for reinforcement learning. Eng Appl Artif Intell. 2004;17:577\u201388.","journal-title":"Eng Appl Artif Intell"},{"key":"9021_CR5","doi-asserted-by":"crossref","unstructured":"Weber C, Muse D, Elshaw M, Wermter S. A camera-direction dependent visual-motor coordinate transformation for a visually guided neural robot. Applications and Innovations in Intelligent Systems XIII\u2014International Conference on Innovative Techniques and Applications of Artificial Intelligence; 2005. p. 151\u201364.","DOI":"10.1007\/1-84628-224-1_12"},{"key":"9021_CR6","doi-asserted-by":"crossref","unstructured":"Weber C, Muse D, Wermter S. Robot docking based on omni-directional vision and reinforcement learning. Research and Development in Intelligent Systems XXII\u2014International Conference on Innovative Techniques and Applications of Artificial Intelligence; 2005. p. 23\u201336.","DOI":"10.1007\/978-1-84628-226-3_3"},{"key":"9021_CR7","doi-asserted-by":"crossref","DOI":"10.1007\/b139051","volume-title":"Biomimetic neural learning for intelligent robots","author":"S Wermter","year":"2005","unstructured":"Wermter S, Palm G, Elshaw M. Biomimetic neural learning for intelligent robots. New York: Springer; 2005."},{"key":"9021_CR8","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1016\/j.neunet.2009.01.004","volume":"22","author":"S Wermter","year":"2009","unstructured":"Wermter S, Page M, Knowles M, Gallese V, Pulverm\u00fcller F, Taylor J. Multimodal communication in animals, humans and robots: an introduction to perspectives in brain-inspired informatics. Neural Netw. 2009;22:111\u20135.","journal-title":"Neural Netw"},{"issue":"4","key":"9021_CR9","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1016\/S1389-0417(03)00008-1","volume":"4","author":"D Filliat","year":"2003","unstructured":"Filliat D, Meyer JA. Map-based navigation in mobile robots. I. A review of localization strategies. J Cogn Syst Res. 2003;4(4):243\u201382.","journal-title":"J Cogn Syst Res"},{"issue":"4","key":"9021_CR10","doi-asserted-by":"crossref","first-page":"283","DOI":"10.1016\/S1389-0417(03)00007-X","volume":"4","author":"D Filliat","year":"2003","unstructured":"Filliat D, Meyer JA. Map-based navigation in mobile robots. II. A review of map-learning and path-planning strategies. J Cogn Syst Res. 2003;4(4):283\u2013317.","journal-title":"J Cogn Syst Res"},{"key":"9021_CR11","volume-title":"Reinforcement learning an introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG. Reinforcement learning an introduction. Cambridge, MA: MIT Press; 1998."},{"key":"9021_CR12","unstructured":"W\u00f6rg\u00f6tter F. Actor-Critic models of animal control\u2014a critique of reinforcement learning. Proceeding of Fourth International ICSC Symposium on Engineering of Intelligent Systems; 2004."},{"key":"9021_CR13","doi-asserted-by":"crossref","first-page":"198","DOI":"10.1007\/3-540-44631-1_14","volume":"1986","author":"C Sierra","year":"2002","unstructured":"Sierra C, Mantaras RL, Busquets D. Multiagent bidding bechanisms for robot qualitative navigation. Lect Notes Comput Sci. 2002;1986:198\u2013205.","journal-title":"Lect Notes Comput Sci"},{"key":"9021_CR14","unstructured":"Gaskett C, Fletcher L, Zelinsky A. Reinforcement learning for visual servoing of a mobile robot. Proceedings of the Australian Conference on Robotics and Automation; 2000."},{"key":"9021_CR15","doi-asserted-by":"crossref","DOI":"10.1515\/9781400874668","volume-title":"Adaptive control process: a guided tour","author":"R Bellman","year":"1961","unstructured":"Bellman R. Adaptive control process: a guided tour. Princeton: Princeton University Press; 1961."},{"key":"9021_CR16","unstructured":"Lighthill J. Artificial intelligence: a general survey. Artificial Intelligence: A Paper Symposium. Science Research Council; 1973."},{"issue":"2\u20134","key":"9021_CR17","doi-asserted-by":"crossref","first-page":"165","DOI":"10.1016\/j.knosys.2004.03.012","volume":"12","author":"C Weber","year":"2004","unstructured":"Weber C, Wermter S, Zochios A. Robot docking with neural vision and reinforcement. Knowl Based Syst. 2004;12(2\u20134):165\u201372.","journal-title":"Knowl Based Syst"},{"key":"9021_CR18","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling LP, Littman ML, Moore AW. Reinforcement learning: a survey. J Artif Intell Res. 1996;4:237\u201385.","journal-title":"J Artif Intell Res"},{"key":"9021_CR19","unstructured":"Pavlov IP. Conditioned reflexes: an investigation of the physiological activity of the cerebral cortex; 1927. http:\/\/psychclassics.yorku.ca\/Pavlov\/ ."},{"key":"9021_CR20","doi-asserted-by":"crossref","first-page":"341","DOI":"10.1023\/A:1025696116075","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto AG, Mahadevan S. Recent advances in hierarchical reinforcement learning. Discrete Event Dynamcal Systems: Theory Appl. 2003;13:341\u201379.","journal-title":"Discrete Event Dyn Syst: Theory Appl"},{"key":"9021_CR21","doi-asserted-by":"crossref","first-page":"172","DOI":"10.1016\/j.neunet.2006.01.016","volume":"20","author":"SM Stringer","year":"2007","unstructured":"Stringer SM, Rolls ET, Taylor P. Learning movement sequences with a delayed reward signal in a hierarchical model of motor function. Neural Netw. 2007;20:172\u201381.","journal-title":"Neural Netw"},{"key":"9021_CR22","doi-asserted-by":"crossref","first-page":"247","DOI":"10.1016\/0921-8890(95)00005-Z","volume":"15","author":"CK Tham","year":"1995","unstructured":"Tham CK. Reinforcement learning of multiple tasks using a hierarchical CMAC architecture. Robot Auton Syst. 1995;15:247\u201374.","journal-title":"Robot Auton Syst"},{"issue":"1","key":"9021_CR23","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1016\/S0921-8890(01)00113-0","volume":"36","author":"J Morimoto","year":"2001","unstructured":"Morimoto J, Doya K. Acquisition of stand-up behaviour by a real robot using hierarchical reinforcement learning. Robot Auton Syst. 2001;36(1):37\u201351.","journal-title":"Robot Auton Syst"},{"key":"9021_CR24","doi-asserted-by":"crossref","unstructured":"Singh S, Barto A, Chentanez N. Intrinsically motivated reinforcement learning. Proceedings of Neural Image Processing Systems Foundation; 2005.","DOI":"10.21236\/ADA440280"},{"key":"9021_CR25","doi-asserted-by":"crossref","unstructured":"Konidaris GD, Barto AG. Autonomous shaping: knowledge transfer in reinforcement learning. Proceedings of the Twenty-Third International Conference on Machine Learning; 2006. p. 489\u201396.","DOI":"10.1145\/1143844.1143906"},{"key":"9021_CR26","unstructured":"Smart WD, Kaelbling LP. Reinforcement learning for robot control. Proc SPIE: Mobile Robots XVI. 2001;4573:92\u2013103."},{"issue":"11","key":"9021_CR27","doi-asserted-by":"crossref","first-page":"487","DOI":"10.1016\/S1364-6613(00)01773-3","volume":"5","author":"DM Wolpert","year":"2001","unstructured":"Wolpert DM, Ghahramani Z, Flanagan JR. Perspectives and problems in motor learning. Trends Cogn Sci. 2001;5(11):487\u201394.","journal-title":"Trends Cogn Sci"},{"key":"9021_CR28","unstructured":"Mitchell RJ, Keating DA, Goodhew ICB, Bishop JM. Multiple neural network control of simple mobile robot. Proceedings of the 4th IEEE Mediterranean Symposium on New Directions in Control and Automation; 1996. p. 271\u20135."},{"key":"9021_CR29","doi-asserted-by":"crossref","unstructured":"Walter WG. A machine that learns. Sci Am. 1951;184(8):60\u20133.","DOI":"10.1038\/scientificamerican0851-60"},{"key":"9021_CR30","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1002\/(SICI)1098-1063(2000)10:1<1::AID-HIPO1>3.0.CO;2-1","volume":"10","author":"DJ Foster","year":"2000","unstructured":"Foster DJ, Morris RGN, Dayan P. A model of hippocampally dependent navigation, using the temporal learning rule. Hippocampus. 2000;10:1\u201316.","journal-title":"Hippocampus"},{"key":"9021_CR31","doi-asserted-by":"crossref","first-page":"808","DOI":"10.1016\/j.ejor.2006.02.023","volume":"178","author":"SS Singh","year":"2007","unstructured":"Singh SS, Tadic VB, Doucet A. A policy gradient method for semi-Markov decision processes with application to call admission control. Eur J Oper Res. 2007;178:808\u201318.","journal-title":"Eur J Oper Res"}],"container-title":["Cognitive Computation"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12559-009-9021-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s12559-009-9021-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12559-009-9021-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,2]],"date-time":"2019-06-02T12:36:47Z","timestamp":1559479007000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s12559-009-9021-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,7,24]]},"references-count":31,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2009,9]]}},"alternative-id":["9021"],"URL":"https:\/\/doi.org\/10.1007\/s12559-009-9021-z","relation":{},"ISSN":["1866-9956","1866-9964"],"issn-type":[{"value":"1866-9956","type":"print"},{"value":"1866-9964","type":"electronic"}],"subject":[],"published":{"date-parts":[[2009,7,24]]}}}