{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T05:57:15Z","timestamp":1775109435057,"version":"3.50.1"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2013,2,2]],"date-time":"2013-02-02T00:00:00Z","timestamp":1359763200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2013,9]]},"DOI":"10.1007\/s10489-012-0412-6","type":"journal-article","created":{"date-parts":[[2013,2,1]],"date-time":"2013-02-01T15:32:40Z","timestamp":1359732760000},"page":"267-278","source":"Crossref","is-referenced-by-count":19,"title":["Learning via human feedback in continuous state and action spaces"],"prefix":"10.1007","volume":"39","author":[{"given":"Ngo Anh","family":"Vien","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wolfgang","family":"Ertel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tae Choong","family":"Chung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2013,2,2]]},"reference":[{"key":"412_CR1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1015330.1015430","volume-title":"Proceedings of the twenty-first international conference on machine learning (ICML)","author":"P Abbeel","year":"2004","unstructured":"Abbeel P, Ng AY (2004) Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the twenty-first international conference on machine learning (ICML), pp\u00a01\u20138"},{"issue":"5","key":"412_CR2","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TSMC.1983.6313077","volume":"13","author":"AG Barto","year":"1983","unstructured":"Barto AG, Sutton RS, Anderson CW (1983) Neuronlike adaptive elements that can solve difficult learning control problems. IEEE Trans Syst Man Cybern 13(5):834\u2013846","journal-title":"IEEE Trans Syst Man Cybern"},{"issue":"3","key":"412_CR3","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1023\/A:1007634325138","volume":"40","author":"J Baxter","year":"2000","unstructured":"Baxter J, Tridgell A, Weaver L (2000) Learning to play chess using temporal differences. Mach Learn 40(3):243\u2013263","journal-title":"Mach Learn"},{"issue":"11","key":"412_CR4","doi-asserted-by":"crossref","first-page":"2471","DOI":"10.1016\/j.automatica.2009.07.008","volume":"45","author":"S Bhatnagar","year":"2009","unstructured":"Bhatnagar S, Sutton RS, Ghavamzadeh M, Lee M (2009) Natural actor-critic algorithms. Automatica 45(11):2471\u20132482","journal-title":"Automatica"},{"key":"412_CR5","doi-asserted-by":"crossref","first-page":"451","DOI":"10.1007\/978-3-642-05181-4_19","volume-title":"From motor learning to interaction learning in robots","author":"R Detry","year":"2010","unstructured":"Detry R, Baseski E, Popovic M, Touati Y, Kr\u00fcger N, Kroemer O, Peters J, Piater JH (2010) Learning continuous grasp affordances by sensorimotor exploration. In: From motor learning to interaction learning in robots, pp\u00a0451\u2013465"},{"key":"412_CR6","doi-asserted-by":"crossref","unstructured":"Granmo OC, Glimsdal S (2012) Accelerated Bayesian learning for decentralized two-armed bandit based decision making with applications to the Goore game. Appl Intell","DOI":"10.1007\/s10489-012-0346-z"},{"issue":"1","key":"412_CR7","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1023\/B:APIN.0000011143.95085.74","volume":"20","author":"J Hong","year":"2004","unstructured":"Hong J, Prabhu VV (2004) Distributed reinforcement learning control for batch sequencing and sizing in just-in-time manufacturing systems. Appl Intell 20(1):71\u201387","journal-title":"Appl Intell"},{"issue":"1","key":"412_CR8","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1007\/s10489-008-0115-1","volume":"31","author":"A Iglesias","year":"2009","unstructured":"Iglesias A, Mart\u00ednez P, Aler R, Fern\u00e1ndez F (2009) Learning teaching strategies in an adaptive and intelligent educational system through reinforcement learning. Appl Intell 31(1):89\u2013106","journal-title":"Appl Intell"},{"key":"412_CR9","first-page":"481","volume-title":"Proceedings of the twenty-fourth AAAI conference on artificial intelligence","author":"K Judah","year":"2010","unstructured":"Judah K, Roy S, Fern A, Dietterich TG (2010) Reinforcement learning via practice and critique advice. In: Proceedings of the twenty-fourth AAAI conference on artificial intelligence, pp\u00a0481\u2013486"},{"issue":"4","key":"412_CR10","doi-asserted-by":"crossref","first-page":"409","DOI":"10.1007\/s12369-012-0163-x","volume":"4","author":"WB Knox","year":"2012","unstructured":"Knox WB, Glass BD, Love BC, Maddox WT, Stone P (2012) How humans teach agents: a new experimental perspective. Int J Soc Robot 4(4):409\u2013421","journal-title":"Int J Soc Robot"},{"key":"412_CR11","first-page":"36","volume-title":"AAAI 2011 spring symposium","author":"WB Knox","year":"2011","unstructured":"Knox WB, Setapen A, Stone P (2011) Reinforcement learning with human feedback in Mountain Car. In: AAAI 2011 spring symposium, pp\u00a036\u201341"},{"key":"412_CR12","first-page":"292","volume-title":"IEEE 7th international conference on development and learning (ICDL-08)","author":"WB Knox","year":"2008","unstructured":"Knox WB, Stone P (2008) TAMER: training of an agent manually via evaluative reinforcement. In: IEEE 7th international conference on development and learning (ICDL-08), pp\u00a0292\u2013297"},{"key":"412_CR13","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1145\/1597735.1597738","volume-title":"Proceedings of the 5th international conference on knowledge capture (K-CAP)","author":"WB Knox","year":"2009","unstructured":"Knox WB, Stone P (2009) Interactively shaping agents via human reinforcement: the TAMER framework. In: Proceedings of the 5th international conference on knowledge capture (K-CAP), pp\u00a09\u201316"},{"key":"412_CR14","first-page":"5","volume-title":"9th international conference on autonomous agents and multiagent systems (AAMAS)","author":"WB Knox","year":"2010","unstructured":"Knox WB, Stone P (2010) Combining manual feedback with subsequent MDP reward signals for reinforcement learning. In: 9th international conference on autonomous agents and multiagent systems (AAMAS), pp\u00a05\u201312"},{"key":"412_CR15","first-page":"1767","volume-title":"9th international conference on autonomous agents and multiagent systems (AAMAS)","author":"WB Knox","year":"2010","unstructured":"Knox WB, Stone P (2010) Training a Tetris agent via interactive shaping: a demonstration of the TAMER framework. In: 9th international conference on autonomous agents and multiagent systems (AAMAS), pp\u00a01767\u20131768"},{"key":"412_CR16","volume-title":"2011 ICML workshop on new developments in imitation learning","author":"WB Knox","year":"2011","unstructured":"Knox WB, Stone P (2011) Augmenting reinforcement learning with human feedback. In: 2011 ICML workshop on new developments in imitation learning"},{"key":"412_CR17","first-page":"475","volume-title":"11st international conference on autonomous agents and multiagent systems (AAMAS)","author":"WB Knox","year":"2012","unstructured":"Knox WB, Stone P (2012) Reinforcement learning from simultaneous human and MDP reward. In: 11st international conference on autonomous agents and multiagent systems (AAMAS), pp\u00a0475\u2013482"},{"key":"412_CR18","doi-asserted-by":"crossref","first-page":"209","DOI":"10.1007\/978-3-642-05181-4_10","volume-title":"From motor learning to interaction learning in robots","author":"J Kober","year":"2010","unstructured":"Kober J, Mohler BJ, Peters J (2010) Imitation and reinforcement learning for motor primitives with perceptual coupling. In: From motor learning to interaction learning in robots, pp\u00a0209\u2013225"},{"issue":"1\u20132","key":"412_CR19","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1007\/s10994-010-5223-6","volume":"84","author":"J Kober","year":"2011","unstructured":"Kober J, Peters J (2011) Policy search for motor primitives in robotics. Mach Learn 84(1\u20132):171\u2013203","journal-title":"Mach Learn"},{"issue":"4","key":"412_CR20","doi-asserted-by":"crossref","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"VR Konda","year":"2003","unstructured":"Konda VR, Tsitsiklis JN (2003) On actor-critic algorithms. SIAM J Control Optim 42(4):1143\u20131166","journal-title":"SIAM J Control Optim"},{"issue":"9","key":"412_CR21","doi-asserted-by":"crossref","first-page":"1105","DOI":"10.1016\/j.robot.2010.06.001","volume":"58","author":"O Kroemer","year":"2010","unstructured":"Kroemer O, Detry R, Piater JH, Peters J (2010) Combining active learning and reactive control for robot grasping. Robot Auton Syst 58(9):1105\u20131116","journal-title":"Robot Auton Syst"},{"issue":"2","key":"412_CR22","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s10489-009-0191-x","volume":"34","author":"J Li","year":"2011","unstructured":"Li J, Li Z, Chen J (2011) Microassembly path planning using reinforcement learning for improving positioning accuracy of a 1\u00a0cm3 omni-directional mobile microrobot. Appl Intell 34(2):211\u2013225","journal-title":"Appl Intell"},{"key":"412_CR23","doi-asserted-by":"crossref","unstructured":"Pakizeh E, Palhang M, Pedram MM (2012) Multi-criteria expertness based cooperative Q-learning. Appl Intell","DOI":"10.1007\/s10489-012-0392-6"},{"issue":"3","key":"412_CR24","doi-asserted-by":"crossref","first-page":"1085","DOI":"10.1016\/j.jnca.2006.04.002","volume":"30","author":"GE Phillips-Wren","year":"2007","unstructured":"Phillips-Wren GE, M\u00f8rch AI, Tweedale J, Ichalkaranje N (2007) Innovations in agent collaboration, cooperation and teaming, part\u00a02. J Netw Comput Appl 30(3):1085\u20131088","journal-title":"J Netw Comput Appl"},{"key":"412_CR25","first-page":"1","volume-title":"IEEE international conference on rehabilitation robotics","author":"PM Pilarski","year":"2011","unstructured":"Pilarski PM, Dawson MR, Degris T, Fahimi F, Carey JP, Sutton RS (2011) Online human training of a myoelectric prosthesis controller via actor-critic reinforcement learning. In: IEEE international conference on rehabilitation robotics, pp\u00a01\u20137"},{"issue":"3","key":"412_CR26","doi-asserted-by":"crossref","first-page":"210","DOI":"10.1147\/rd.33.0210","volume":"3","author":"AL Samuel","year":"1959","unstructured":"Samuel AL (1959) Some studies in machine learning using the game of checkers. IBM J Res Dev 3(3):210\u2013229","journal-title":"IBM J Res Dev"},{"issue":"2","key":"412_CR27","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1177\/105971239700600201","volume":"6","author":"JC Santamaria","year":"1998","unstructured":"Santamaria JC, Sutton RS, Ram A (1998) Experiments with reinforcement learning in problems with continuous state and action spaces. Adapt Behav 6(2):163\u2013218","journal-title":"Adapt Behav"},{"key":"412_CR28","doi-asserted-by":"crossref","first-page":"194","DOI":"10.1007\/11527862_14","volume-title":"Abstraction, reformulation and approximation, 6th international symposium (SARA)","author":"AA Sherstov","year":"2005","unstructured":"Sherstov AA, Stone P (2005) Function approximation via tile coding: automating parameter choice. In: Abstraction, reformulation and approximation, 6th international symposium (SARA), pp\u00a0194\u2013205"},{"key":"412_CR29","first-page":"974","volume-title":"Advances in neural information processing systems (NIPS)","author":"SP Singh","year":"1996","unstructured":"Singh SP, Bertsekas D (1996) Reinforcement learning for dynamic channel allocation in cellular telephone systems. In: Advances in neural information processing systems (NIPS), pp\u00a0974\u2013980"},{"key":"412_CR30","series-title":"Proceedings of the eleventh international conference (ICML)","first-page":"284","volume-title":"Machine learning","author":"SP Singh","year":"1994","unstructured":"Singh SP, Jaakkola T, Jordan MI (1994) Learning without state-estimation in partially observable Markovian decision processes. In: Machine learning, Proceedings of the eleventh international conference (ICML), pp\u00a0284\u2013292"},{"key":"412_CR31","volume-title":"Workshop on agents learning interactively from human teachers at IJCAI","author":"K Subramanian","year":"2011","unstructured":"Subramanian K, Isbell C, Thomaz A (2011) Learning options through human interaction. In: Workshop on agents learning interactively from human teachers at IJCAI"},{"key":"412_CR32","first-page":"1038","volume-title":"Advances in neural information processing systems (NIPS)","author":"RS Sutton","year":"1995","unstructured":"Sutton RS (1995) Generalization in reinforcement learning: successful examples using sparse coarse coding. In: Advances in neural information processing systems (NIPS), vol\u00a08, pp\u00a01038\u20131044"},{"key":"412_CR33","first-page":"497","volume-title":"Learning and computational neuroscience: foundations of adaptive networks","author":"RS Sutton","year":"1990","unstructured":"Sutton RS, Barto AG (1990) Technical note q-learning. In: Learning and computational neuroscience: foundations of adaptive networks, pp\u00a0497\u2013537"},{"key":"412_CR34","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: an introduction. MIT Press, Cambridge"},{"key":"412_CR35","first-page":"1057","volume-title":"Advances in neural information processing systems","author":"RS Sutton","year":"1999","unstructured":"Sutton RS, McAllester DA, Singh SP, Mansour Y (1999) Policy gradient methods for reinforcement learning with function approximation. In: Advances in neural information processing systems, vol\u00a012. NIPS conference, Denver, Colorado, USA, pp\u00a01057\u20131063"},{"key":"412_CR36","volume-title":"Proceedings of the agents learning interactively from human teachers workshop (at AAMAS-10)","author":"ME Taylor","year":"2010","unstructured":"Taylor ME, Chernova S (2010) Integrating human demonstration and reinforcement learning: initial results in human-agent transfer. In: Proceedings of the agents learning interactively from human teachers workshop (at AAMAS-10)"},{"key":"412_CR37","first-page":"257","volume":"8","author":"G Tesauro","year":"1992","unstructured":"Tesauro G (1992) Practical issues in temporal difference learning. Mach Learn 8:257\u2013277","journal-title":"Mach Learn"},{"issue":"2","key":"412_CR38","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1162\/neco.1994.6.2.215","volume":"6","author":"G Tesauro","year":"1994","unstructured":"Tesauro G (1994) Td-gammon, a self-teaching backgammon program, achieves master-level play. Neural Comput 6(2):215\u2013219","journal-title":"Neural Comput"},{"issue":"3","key":"412_CR39","doi-asserted-by":"crossref","first-page":"58","DOI":"10.1145\/203330.203343","volume":"38","author":"G Tesauro","year":"1995","unstructured":"Tesauro G (1995) Temporal difference learning and td-gammon. Commun ACM 38(3):58\u201368","journal-title":"Commun ACM"},{"key":"412_CR40","volume-title":"Proceedings, the twenty-first national conference on artificial intelligence and the eighteenth innovative applications of artificial intelligence conference","author":"AL Thomaz","year":"2006","unstructured":"Thomaz AL, Breazeal C (2006) Reinforcement learning with human teachers: evidence of feedback and guidance with implications for learning performance. In: Proceedings, the twenty-first national conference on artificial intelligence and the eighteenth innovative applications of artificial intelligence conference"},{"key":"412_CR41","doi-asserted-by":"crossref","first-page":"2008","DOI":"10.1587\/transcom.E92.B.2008","volume":"92-B(6)","author":"NA Vien","year":"2009","unstructured":"Vien NA, Viet NH, Lee S, Chung T (2009) Policy gradient SMDP for resource allocation and routing in integrated services networks. IEICE Trans 92-B(6):2008\u20132022","journal-title":"IEICE Trans"},{"issue":"9","key":"412_CR42","doi-asserted-by":"crossref","first-page":"1671","DOI":"10.1016\/j.ins.2011.01.001","volume":"181","author":"NA Vien","year":"2011","unstructured":"Vien NA, Yu H, Chung T (2011) Hessian matrix distribution for Bayesian policy gradient reinforcement learning. Inf Sci 181(9):1671\u20131685","journal-title":"Inf Sci"},{"issue":"4","key":"412_CR43","doi-asserted-by":"crossref","first-page":"286","DOI":"10.1016\/S0019-9958(77)90354-0","volume":"34","author":"IH Witten","year":"1977","unstructured":"Witten IH (1977) An adaptive optimal controller for discrete-time Markov environments. Inf Control 34(4):286\u2013295","journal-title":"Inf Control"},{"key":"412_CR44","first-page":"26","volume-title":"IEE proceedings on software engineering","author":"M Wooldridge","year":"1997","unstructured":"Wooldridge M (1997) Agent-based software engineering. In: IEE proceedings on software engineering, pp\u00a026\u201337"},{"key":"412_CR45","first-page":"1114","volume-title":"International joint conferences on artificial intelligence","author":"W Zhang","year":"1995","unstructured":"Zhang W, Dietterich TG (1995) A\u00a0reinforcement learning approach to job-shop scheduling. In: International joint conferences on artificial intelligence, pp\u00a01114\u20131120"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-012-0412-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10489-012-0412-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-012-0412-6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,7,9]],"date-time":"2019-07-09T03:38:09Z","timestamp":1562643489000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10489-012-0412-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,2,2]]},"references-count":45,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2013,9]]}},"alternative-id":["412"],"URL":"https:\/\/doi.org\/10.1007\/s10489-012-0412-6","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,2,2]]}}}