{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,6]],"date-time":"2025-11-06T12:00:34Z","timestamp":1762430434689,"version":"3.37.3"},"reference-count":61,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2010,5,5]],"date-time":"2010-05-05T00:00:00Z","timestamp":1273017600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J Intell Robot Syst"],"published-print":{"date-parts":[[2010,11]]},"DOI":"10.1007\/s10846-010-9422-y","type":"journal-article","created":{"date-parts":[[2010,5,4]],"date-time":"2010-05-04T04:27:49Z","timestamp":1272947269000},"page":"217-239","source":"Crossref","is-referenced-by-count":33,"title":["A Human-Robot Collaborative Reinforcement Learning Algorithm"],"prefix":"10.1007","volume":"60","author":[{"given":"Uri","family":"Kartoun","sequence":"first","affiliation":[]},{"given":"Helman","family":"Stern","sequence":"additional","affiliation":[]},{"given":"Yael","family":"Edan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2010,5,5]]},"reference":[{"key":"9422_CR1","doi-asserted-by":"crossref","unstructured":"Zhu, W., Levinson, S.: Vision-based reinforcement learning for robot navigation. In: Proceedings of the International Joint Conference on Neural Networks, Washington DC, vol. 2, pp. 1025\u20131030 (2001)","DOI":"10.1109\/IJCNN.2001.939501"},{"key":"9422_CR2","first-page":"361","volume-title":"Proceedings of the 16th International FLAIRS Conference","author":"VN Papudesi","year":"2003","unstructured":"Papudesi, V.N., Huber, M.: Learning from reinforcement and advice using composite reward functions. In: Proceedings of the 16th International FLAIRS Conference, pp. 361\u2013365, St. Augustine, FL (2003)"},{"key":"9422_CR3","first-page":"160","volume-title":"AAAI Spring Symposium on Human Interaction with Autonomous Systems in Complex Environments","author":"VN Papudesi","year":"2003","unstructured":"Papudesi, V.N., Wang, Y., Huber, M., Cook, D.J.: Integrating user commands and autonomous task performance in a reinforcement learning framework. In: AAAI Spring Symposium on Human Interaction with Autonomous Systems in Complex Environments, pp. 160\u2013165. Stanford University, CA (2003)"},{"key":"9422_CR4","unstructured":"Kui-Hong, P., Jun, J., Jong-Hwan, K.: Stabilization of biped robot based on two mode Q-learning. In: Proceedings of the 2nd International Conference on Autonomous Robots and Agents, pp. 446\u2013451. New Zealand (2004)"},{"key":"9422_CR5","doi-asserted-by":"crossref","first-page":"4386","DOI":"10.1109\/ROBOT.2005.1570795","volume-title":"Proceedings of the 2005 IEEE International Conference on Robotics and Automation","author":"R Broadbent","year":"2005","unstructured":"Broadbent, R., Peterson, T.: Robot learning in partially observable, noisy, continuous worlds. In: Proceedings of the 2005 IEEE International Conference on Robotics and Automation, pp. 4386\u20134393. Barcelona, Spain (2005)"},{"key":"9422_CR6","doi-asserted-by":"crossref","unstructured":"Bakker, B., Zhumatiy, V., Gruener, G., Schmidhuber, J.: Quasi-online reinforcement learning for robots. In: Proceedings of the 2006 IEEE International Conference on Robotics and Automation, pp. 2997\u20133002 (2006)","DOI":"10.1109\/ROBOT.2006.1642157"},{"key":"9422_CR7","unstructured":"Kartoun, U., Stern, H., Edan, Y.: Human\u2013robot collaborative learning of a bag shaking trajectory. In: The Israel Conference on Robotics (ICR 2006), Faculty of Engineering, Tel Aviv University, June (2006)"},{"key":"9422_CR8","doi-asserted-by":"crossref","unstructured":"Kartoun, U., Stern, H., Edan, Y.: Human\u2013robot collaborative learning system for inspection. In: IEEE International Conference on Systems, Man, and Cybernetics, Taipei, Taiwan, October, pp. 4249\u20134255 (2006)","DOI":"10.1109\/ICSMC.2006.384802"},{"key":"9422_CR9","unstructured":"Mihalkova, L., Mooney, R.: Using active relocation to aid reinforcement. In: Proceedings of the 19th International FLAIRS Conference (FLAIRS-2006), Melbourne Beach, Florida, pp. 580\u2013585 (2006)"},{"issue":"2\u20134","key":"9422_CR10","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1007\/s10846-005-5137-x","volume":"4","author":"F Fern\u00e1ndez","year":"2005","unstructured":"Fern\u00e1ndez, F., Borrajo, D., Parker, L.E.: A Reinforcement learning algorithm in cooperative multi-robot domains. J. Intell. Robot. Syst. 4(2\u20134), 161\u2013174 (2005)","journal-title":"J. Intell. Robot. Syst."},{"issue":"1","key":"9422_CR11","doi-asserted-by":"crossref","first-page":"172","DOI":"10.1109\/TASE.2009.2013133","volume":"7","author":"U Kartoun","year":"2010","unstructured":"Kartoun, U., Shapiro, A., Stern, H., Edan, Y.: Physical modeling of a bag knot in a robot learning system. IEEE Trans. Automat. Sci. Eng. 7(1), 172\u2013177 (2010)","journal-title":"IEEE Trans. Automat. Sci. Eng."},{"issue":"1","key":"9422_CR12","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/s10846-007-9174-5","volume":"51","author":"DM Kati\u0107","year":"2008","unstructured":"Kati\u0107, D.M., Rodi\u0107, A.D., Vukobratovi\u0107, M.K.: Hybrid dynamic control algorithm for humanoid robots based on reinforcement learning. J. Intell. Robot. Syst. 51(1), 3\u201330 (2008)","journal-title":"J. Intell. Robot. Syst."},{"issue":"5","key":"9422_CR13","doi-asserted-by":"crossref","first-page":"755","DOI":"10.1007\/s10846-008-9287-5","volume":"54","author":"GT Anderson","year":"2009","unstructured":"Anderson, G.T., Yang, Y., Cheng, G.: An adaptable oscillator-based controller for autonomous robots. J. Intell. Robot. Syst. 54(5), 755\u2013767 (2009)","journal-title":"J. Intell. Robot. Syst."},{"key":"9422_CR14","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1177\/0278364907087548","volume":"27","author":"J Peters","year":"2008","unstructured":"Peters, J., Schaal, S.: Learning to control in operational space. Int. J. Rob. Res. 27, 197\u2013212 (2008)","journal-title":"Int. J. Rob. Res."},{"issue":"1","key":"9422_CR15","doi-asserted-by":"crossref","first-page":"51","DOI":"10.1023\/A:1007968115863","volume":"21","author":"C Ribeiro","year":"1998","unstructured":"Ribeiro, C.: Embedding a priori knowledge in reinforcement learning. J. Intell. Robot. Syst. 21(1), 51\u201371 (1998)","journal-title":"J. Intell. Robot. Syst."},{"key":"9422_CR16","unstructured":"Hoffmann, H., Theodorou, E., Schaal, S.: Human optimization strategies under reward feedback. Abstracts of Neural Control of Movement Conference (NCM 2009) (2009)"},{"issue":"2","key":"9422_CR17","doi-asserted-by":"crossref","first-page":"173","DOI":"10.1080\/09540090600768658","volume":"18","author":"J Schmidhuber","year":"2006","unstructured":"Schmidhuber, J.: Developmental robotics, optimal artificial curiosity, creativity, music, and the fine arts. Connect. Sci. 18(2), 173\u2013187 (2006)","journal-title":"Connect. Sci."},{"issue":"1","key":"9422_CR18","doi-asserted-by":"crossref","first-page":"73","DOI":"10.1023\/A:1008819414322","volume":"4","author":"MJ Matari\u0107","year":"1997","unstructured":"Matari\u0107, M.J.: Reinforcement learning in the multi-robot domain. Auton. Robots 4(1), 73\u201383 (1997)","journal-title":"Auton. Robots"},{"issue":"6","key":"9422_CR19","doi-asserted-by":"crossref","first-page":"674","DOI":"10.1016\/j.robot.2008.12.001","volume":"57","author":"TS Dahl","year":"2009","unstructured":"Dahl, T.S., Matari\u0107, M.J., Sukhatme, G.S.: Multi-robot task allocation through vacancy chain scheduling. J. Robot. Auton. Syst. 57(6), 674\u2013687 (2009)","journal-title":"J. Robot. Auton. Syst."},{"key":"9422_CR20","doi-asserted-by":"crossref","unstructured":"Fukuda, T., Funato, D., Arai, F.: Recognizing environmental change through multiplex reinforcement learning in group robot system. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems, vol. 2, pp. 972\u2013977 (1999)","DOI":"10.1109\/IROS.1999.812806"},{"key":"9422_CR21","unstructured":"Chernova, S., Veloso, M.: Confidence-based policy learning from demonstration using Gaussian mixture models. In: International Conference on Autonomous Agents and Multi-agent Systems (AAMAS\u201907), 2007"},{"key":"9422_CR22","first-page":"934","volume-title":"Q-Learning for Robots. The Handbook of Brain Theory and Neural Networks","author":"CF Touzet","year":"2003","unstructured":"Touzet, C.F.: Q-Learning for Robots. The Handbook of Brain Theory and Neural Networks, pp. 934\u2013937. MIT Press, Cambridge (2003)"},{"key":"9422_CR23","doi-asserted-by":"crossref","unstructured":"Inamura, T., Inaba, M., Inoue, H.: Integration model of learning mechanism and dialogue strategy based on stochastic experience representation using Bayesian network. In: Proceedings of the 9th IEEE International Workshop on Robot and Human Interactive Communication, RO-MAN 2000, pp. 247\u2013252 (2000)","DOI":"10.1109\/ROMAN.2000.892503"},{"key":"9422_CR24","doi-asserted-by":"crossref","unstructured":"Inamura, T., Inaba, M., Inoue, H.: User adaptation of human-robot interaction model based on Bayesian network and introspection of interaction experience. In: International Conference on Intelligent Robots and Systems (IROS 2000), vol. 3, pp. 2139\u20132144 (2000)","DOI":"10.1109\/IROS.2000.895287"},{"issue":"5","key":"9422_CR25","doi-asserted-by":"crossref","first-page":"469","DOI":"10.1016\/j.robot.2008.10.024","volume":"57","author":"BD Argall","year":"2009","unstructured":"Argall, B.D., Chernova, S., Veloso, M., Browning, B.: A survey of robot learning from demonstration. Robot. Auton. Syst. 57(5), 469\u2013483 (2009)","journal-title":"Robot. Auton. Syst."},{"key":"9422_CR26","doi-asserted-by":"crossref","unstructured":"Katagami, D., Yamada, S.: Interactive classifier system for real robot learning. In: Proceedings of the 9th IEEE International Workshop on Robot and Human Interactive Communication, RO-MAN 2000, pp. 258\u2013263 (2000)","DOI":"10.1109\/ROMAN.2000.892505"},{"key":"9422_CR27","unstructured":"Atkeson, C., Schaal, S.: Robot learning from demonstration. In: Proceedings of the International Conference Machine Learning, pp. 12\u201320 (1997)"},{"key":"9422_CR28","doi-asserted-by":"crossref","first-page":"569","DOI":"10.1613\/jair.898","volume":"19","author":"B Price","year":"2003","unstructured":"Price, B., Boutilier, C.: Accelerating reinforcement learning through implicit imitation. J. Artif. Intell. Res. 19, 569\u2013629 (2003)","journal-title":"J. Artif. Intell. Res."},{"key":"9422_CR29","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1613\/jair.2584","volume":"34","author":"S Chernova","year":"2009","unstructured":"Chernova, S., Veloso, M.: Interactive policy learning through confidence-based autonomy. J. Artif. Intell. Res. 34, 1\u201325 (2009)","journal-title":"J. Artif. Intell. Res."},{"key":"9422_CR30","doi-asserted-by":"crossref","unstructured":"Chernova, S., Veloso, M.: Multi-thresholded approach to demonstration selection for interactive robot learning. In: The 3rd ACM\/IEEE International Conference on Human\u2013Robot Interaction (HRI\u201908), pp. 225\u2013232 (2008)","DOI":"10.1145\/1349822.1349852"},{"key":"9422_CR31","unstructured":"Thomaz, A.L., Breazeal, C.: Reinforcement learning with human teachers: evidence of feedback and guidance with implications for learning performance. In: Proceedings of the 21st National Conference on Artificial Intelligence (AAAI), 2006"},{"key":"9422_CR32","doi-asserted-by":"crossref","first-page":"716","DOI":"10.1016\/j.artint.2007.09.009","volume":"172","author":"AL Thomaz","year":"2008","unstructured":"Thomaz, A.L., Breazeal, C.: Teachable robots: understanding human teaching behavior to build more effective robot learners. Artif. Intell. 172, 716\u2013737 (2008)","journal-title":"Artif. Intell."},{"key":"9422_CR33","doi-asserted-by":"crossref","unstructured":"Lockerd, A.L., Breazeal, C.: Tutelage and socially guided robot learning. In: Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems, Sendai, Japan (2004)","DOI":"10.1109\/IROS.2004.1389954"},{"key":"9422_CR34","doi-asserted-by":"crossref","unstructured":"Breazeal, C., Thomaz, A.L.: Learning from human teachers with socially guided exploration. In: Proceedings of the 2005 IEEE International Conference on Robotics and Automation, pp. 3539\u20133544 (2008)","DOI":"10.1109\/ROBOT.2008.4543752"},{"key":"9422_CR35","unstructured":"Abbeel, P., Ng., A.: Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the International Conference Machine Learning, vol. 69, 2004"},{"key":"9422_CR36","doi-asserted-by":"crossref","unstructured":"Chernova, S., Veloso, M.: Learning equivalent action choices from demonstration. In: The International Conference on Intelligent Robots and Systems (IROS 2008), pp. 1216\u20131221 (2008)","DOI":"10.1109\/IROS.2008.4650995"},{"key":"9422_CR37","doi-asserted-by":"crossref","unstructured":"Chernova, S., Veloso, M.: Teaching collaborative multi-robot tasks through demonstration. In: IEEE-RAS International Conference on Humanoid Robots, pp. 385\u2013390 (2008)","DOI":"10.1109\/ICHR.2008.4755982"},{"key":"9422_CR38","unstructured":"Watkins, C.J.C.H.: Learning from delayed rewards. Ph.D. dissertation, Psychology Department, Cambridge University (1989)"},{"issue":"1\u20133","key":"9422_CR39","first-page":"283","volume":"22","author":"J Peng","year":"1996","unstructured":"Peng, J., Williams, R.: Incremental multi-step Q-learning. Mach. Learn. 22(1\u20133), 283\u2013290 (1996)","journal-title":"Mach. Learn."},{"issue":"1","key":"9422_CR40","doi-asserted-by":"crossref","first-page":"28","DOI":"10.3844\/jcssp.2005.28.30","volume":"1","author":"Y Dahmani","year":"2005","unstructured":"Dahmani, Y., Benyettou, A.: Seek of an optimal way by Q-learning. J. Comput. Sci. 1(1), 28\u201330 (2005)","journal-title":"J. Comput. Sci."},{"key":"9422_CR41","doi-asserted-by":"crossref","unstructured":"Wang, Y., Huber, M., Papudesi, V.N., Cook, D.J.: User-guided reinforcement learning of robot assistive tasks for an intelligent environment. In: Proceedings of the IEEE\/RJS International Conference on Intelligent Robots and Systems, vol. 1, pp. 424\u2013429 (2003)","DOI":"10.1109\/IROS.2003.1250666"},{"key":"9422_CR42","unstructured":"Clouse, J.A.: An Introspection Approach to Querying a Trainer. Technical Report: UM-CS-1996-013. University of Massachusetts, Amherst (1996)"},{"issue":"1","key":"9422_CR43","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1109\/TRO.2005.855988","volume":"22","author":"J Takamatsu","year":"2006","unstructured":"Takamatsu, J., Morita, T., Ogawara, K., Kimura, H., Ikeuchi, K.: Representation for knot-tying tasks. IEEE Trans. Robot. 22(1), 65\u201378 (2006)","journal-title":"IEEE Trans. Robot."},{"issue":"4","key":"9422_CR44","doi-asserted-by":"crossref","first-page":"371","DOI":"10.1177\/0278364906064819","volume":"25","author":"H Wakamatsu","year":"2006","unstructured":"Wakamatsu, H., Eiji, A., Shinichi, H.: Knotting\/unknotting manipulation of deformable linear objects. Int. J. Rob. Res. 25(4), 371\u2013395 (2006)","journal-title":"Int. J. Rob. Res."},{"key":"9422_CR45","doi-asserted-by":"crossref","unstructured":"Matsuno, T., Fukuda, T.: Manipulation of flexible rope using topological model based on sensor information. International Conference on Intelligent Robots and Systems, pp. 2638\u20132643 (2006)","DOI":"10.1109\/IROS.2006.281944"},{"key":"9422_CR46","doi-asserted-by":"crossref","unstructured":"Saha, M., Isto, P.: Motion planning for robotic manipulation of deformable linear objects. In: International Conference on Intelligent Robots and Systems, vol. 23(6), pp. 1141\u20131150 (2007)","DOI":"10.1109\/TRO.2007.907486"},{"key":"9422_CR47","first-page":"679","volume":"6","author":"R Bellman","year":"1957","unstructured":"Bellman, R.: A Markovian decision process. Journal of Mathematics and Mechanics 6, 679\u2013684 (1957)","journal-title":"Journal of Mathematics and Mechanics"},{"issue":"3","key":"9422_CR48","doi-asserted-by":"crossref","first-page":"223","DOI":"10.1023\/A:1015008417172","volume":"17","author":"C Ribeiro","year":"2002","unstructured":"Ribeiro, C.: Reinforcement learning agents. Artif. Intell. Rev. 17(3), 223\u2013250 (2002)","journal-title":"Artif. Intell. Rev."},{"key":"9422_CR49","unstructured":"Smart, W.D., Kaelbling, L.: Practical reinforcement learning in continuous spaces. In: Proceedings of the 17th International Conference on Machine Learning, pp. 903\u2013910 (2000)"},{"key":"9422_CR50","volume-title":"Dynamic Programming and Modern Control Theory","author":"R Bellman","year":"1965","unstructured":"Bellman, R., Kalaba, R.: Dynamic Programming and Modern Control Theory. Academic Press, New York (1965)"},{"key":"9422_CR51","first-page":"279","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins, C.J.C.H., Dayan, P.: Q-learning. Mach. Learn. 8, 279\u2013292 (1992)","journal-title":"Mach. Learn."},{"key":"9422_CR52","unstructured":"Glorennec, P.Y.: Reinforcement learning: an overview. European Symposium on Intelligent Techniques. Aachen, Germany, pp. 17\u201335 (2000)"},{"key":"9422_CR53","doi-asserted-by":"crossref","unstructured":"S., Nason, Laird, J.E.: Soar-RL: integrating reinforcement learning with soar. In: Proceedings of the International Conference on Cognitive Modeling, pp. 51\u201359 (2004)","DOI":"10.1016\/j.cogsys.2004.09.006"},{"key":"9422_CR54","doi-asserted-by":"crossref","unstructured":"Natarajan, S., Tadepalli, P.: Dynamic preferences in multi-criteria reinforcement learning. In: Proceedings of the 22nd International Conference on Machine Learning (ICML 2005), Bonn, Germany (2005)","DOI":"10.1145\/1102351.1102427"},{"key":"9422_CR55","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"9422_CR56","doi-asserted-by":"crossref","unstructured":"Kartoun, U., Stern, H., Edan, Y.: Bag Classification Using Support Vector Machines. Applied Soft Computing Technologies: The Challenge of Complexity Series: Advances in Soft Computing, pp. 665\u2013674. Springer, Berlin (2006)","DOI":"10.1007\/3-540-31662-0_50"},{"key":"9422_CR57","doi-asserted-by":"crossref","unstructured":"Frank, M.J., Moustafa, A.A., Haughey, H.M., Curran, T., Hutchison, K.E.: Genetic triple dissociation reveals multiple roles for dopamine in reinforcement learning. In: Proceedings of the National Academy of Sciences, vol. 104(41), pp. 16311\u201316316 (2007)","DOI":"10.1073\/pnas.0706111104"},{"key":"9422_CR58","doi-asserted-by":"crossref","unstructured":"Abramson, M., Wechsler, H.: Tabu search exploration for on-policy reinforcement learning. In: Proceedings of the International Joint Conference on Neural Networks 4(20\u201324), 2910\u20132915 (2003)","DOI":"10.1109\/IJCNN.2003.1224033"},{"issue":"5","key":"9422_CR59","doi-asserted-by":"crossref","first-page":"2140","DOI":"10.1109\/TSMCB.2004.832154","volume":"34","author":"M Guo","year":"2004","unstructured":"Guo, M., Liu, Y., Malec, J.: A new Q-learning algorithm based on the metropolis criterion. IEEE Trans. Syst. Man Cybern., Part B, Cybern. 34(5), 2140\u20132143 (2004)","journal-title":"IEEE Trans. Syst. Man Cybern., Part B, Cybern."},{"key":"9422_CR60","doi-asserted-by":"crossref","unstructured":"Meng, X., Chen, Y., Pi, Y., Yuan, Q.: A novel multi-agent reinforcement learning algorithm combination with quantum computation. The 6th World Congress on Intelligent Control and Automation, vol. 1, pp. 2613\u20132617 (2006)","DOI":"10.1109\/WCICA.2006.1712835"},{"key":"9422_CR61","unstructured":"Kartoun, U.: Human-Robot Collaborative Learning Methods. Ph.D. dissertation, Department of Industrial Engineering and Management, Ben-Gurion University of the Negev (2007)"}],"container-title":["Journal of Intelligent &amp; Robotic Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-010-9422-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10846-010-9422-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-010-9422-y","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,20]],"date-time":"2025-02-20T14:19:11Z","timestamp":1740061151000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10846-010-9422-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,5,5]]},"references-count":61,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2010,11]]}},"alternative-id":["9422"],"URL":"https:\/\/doi.org\/10.1007\/s10846-010-9422-y","relation":{},"ISSN":["0921-0296","1573-0409"],"issn-type":[{"type":"print","value":"0921-0296"},{"type":"electronic","value":"1573-0409"}],"subject":[],"published":{"date-parts":[[2010,5,5]]}}}