{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T19:59:18Z","timestamp":1760385558007,"version":"3.37.3"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2017,7,3]],"date-time":"2017-07-03T00:00:00Z","timestamp":1499040000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"crossref","award":["841713015"],"award-info":[{"award-number":["841713015"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Auton Agent Multi-Agent Syst"],"published-print":{"date-parts":[[2018,1]]},"DOI":"10.1007\/s10458-017-9374-8","type":"journal-article","created":{"date-parts":[[2017,7,3]],"date-time":"2017-07-03T11:56:27Z","timestamp":1499082987000},"page":"1-25","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Social interaction for efficient agent learning from human reward"],"prefix":"10.1007","volume":"32","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1728-5711","authenticated-orcid":false,"given":"Guangliang","family":"Li","sequence":"first","affiliation":[]},{"given":"Shimon","family":"Whiteson","sequence":"additional","affiliation":[]},{"given":"W. Bradley","family":"Knox","sequence":"additional","affiliation":[]},{"given":"Hayley","family":"Hung","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,7,3]]},"reference":[{"key":"9374_CR1","doi-asserted-by":"crossref","unstructured":"Argall, B., Browning, B., & Veloso, M. (2007). Learning by demonstration with critique from a human teacher. In Proceedings of the ACM\/IEEE international conference on human\u2013robot interaction (pp. 57\u201364). ACM.","DOI":"10.1145\/1228716.1228725"},{"issue":"5","key":"9374_CR2","doi-asserted-by":"crossref","first-page":"469","DOI":"10.1016\/j.robot.2008.10.024","volume":"57","author":"BD Argall","year":"2009","unstructured":"Argall, B. D., Chernova, S., Veloso, M., & Browning, B. (2009). A survey of robot learning from demonstration. Robotics and Autonomous Systems, 57(5), 469\u2013483.","journal-title":"Robotics and Autonomous Systems"},{"key":"9374_CR3","unstructured":"Bertsekas, D. P., & Tsitsiklis, J. N. (1996). Neuro-dynamic programming (Vol. 7). Optimization and neural computation series 3. Belmont, MA: Athena Scientific."},{"key":"9374_CR4","doi-asserted-by":"crossref","unstructured":"Blumberg, B., Downie, M., Ivanov, Y., Berlin, M., Johnson, M. P., & Tomlinson, B. (2002). Integrated learning for interactive synthetic characters. In ACM transactions on graphics (TOG) (pp. 417\u2013426). ACM.","DOI":"10.1145\/566570.566597"},{"key":"9374_CR5","unstructured":"B\u00f6hm, N., K\u00f3kai, G., & Mandl, S. (2004). Evolving a heuristic function for the game of tetris. In LWA (pp. 118\u2013122)."},{"key":"9374_CR6","unstructured":"Cederborg, T., Grover, I., Isbell, C. L., & Thomaz, A. L. (2015). Policy shaping with human teachers. In Proceedings of the 24th international conference on artificial intelligence (pp. 3366\u20133372). AAAI Press."},{"key":"9374_CR7","unstructured":"Dearden, R., Friedman, N., & Russell, S. (1998). Bayesian q-learning. In AAAI\/IAAI (pp. 761\u2013768)."},{"key":"9374_CR8","unstructured":"Deterding, S., Khaled, R., Nacke, L. E., & Dixon, D. (2011). Gamification: Toward a definition. In CHI 2011 gamification workshop proceedings (pp. 12\u201315)."},{"key":"9374_CR9","doi-asserted-by":"crossref","first-page":"380","DOI":"10.1016\/j.compedu.2012.12.020","volume":"63","author":"A Dom\u00ednguez","year":"2013","unstructured":"Dom\u00ednguez, A., Saenz-de Navarrete, J., De-Marcos, L., Fern\u00e1ndez-Sanz, L., Pag\u00e9s, C., & Mart\u00ednez-Herr\u00e1iz, J.-J. (2013). Gamifying learning experiences: Practical implications and outcomes. Computers & Education, 63, 380\u2013392.","journal-title":"Computers & Education"},{"key":"9374_CR10","doi-asserted-by":"crossref","unstructured":"Dong, T., Dontcheva, M., Joseph, D., Karahalios, K., Newman, M., & Ackerman, M. (2012). Discovery-based games for learning software. In Proceedings of the SIGCHI conference on human factors in computing systems (pp. 2083\u20132086). ACM.","DOI":"10.1145\/2207676.2208358"},{"key":"9374_CR11","doi-asserted-by":"crossref","unstructured":"Farzan, R., DiMicco, J. M., Millen, D. R., Dugan, C., Geyer, W., & Brownholtz, E. A. (2008). Results from deploying a participation incentive mechanism within the enterprise. In Proceedings of the SIGCHI conference on human factors in computing systems (pp. 563\u2013572). ACM.","DOI":"10.1145\/1357054.1357145"},{"key":"9374_CR12","unstructured":"Griffith, S., Subramanian, K., Scholz, J., Isbell, C., & Thomaz, A. L. (2013). Policy shaping: Integrating human feedback with reinforcement learning. In Advances in neural information processing systems (pp. 2625\u20132633)."},{"key":"9374_CR13","doi-asserted-by":"crossref","unstructured":"Hakulinen, L., Auvinen, T., & Korhonen, A. (2013). Empirical study on the effect of achievement badges in TRAKLA2 online learning environment. In Learning and teaching in computing and engineering (LaTiCE), 2013 (pp. 47\u201354). IEEE.","DOI":"10.1109\/LaTiCE.2013.34"},{"key":"9374_CR14","doi-asserted-by":"crossref","unstructured":"Hamari, J., Koivisto, J., & Sarsa, H. (2014). Does gamification work?\u2014A literature review of empirical studies on gamification. In 2014 47th Hawaii international conference on system sciences (HICSS) (pp. 3025\u20133034). IEEE.","DOI":"10.1109\/HICSS.2014.377"},{"key":"9374_CR15","doi-asserted-by":"crossref","unstructured":"Isbell, C., Shelton, C. R., Kearns, M., Singh, S., & Stone, P. (2001). A social reinforcement learning agent. In Proceedings of the fifth international conference on autonomous agents (pp. 377\u2013384). ACM.","DOI":"10.1145\/375735.376334"},{"issue":"3","key":"9374_CR16","doi-asserted-by":"crossref","first-page":"327","DOI":"10.1007\/s10458-006-0005-z","volume":"13","author":"CL Isbell Jr","year":"2006","unstructured":"Isbell, C. L, Jr., Kearns, M., Singh, S., Shelton, C. R., Stone, P., & Kormann, D. (2006). Cobot in LambdaMOO: An adaptive social statistics agent. Autonomous Agents and Multi-agent Systems, 13(3), 327\u2013354.","journal-title":"Autonomous Agents and Multi-agent Systems"},{"issue":"3","key":"9374_CR17","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1016\/S0921-8890(02)00168-9","volume":"38","author":"F Kaplan","year":"2002","unstructured":"Kaplan, F., Oudeyer, P.-Y., Kubinyi, E., & Mikl\u00f3si, A. (2002). Robotic clicker training. Robotics and Autonomous Systems, 38(3), 197\u2013206.","journal-title":"Robotics and Autonomous Systems"},{"key":"9374_CR18","volume-title":"The gamification of learning and instruction: Game-based methods and strategies for training and education","author":"KM Kapp","year":"2012","unstructured":"Kapp, K. M. (2012). The gamification of learning and instruction: Game-based methods and strategies for training and education. Hoboken: Wiley."},{"key":"9374_CR19","doi-asserted-by":"crossref","unstructured":"Kirman, B., Lawson, S., Linehan, C., Martino, F., Gamberini, L., & Gaggioli, A. (2010). Improving social game engagement on Facebook through enhanced socio-contextual information. In Proceedings of the SIGCHI conference on human factors in computing systems (pp. 1753\u20131756). ACM.","DOI":"10.1145\/1753326.1753589"},{"key":"9374_CR20","unstructured":"Knox, W. B. (2012). Learning from human-generated reward. Ph.D. thesis, University of Texas at Austin."},{"key":"9374_CR21","doi-asserted-by":"crossref","unstructured":"Knox, W. B., & Stone, P. (2009). Interactively shaping agents via human reinforcement: The TAMER framework. In Proceedings of the fifth international conference on knowledge capture (pp. 9\u201316). ACM.","DOI":"10.1145\/1597735.1597738"},{"key":"9374_CR22","unstructured":"Knox, W. B., & Stone, P. (2010). Combining manual feedback with subsequent MDP reward signals for reinforcement learning. In Proceedings of the 9th international conference on autonomous agents and multiagent systems (pp. 5\u201312). International Foundation for Autonomous Agents and Multiagent Systems."},{"key":"9374_CR23","doi-asserted-by":"crossref","unstructured":"Knox, W. B. & Stone, P. (2012). Reinforcement learning from human reward: Discounting in episodic tasks. In 2012 IEEE on RO-MAN (pp. 878\u2013885). IEEE.","DOI":"10.1109\/ROMAN.2012.6343862"},{"key":"9374_CR24","unstructured":"Knox, W. B. & Stone, P. (2012). Reinforcement learning from simultaneous human and MDP reward. In Proceedings of the 11th international conference on autonomous agents and multiagent systems (pp. 475\u2013482). International Foundation for Autonomous Agents and Multiagent Systems."},{"key":"9374_CR25","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1016\/j.artint.2015.03.009","volume":"225","author":"WB Knox","year":"2015","unstructured":"Knox, W. B., & Stone, P. (2015). Framing reinforcement learning from human reward: Reward positivity, temporal discounting, episodicity, and performance. Artificial Intelligence, 225, 24\u201350.","journal-title":"Artificial Intelligence"},{"key":"9374_CR26","unstructured":"Li, G., Hung, H., Whiteson, S., & Knox, W. B. (2013). Using informative behavior to increase engagement in the TAMER framework. In Proceedings of the 2013 international conference on autonomous agents and multi-agent systems (pp. 909\u2013916). International Foundation for Autonomous Agents and Multiagent Systems."},{"key":"9374_CR27","doi-asserted-by":"crossref","unstructured":"Li, G., Hung, H., Whiteson, S., & Knox, W. B. (2014). Learning from human reward benefits from socio-competitive feedback. In Proceedings of the fourth joint IEEE international conference on development and learning and on epigenetic robotics (pp. 93\u2013100).","DOI":"10.1109\/DEVLRN.2014.6982960"},{"key":"9374_CR28","unstructured":"Li, G., Hung, H., Whiteson, S., & Knox, W. B. (2014). Leveraging social networks to motivate humans to train agents. In Proceedings of the 2014 international conference on autonomous agents and multi-agent systems (pp. 1571\u20131572). International Foundation for Autonomous Agents and Multiagent Systems."},{"key":"9374_CR29","unstructured":"Li, G., Whiteson, S., Knox, W. B., & Hung, H. (2015). Using informative behavior to increase engagement while learning from human reward. In Autonomous agents and multi-agent systems (pp. 1\u201323)."},{"key":"9374_CR30","unstructured":"Loftin, R., Peng, B., MacGlashan, J., Littman, M. L., Taylor, M. E., Huang, J., & Roberts, D. L. (2015). Learning behaviors via human-delivered discrete feedback: Modeling implicit feedback strategies to speed up learning. In Autonomous agents and multi-agent systems (pp. 1\u201330)."},{"key":"9374_CR31","unstructured":"Maclin, R., Shavlik, J., Torrey, L., Walker, T., & Wild, E. (1999, 2005). Giving advice about preferred actions to reinforcement learners via knowledge-based kernel regression. In Proceedings of the national conference on artificial intelligence (p. 819). Menlo Park, CA: AAAI Press; Cambridge, MA: MIT Press."},{"key":"9374_CR32","doi-asserted-by":"crossref","unstructured":"Nazir, A., Raza, S., & Chuah, C.-N. (2008). Unveiling Facebook: A measurement study of social network based applications. In Proceedings of the 8th ACM SIGCOMM conference on Internet measurement (pp. 43\u201356). ACM.","DOI":"10.1145\/1452520.1452527"},{"key":"9374_CR33","first-page":"278","volume":"99","author":"AY Ng","year":"1999","unstructured":"Ng, A. Y., Harada, D., & Russell, S. (1999). Policy invariance under reward transformations: Theory and application to reward shaping. ICML, 99, 278\u2013287.","journal-title":"ICML"},{"key":"9374_CR34","doi-asserted-by":"crossref","unstructured":"Pilarski, P. M., Dawson, M. R., Degris, T., Fahimi, F., Carey, J. P., & Sutton, R. S. (2011). Online human training of a myoelectric prosthesis controller via actor-critic reinforcement learning. In Proceedings of 12th international conference on rehabilitation robotics (ICORR) (pp. 1\u20137). IEEE.","DOI":"10.1109\/ICORR.2011.5975338"},{"key":"9374_CR35","doi-asserted-by":"crossref","unstructured":"Rafelsberger, W., & Scharl, A. (2009). Games with a purpose for social networking platforms. In Proceedings of the 20th ACM conference on hypertext and hypermedia (pp. 193\u2013198). ACM.","DOI":"10.1145\/1557914.1557948"},{"key":"9374_CR36","doi-asserted-by":"crossref","unstructured":"Suay, H. B., & Chernova, S. (2011). Effect of human guidance and state space size on interactive reinforcement learning. In 20th IEEE international symposium on robot and human interactive communication (RO-MAN) (pp. 1\u20136). IEEE.","DOI":"10.1109\/ROMAN.2011.6005223"},{"key":"9374_CR37","volume-title":"Reinforcement learning: An introduction","author":"R Sutton","year":"1998","unstructured":"Sutton, R., & Barto, A. (1998). Reinforcement learning: An introduction. Cambridge, MA: MIT Press."},{"issue":"12","key":"9374_CR38","doi-asserted-by":"crossref","first-page":"2936","DOI":"10.1162\/neco.2006.18.12.2936","volume":"18","author":"I Szita","year":"2006","unstructured":"Szita, I., & L\u00f6rincz, A. (2006). Learning tetris using the noisy cross-entropy method. Neural Computation, 18(12), 2936\u20132941.","journal-title":"Neural Computation"},{"key":"9374_CR39","doi-asserted-by":"crossref","unstructured":"Tenorio-Gonzalez, A. C., Morales, E. F., & Villase\u00f1or-Pineda, L. (2010). Dynamic reward shaping: Training a robot by voice. In Advances in artificial intelligence\u2013IBERAMIA 2010 (pp. 483\u2013492). Springer.","DOI":"10.1007\/978-3-642-16952-6_49"},{"key":"9374_CR40","doi-asserted-by":"crossref","unstructured":"Thom, J., Millen, D., & DiMicco, J. (2012). Removing gamification from an enterprise SNS. In Proceedings of the ACM 2012 conference on computer supported cooperative work (pp. 1067\u20131070). ACM.","DOI":"10.1145\/2145204.2145362"},{"key":"9374_CR41","first-page":"1000","volume":"6","author":"AL Thomaz","year":"2006","unstructured":"Thomaz, A. L., & Breazeal, C. (2006). Reinforcement learning with human teachers: Evidence of feedback and guidance with implications for learning performance. AAAI, 6, 1000\u20131005.","journal-title":"AAAI"},{"issue":"6","key":"9374_CR42","doi-asserted-by":"crossref","first-page":"716","DOI":"10.1016\/j.artint.2007.09.009","volume":"172","author":"AL Thomaz","year":"2008","unstructured":"Thomaz, A. L., & Breazeal, C. (2008). Teachable robots: Understanding human teaching behavior to build more effective robot learners. Artificial Intelligence, 172(6), 716\u2013737.","journal-title":"Artificial Intelligence"},{"issue":"3\u20134","key":"9374_CR43","first-page":"279","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins, C. J., & Dayan, P. (1992). Q-Learning. Machine Learning, 8(3\u20134), 279\u2013292.","journal-title":"Machine Learning"},{"issue":"3","key":"9374_CR44","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1177\/1745691612442904","volume":"7","author":"RE Wilson","year":"2012","unstructured":"Wilson, R. E., Gosling, S. D., & Graham, L. T. (2012). A review of Facebook research in the social sciences. Perspectives on Psychological Science, 7(3), 203\u2013220.","journal-title":"Perspectives on Psychological Science"},{"key":"9374_CR45","volume-title":"Game-based marketing: Inspire customer loyalty through rewards, challenges, and contests","author":"G Zichermann","year":"2010","unstructured":"Zichermann, G., & Linder, J. (2010). Game-based marketing: Inspire customer loyalty through rewards, challenges, and contests. Hoboken: Wiley."}],"container-title":["Autonomous Agents and Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10458-017-9374-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-017-9374-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10458-017-9374-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,18]],"date-time":"2020-05-18T01:35:54Z","timestamp":1589765754000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10458-017-9374-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,7,3]]},"references-count":45,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2018,1]]}},"alternative-id":["9374"],"URL":"https:\/\/doi.org\/10.1007\/s10458-017-9374-8","relation":{},"ISSN":["1387-2532","1573-7454"],"issn-type":[{"type":"print","value":"1387-2532"},{"type":"electronic","value":"1573-7454"}],"subject":[],"published":{"date-parts":[[2017,7,3]]}}}