{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,29]],"date-time":"2025-10-29T03:45:32Z","timestamp":1761709532048},"publisher-location":"Cham","reference-count":30,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319973036"},{"type":"electronic","value":"9783319973043"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-97304-3_7","type":"book-chapter","created":{"date-parts":[[2018,7,26]],"date-time":"2018-07-26T14:34:06Z","timestamp":1532615646000},"page":"85-97","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Adaptively Shaping Reinforcement Learning Agents via Human Reward"],"prefix":"10.1007","author":[{"given":"Chao","family":"Yu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dongxu","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianpei","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenxuan","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuchen","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongwei","family":"Ge","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiankang","family":"Ren","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,7,27]]},"reference":[{"key":"7_CR1","doi-asserted-by":"crossref","unstructured":"Abel, D., Hershkowitz, D.E., Barth-Maron, G., Brawner, S., O\u2019Farrell, K., MacGlashan, J., Tellex, S.: Goal-based action priors. In: ICAPS2015, pp. 306\u2013314 (2015)","DOI":"10.1609\/icaps.v25i1.13697"},{"key":"7_CR2","unstructured":"Abel, D., Salvatier, J., Stuhlm\u00fcller, A., Evans, O.: Agent-agnostic human-in-the-loop reinforcement learning. arXiv preprint arXiv:1701.04079 (2017)"},{"key":"7_CR3","unstructured":"Brys, T., Harutyunyan, A., Suay, H.B., Chernova, S., Taylor, M.E.: Reinforcement learning from demonstration through shaping. In: IJCAI2015, pp. 3352\u20133358 (2015)"},{"key":"7_CR4","unstructured":"Cederborg, T., Grover, I., Isbell, C.L., Thomaz, A.L.: Policy shaping with human teachers. In: IJCAI2015, pp. 3366\u20133372 (2015)"},{"key":"7_CR5","unstructured":"Suay, H.B., Brys, T., Taylor, M.E., Chernova, S.: Learning from demonstration for shaping through inverse reinforcement learning. In: AAMAS2016, pp. 429\u2013437 (2016)"},{"key":"7_CR6","unstructured":"Devlin, S., Kudenko, D.: Dynamic potential-based reward shaping. In: AAMAS2012, pp. 433\u2013440 (2012)"},{"key":"7_CR7","unstructured":"Devlin, S., Yliniemi, L., Kudenko, D., Tumer, K.: Potential-based difference rewards for multiagent reinforcement learning. In: AAMAS2014, pp. 165\u2013172 (2014)"},{"issue":"1","key":"7_CR8","first-page":"2","volume":"1","author":"A Fachantidis","year":"2017","unstructured":"Fachantidis, A., Taylor, M.E., Vlahavas, I.: Learning to teach reinforcement learning agents. Mach. Learn. Knowl. Extr. 1(1), 2 (2017)","journal-title":"Mach. Learn. Knowl. Extr."},{"key":"7_CR9","doi-asserted-by":"crossref","unstructured":"Fern\u00e1ndez, F., Veloso, M.: Probabilistic policy reuse in a reinforcement learning agent. In: AAMAS2006, pp. 720\u2013727. ACM (2006)","DOI":"10.1145\/1160633.1160762"},{"key":"7_CR10","unstructured":"Griffith, S., Subramanian, K., Scholz, J., Isbell, C.L., Thomaz, A.L.: Policy shaping: integrating human feedback with reinforcement learning. In: NIPS2013, pp. 2625\u20132633 (2013)"},{"key":"7_CR11","doi-asserted-by":"crossref","unstructured":"Gu, S., Holly, E., Lillicrap, T., Levine, S.: Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. In: 2017 IEEE ICRA, pp. 3389\u20133396. IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"7_CR12","unstructured":"Knox, W.B., Stone, P.: Tamer: training an agent manually via evaluative reinforcement. In: 7th IEEE ICDL, pp. 292\u2013297. IEEE (2008)"},{"key":"7_CR13","unstructured":"Knox, W.B., Stone, P.: Combining manual feedback with subsequent MDP reward signals for reinforcement learning. In: AAMAS2010, pp. 5\u201312 (2010)"},{"key":"7_CR14","unstructured":"Knox, W.B., Stone, P.: Reinforcement learning from simultaneous human and MDP reward. In: AAMAS2012, pp. 475\u2013482 (2012)"},{"issue":"C","key":"7_CR15","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1016\/j.artint.2015.03.009","volume":"225","author":"WB Knox","year":"2015","unstructured":"Knox, W.B., Stone, P.: Framing reinforcement learning from human reward: reward positivity, temporal discounting, episodicity, and performance. Artif. Intell. 225(C), 24\u201350 (2015)","journal-title":"Artif. Intell."},{"key":"7_CR16","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)"},{"key":"7_CR17","unstructured":"Mnih, V., et al.: Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)"},{"issue":"4","key":"7_CR18","doi-asserted-by":"publisher","first-page":"629","DOI":"10.1002\/cjs.11162","volume":"40","author":"EE Moodie","year":"2012","unstructured":"Moodie, E.E., Chakraborty, B., Kramer, M.S.: Q-learning for estimating optimal dynamic treatment rules from observational data. Can. J. Stat. 40(4), 629\u2013645 (2012)","journal-title":"Can. J. Stat."},{"key":"7_CR19","unstructured":"Ng, A.Y., Harada, D., Russell, S.: Policy invariance under reward transformations: theory and application to reward shaping. In: ICML1999, vol. 99, pp. 278\u2013287 (1999)"},{"key":"7_CR20","unstructured":"Peng, P., et al.: Multiagent bidirectionally-coordinated nets for learning to play starcraft combat games. arXiv preprint arXiv:1703.10069 (2017)"},{"key":"7_CR21","unstructured":"Prasad, N., Cheng, L.F., Chivers, C., Draugelis, M., Engelhardt, B.E.: A reinforcement learning approach to weaning of mechanical ventilation in intensive care units. arXiv preprint arXiv:1704.06300 (2017)"},{"key":"7_CR22","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: ICML2015, pp. 1889\u20131897 (2015)"},{"key":"7_CR23","unstructured":"Sherstov, A.A., Stone, P.: Improving action selection in MDP\u2019s via knowledge transfer. In: AAAI2005, vol. 5, pp. 1024\u20131029 (2005)"},{"key":"7_CR24","volume-title":"Reinforcement Learning: An Introduction","author":"R Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning: An Introduction. The MIT press, Cambridge (1998)"},{"key":"7_CR25","unstructured":"Taylor, M.E., Suay, H.B., Chernova, S.: Integrating reinforcement learning with human demonstrations of varying ability. In: AAMAS2011, pp. 617\u2013624 (2011)"},{"key":"7_CR26","unstructured":"Torrey, L., Taylor, M.: Teaching on a budget: agents advising agents in reinforcement learning. In: AAMAS2013, pp. 1053\u20131060 (2013)"},{"issue":"12","key":"7_CR27","doi-asserted-by":"publisher","first-page":"2853","DOI":"10.1109\/TCYB.2014.2387277","volume":"45","author":"C Yu","year":"2015","unstructured":"Yu, C., Zhang, M., Ren, F., Tan, G.: Multiagent learning of coordination in loosely coupled multiagent systems. IEEE Trans. Cybern. 45(12), 2853\u20132867 (2015)","journal-title":"IEEE Trans. Cybern."},{"issue":"12","key":"7_CR28","doi-asserted-by":"publisher","first-page":"2342","DOI":"10.1109\/TCYB.2014.2307862","volume":"44","author":"C Yu","year":"2014","unstructured":"Yu, C., Zhang, M., Ren, F.: Collective learning for the emergence of social norms in networked multiagent systems. IEEE Trans. Cybern. 44(12), 2342\u20132355 (2014)","journal-title":"IEEE Trans. Cybern."},{"issue":"12","key":"7_CR29","doi-asserted-by":"publisher","first-page":"3083","DOI":"10.1109\/TNNLS.2015.2403394","volume":"26","author":"C Yu","year":"2015","unstructured":"Yu, C., Zhang, M., Ren, F., Tan, G.: Emotional multiagent reinforcement learning in spatial social dilemmas. IEEE Trans. Neural Netw. Learn. Syst. 26(12), 3083\u20133096 (2015)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"7_CR30","unstructured":"Zhan, Y., Fachantidis, A., Vlahavas, I., Taylor, M.E.: Agents teaching humans in reinforcement learning tasks. In: Proceedings of the Adaptive and Learning Agents Workshop (AAMAS) (2014)"}],"container-title":["Lecture Notes in Computer Science","PRICAI 2018: Trends in Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-97304-3_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,3]],"date-time":"2023-09-03T20:04:07Z","timestamp":1693771447000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-97304-3_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319973036","9783319973043"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-97304-3_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]}}}