{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T03:06:47Z","timestamp":1777518407179,"version":"3.51.4"},"reference-count":163,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2021,9,18]],"date-time":"2021-09-18T00:00:00Z","timestamp":1631923200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,9,18]],"date-time":"2021-09-18T00:00:00Z","timestamp":1631923200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Ambient Intell Human Comput"],"published-print":{"date-parts":[[2023,4]]},"DOI":"10.1007\/s12652-021-03489-y","type":"journal-article","created":{"date-parts":[[2021,9,18]],"date-time":"2021-09-18T06:02:49Z","timestamp":1631944969000},"page":"3621-3644","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["A conceptual framework for externally-influenced agents: an assisted reinforcement learning review"],"prefix":"10.1007","volume":"14","author":[{"given":"Adam","family":"Bignold","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1131-3382","authenticated-orcid":false,"given":"Francisco","family":"Cruz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Matthew E.","family":"Taylor","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tim","family":"Brys","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Richard","family":"Dazeley","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peter","family":"Vamplew","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cameron","family":"Foale","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,9,18]]},"reference":[{"key":"3489_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel P, Ng AY (2004) Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the International Conference on Machine learning ICML, ACM, pp 1\u20138","DOI":"10.1145\/1015330.1015430"},{"key":"3489_CR2","first-page":"293","volume-title":"A brief survey on concept drift. Intelligent computing","author":"V Akila","year":"2015","unstructured":"Akila V, Zayaraz G (2015) A brief survey on concept drift. Intelligent computing. Springer, Communication and Devices, Berlin, pp 293\u2013302"},{"issue":"4","key":"3489_CR3","first-page":"105","volume":"35","author":"S Amershi","year":"2014","unstructured":"Amershi S, Cakmak M, Knox WB, Kulesza T (2014) Power to the people: the role of humans in interactive machine learning. AI Mag 35(4):105\u2013120","journal-title":"AI Mag"},{"key":"3489_CR4","unstructured":"Amir O, Kamar E, Kolobov A, Grosz B (2016) Interactive teaching strategies for agent training. In: Proceedings of the International Joint Conference on Artificial Intelligence IJCAI, pp 804\u2013811"},{"key":"3489_CR5","unstructured":"Ammar HB, Eaton E, Ruvolo P, Taylor ME (2015) Unsupervised Cross-Domain Transfer in Policy Gradient Reinforcement Learning via Manifold Alignment. In: Proceedings of the Association for the Advancement of Artificial Intelligence conference AAAI"},{"key":"3489_CR8","doi-asserted-by":"crossref","unstructured":"Argall B, Browning B, Veloso M (2007) Learning by demonstration with critique from a human teacher. In: Proceedings of the ACM\/IEEE International Conference on Human-Robot Interaction HRI, ACM, pp 57\u201364","DOI":"10.1145\/1228716.1228725"},{"key":"3489_CR6","doi-asserted-by":"crossref","unstructured":"Argall BD, Browning B, Veloso M (2009a) Automatic weight learning for multiple data sources when learning from demonstration. In: Proceedings of the IEEE International Conference on Robotics and Automation ICRA, IEEE, pp 226\u2013231","DOI":"10.1109\/ROBOT.2009.5152668"},{"issue":"5","key":"3489_CR7","doi-asserted-by":"crossref","first-page":"469","DOI":"10.1016\/j.robot.2008.10.024","volume":"57","author":"BD Argall","year":"2009","unstructured":"Argall BD, Chernova S, Veloso M, Browning B (2009b) A survey of robot learning from demonstration. Robot Auton Syst 57(5):469\u2013483","journal-title":"Robot Auton Syst"},{"key":"3489_CR9","doi-asserted-by":"crossref","unstructured":"Arzate Cruz C, Igarashi T (2020) A survey on interactive reinforcement learning: Design principles and open challenges. In: Proceedings of the 2020 ACM Designing Interactive Systems Conference, pp 1195\u20131209","DOI":"10.1145\/3357236.3395525"},{"key":"3489_CR10","doi-asserted-by":"crossref","unstructured":"Ayala A, Henr\u00edquez C, Cruz F (2019) Reinforcement learning using continuous states and interactive feedback. In: Proceedings of the International Conference on Applications of Intelligent Systems, pp 1\u20135","DOI":"10.1145\/3309772.3309801"},{"key":"3489_CR11","unstructured":"Banerjee B (2007) General game learning using knowledge transfer. In: Proceedings of the International Joint Conference on Artificial Intelligence IJCAI, pp 672\u2013677"},{"key":"3489_CR12","doi-asserted-by":"crossref","unstructured":"Barros P, Tanevska A, Cruz F, Sciutti A (2020) Moody learners-explaining competitive behaviour of reinforcement learning agents. In: 2020 Joint IEEE 10th International Conference on Development and Learning and Epigenetic Robotics (ICDL-EpiRob), IEEE, pp 1\u20138","DOI":"10.1109\/ICDL-EpiRob48136.2020.9278125"},{"key":"3489_CR13","unstructured":"Behboudian P, Satsangi Y, Taylor ME, Harutyunyan A, Bowling M (2020) Useful policy invariant shaping from arbitrary advice. In: AAMAS Adaptive and Learning Agents Workshop ALA 2020, p\u00a09"},{"key":"3489_CR14","doi-asserted-by":"crossref","unstructured":"Bengio Y, Louradour J, Collobert R, Weston J (2009). Curriculum learning. In: Proceedings of the International Conference on Machine learning ICML, ACM, New York, NY, USA, pp 41\u201348","DOI":"10.1145\/1553374.1553380"},{"key":"3489_CR15","doi-asserted-by":"crossref","first-page":"102","DOI":"10.1016\/j.artint.2015.05.008","volume":"226","author":"RA Bianchi","year":"2015","unstructured":"Bianchi RA, Celiberto LA Jr, Santos PE, Matsuura JP, de Mantaras RL (2015) Transferring knowledge as heuristics in reinforcement learning: a case-based approach. Artif Intell 226:102\u2013121","journal-title":"Artif Intell"},{"key":"3489_CR17","doi-asserted-by":"crossref","unstructured":"Bignold A, Cruz F, Dazeley R, Vamplew P, Foale C (2020) Human engagement providing evaluative and informative advice for interactive reinforcement learning. arXiv preprint arXiv:200909575","DOI":"10.1007\/s00521-021-06466-w"},{"issue":"1","key":"3489_CR16","doi-asserted-by":"crossref","first-page":"13","DOI":"10.3390\/biomimetics6010013","volume":"6","author":"A Bignold","year":"2021","unstructured":"Bignold A, Cruz F, Dazeley R, Vamplew P, Foale C (2021a) An evaluation methodology for interactive reinforcement learning with simulated users. Biomimetics 6(1):13","journal-title":"Biomimetics"},{"key":"3489_CR18","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-021-06466-w","author":"A Bignold","year":"2021","unstructured":"Bignold A, Cruz F, Dazeley R, Vamplew P, Foale C (2021b) Persistent rule-based interactive reinforcement learning. Neural Comput Appl. https:\/\/doi.org\/10.1007\/s00521-021-06466-w","journal-title":"Neural Comput Appl"},{"key":"3489_CR19","first-page":"1","volume-title":"European workshop on multi-agent systems","author":"H Bou Ammar","year":"2011","unstructured":"Bou Ammar H, Taylor ME, Tuyls K, Weiss G (2011) Reinforcement learning transfer using a sparse coded inter-task mapping. European workshop on multi-agent systems. Springer, Berlin, pp 1\u201316"},{"issue":"2","key":"3489_CR20","doi-asserted-by":"crossref","first-page":"1549","DOI":"10.1109\/LRA.2019.2896467","volume":"4","author":"M Breyer","year":"2019","unstructured":"Breyer M, Furrer F, Novkovic T, Siegwart R, Nieto J (2019) Comparing task simplifications to learn closed-loop object picking using deep reinforcement learning. IEEE Robot Autom Lett 4(2):1549\u20131556","journal-title":"IEEE Robot Autom Lett"},{"key":"3489_CR23","doi-asserted-by":"crossref","unstructured":"Brys T, Now\u00e9 A, Kudenko D, Taylor ME (2014) Combining multiple correlated reward and shaping signals by measuring confidence. In: Proceedings of the Association for the Advancement of Artificial Intelligence conference AAAI, pp 1687\u20131693","DOI":"10.1609\/aaai.v28i1.8998"},{"key":"3489_CR22","unstructured":"Brys T, Harutyunyan A, Suay HB, Chernova S, Taylor ME, Now\u00e9 A (2015) Reinforcement learning from demonstration through shaping. In: Proceedings of the International Joint Conference on Artificial Intelligence IJCAI, p 26"},{"key":"3489_CR21","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1016\/j.neucom.2017.02.096","volume":"263","author":"T Brys","year":"2017","unstructured":"Brys T, Harutyunyan A, Vrancx P, Now\u00e9 A, Taylor ME (2017) Multi-objectivization and ensembles of shapings in reinforcement learning. Neurocomputing 263:48\u201359","journal-title":"Neurocomputing"},{"key":"3489_CR24","first-page":"362","volume-title":"Proceedings of the international conference on machine learning ICML","author":"AR Cassandra","year":"2016","unstructured":"Cassandra AR, Kaelbling LP (2016) Learning policies for partially observable environments: scaling up. Proceedings of the international conference on machine learning ICML. Morgan Kaufmann, Burlington, p 362"},{"key":"3489_CR25","first-page":"220","volume-title":"Heuristic reinforcement learning applied to robocup simulation agents","author":"LA Celiberto Jr","year":"2007","unstructured":"Celiberto LA Jr, Ribeiro CH, Costa AH, Bianchi RA (2007) Heuristic reinforcement learning applied to robocup simulation agents. Springer, Berlin, pp 220\u2013227"},{"key":"3489_CR27","volume-title":"Lifelong machine learning. Synthesis lectures on artificial intelligence and machine learning","author":"Z Chen","year":"2016","unstructured":"Chen Z, Liu B (2016) Lifelong machine learning. Synthesis lectures on artificial intelligence and machine learning. Morgan & Claypool Publishers, San Rafael"},{"key":"3489_CR28","first-page":"425","volume-title":"Proceedings of the international conference on knowledge science, engineering and management","author":"H Chen","year":"2018","unstructured":"Chen H, Yang B, Liu J (2018) Partially observable reinforcement learning for sustainable active surveillance. Proceedings of the international conference on knowledge science, engineering and management. Springer, Berlin, pp 425\u2013437"},{"issue":"9","key":"3489_CR26","first-page":"1699","volume":"109","author":"SA Chen","year":"2019","unstructured":"Chen SA, Tangkaratt V, Lin HT, Sugiyama M (2019) Active deep Q-learning with demonstration. Mach Learn 109(9):1699\u20131725","journal-title":"Mach Learn"},{"key":"3489_CR29","unstructured":"Cheng ST, Chang TY, Hsu CW (2013) A framework of an agent planning with reinforcement learning for e-pet. In: Proceedings of the International Conference on Orange Technologies ICOT, IEEE, pp 310\u2013313"},{"key":"3489_CR30","unstructured":"Churamani N, Cruz F, Griffiths S, Barros P (2016) iCub: learning emotion expressions using human reward. In: Proceedings of the Workshop on Bio-inspired Social Robot Learning in Home Scenarios, IEEE\/RSJ IROS, p 2"},{"key":"3489_CR31","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1016\/j.artint.2014.07.003","volume":"216","author":"LC Cobo","year":"2014","unstructured":"Cobo LC, Subramanian K, Isbell CL Jr, Lanterman AD, Thomaz AL (2014) Abstraction from demonstration for efficient reinforcement learning in high-dimensional domains. Artif Intell 216:103\u2013128","journal-title":"Artif Intell"},{"issue":"3","key":"3489_CR32","doi-asserted-by":"crossref","first-page":"75","DOI":"10.3390\/computers9030075","volume":"9","author":"R Contreras","year":"2020","unstructured":"Contreras R, Ayala A, Cruz F (2020) Unmanned aerial vehicle control through domain-based automatic speech recognition. Computers 9(3):75","journal-title":"Computers"},{"key":"3489_CR40","doi-asserted-by":"crossref","unstructured":"Cruz F, Twiefel J, Magg S, Weber C, Wermter S (2015) Interactive reinforcement learning through speech guidance in a domestic scenario. In: Proceedings of the International Joint Conference on Neural Networks IJCNN, IEEE, pp 1341\u20131348","DOI":"10.1109\/IJCNN.2015.7280477"},{"issue":"4","key":"3489_CR33","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1109\/TCDS.2016.2543839","volume":"8","author":"F Cruz","year":"2016","unstructured":"Cruz F, Magg S, Weber C, Wermter S (2016a) Training agents with interactive reinforcement learning and contextual affordances. IEEE Trans Cognit Dev Syst 8(4):271\u2013284","journal-title":"IEEE Trans Cognit Dev Syst"},{"key":"3489_CR37","doi-asserted-by":"crossref","unstructured":"Cruz F, Parisi GI, Twiefel J, Wermter S (2016b) Multi-modal integration of dynamic audiovisual patterns for an interactive reinforcement learning scenario. In: Proceedings fo the IEEE\/RSJ International Conference on Intelligent Robots and Systems IROS, IEEE, pp 759\u2013766","DOI":"10.1109\/IROS.2016.7759137"},{"key":"3489_CR38","unstructured":"Cruz F, Parisi GI, Wermter S (2016c) Learning contextual affordances with an associative neural architecture. In: Proceedings of the European Symposium on Artificial Neural Network. Computational Intelligence and Machine Learning ESANN, UCLouvain, pp 665-670"},{"key":"3489_CR42","doi-asserted-by":"crossref","unstructured":"Cruz F, W\u00fcppen P, Magg S, Fazrie A, Wermter S (2017) Agent-advising approaches in an interactive reinforcement learning scenario. In: Proceedings of the Joint IEEE International Conference on Development and Learning and Epigenetic Robotics ICDL-EpiRob, IEEE, pp 209\u2013214","DOI":"10.1109\/DEVLRN.2017.8329809"},{"issue":"3","key":"3489_CR34","doi-asserted-by":"crossref","first-page":"306","DOI":"10.1080\/09540091.2018.1443318","volume":"30","author":"F Cruz","year":"2018","unstructured":"Cruz F, Magg S, Nagai Y, Wermter S (2018a) Improving interactive reinforcement learning: what makes a good teacher? Connect Sci 30(3):306\u2013325","journal-title":"Connect Sci"},{"key":"3489_CR39","doi-asserted-by":"crossref","unstructured":"Cruz F, Parisi GI, Wermter S (2018b) Multi-modal feedback for affordance-driven interactive reinforcement learning. In: Proceedings of the International Joint Conference on Neural Networks IJCNN, IEEE, pp 5515\u20135122","DOI":"10.1109\/IJCNN.2018.8489237"},{"key":"3489_CR41","doi-asserted-by":"crossref","unstructured":"Cruz F, W\u00fcppen P, Fazrie A, Weber C, Wermter S (2018c) Action selection methods in a robotic reinforcement learning scenario. In: 2018 IEEE Latin American Conference on Computational Intelligence (LA-CCI), IEEE, pp 1-6","DOI":"10.1109\/LA-CCI.2018.8625243"},{"key":"3489_CR35","first-page":"66","volume-title":"Proceedings of the Australasian joint conference on artificial intelligence","author":"F Cruz","year":"2019","unstructured":"Cruz F, Dazeley R, Vamplew P (2019) Memory-based explainable reinforcement learning. Proceedings of the Australasian joint conference on artificial intelligence. Springer, Berlin, pp 66\u201377"},{"key":"3489_CR36","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-021-06425-5","author":"F Cruz","year":"2021","unstructured":"Cruz F, Dazeley R, Vamplew P et al (2021) Explainable robotic systems: understanding goal-driven actions in a reinforcement learning scenario. Neural Comput Appl. https:\/\/doi.org\/10.1007\/s00521-021-06425-5","journal-title":"Neural Comput Appl"},{"key":"3489_CR45","unstructured":"Da Silva FL (2019) Integrating agent advice and previous task solutions in multiagent reinforcement learning. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, International Foundation for Autonomous Agents and Multiagent Systems, pp 2447\u20132448"},{"key":"3489_CR48","unstructured":"Da\u00a0Silva FL, Costa AHR (2018) Object-oriented curriculum generation for reinforcement learning. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, International Foundation for Autonomous Agents and Multiagent Systems, pp 1026\u20131034"},{"key":"3489_CR46","doi-asserted-by":"crossref","first-page":"645","DOI":"10.1613\/jair.1.11396","volume":"64","author":"FL Da Silva","year":"2019","unstructured":"Da Silva FL, Costa AHR (2019) A survey on transfer learning for multiagent reinforcement learning systems. J Artif Intell Res 64:645\u2013703","journal-title":"J Artif Intell Res"},{"key":"3489_CR43","unstructured":"Da Silva FL, Glatt R, Costa AHR (2017) Simultaneously learning and advising in multiagent reinforcement learning. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, pp 1100-1108"},{"key":"3489_CR44","doi-asserted-by":"crossref","unstructured":"Da Silva FL, Hernandez-Leal P, Kartal B, Taylor ME (2020a) Uncertainty-aware action advising for deep reinforcement learning agents. In: Proceedings of the Association for the Advancement of Artificial Intelligence conference AAAI, pp 5792\u20135799","DOI":"10.1609\/aaai.v34i04.6036"},{"issue":"1","key":"3489_CR47","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1007\/s10458-019-09430-0","volume":"34","author":"FL Da Silva","year":"2020","unstructured":"Da Silva FL, Warnell G, Costa AHR, Stone P (2020b) Agents teaching agents: a survey on inter-agent transfer learning. Auton Agents Multi-Agent Syst 34(1):9","journal-title":"Auton Agents Multi-Agent Syst"},{"key":"3489_CR49","unstructured":"Dazeley R, Vamplew P, Cruz F (2021a) Explainable reinforcement learning for broad-XAI: a conceptual framework and survey. arXiv preprint arXiv:210809003"},{"issue":"103525","key":"3489_CR50","first-page":"29","volume":"299","author":"R Dazeley","year":"2021","unstructured":"Dazeley R, Vamplew P, Foale C, Young C, Aryal S, Cruz F (2021b) Levels of explainable artificial intelligence for human-aligned conversational explanations. Artif Intell 299(103525):29","journal-title":"Artif Intell"},{"key":"3489_CR51","unstructured":"Devlin S, Kudenko D (2011) Theoretical considerations of potential-based reward shaping for multi-agent systems. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, International Foundation for Autonomous Agents and Multiagent Systems, pp 225\u2013232"},{"key":"3489_CR52","unstructured":"Devlin S, Kudenko D (2012) Dynamic potential-based reward shaping. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, International Foundation for Autonomous Agents and Multiagent Systems, pp 433\u2013440"},{"key":"3489_CR53","volume-title":"Incorporating prior knowledge and previously learned information into reinforcement learning agents","author":"K Dixon","year":"2000","unstructured":"Dixon K, Malak RJ, Khosla PK (2000) Incorporating prior knowledge and previously learned information into reinforcement learning agents. Carnegie Mellon University, Institute for Complex Engineered Systems, Pittsburgh"},{"key":"3489_CR54","doi-asserted-by":"crossref","unstructured":"Dorigo M, Gambardella L (2014) Ant-Q: a reinforcement learning approach to the traveling salesman problem. In: Proceedings of International Conference on Machine Learning ICML, pp 252-260","DOI":"10.1016\/B978-1-55860-377-6.50039-6"},{"key":"3489_CR55","unstructured":"Dulac-Arnold G, Evans R, van Hasselt H, Sunehag P, Lillicrap T, Hunt J, Mann T, Weber T, Degris T, Coppin B (2015) Deep reinforcement learning in large discrete action spaces. arXiv preprint arXiv:151207679"},{"key":"3489_CR56","unstructured":"Efthymiadis K, Devlin S, Kudenko D (2013) Overcoming erroneous domain knowledge in plan-based reward shaping. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, International Foundation for Autonomous Agents and Multiagent Systems, pp 1245\u20131246"},{"key":"3489_CR57","doi-asserted-by":"crossref","unstructured":"Eppe M, Magg S, Wermter S (2019) Curriculum goal masking for continuous deep reinforcement learning. In: Proceedings of the Joint IEEE International Conference on Development and Learning and Epigenetic Robotics ICDL-EpiRob, IEEE, pp 183-188","DOI":"10.1109\/DEVLRN.2019.8850721"},{"key":"3489_CR58","doi-asserted-by":"crossref","unstructured":"Erez T, Smart WD (2008) What does shaping mean for computational reinforcement learning? In: Proceedings of the IEEE International Conference on Development and Learning ICDL, IEEE, pp 215-219","DOI":"10.1109\/DEVLRN.2008.4640832"},{"issue":"1","key":"3489_CR59","doi-asserted-by":"crossref","first-page":"21","DOI":"10.3390\/make1010002","volume":"1","author":"A Fachantidis","year":"2019","unstructured":"Fachantidis A, Taylor ME, Vlahavas I (2019) Learning to teach reinforcement learning agents. Mach Learn Knowl Extr 1(1):21\u201342","journal-title":"Mach Learn Knowl Extr"},{"key":"3489_CR60","doi-asserted-by":"crossref","unstructured":"Fern\u00e1ndez F, Veloso M (2006) Probabilistic policy reuse in a reinforcement learning agent. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, ACM, pp 720\u2013727","DOI":"10.1145\/1160633.1160762"},{"key":"3489_CR61","unstructured":"Gao Y, Xu H, Lin J, Yu F, Levine S, Darrell T (2018) Reinforcement learning from imperfect demonstrations. In: Proceedings of the workshop track of the international conference on learning representations ICLR, p 13"},{"issue":"8","key":"3489_CR62","doi-asserted-by":"crossref","first-page":"e3537","DOI":"10.1002\/dac.3537","volume":"31","author":"M Ghobaei-Arani","year":"2018","unstructured":"Ghobaei-Arani M, Rahmanian AA, Shamsi M, Rasouli-Kenari A (2018) A learning-based approach for virtual machine placement in cloud data centers. Int J Commun Syst 31(8):e3537","journal-title":"Int J Commun Syst"},{"issue":"2","key":"3489_CR63","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1016\/S0925-5273(00)00156-0","volume":"78","author":"I Giannoccaro","year":"2002","unstructured":"Giannoccaro I, Pontrandolfo P (2002) Inventory management in supply chains: a reinforcement learning approach. Int J Prod Econ 78(2):153\u2013161","journal-title":"Int J Prod Econ"},{"key":"3489_CR64","unstructured":"Gimelfarb M, Sanner S, Lee CG (2018) Reinforcement learning with multiple experts: A Bayesian model combination approach. In: Advances in Neural Information Processing Systems, pp 9528\u20139538"},{"key":"3489_CR65","unstructured":"Griffith S, Subramanian K, Scholz J, Isbell C, Thomaz AL (2013) Policy shaping: Integrating human feedback with reinforcement learning. In: Advances in Neural Information Processing Systems, pp 2625\u20132633"},{"key":"3489_CR66","doi-asserted-by":"crossref","unstructured":"Grizou J, Lopes M, Oudeyer PY (2013) Robot learning simultaneously a task and how to interpret human instructions. In: Proceedings of the Joint IEEE International Conference on Development and Learning and Epigenetic Robotics ICDL-EpiRob, IEEE, pp 1\u20138","DOI":"10.1109\/DevLrn.2013.6652523"},{"key":"3489_CR67","doi-asserted-by":"crossref","unstructured":"Harutyunyan A, Devlin S, Vrancx P, Now\u00e9 A (2015) Expressing arbitrary reward functions as potential-based advice. In: Proceedings of the Association for the Advancement of Artificial Intelligence conference AAAI, pp 2652\u20132658","DOI":"10.1609\/aaai.v29i1.9628"},{"key":"3489_CR68","unstructured":"Hausknecht M, Mupparaju P, Subramanian S, Kalyanakrishnan S, Stone P (2016) Half field offense: an environment for multiagent learning and ad hoc teamwork. In: AAMAS Adaptive and Learning Agents Workshop ALA 2016"},{"issue":"4","key":"3489_CR69","doi-asserted-by":"crossref","first-page":"767","DOI":"10.1007\/s10458-016-9352-6","volume":"31","author":"P Hernandez-Leal","year":"2017","unstructured":"Hernandez-Leal P, Zhan Y, Taylor ME, Sucar LE, de Cote EM (2017) Efficiently detecting switches against non-stationary opponents. Auton Agents Multi-Agent Syst 31(4):767\u2013789","journal-title":"Auton Agents Multi-Agent Syst"},{"issue":"2","key":"3489_CR70","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1007\/s40708-016-0042-6","volume":"3","author":"A Holzinger","year":"2016","unstructured":"Holzinger A (2016) Interactive machine learning for health informatics: when do we need the human-in-the-loop? Brain Inform 3(2):119\u2013131","journal-title":"Brain Inform"},{"issue":"2","key":"3489_CR72","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1007\/s13218-020-00636-z","volume":"34","author":"A Holzinger","year":"2020","unstructured":"Holzinger A, Carrington A, M\u00fcller H (2020) Measuring the quality of explanations: the system causability scale (SCS): comparing human and machine explanations. Kunstliche Intelligenz 34(2):193\u2013198. https:\/\/doi.org\/10.1007\/s13218-020-00636-z","journal-title":"Kunstliche Intelligenz"},{"key":"3489_CR71","doi-asserted-by":"crossref","first-page":"28","DOI":"10.1016\/j.inffus.2021.01.008","volume":"71","author":"A Holzinger","year":"2021","unstructured":"Holzinger A, Malle B, Saranti A, Pfeifer B (2021) Towards multi-modal causability with graph neural networks enabling information fusion for explainable AI. Inf Fusion 71:28\u201337","journal-title":"Inf Fusion"},{"key":"3489_CR73","unstructured":"Isbell CL, Kearns M, Kormann D, Singh S, Stone P (2000) Cobot in LambdaMOO: a social statistics agent. In: Proceedings of the Association for the Advancement of Artificial Intelligence conference AAAI, pp 36-41"},{"key":"3489_CR74","doi-asserted-by":"crossref","unstructured":"Jing M, Ma X, Huang W, Sun F, Yang C, Fang B, Liu H (2020) Reinforcement learning from imperfect demonstrations under soft expert guidance. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp 5109\u20135116","DOI":"10.1609\/aaai.v34i04.5953"},{"key":"3489_CR75","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling LP, Littman ML, Moore AW (1996) Reinforcement learning: a survey. J Artif Intell Res 4:237\u2013285","journal-title":"J Artif Intell Res"},{"key":"3489_CR80","unstructured":"Karlsson J (2014) Learning to play games from multiple imperfect teachers. Chalmers University of Technology, Gothenburg, Sweden (Master\u2019s thesis)"},{"issue":"3","key":"3489_CR78","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1016\/S0921-8890(02)00168-9","volume":"38","author":"F Kaplan","year":"2002","unstructured":"Kaplan F, Oudeyer PY, Kubinyi E, Mikl\u00f3si A (2002) Robotic clicker training. Robot Auton Syst 38(3):197\u2013206","journal-title":"Robot Auton Syst"},{"key":"3489_CR76","first-page":"72","volume-title":"Robot soccer world cup","author":"S Kalyanakrishnan","year":"2006","unstructured":"Kalyanakrishnan S, Liu Y, Stone P (2006) Half field offense in RoboCup soccer: a multiagent reinforcement learning case study. Robot soccer world cup. Springer, Berlin, pp 72\u201385"},{"key":"3489_CR77","unstructured":"Kamar E, Hacker S, Horvitz E (2012) Combining human and machine intelligence in large-scale crowdsourcing. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, International Foundation for Autonomous Agents and Multiagent Systems, pp 467\u2013474"},{"key":"3489_CR79","doi-asserted-by":"crossref","unstructured":"Karimpanal TG, Rana S, Gupta S, Tran T, Venkatesh S (2019) Learning transferable domain priors for safe exploration in reinforcement learning. In: Proceedings of the International Joint Conference on Neural Networks IJCNN, pp 1-8","DOI":"10.1109\/IJCNN48605.2020.9207344"},{"key":"3489_CR81","doi-asserted-by":"crossref","unstructured":"Kerzel M, Mohammadi HB, Zamani MA, Wermter S (2018) Accelerating deep continuous reinforcement learning through task simplification. In: Proceedings of the International Joint Conference on Neural Networks IJCNN, IEEE, pp 1-6","DOI":"10.1109\/IJCNN.2018.8489712"},{"key":"3489_CR82","unstructured":"Kessler Faulkner T, Gutierrez RA, Short ES, Hoffman G, Thomaz AL (2019) Active attention-modified policy shaping: socially interactive agents track. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, International Foundation for Autonomous Agents and Multiagent Systems, pp 728-736"},{"issue":"1","key":"3489_CR83","first-page":"73","volume":"18","author":"H Kitano","year":"1997","unstructured":"Kitano H, Asada M, Kuniyoshi Y, Noda I, Osawa E, Matsubara H (1997) RoboCup: a challenge problem for AI. AI Mag 18(1):73","journal-title":"AI Mag"},{"key":"3489_CR84","doi-asserted-by":"crossref","unstructured":"Knowles MJ, Wermter S (2008) The hybrid integration of perceptual symbol systems and interactive reinforcement learning. In: Proceedings of the International Conference on Hybrid Intelligent Systems, IEEE, pp 404-409","DOI":"10.1109\/HIS.2008.90"},{"key":"3489_CR85","doi-asserted-by":"crossref","unstructured":"Knox WB, Stone P (2009) Interactively shaping agents via human reinforcement: the TAMER framework. In: Proceedings of the International Conference on Knowledge Capture, ACM, pp 9-16","DOI":"10.1145\/1597735.1597738"},{"key":"3489_CR86","unstructured":"Knox WB, Stone P (2010) Combining manual feedback with subsequent MDP reward signals for reinforcement learning. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, International Foundation for Autonomous Agents and Multiagent Systems, pp 5-12"},{"key":"3489_CR87","doi-asserted-by":"crossref","unstructured":"Knox WB, Stone P (2012a) Reinforcement learning from human reward: discounting in episodic tasks. Discounting in episodic tasks. In:Proceedings of the IEEE International Symposium on Robot and Human Interactive Communication RO-MAN, IEEE, pp 878\u2013885","DOI":"10.1109\/ROMAN.2012.6343862"},{"key":"3489_CR88","unstructured":"Knox WB, Stone P (2012b) Reinforcement learning from simultaneous human and MDP reward. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, pp 475-482"},{"issue":"4","key":"3489_CR89","doi-asserted-by":"crossref","first-page":"409","DOI":"10.1007\/s12369-012-0163-x","volume":"4","author":"WB Knox","year":"2012","unstructured":"Knox WB, Glass BD, Love BC, Maddox WT, Stone P (2012) How humans teach agents. Int J Soc Robot 4(4):409\u2013421","journal-title":"Int J Soc Robot"},{"key":"3489_CR90","first-page":"460","volume-title":"Proceedings of the international conference on social robotics","author":"WB Knox","year":"2013","unstructured":"Knox WB, Stone P, Breazeal C (2013) Training a robot via human feedback: a case study. Proceedings of the international conference on social robotics. Springer, Berlin, pp 460\u2013470"},{"issue":"11","key":"3489_CR91","doi-asserted-by":"crossref","first-page":"1238","DOI":"10.1177\/0278364913495721","volume":"32","author":"J Kober","year":"2013","unstructured":"Kober J, Bagnell JA, Peters J (2013) Reinforcement learning in robotics: a survey. Int J Robot Res 32(11):1238\u20131274","journal-title":"Int J Robot Res"},{"key":"3489_CR92","unstructured":"Koenig S, Simmons RG (1993) Complexity analysis of real-time reinforcement learning. In: Proceedings of the Association for the Advancement of Artificial Intelligence conference AAAI, pp 99\u2013107"},{"issue":"3","key":"3489_CR93","doi-asserted-by":"crossref","first-page":"360","DOI":"10.1177\/0278364911428653","volume":"31","author":"G Konidaris","year":"2012","unstructured":"Konidaris G, Kuindersma S, Grupen R, Barto A (2012) Robot learning from demonstration by constructing skill trees. Int J Robot Res 31(3):360\u2013375","journal-title":"Int J Robot Res"},{"issue":"4","key":"3489_CR94","doi-asserted-by":"crossref","first-page":"337","DOI":"10.1109\/THMS.2019.2912447","volume":"49","author":"G Li","year":"2019","unstructured":"Li G, Gomez R, Nakamura K, He B (2019) Human-centered reinforcement learning: a survey. IEEE Trans Hum-Mach Syst 49(4):337\u2013349","journal-title":"IEEE Trans Hum-Mach Syst"},{"key":"3489_CR95","unstructured":"Lin LJ (1991) Programming robots using reinforcement learning and teaching. In: Proceedings of the Association for the Advancement of Artificial Intelligence conference AAAI, pp 781\u2013786"},{"key":"3489_CR96","doi-asserted-by":"crossref","first-page":"120757","DOI":"10.1109\/ACCESS.2020.3006254","volume":"8","author":"J Lin","year":"2020","unstructured":"Lin J, Ma Z, Gomez R, Nakamura K, He B, Li G (2020) A review on interactive reinforcement learning from human social feedback. IEEE Access 8:120757\u2013120765","journal-title":"IEEE Access"},{"issue":"1","key":"3489_CR97","doi-asserted-by":"crossref","first-page":"456","DOI":"10.1109\/TETC.2019.2896325","volume":"9","author":"X Liu","year":"2021","unstructured":"Liu X, Deng R, Choo KKR, Yang Y (2021) Privacy-preserving reinforcement learning design for patient-centric dynamic treatment regimes. IEEE Trans Emerg Top Comput 9(1):456\u2013470","journal-title":"IEEE Trans Emerg Top Comput"},{"key":"3489_CR98","unstructured":"Mankowitz DJ, Dulac-Arnold G, Hester T (2019) Challenges of real-world reinforcement learning. In: ICML Workshop on Real-Life Reinforcement Learning, p\u00a014"},{"key":"3489_CR99","unstructured":"Mann TA, Gowal S, Jiang R, Hu H, Lakshminarayanan B, Gyorgy A (2018) Learning from delayed outcomes with intermediate observations. arXiv preprint arXiv:180709387"},{"key":"3489_CR102","unstructured":"Mill\u00e1n C, Fernandes B, Cruz F (2019) Human feedback in continuous actor-critic reinforcement learning. In: Proceedings of the European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning ESANN, ESANN, pp 661-666"},{"key":"3489_CR101","doi-asserted-by":"crossref","unstructured":"Mill\u00e1n-Arias C, Fernandes B, Cruz F, Dazeley R, Fernandes S (2020) A robust approach for continuous interactive reinforcement learning. In: Proceedings of the 8th International Conference on Human-Agent Interaction, pp 278\u2013280","DOI":"10.1145\/3406499.3418769"},{"key":"3489_CR100","doi-asserted-by":"crossref","first-page":"104242","DOI":"10.1109\/ACCESS.2021.3099071","volume":"9","author":"C Mill\u00e1n-Arias","year":"2021","unstructured":"Mill\u00e1n-Arias C, Fernandes B, Cruz F, Dazeley R, Fernandes S (2021) A robust approach for continuous interactive actor-critic algorithms. IEEE Access 9:104242\u2013104260","journal-title":"IEEE Access"},{"issue":"16","key":"3489_CR103","doi-asserted-by":"crossref","first-page":"5574","DOI":"10.3390\/app10165574","volume":"10","author":"I Moreira","year":"2020","unstructured":"Moreira I, Rivas J, Cruz F, Dazeley R, Ayala A, Fernandes B (2020) Deep reinforcement learning with interactive feedback in a human-robot environment. Appl Sci 10(16):5574","journal-title":"Appl Sci"},{"key":"3489_CR104","doi-asserted-by":"crossref","unstructured":"Nair A, McGrew B, Andrychowicz M, Zaremba W, Abbeel P (2018) Overcoming exploration in reinforcement learning with demonstrations. In: Proceedings of the IEEE International Conference on Robotics and Automation ICRA, IEEE, pp 6292\u20136299","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"3489_CR105","unstructured":"Narvekar S, Sinapov J, Leonetti M, Stone P (2016) Source task creation for curriculum learning. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, pp 566-574"},{"key":"3489_CR106","doi-asserted-by":"crossref","unstructured":"Narvekar S, Sinapov J, Stone P (2017) Autonomous task sequencing for customized curriculum design in reinforcement learning.In: Proceedings of the International Joint Conference on Artificial Intelligence IJCAI","DOI":"10.24963\/ijcai.2017\/353"},{"key":"3489_CR107","unstructured":"Navidi N (2020) Human AI interaction loop training: new approach for interactive reinforcement learning. arXiv preprint arXiv:200304203"},{"key":"3489_CR108","first-page":"278","volume":"99","author":"AY Ng","year":"1999","unstructured":"Ng AY, Harada D, Russell S (1999) Policy invariance under reward transformations: theory and application to reward shaping. Proc Int Conf Mach Learn ICML 99:278\u2013287","journal-title":"Proc Int Conf Mach Learn ICML"},{"issue":"3","key":"3489_CR109","doi-asserted-by":"crossref","first-page":"139","DOI":"10.1016\/j.jmp.2008.12.005","volume":"53","author":"Y Niv","year":"2009","unstructured":"Niv Y (2009) Reinforcement learning in the brain. J Math Psychol 53(3):139\u2013154","journal-title":"J Math Psychol"},{"key":"3489_CR110","first-page":"250","volume":"7","author":"L Nunes","year":"2003","unstructured":"Nunes L, Oliveira E (2003) Exchanging advice and learning to trust. Coop Inf Agents 7:250\u2013265","journal-title":"Coop Inf Agents"},{"key":"3489_CR111","doi-asserted-by":"crossref","first-page":"54","DOI":"10.1016\/j.neunet.2019.01.012","volume":"113","author":"GI Parisi","year":"2019","unstructured":"Parisi GI, Kemker R, Part JL, Kanan C, Wermter S (2019) Continual lifelong learning with neural networks: a review. Neural Netw 113:54\u201371","journal-title":"Neural Netw"},{"key":"3489_CR112","unstructured":"Parisotto E, Ba JL, Salakhutdinov R (2016) Actor-mimic: deep multitask and transfer reinforcement learning. In: Proceedings of the international conference on learning representations ICLR, p 16"},{"key":"3489_CR113","doi-asserted-by":"crossref","unstructured":"Partalas I, Vrakas D, Vlahavas I (2008) Reinforcement learning and automated planning: a survey. In: Artificial Intelligence for Advanced Problem Solving Techniques, IGI Global, pp 148\u2013165","DOI":"10.4018\/978-1-59904-705-8.ch006"},{"key":"3489_CR114","doi-asserted-by":"crossref","unstructured":"Pathak D, Agrawal P, Efros AA, Darrell T (2017) Curiosity-driven exploration by self-supervised prediction. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp 16-17","DOI":"10.1109\/CVPRW.2017.70"},{"key":"3489_CR115","doi-asserted-by":"crossref","unstructured":"Peng B, MacGlashan J, Loftin R, Littman ML, Roberts DL, Taylor ME (2017) Curriculum design for machine learners in sequential decision tasks(Extended Abstract). In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS","DOI":"10.1109\/TETCI.2018.2829980"},{"key":"3489_CR116","unstructured":"Pilarski PM, Sutton RS (2012) Between instruction and reward: human-prompted switching. In: AAAI Fall Symposium Series: Robots Learning Interactively from Human Teachers, pp 45\u201352"},{"key":"3489_CR117","doi-asserted-by":"crossref","first-page":"569","DOI":"10.1613\/jair.898","volume":"19","author":"B Price","year":"2003","unstructured":"Price B, Boutilier C (2003) Accelerating reinforcement learning through implicit imitation. J Artif Intell Res 19:569\u2013629","journal-title":"J Artif Intell Res"},{"key":"3489_CR118","unstructured":"Randl\u00f8v J, Alstr\u00f8m P (1998) Learning to drive a bicycle using reinforcement learning and shaping. In: Proceedings of the International Conference on Machine Learning ICML, pp 463\u2013471"},{"key":"3489_CR119","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1613\/jair.3987","volume":"48","author":"DM Roijers","year":"2013","unstructured":"Roijers DM, Vamplew P, Whiteson S, Dazeley R (2013) A survey of multi-objective sequential decision-making. J Artif Intell Res 48:67\u2013113","journal-title":"J Artif Intell Res"},{"issue":"1","key":"3489_CR120","doi-asserted-by":"crossref","first-page":"33","DOI":"10.1007\/s11370-012-0128-9","volume":"6","author":"L Rozo","year":"2013","unstructured":"Rozo L, Jim\u00e9nez P, Torras C (2013) A robot learning from demonstration framework to perform force-based manipulation tasks. Intell Serv Robot 6(1):33\u201351","journal-title":"Intell Serv Robot"},{"key":"3489_CR121","first-page":"1040","volume":"9","author":"S Schaal","year":"1997","unstructured":"Schaal S (1997) Learning from demonstration. Adv Neural Inf Process Syst 9:1040\u20131046","journal-title":"Adv Neural Inf Process Syst"},{"issue":"1","key":"3489_CR122","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1038\/s41598-020-68447-8","volume":"10","author":"E Sert","year":"2020","unstructured":"Sert E, Bar-Yam Y, Morales AJ (2020) Segregation dynamics with reinforcement learning and agent based modeling. Sci Rep 10(1):1\u201312","journal-title":"Sci Rep"},{"issue":"12","key":"3489_CR123","first-page":"2212","volume":"50","author":"A Shahidinejad","year":"2020","unstructured":"Shahidinejad A, Ghobaei-Arani M (2020) Joint computation offloading and resource provisioning for edge-cloud computing environment: a machine learning-based approach. Software 50(12):2212\u20132230","journal-title":"Software"},{"issue":"4","key":"3489_CR124","doi-asserted-by":"crossref","first-page":"639","DOI":"10.1007\/s10723-020-09530-2","volume":"18","author":"A Shakarami","year":"2020","unstructured":"Shakarami A, Ghobaei-Arani M, Masdari M, Hosseinzadeh M (2020) A survey on the computation offloading approaches in mobile edge\/cloud computing environment: a stochastic-based perspective. J Grid Comput 18(4):639\u2013671","journal-title":"J Grid Comput"},{"issue":"1","key":"3489_CR125","doi-asserted-by":"crossref","first-page":"73","DOI":"10.1109\/TETCI.2018.2823329","volume":"3","author":"K Shao","year":"2018","unstructured":"Shao K, Zhu Y, Zhao D (2018) Starcraft micromanagement with reinforcement learning and curriculum transfer learning. IEEE Trans Emerg Top Comput Intell 3(1):73\u201384","journal-title":"IEEE Trans Emerg Top Comput Intell"},{"key":"3489_CR126","first-page":"1041","volume":"7","author":"M Sharma","year":"2007","unstructured":"Sharma M, Holmes MP, Santamar\u00eda JC, Irani A, Isbell CL Jr, Ram A (2007) Transfer learning in real-time strategy games using hybrid cbr\/rl. Proc Int Jt Conf Artif Intell IJCAI 7:1041\u20131046","journal-title":"Proc Int Jt Conf Artif Intell IJCAI"},{"key":"3489_CR127","first-page":"1082","volume-title":"Advances in neural information processing systems","author":"CR Shelton","year":"2001","unstructured":"Shelton CR (2001) Balancing multiple sources of reward in reinforcement learning. Advances in neural information processing systems. Springer, Berlin, pp 1082\u20131088"},{"key":"3489_CR128","unstructured":"Shiarlis K, ao Messias J, Whiteson S, (2016) Inverse reinforcement learning from failure. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, pp 1060-1068"},{"issue":"1","key":"3489_CR129","doi-asserted-by":"crossref","first-page":"117","DOI":"10.1901\/jeab.1975.24-117","volume":"24","author":"BF Skinner","year":"1975","unstructured":"Skinner BF (1975) The shaping of phylogenic behavior. J Exp Anal Behav 24(1):117\u2013120","journal-title":"J Exp Anal Behav"},{"key":"3489_CR130","unstructured":"Smart WD, Kaelbling LP (2002) Effective reinforcement learning for mobile robots. Proceedings of the IEEE International Conference on Robotics and Automation ICRA, IEEE 4:3404\u20133410"},{"key":"3489_CR131","doi-asserted-by":"crossref","unstructured":"Sridharan M, Meadows B, Gomez R (2017) What can I not do? towards an architecture for reasoning about and learning affordances. In: Proceedings of the International Conference on Automated Planning and Scheduling, pp 461\u2013469","DOI":"10.1609\/icaps.v27i1.13852"},{"key":"3489_CR132","unstructured":"Stahlhut C, Navarro-Guerrero N, Weber C, Wermter S, WTM VKS, (2015) Interaction is more beneficial in complex reinforcement learning problems than in simple ones. In: Proceedings of the Interdisziplin\u00e4rer Workshop Kognitive Systeme (KogSys), pp 142-150"},{"key":"3489_CR134","doi-asserted-by":"crossref","unstructured":"Suay HB, Chernova S (2011) Effect of human guidance and state space size on interactive reinforcement learning. In: Proceedings of the IEEE International Symposium on Robot and Human Interactive Communication RO-MAN, IEEE, pp 1-6","DOI":"10.1109\/ROMAN.2011.6005223"},{"key":"3489_CR133","unstructured":"Suay HB, Brys T, Taylor ME, Chernova S (2016) Learning from demonstration for shaping through inverse reinforcement learning. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, International Foundation for Autonomous Agents and Multiagent Systems, pp 429-437"},{"key":"3489_CR136","unstructured":"Subramanian K, Isbell C, Thomaz A (2011) Learning options through human interaction. In: IJCAI Workshop on Agents Learning Interactively from Human Teachers (ALIHT), Citeseer"},{"key":"3489_CR135","unstructured":"Subramanian K, Isbell CL Jr, Thomaz AL (2016) Exploration from demonstration for interactive reinforcement learning. In: Proceedings of the International Conference on Autonomous Agents and MultiAgent Systems AAMAS, pp 447-456"},{"key":"3489_CR137","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction. MIT Press, London"},{"key":"3489_CR138","unstructured":"Talvitie E, Singh SP (2007) An experts algorithm for transfer learning. In: Proceedings of the International Joint Conference on Artificial Intelligence IJCAI, pp 1065-1070"},{"key":"3489_CR139","doi-asserted-by":"crossref","unstructured":"Tanwani AK, Billard A (2013) Transfer in inverse reinforcement learning for multiple strategies. In: Proceedings of the IEEE\/RSJ International Conference on Intelligent Robots and Systems IROS, IEEE, pp 3244\u20133250","DOI":"10.1109\/IROS.2013.6696817"},{"key":"3489_CR140","unstructured":"Taylor ME (2009) Assisting transfer-enabled machine learning algorithms: leveraging human knowledge for curriculum design. In: The AAAI 2009 Spring Symposium on Agents that Learn from Human Teachers"},{"issue":"7","key":"3489_CR144","first-page":"1633","volume":"10","author":"ME Taylor","year":"2009","unstructured":"Taylor ME, Stone P (2009) Transfer learning for reinforcement learning domains: a survey. J Mach Learn Res 10(7):1633\u20131685","journal-title":"J Mach Learn Res"},{"key":"3489_CR145","first-page":"880","volume":"5","author":"ME Taylor","year":"2005","unstructured":"Taylor ME, Stone P, Liu Y (2005) Value functions for rl-based behavior transfer: a comparative study. Proc Assoc Adv Artif Intell Conf AAAI 5:880\u2013885","journal-title":"Proc Assoc Adv Artif Intell Conf AAAI"},{"issue":"1","key":"3489_CR146","first-page":"2125","volume":"8","author":"ME Taylor","year":"2007","unstructured":"Taylor ME, Stone P, Liu Y (2007a) Transfer learning via inter-task mappings for temporal difference learning. J Mach Learn Res 8(1):2125\u20132167","journal-title":"J Mach Learn Res"},{"key":"3489_CR143","doi-asserted-by":"crossref","unstructured":"Taylor ME, Whiteson S, Stone P (2007b) Transfer via Inter-Task Mappings in Policy Search Reinforcement Learning. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, pp 156-163","DOI":"10.1145\/1329125.1329170"},{"key":"3489_CR141","unstructured":"Taylor ME, Kuhlmann G, Stone P (2008) Autonomous transfer for reinforcement learning. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, International Foundation for Autonomous Agents and Multiagent Systems, pp 283\u2013290"},{"key":"3489_CR142","unstructured":"Taylor ME, Suay HB, Chernova S (2011) Integrating reinforcement learning with human demonstrations of varying ability. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS, International Foundation for Autonomous Agents and Multiagent Systems, pp 617-624"},{"issue":"1","key":"3489_CR147","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1080\/09540091.2014.885279","volume":"26","author":"ME Taylor","year":"2014","unstructured":"Taylor ME, Carboni N, Fachantidis A, Vlahavas I, Torrey L (2014) Reinforcement learning agents providing advice in complex video games. Connect Sci 26(1):45\u201363","journal-title":"Connect Sci"},{"key":"3489_CR148","first-page":"483","volume-title":"Advances in artificial intelligence-IBERAMIA 2010","author":"AC Tenorio-Gonzalez","year":"2010","unstructured":"Tenorio-Gonzalez AC, Morales EF, Villase\u00f1or-Pineda L (2010) Dynamic reward shaping: training a robot by voice. Advances in artificial intelligence-IBERAMIA 2010. Springer, Berlin, pp 483\u2013492"},{"issue":"2","key":"3489_CR149","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1162\/neco.1994.6.2.215","volume":"6","author":"G Tesauro","year":"1994","unstructured":"Tesauro G (1994) TD-Gammon, a self-teaching backgammon program, achieves master-level play. Neural Comput 6(2):215\u2013219","journal-title":"Neural Comput"},{"key":"3489_CR150","first-page":"871","volume-title":"Advances in neural information processing systems","author":"G Tesauro","year":"2004","unstructured":"Tesauro G (2004) Extending Q-learning to general adaptive multi-agent systems. Advances in neural information processing systems. Springer, Berlin, pp 871\u2013878"},{"key":"3489_CR151","doi-asserted-by":"crossref","unstructured":"Thomaz AL, Breazeal C (2007) Asymmetric interpretations of positive and negative human feedback for a social learning agent. In: Proceedings of the IEEE International Symposium on Robot and Human Interactive Communication RO-MAN, IEEE, pp 720-725","DOI":"10.1109\/ROMAN.2007.4415180"},{"key":"3489_CR152","unstructured":"Thomaz AL, Hoffman G, Breazeal C (2005) Real-time interactive reinforcement learning for robots. In: AAAI 2005 Workshop on Human Comprehensible Machine Learning"},{"key":"3489_CR154","first-page":"1000","volume":"6","author":"AL Thomaz","year":"2006","unstructured":"Thomaz AL, Breazeal C et al. (2006a) Reinforcement learning with human teachers: evidence of feedback and guidance with implications for learning performance. Proc Assoc Adv Artif Intell Conf AAAI 6:1000\u20131005","journal-title":"Proc Assoc Adv Artif Intell Conf AAAI"},{"key":"3489_CR153","doi-asserted-by":"crossref","unstructured":"Thomaz AL, Hoffman G, Breazeal C (2006b) Reinforcement learning with human teachers: Understanding how people want to teach robots. In: Proceedings of the IEEE International Symposium on Robot and Human Interactive Communication RO-MAN, IEEE, pp 352-357","DOI":"10.1109\/ROMAN.2006.314459"},{"key":"3489_CR155","unstructured":"Torrey L, Taylor ME (2013) Teaching on a Budget: Agents Advising Agents in Reinforcement Learning. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems AAMAS"},{"key":"3489_CR156","unstructured":"Vamplew P, Foale C, Dazeley R (2020) A demonstration of issues with value-based multiobjective reinforcement learning under stochastic state transitions. In: Proceedings of the adaptive and learning agents workshop, international conference on autonomous agents and multiagent systems, p 6"},{"key":"3489_CR157","doi-asserted-by":"publisher","first-page":"359","DOI":"10.1007\/978-3-642-27645-3_11","volume-title":"Reinforcement learning: state of the Art. Adaptation, learning, and optimization","author":"N Vlassis","year":"2012","unstructured":"Vlassis N, Ghavamzadeh M, Mannor S, Poupart P (2012) Bayesian reinforcement learning. Reinforcement learning: state of the Art. Adaptation, learning, and optimization, vol 12. Springer, Berlin, Heidelberg, pp 359\u2013386. https:\/\/doi.org\/10.1007\/978-3-642-27645-3_11"},{"key":"3489_CR158","unstructured":"Wiewiora E, Cottrell G, Elkan C (2003) Principled methods for advising reinforcement learning agents. In: Proceedings of the International Conference on Machine learning ICML, pp 792-799"},{"key":"3489_CR159","unstructured":"Xu H, Bector R, Rabinovich Z (2020) Teaching multiple learning agents by environment-dynamics tweaks. In: AAMAS Adaptive and Learning Agents Workshop ALA 2020, p\u00a08"},{"key":"3489_CR160","unstructured":"Yamagata T, Santos-Rodr\u00edguez R, McConville R, Elsts A (2019) Online feature selection for activity recognition using reinforcement learning with multiple feedback. arXiv preprint arXiv:190806134"},{"key":"3489_CR161","doi-asserted-by":"crossref","first-page":"320","DOI":"10.1007\/978-3-030-26118-4_31","volume-title":"Proceedings of the international conference on interactive collaborative robotics","author":"MC Yang","year":"2019","unstructured":"Yang MC, Samani H, Zhu K (2019) Emergency-response locomotion of hexapod robot with heuristic reinforcement learning using q-learning. Proceedings of the international conference on interactive collaborative robotics. Springer, Berlin, pp 320\u2013329"},{"key":"3489_CR162","unstructured":"Zhan Y, Ammar HB, Taylor ME (2016) Theoretically-grounded policy advice from multiple teachers in reinforcement learning settings with applications to negative transfer. In: Proceedings of the International Joint Conference on Artificial Intelligence IJCAI"},{"issue":"1","key":"3489_CR163","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1109\/JPROC.2020.3004555","volume":"109","author":"F Zhuang","year":"2020","unstructured":"Zhuang F, Qi Z, Duan K, Xi D, Zhu Y, Zhu H, Xiong H, He Q (2020) A comprehensive survey on transfer learning. Proc IEEE 109(1):43\u201376","journal-title":"Proc IEEE"}],"container-title":["Journal of Ambient Intelligence and Humanized Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12652-021-03489-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s12652-021-03489-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12652-021-03489-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,28]],"date-time":"2023-03-28T12:55:57Z","timestamp":1680008157000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s12652-021-03489-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,18]]},"references-count":163,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,4]]}},"alternative-id":["3489"],"URL":"https:\/\/doi.org\/10.1007\/s12652-021-03489-y","relation":{},"ISSN":["1868-5137","1868-5145"],"issn-type":[{"value":"1868-5137","type":"print"},{"value":"1868-5145","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,9,18]]},"assertion":[{"value":"27 October 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 September 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 September 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}