{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T16:04:11Z","timestamp":1778169851283,"version":"3.51.4"},"reference-count":100,"publisher":"Springer Science and Business Media LLC","issue":"23","license":[{"start":{"date-parts":[[2025,5,20]],"date-time":"2025-05-20T00:00:00Z","timestamp":1747699200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,20]],"date-time":"2025-05-20T00:00:00Z","timestamp":1747699200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s00521-025-11288-1","type":"journal-article","created":{"date-parts":[[2025,5,20]],"date-time":"2025-05-20T12:45:26Z","timestamp":1747745126000},"page":"18957-18987","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Human-AI collaboration in real-world complex environment with reinforcement learning"],"prefix":"10.1007","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9236-380X","authenticated-orcid":false,"given":"Md Saiful","family":"Islam","sequence":"first","affiliation":[]},{"given":"Srijita","family":"Das","sequence":"additional","affiliation":[]},{"given":"Sai Krishna","family":"Gottipati","sequence":"additional","affiliation":[]},{"given":"William","family":"Duguay","sequence":"additional","affiliation":[]},{"given":"Clodric","family":"Mars","sequence":"additional","affiliation":[]},{"given":"Jalal","family":"Arabneydi","sequence":"additional","affiliation":[]},{"given":"Antoine","family":"Fagette","sequence":"additional","affiliation":[]},{"given":"Matthew","family":"Guzdial","sequence":"additional","affiliation":[]},{"given":"Matthew E.","family":"Taylor","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,20]]},"reference":[{"issue":"2","key":"11288_CR1","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1109\/TSMCC.2002.801352","volume":"32","author":"RH Kewley","year":"2002","unstructured":"Kewley RH, Embrechts MJ (2002) Computational military tactical planning system. IEEE Trans Syst, Man, Cybern, Part C (Applications and Reviews) 32(2):161\u2013171","journal-title":"IEEE Trans Syst, Man, Cybern, Part C (Applications and Reviews)"},{"key":"11288_CR2","unstructured":"Gottipati SK, Nguyen L-H, Mars C, Taylor ME (2023) Hiking up that hill with cogment-verse: Train & operate multi-agent systems learning from humans. In: Proceedings of the 2023 International Conference on Autonomous Agents and Multiagent Systems, pp. 3065\u20133067"},{"key":"11288_CR3","doi-asserted-by":"publisher","first-page":"15","DOI":"10.3389\/frobt.2018.00015","volume":"5","author":"F Sio","year":"2018","unstructured":"Sio F, Hoven J (2018) Meaningful human control over autonomous systems: A philosophical account. Front Robot AI 5:15","journal-title":"Front Robot AI"},{"issue":"7587","key":"11288_CR4","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison CJ, Guez A, Sifre L, Van Den Driessche G, Schrittwieser J, Antonoglou I, Panneershelvam V, Lanctot M (2016) Mastering the game of go with deep neural networks and tree search. Nature 529(7587):484\u2013489","journal-title":"Nature"},{"issue":"7836","key":"11288_CR5","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1038\/s41586-020-2939-8","volume":"588","author":"MG Bellemare","year":"2020","unstructured":"Bellemare MG, Candido S, Castro PS, Gong J, Machado MC, Moitra S, Ponda SS, Wang Z (2020) Autonomous navigation of stratospheric balloons using reinforcement learning. Nature 588(7836):77\u201382","journal-title":"Nature"},{"key":"11288_CR6","unstructured":"Gottipati SK, Sattarov B, Niu S, Pathak Y, Wei H, Liu S, Blackburn S, Thomas K, Coley C, Tang J (2020) Learning to navigate the synthetically accessible chemical space using reinforcement learning. In: International Conference on Machine Learning, pp. 3668\u20133679. PMLR"},{"key":"11288_CR7","unstructured":"Gottipati SK, Pathak Y, Nuttall R, Sahir Chunduru R, Touati A, Subramanian SG, Taylor ME, Chandar S (2020) Maximum reward formulation in reinforcement learning. CoRR arxiv:abs\/2010.03744"},{"key":"11288_CR8","doi-asserted-by":"crossref","unstructured":"Todorov E, Erez T, Tassa Y (2012) Mujoco: A physics engine for model-based control. In: 2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 5026\u20135033. IEEE","DOI":"10.1109\/IROS.2012.6386109"},{"issue":"7540","key":"11288_CR9","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"11288_CR10","doi-asserted-by":"crossref","unstructured":"Schelble B, Canonico L-B, McNeese N, Carroll J, Hird C (2020) Designing human-autonomy teaming experiments through reinforcement learning. In: Proceedings of the Human Factors and Ergonomics Society Annual Meeting, vol. 64, pp. 1426\u20131430. SAGE Publications Sage CA: Los Angeles, CA","DOI":"10.1177\/1071181320641340"},{"issue":"5","key":"11288_CR11","doi-asserted-by":"publisher","first-page":"904","DOI":"10.1177\/0018720820960865","volume":"64","author":"T O\u2019Neill","year":"2022","unstructured":"O\u2019Neill T, McNeese N, Barron A, Schelble B (2022) Human-autonomy teaming: A review and analysis of the empirical literature. Hum Factors 64(5):904\u2013938","journal-title":"Hum Factors"},{"key":"11288_CR12","doi-asserted-by":"publisher","first-page":"698","DOI":"10.1177\/0278364920987859","volume":"40","author":"J Ibarz","year":"2021","unstructured":"Ibarz J, Tan J, Finn C, Kalakrishnan M, Pastor P, Levine S (2021) How to train your robot with deep reinforcement learning: lessons we have learned. Int J Robot Res 40:698\u2013721","journal-title":"Int J Robot Res"},{"key":"11288_CR13","doi-asserted-by":"crossref","unstructured":"Hester T, Vecerik M, Pietquin O, Lanctot M, Schaul T, Piot B, Horgan D, Quan J, Sendonaris A, Osband I (2018) Deep Q-learning from demonstrations. In: Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence and Thirtieth Innovative Applications of Artificial Intelligence Conference and Eighth AAAI Symposium on Educational Advances in Artificial Intelligence, pp. 3223\u20133230","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"11288_CR14","unstructured":"Kharyal C, Sinha T, Gottipati SK, Abdollahi F, Das S, Taylor ME (2023) Do as you teach: A multi-teacher approach to self-play in deep reinforcement learning. In: Proceedings of the 2023 International Conference on Autonomous Agents and Multiagent Systems. AAMAS \u201923, pp. 2457\u20132459. International Foundation for Autonomous Agents and Multiagent Systems, Richland, SC"},{"key":"11288_CR15","unstructured":"Mandlekar A, Xu D, Wong J, Nasiriany S, Wang C, Kulkarni R, Fei-Fei L, Savarese S, Zhu Y, Mart\u00edn-Mart\u00edn R (2022) What matters in learning from offline human demonstrations for robot manipulation. In: Conference on Robot Learning, pp. 1678\u20131690. PMLR"},{"key":"11288_CR16","unstructured":"Torrey L, Taylor M (2013) Teaching on a budget: Agents advising agents in reinforcement learning. In: Proceedings of the 2013 International Conference on Autonomous Agents and Multi-agent Systems, pp. 1053\u20131060"},{"key":"11288_CR17","doi-asserted-by":"crossref","unstructured":"Frazier S, Riedl M (2019) Improving deep reinforcement learning in minecraft with action advice. In: Proceedings of the AAAI Conference on Artificial Intelligence and Interactive Digital Entertainment, vol. 15, pp. 146\u2013152","DOI":"10.1609\/aiide.v15i1.5237"},{"key":"11288_CR18","unstructured":"Ilhan E, Gow J, Perez\u00a0Liebana D (2021) Action advising with advice imitation in deep reinforcement learning. In: Proceedings of the 20th International Conference on Autonomous Agents and MultiAgent Systems, pp. 629\u2013637"},{"key":"11288_CR19","unstructured":"Ibarz B, Leike J, Pohlen T, Irving G, Legg S, Amodei D (2018) Reward learning from human preferences and demonstrations in atari. Adv Neural Inf Proc Syst 31"},{"key":"11288_CR20","doi-asserted-by":"crossref","unstructured":"Palan M, Shevchuk G, Charles\u00a0Landolfi N, Sadigh D (2019) Learning reward functions by integrating human demonstrations and preferences. In: Robotics: Science and Systems","DOI":"10.15607\/RSS.2019.XV.023"},{"key":"11288_CR21","unstructured":"Lee K, Smith LM, Abbeel P (2021) Pebble: Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training. In: International Conference on Machine Learning, pp. 6152\u20136163. PMLR"},{"key":"11288_CR22","unstructured":"Ng AY, Harada D, Russell SJ (1999) Policy invariance under reward transformations: Theory and application to reward shaping. In: Proceedings of the Sixteenth International Conference on Machine Learning, pp. 278\u2013287"},{"key":"11288_CR23","unstructured":"Devlin SM, Kudenko D (2012) Dynamic potential-based reward shaping. In: Proceedings of the 11th International Conference on Autonomous Agents and Multiagent Systems, pp. 433\u2013440. IFAAMAS"},{"key":"11288_CR24","unstructured":"Brys T, Harutyunyan A, Suay HB, Chernova S, Taylor ME, Now\u00e9 A (2015) Reinforcement learning from demonstration through shaping. In: Twenty-fourth International Joint Conference on Artificial Intelligence"},{"key":"11288_CR25","first-page":"15931","volume":"33","author":"Y Hu","year":"2020","unstructured":"Hu Y, Wang W, Jia H, Wang Y, Chen Y, Hao J, Wu F, Fan C (2020) Learning to utilize shaping rewards: A new approach of reward shaping. Adv Neural Inf Process Syst 33:15931\u201315941","journal-title":"Adv Neural Inf Process Syst"},{"key":"11288_CR26","unstructured":"Islam MS, Das S, Gottipati SK, Duguay W, Mars C, Arabneydi J, Fagette A, Guzdial M, Taylor ME (2023) Wip: Human-ai interactions in real-world complex environments using a comprehensive reinforcement learning framework. In: Adaptive Learning Agents Workshop, ALA"},{"key":"11288_CR27","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1016\/j.artint.2017.08.005","volume":"252","author":"A Rosenfeld","year":"2017","unstructured":"Rosenfeld A, Agmon N, Maksimov O, Kraus S (2017) Intelligent agent supporting human-multi-robot team collaboration. Artif Intell 252:211\u2013231","journal-title":"Artif Intell"},{"key":"11288_CR28","doi-asserted-by":"crossref","unstructured":"Humann J, Pollard KA (2019) Human factors in the scalability of multirobot operation: A review and simulation. In: 2019 IEEE International Conference on Systems, Man and Cybernetics (SMC), pp. 700\u2013707. IEEE","DOI":"10.1109\/SMC.2019.8913876"},{"key":"11288_CR29","doi-asserted-by":"crossref","unstructured":"Barnes MJ, Chen JY, Jentsch F (2015) Designing for mixed-initiative interactions between human and autonomous systems in complex environments. In: 2015 IEEE International Conference on Systems, Man, and Cybernetics, pp. 1386\u20131390. IEEE","DOI":"10.1109\/SMC.2015.246"},{"key":"11288_CR30","doi-asserted-by":"publisher","first-page":"568","DOI":"10.3389\/fpsyg.2016.00568","volume":"7","author":"T Porat","year":"2016","unstructured":"Porat T, Oron-Gilad T, Rottem-Hovev M, Silbiger J (2016) Supervising and controlling unmanned systems: A multi-phase study with subject matter experts. Front Psychol 7:568","journal-title":"Front Psychol"},{"key":"11288_CR31","unstructured":"Meyer J-JC, Wieringa RJ (1994) Deontic logic in computer science: normative system specification. John Wiley and Sons Ltd"},{"key":"11288_CR32","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2021.640647","volume":"8","author":"J Waa","year":"2021","unstructured":"Waa J, Verdult S, Bosch K, Diggelen J, Haije T, Stigchel B, Cocu I (2021) Moral decision making in human-agent teams: Human control and the role of explanations. Front Robot AI 8:640647","journal-title":"Front Robot AI"},{"key":"11288_CR33","unstructured":"Argerich MF, F\u00fcrst J, Cheng B (2020) Tutor4rl: Guiding reinforcement learning with external knowledge. In: AAAI Spring Symposium: Combining Machine Learning with Knowledge Engineering (1)"},{"key":"11288_CR34","doi-asserted-by":"crossref","unstructured":"Bignold A, Cruz F, Taylor ME, Brys T, Dazeley R, Vamplew P, Foale C (2021) A conceptual framework for externally-influenced agents: An assisted reinforcement learning review. Journal of Ambient Intelligence and Humanized Computing, 1\u201324","DOI":"10.1007\/s12652-021-03489-y"},{"key":"11288_CR35","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1016\/S1364-6613(99)01327-3","volume":"3","author":"S Schaal","year":"1999","unstructured":"Schaal S (1999) Is imitation learning the route to humanoid robots? Trends Cognit Sci 3:233\u2013242","journal-title":"Trends Cognit Sci"},{"issue":"1","key":"11288_CR36","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1177\/02783649211041652","volume":"41","author":"E B\u0131y\u0131k","year":"2022","unstructured":"B\u0131y\u0131k E, Losey DP, Palan M, Landolfi NC, Shevchuk G, Sadigh D (2022) Learning reward functions from diverse sources of human feedback: Optimally integrating demonstrations and preferences. Int J Robot Res 41(1):45\u201367","journal-title":"Int J Robot Res"},{"key":"11288_CR37","unstructured":"Christiano PF, Leike J, Brown T, Martic M, Legg S, Amodei D (2017) Deep reinforcement learning from human preferences. Adv. Neural Inf Proc Syst 30"},{"key":"11288_CR38","unstructured":"Park J, Seo Y, Shin J, Lee H, Abbeel P, Lee K (2021) Surf: Semi-supervised reward learning with data augmentation for feedback-efficient preference-based reinforcement learning. In: International Conference on Learning Representations"},{"key":"11288_CR39","doi-asserted-by":"crossref","unstructured":"Sumers TR, Ho MK, Hawkins RD, Narasimhan K, Griffiths TL (2021) Learning rewards from linguistic feedback. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 6002\u20136010","DOI":"10.1609\/aaai.v35i7.16749"},{"key":"11288_CR40","doi-asserted-by":"crossref","unstructured":"Xue W, An B, Yan S, Xu Z (2023) Reinforcement learning from diverse human preferences. arXiv preprint arXiv:2301.11774","DOI":"10.24963\/ijcai.2024\/586"},{"key":"11288_CR41","unstructured":"Taylor ME, Suay HB, Chernova S (2011) Integrating Reinforcement Learning with Human Demonstrations of Varying Ability. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems ( AAMAS )"},{"key":"11288_CR42","doi-asserted-by":"crossref","unstructured":"Nair A, McGrew B, Andrychowicz M, Zaremba W, Abbeel P (2018) Overcoming exploration in reinforcement learning with demonstrations. In: 2018 IEEE International Conference on Robotics and Automation (ICRA), pp. 6292\u20136299. IEEE","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"11288_CR43","unstructured":"Vecerik M, Hester T, Scholz J, Wang F, Pietquin O, Piot B, Heess N, Roth\u00f6rl T, Lampe T, Riedmiller M (2017) Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards. arXiv preprint arXiv:1707.08817"},{"key":"11288_CR44","unstructured":"Knox WB, Stone P (2012) Reinforcement learning from simultaneous human and mdp reward. In: Proceedings of the 11th International Conference on Autonomous Agents and Multiagent Systems-Volume 1, pp. 475\u2013482"},{"key":"11288_CR45","doi-asserted-by":"crossref","unstructured":"Warnell G, Waytowich N, Lawhern V, Stone P (2018) Deep TAMER: Interactive agent shaping in high-dimensional state spaces. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32","DOI":"10.1609\/aaai.v32i1.11485"},{"key":"11288_CR46","unstructured":"Arakawa R, Kobayashi S, Unno Y, Tsuboi Y, Maeda S-i (2018) DQN-TAMER: Human-in-the-loop reinforcement learning with intractable feedback. arXiv preprint arXiv:1810.11748"},{"key":"11288_CR47","unstructured":"MacGlashan J, Ho M, Loftin R, Peng B, Wang G, Roberts DL, E.Taylor M, Littman ML (2017) Interactive learning from policy-dependent human feedback. In: Proceedings of the International Conference on Machine Learning (ICML)"},{"key":"11288_CR48","unstructured":"Arumugam D, Lee JK, Saskin S, Littman ML (2019) Deep reinforcement learning from policy-dependent human feedback. arXiv preprint arXiv:1902.04257"},{"key":"11288_CR49","unstructured":"Kim B, Farahmand A-m, Pineau J, Precup D (2013) Learning from limited demonstrations. In: Proceedings of the 26th International Conference on Neural Information Processing Systems-Volume 2, pp. 2859\u20132867"},{"key":"11288_CR50","unstructured":"Schaul T, Quan J, Antonoglou I, Silver D (2015) Prioritized experience replay. arXiv preprint arXiv:1511.05952"},{"key":"11288_CR51","unstructured":"Andrychowicz M, Wolski F, Ray A, Schneider J, Fong R, Welinder P, McGrew B, Tobin J, Pieter\u00a0Abbeel O, Zaremba W (2017) Hindsight experience replay. Adv Neural inf Proc Syst 30"},{"key":"11288_CR52","unstructured":"Liang X, Shu K, Lee K, Abbeel P (2021) Reward uncertainty for exploration in preference-based reinforcement learning. In: International Conference on Learning Representations"},{"key":"11288_CR53","doi-asserted-by":"crossref","unstructured":"Hessel M, Modayil J, Van\u00a0Hasselt H, Schaul T, Ostrovski G, Dabney W, Horgan D, Piot B, Azar M, Silver D (2018) Rainbow: Combining improvements in deep reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"11288_CR54","unstructured":"Venugopal A, Bondi E, Kamarthi H, Dholakia K, Ravindran B, Tambe M (2021) Reinforcement learning for unified allocation and patrolling in signaling games with uncertainty. In: Proceedings of the 20th International Conference on Autonomous Agents and MultiAgent Systems, pp. 1353\u20131361"},{"key":"11288_CR55","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1007\/s10846-019-01073-3","volume":"98","author":"C Yan","year":"2020","unstructured":"Yan C, Xiang X, Wang C (2020) Towards real-time path planning through deep reinforcement learning for a UAV in dynamic environments. J Intell Robot Syst 98:297\u2013309","journal-title":"J Intell Robot Syst"},{"issue":"4","key":"11288_CR56","doi-asserted-by":"publisher","first-page":"562","DOI":"10.1049\/itr2.12046","volume":"15","author":"H Yuan","year":"2021","unstructured":"Yuan H, Ni J, Hu J (2021) A centralised training algorithm with D3QN for scalable regular unmanned ground vehicle formation maintenance. IET Intell Trans Syst 15(4):562\u2013572","journal-title":"IET Intell Trans Syst"},{"key":"11288_CR57","doi-asserted-by":"publisher","first-page":"326","DOI":"10.1016\/j.jmsy.2020.06.018","volume":"56","author":"H Oliff","year":"2020","unstructured":"Oliff H, Liu Y, Kumar M, Williams M, Ryan M (2020) Reinforcement learning for facilitating human-robot-interaction in manufacturing. J Manuf Syst 56:326\u2013340","journal-title":"J Manuf Syst"},{"key":"11288_CR58","doi-asserted-by":"publisher","first-page":"13447","DOI":"10.1109\/TVT.2023.3275546","volume":"72","author":"Y Ji","year":"2023","unstructured":"Ji Y, Wang Y, Zhao H, Gui G, Gacanin H, Sari H, Adachi F (2023) Multi-agent reinforcement learning resources allocation method using Dueling Double Deep Q-Network in vehicular networks. IEEE Trans Veh Technol 72:13447\u201313460","journal-title":"IEEE Trans Veh Technol"},{"issue":"7","key":"11288_CR59","doi-asserted-by":"publisher","first-page":"1121","DOI":"10.3390\/electronics9071121","volume":"9","author":"W Kong","year":"2020","unstructured":"Kong W, Zhou D, Yang Z, Zhao Y, Zhang K (2020) UAV autonomous aerial combat maneuver strategy generation with observation error based on state-adversarial deep deterministic policy gradient and inverse reinforcement learning. Electronics 9(7):1121","journal-title":"Electronics"},{"key":"11288_CR60","doi-asserted-by":"crossref","unstructured":"Jiang Y, Yu J, Li Q (2022) A novel decision-making algorithm for beyond visual range air combat based on deep reinforcement learning. In: 2022 37th Youth Academic Annual Conference of Chinese Association of Automation (YAC), pp. 516\u2013521. IEEE","DOI":"10.1109\/YAC57282.2022.10023870"},{"key":"11288_CR61","unstructured":"Kamar E, Hacker S, Horvitz E (2012) Combining human and machine intelligence in large-scale crowdsourcing. In: Proceedings of the 11th International Conference on Autonomous Agents and Multiagent Systems-Volume 1, pp. 467\u2013474"},{"key":"11288_CR62","doi-asserted-by":"crossref","unstructured":"Lasecki W, Bigham J, Allen J, Ferguson G (2012) Real-time collaborative planning with the crowd. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 26, pp. 2435\u20132436","DOI":"10.1609\/aaai.v26i1.8419"},{"key":"11288_CR63","unstructured":"Kamar E (2016) Directions in hybrid intelligence: Complementing AI systems with human intelligence. In: IJCAI, pp. 4070\u20134073"},{"key":"11288_CR64","doi-asserted-by":"crossref","unstructured":"Liang C, Proft J, Andersen E, Knepper RA (2019) Implicit communication of actionable information in human-ai teams. In: Proceedings of the 2019 CHI Conference on Human Factors in Computing Systems, pp. 1\u201313","DOI":"10.1145\/3290605.3300325"},{"key":"11288_CR65","doi-asserted-by":"crossref","unstructured":"Bansal G, Nushi B, Kamar E, Weld DS, Lasecki WS, Horvitz E (2019) Updates in human-AI teams: Understanding and addressing the performance\/compatibility tradeoff. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 2429\u20132437","DOI":"10.1609\/aaai.v33i01.33012429"},{"key":"11288_CR66","doi-asserted-by":"crossref","unstructured":"Hayes B, Scassellati B (2015) Effective robot teammate behaviors for supporting sequential manipulation tasks. In: 2015 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 6374\u20136380. IEEE","DOI":"10.1109\/IROS.2015.7354288"},{"key":"11288_CR67","doi-asserted-by":"publisher","DOI":"10.1145\/3394287","author":"MF Jung","year":"2020","unstructured":"Jung MF, Difranzo D, Shen S, Stoll B, Claure H, Lawrence A (2020) Robot-assisted tower construction-a method to study the impact of a robot\u2019s allocation behavior on interpersonal dynamics and collaboration in groups. J Hum-Robot Interact. https:\/\/doi.org\/10.1145\/3394287","journal-title":"J. Hum.-Robot Interact."},{"key":"11288_CR68","unstructured":"Herse S (2022) Optimising outcomes of human-agent collaboration using trust calibration. PhD thesis, UNSW Sydney"},{"issue":"1","key":"11288_CR69","first-page":"137","volume":"8","author":"K Phulera","year":"2017","unstructured":"Phulera K, Singh H, Bhatt A (2017) Analytical study on artificial intelligence techniques to achieve expert systems. Int J Emerg Technol (Special Issue NCETST-2017) 8(1):137\u2013140","journal-title":"Int J Emerg Technol (Special Issue NCETST-2017)"},{"key":"11288_CR70","doi-asserted-by":"publisher","DOI":"10.1016\/j.rcim.2022.102404","volume":"78","author":"R Jahanmahin","year":"2022","unstructured":"Jahanmahin R, Masoud S, Rickli J, Djuric A (2022) Human-robot interactions in manufacturing: A survey of human behavior modeling. Robot Comput-Integr Manuf 78:102404","journal-title":"Robot Comput-Integr Manuf"},{"key":"11288_CR71","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511973031","volume-title":"Security and Game Theory: Algorithms, Deployed Systems","author":"M Tambe","year":"2011","unstructured":"Tambe M (2011) Security and Game Theory: Algorithms, Deployed Systems. Cambridge University Press, Lessons Learned"},{"issue":"2\u20133","key":"11288_CR72","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1080\/088395198117820","volume":"12","author":"M Tambe","year":"1998","unstructured":"Tambe M (1998) Implementing agent teams in dynamic multiagent environments. Appl Artif Intell 12(2\u20133):189\u2013210","journal-title":"Appl Artif Intell"},{"issue":"1","key":"11288_CR73","first-page":"15","volume":"16","author":"M Tambe","year":"1995","unstructured":"Tambe M, Johnson WL, Jones RM, Koss F, Laird JE, Rosenbloom PS, Schwamb K (1995) Intelligent agents for interactive simulation environments. AI Mag 16(1):15\u201315","journal-title":"AI Mag"},{"key":"11288_CR74","doi-asserted-by":"crossref","unstructured":"Van\u00a0Diggelen J, Bradshaw JM, Grant T, Johnson M, Neerincx M (2009) Policy-based design of human-machine collaboration in manned space missions. In: 2009 Third IEEE International Conference on Space Mission Challenges for Information Technology, pp. 376\u2013383. IEEE","DOI":"10.1109\/SMC-IT.2009.52"},{"key":"11288_CR75","doi-asserted-by":"publisher","first-page":"669","DOI":"10.1007\/s10846-018-0899-0","volume":"94","author":"A Hong","year":"2019","unstructured":"Hong A, Igharoro O, Liu Y, Niroui F, Nejat G, Benhabib B (2019) Investigating human-robot teams for learning-based semi-autonomous control in urban search and rescue environments. J Intell Robot Syst 94:669\u2013686","journal-title":"J Intell Robot Syst"},{"issue":"3","key":"11288_CR76","doi-asserted-by":"publisher","first-page":"467","DOI":"10.3390\/electronics11030467","volume":"11","author":"J Hu","year":"2022","unstructured":"Hu J, Wang L, Hu T, Guo C, Wang Y (2022) Autonomous maneuver decision making of dual-UAV cooperative air combat based on deep reinforcement learning. Electronics 11(3):467","journal-title":"Electronics"},{"key":"11288_CR77","doi-asserted-by":"crossref","unstructured":"Xin B, He C (2022) DRL-based improvement for autonomous UAV motion path planning in unknown environments. In: 2022 7th International Conference on Control and Robotics Engineering (ICCRE), pp. 102\u2013105. IEEE","DOI":"10.1109\/ICCRE55123.2022.9770257"},{"key":"11288_CR78","doi-asserted-by":"publisher","first-page":"3657814","DOI":"10.1155\/2023\/3657814","volume":"2023","author":"Y Cao","year":"2023","unstructured":"Cao Y, Kou Y-X, Li Z-W, Xu A et al (2023) Autonomous maneuver decision of UCAV air combat based on Double Deep Q Network algorithm and stochastic game theory. Int J Aerosp Eng 2023:3657814","journal-title":"Int J Aerosp Eng"},{"issue":"6","key":"11288_CR79","doi-asserted-by":"publisher","first-page":"385","DOI":"10.3390\/drones7060385","volume":"7","author":"J Zhang","year":"2023","unstructured":"Zhang J, Meng Z, He J, Wang Z, Liu L (2023) UAV air game maneuver decision-making using Dueling Double Deep Q Network with expert experience storage mechanism. Drones 7(6):385","journal-title":"Drones"},{"key":"11288_CR80","doi-asserted-by":"crossref","unstructured":"Liu Y, Halev A, Liu X (2021) Policy learning with constraints in model-free reinforcement learning: A survey. In: The 30th International Joint Conference on Artificial Intelligence (IJCAI)","DOI":"10.24963\/ijcai.2021\/614"},{"key":"11288_CR81","unstructured":"Zhang L, Zhang Q, Shen L, Yuan B, Wang X (2022) Saferl-kit: Evaluating efficient reinforcement learning methods for safe autonomous driving. arXiv preprint arXiv:2206.08528"},{"issue":"5","key":"11288_CR82","doi-asserted-by":"publisher","first-page":"1054","DOI":"10.1109\/TNN.1998.712192","volume":"9","author":"R Sutton","year":"1998","unstructured":"Sutton R, Barto A (1998) Reinforcement learning: An introduction. IEEE Trans Neural Netw 9(5):1054\u20131054","journal-title":"IEEE Trans Neural Netw"},{"key":"11288_CR83","unstructured":"Munos R (2016) Q ($$\\lambda $$) with off-policy corrections. In: Algorithmic Learning Theory: 27th International Conference, ALT 2016, Bari, Italy, October 19-21, 2016, Proceedings, vol. 9925, p. 305. Springer"},{"key":"11288_CR84","unstructured":"Owen AB (2013) Monte Carlo theory, methods and examples. Stanford"},{"key":"11288_CR85","unstructured":"Kang B, Jie Z, Feng J (2018) Policy optimization with demonstrations. In: International Conference on Machine Learning, pp. 2469\u20132478. PMLR"},{"key":"11288_CR86","unstructured":"Cederborg T, Grover I, Isbell\u00a0Jr CL, Thomaz AL (2015) Policy shaping with human teachers. In: IJCAI, pp. 3366\u20133372"},{"key":"11288_CR87","doi-asserted-by":"crossref","unstructured":"Knox WB, Stone P (2009) Interactively shaping agents via human reinforcement: The tamer framework. In: KCAP","DOI":"10.1145\/1597735.1597738"},{"key":"11288_CR88","unstructured":"Satija H, Amortila P, Pineau J (2020) Constrained Markov decision processes via backward value functions. In: International Conference on Machine Learning, pp. 8502\u20138511. PMLR"},{"key":"11288_CR89","unstructured":"Marwan S, Shi Y, Menezes I, Chi M, Barnes T, Price TW (2021) Just a few expert constraints can help: Humanizing data-driven subgoal detection for novice programming. International Educational Data Mining Society"},{"key":"11288_CR90","doi-asserted-by":"crossref","unstructured":"Bai F, Zhang H, Tao T, Wu Z, Wang Y, Xu B (2023) PiCor: multi-task deep reinforcement learning with policy correction. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 37, pp. 6728\u20136736","DOI":"10.1609\/aaai.v37i6.25825"},{"key":"11288_CR91","unstructured":"Zawalski M, Osi\u0144ski B, Michalewski H, Mi\u0142o\u0144 P (2022) Off-policy correction for multi-agent reinforcement learning. In: Proceedings of the 21st International Conference on Autonomous Agents and Multiagent Systems, pp. 1774\u20131776"},{"key":"11288_CR92","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1007\/s10846-018-0839-z","volume":"95","author":"C Celemin","year":"2019","unstructured":"Celemin C, Ruiz-del-Solar J (2019) An interactive framework for learning continuous actions policies based on corrective feedback. J Intell Robot Syst 95:77\u201397","journal-title":"J Intell Robot Syst"},{"key":"11288_CR93","doi-asserted-by":"crossref","unstructured":"Van\u00a0Hasselt H, Guez A, Silver D (2016) Deep reinforcement learning with double q-learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 30","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"11288_CR94","unstructured":"Wang Z, Schaul T, Hessel M, Hasselt H, Lanctot M, Freitas N (2016) Dueling network architectures for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1995\u20132003. PMLR"},{"key":"11288_CR95","unstructured":"Redefined AI, Gottipati SK, Kurandwad S, Mars C, Szriftgiser G, Chabot F (2021) Cogment: Open source framework for distributed multi-actor training, deployment & operations. CoRR arxiv:abs\/2106.11345"},{"key":"11288_CR96","doi-asserted-by":"crossref","unstructured":"Hart SG (2006) NASA-task load index (NASA-TLX); 20 years later. In: Proceedings of the Human Factors and Ergonomics Society Annual Meeting, vol. 50, pp. 904\u2013908. Sage publications Sage CA: Los Angeles, CA","DOI":"10.1177\/154193120605000909"},{"key":"11288_CR97","doi-asserted-by":"crossref","unstructured":"Richards D (2020) Measure for measure: How do we assess human autonomy teaming? In: HCI International 2020\u2013Late Breaking Papers: Cognition, Learning and Games: 22nd HCI International Conference, HCII 2020, Copenhagen, Denmark, July 19\u201324, 2020, Proceedings 22, pp. 227\u2013239. Springer","DOI":"10.1007\/978-3-030-60128-7_18"},{"key":"11288_CR98","doi-asserted-by":"crossref","unstructured":"Kim I, Morrison JR (2018) Learning based framework for joint task allocation and system design in stochastic multi-uav systems. In: 2018 International Conference on Unmanned Aircraft Systems (ICUAS), pp. 324\u2013334. IEEE","DOI":"10.1109\/ICUAS.2018.8453318"},{"key":"11288_CR99","doi-asserted-by":"crossref","unstructured":"McKnight PE, Najab J (2010) Mann-whitney u test. The Corsini encyclopedia of psychology, 1\u20131","DOI":"10.1002\/9780470479216.corpsy0524"},{"key":"11288_CR100","unstructured":"Neumann A, Bossek J, Neumann F (2020) Computing diverse sets of solutions for monotone submodular optimisation problems. arXiv preprint arXiv:2010.11486"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11288-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-025-11288-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11288-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T15:10:38Z","timestamp":1757171438000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-025-11288-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,20]]},"references-count":100,"journal-issue":{"issue":"23","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["11288"],"URL":"https:\/\/doi.org\/10.1007\/s00521-025-11288-1","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,20]]},"assertion":[{"value":"24 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 April 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Our human subject study was approved by the University\u2019s ethics board (REB number: Pro00107555). We have designed the simulator environment so that drones are not equipped with weapons that can directly endanger the lives of humans. We are also focusing on a defensive task to minimize the risk of our work being used by bad actors.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics statement"}}]}}