{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T04:44:08Z","timestamp":1775623448863,"version":"3.50.1"},"reference-count":124,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T00:00:00Z","timestamp":1757980800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T00:00:00Z","timestamp":1757980800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"the State Key Laboratory of Multimedia Information Processing Open Fund","award":["Grant Ref. SKLMIP-KF-2025-02"],"award-info":[{"award-number":["Grant Ref. SKLMIP-KF-2025-02"]}]},{"name":"the Open Projects Program of State Key Laboratory of Multimodal Artificial Intelligence Systems","award":["MAIS2024108"],"award-info":[{"award-number":["MAIS2024108"]}]},{"name":"the Open Fund of Key Laboratory of the Ministry of Education on Artificial Intelligence in Equipment","award":["2024-AAIE-KF03-01"],"award-info":[{"award-number":["2024-AAIE-KF03-01"]}]},{"name":"the Open Research Fund of Anhui Provincial Key Laboratory of Intelligent Low-Carbon Information Technology and Equipment"},{"DOI":"10.13039\/501100000266","name":"the UK Engineering and Physical Sciences Research Council","doi-asserted-by":"crossref","award":["EP\/T021063\/1 (COG-MHEAR) and EP\/T024917\/1 (NATGEN)"],"award-info":[{"award-number":["EP\/T021063\/1 (COG-MHEAR) and EP\/T024917\/1 (NATGEN)"]}],"id":[{"id":"10.13039\/501100000266","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Cogn Comput"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s12559-025-10500-7","type":"journal-article","created":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T09:29:10Z","timestamp":1758014950000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Reinforcement Learning for Human-AI Collaboration: Challenges, Mechanisms, and Methods"],"prefix":"10.1007","volume":"17","author":[{"given":"Wei","family":"Li","sequence":"first","affiliation":[]},{"given":"Hongming","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Kaizhu","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Amir","family":"Hussain","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,16]]},"reference":[{"key":"10500_CR1","doi-asserted-by":"crossref","unstructured":"Lai Y, Kankanhalli A, Ong D. Human-AI collaboration in healthcare: a review and research agenda. In: Hawaii International Conference on System Sciences, Virtual, pp. 1\u201310 (2021)","DOI":"10.24251\/HICSS.2021.046"},{"key":"10500_CR2","unstructured":"Kim Y, Park C, Jeong H, Chan YS, Xu X, McDuff D, Lee H, Ghassemi M, Breazeal C, Park HW. MDAgents: an adaptive collaboration of LLMs for medical decision-making. In: Advances in Neural Information Processing Systems, Vancouver, BC, Canada, pp. 79410\u201379452 (2024)"},{"key":"10500_CR3","doi-asserted-by":"publisher","first-page":"2293","DOI":"10.1038\/s41562-024-02024-1","volume":"8","author":"M Vaccaro","year":"2024","unstructured":"Vaccaro M, Almaatouq A, Malone T. When combinations of humans and AI are useful: a systematic review and meta-analysis. Nat Hum Behaviour. 2024;8:2293\u2013303.","journal-title":"Nat Hum Behaviour."},{"key":"10500_CR4","doi-asserted-by":"crossref","unstructured":"Peng S, Wang MX, Shah JA, Figueroa N. Object permanence filter for robust tracking with interactive robots. In: IEEE International Conference on Robotics and Automation, Yokohama, Japan, pp. 4909\u20134915 (2024)","DOI":"10.1109\/ICRA57147.2024.10611528"},{"key":"10500_CR5","unstructured":"Alrashedy K, Alrashedy K, Tambwekar P, Zaidi ZH, Langwasser M, Xu W, Gombolay M. Object permanence filter for robust tracking with interactive robots. In: International Conference on Learning Representations, Singapore City, Singapore, pp. 1\u201327 (2025)"},{"key":"10500_CR6","doi-asserted-by":"crossref","unstructured":"Ashktorab Z, Liao QV, Dugan C, Johnson J, Pan Q, Zhang W, Kumaravel S, Campbell M. Human-AI collaboration in a cooperative game setting: measuring social perception and outcomes. In: Proceedings of the ACM on Human-Computer Interaction, New York City, NY, USA, pp. 1\u201320 (2020)","DOI":"10.1145\/3415167"},{"key":"10500_CR7","doi-asserted-by":"crossref","unstructured":"Chang E, Chen Z, Labrune J, Coelho M. Be the Beat: AI-powered boombox for music suggestion from freestyle dance. In: International Conference on Tangible, Embedded, and Embodied Interaction, New York City, NY, USA, pp. 1\u20136 (2025)","DOI":"10.1145\/3689050.3705995"},{"key":"10500_CR8","doi-asserted-by":"crossref","unstructured":"Guo G, Kumar AMS, Gupta A, Coscia A, Maclellan C, Endert A. Visualizing intelligent tutor interactions for responsive pedagogy. In: International Conference on Advanced Visual Interfaces, New York City, NY, USA, pp. 1\u20139 (2024)","DOI":"10.1145\/3656650.3656667"},{"key":"10500_CR9","unstructured":"Shen H, Knearem T, Ghosh R, Alkiek K, Krishna K, Liu Y, Ma Z, Petridis S, Peng Y-H, Qiwei L, Rakshit S, Si C, Xie Y, Bigham JP, Bentley F, Chai J, Lipton Z, Mei Q, Mihalcea R, Terry M, Yang D, Morris MR, Resnick P, Jurgens D. Towards bidirectional human-AI alignment: a systematic review for clarifications, framework, and future directions. arXiv:. (2024)"},{"issue":"4","key":"10500_CR10","doi-asserted-by":"publisher","first-page":"1437","DOI":"10.1073\/pnas.1813164116","volume":"116","author":"P Nalepka","year":"2019","unstructured":"Nalepka P, Lamb M, Kallen RW, Shockley K, Chemero A, Saltzman E, et al. Human social motor solutions for human\u2013machine interaction in dynamical task contexts. Proceed National Academy Sci. 2019;116(4):1437\u201346.","journal-title":"Proceed National Academy Sci."},{"issue":"1","key":"10500_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3390\/s20010296","volume":"20","author":"CPC Chanel","year":"2020","unstructured":"Chanel CPC, Roy RN, Dehais F, Drougard N. Towards mixed-initiative human-robot interaction: assessment of discriminative physiological and behavioral features for performance prediction. Sensors. 2020;20(1):1\u201320.","journal-title":"Sensors."},{"issue":"1","key":"10500_CR12","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1007\/s12559-022-10047-x","volume":"15","author":"G Zuo","year":"2023","unstructured":"Zuo G, Tong J, Wang Z, Gong D. A graph-based deep reinforcement learning approach to grasping fully occluded objects. Cognit Comput. 2023;15(1):36\u201349.","journal-title":"Cognit Comput."},{"issue":"2","key":"10500_CR13","doi-asserted-by":"publisher","first-page":"270","DOI":"10.1109\/TG.2023.3263013","volume":"16","author":"W Li","year":"2024","unstructured":"Li W, Liu W, Shao S, Huang S, Song A. Attention-based intrinsic reward mixing network for credit assignment in multiagent reinforcement learning. IEEE Trans Games. 2024;16(2):270\u201381.","journal-title":"IEEE Trans Games."},{"issue":"5","key":"10500_CR14","doi-asserted-by":"publisher","first-page":"2358","DOI":"10.1007\/s12559-022-10080-w","volume":"16","author":"M Schilling","year":"2024","unstructured":"Schilling M, Hammer B, Ohl FW, Ritter HJ, Wiskott L. Modularity in nervous systems-a key to efficient adaptivity for deep reinforcement learning. Cognit Comput. 2024;16(5):2358\u201373.","journal-title":"Cognit Comput."},{"issue":"2","key":"10500_CR15","doi-asserted-by":"publisher","first-page":"682","DOI":"10.1007\/s12559-023-10226-4","volume":"16","author":"Z Ni","year":"2024","unstructured":"Ni Z, Jin Y, Liu P, Zhao W. A novel heuristic exploration method based on action effectiveness constraints to relieve loop enhancement effect in reinforcement learning with sparse rewards. Cognit Comput. 2024;16(2):682\u2013700.","journal-title":"Cognit Comput."},{"key":"10500_CR16","doi-asserted-by":"publisher","first-page":"566","DOI":"10.1109\/LRA.2020.3047730","volume":"6","author":"A Ghadirzadeh","year":"2020","unstructured":"Ghadirzadeh A, Chen X, Yin W, Yi Z, Bjorkman M, Kragic D. Human-centered collaborative robots with deep reinforcement learning. IEEE Robot Automat Lett. 2020;6:566\u201371.","journal-title":"IEEE Robot Automat Lett."},{"key":"10500_CR17","unstructured":"Dijkstra EB. Adaptive reinforcement learning for human-AI collaboration. arXiv:. (2022)"},{"key":"10500_CR18","unstructured":"Bucinca Z, Swaroop S, Paluch AE, Murphy SA, Gajos KZ. Towards optimizing human-centric objectives in AI-assisted decision-making with offline reinforcement learning. arXiv:. (2024)"},{"key":"10500_CR19","doi-asserted-by":"crossref","unstructured":"Huang Z, Sheng Z, Chen S. Trustworthy human-AI collaboration: reinforcement learning with human feedback and physics knowledge for safe autonomous driving. arXiv:. (2024)","DOI":"10.1016\/j.trc.2025.105262"},{"key":"10500_CR20","unstructured":"Berger EJ, Guruprasad G, Senkpeil RR. Characterizing the alignment in faculty and student beliefs. In: ASEE Annual Conference and Exposition, Columbus, OH, USA, pp. 1\u201317 (2017)"},{"issue":"2","key":"10500_CR21","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1097\/ACM.0000000000002518","volume":"94","author":"CSM Royce","year":"2019","unstructured":"Royce CSM, Hayes MMM, Schwartzstein RMM. Teaching critical thinking: a case for instruction in cognitive biases to reduce diagnostic errors and improve patient safety. Acad Med. 2019;94(2):187\u201394.","journal-title":"Acad Med."},{"issue":"2","key":"10500_CR22","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1371\/journal.pone.0229132","volume":"15","author":"K Okamura","year":"2020","unstructured":"Okamura K, Yamada S. AI and human-robot interaction: a review of recent advances and challenges. PLoS One. 2020;15(2):1\u201320.","journal-title":"PLoS One."},{"key":"10500_CR23","first-page":"8052","volume":"35","author":"J Wang","year":"2021","unstructured":"Wang J, Lan C, Liu C, Ouyang Y, Qin T. Generalizing to unseen domains: a survey on domain generalization. IEEE Trans Knowl Data Eng. 2021;35:8052\u201372.","journal-title":"IEEE Trans Knowl Data Eng."},{"issue":"7","key":"10500_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1371\/journal.pone.0287958","volume":"18","author":"SK Ehrlich","year":"2023","unstructured":"Ehrlich SK, Dean-Leon E, Tacca N, Armleder S, Dimova-Edeleva V, Cheng G. Human-robot collaborative task planning using anticipatory brain responses. PLoS One. 2023;18(7):1\u201320.","journal-title":"PLoS One."},{"key":"10500_CR25","first-page":"1","volume":"12","author":"EM Zoelen","year":"2021","unstructured":"Zoelen EM, Bosch K, Rauterberg M, Barakova E, Neerincx MA. Identifying interaction patterns of tangible co-adaptations in human-robot team behaviors. Front Psychol. 2021;12:1\u201316.","journal-title":"Front Psychol."},{"issue":"2","key":"10500_CR26","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1371\/journal.pone.0229132","volume":"15","author":"K Okamura","year":"2020","unstructured":"Okamura K, Yamada S. Adaptive trust calibration for human-AI collaboration. PloS One. 2020;15(2):1\u201320.","journal-title":"PloS One."},{"issue":"4","key":"10500_CR27","doi-asserted-by":"publisher","first-page":"74","DOI":"10.61877\/ijmrp.v2i4.135","volume":"2","author":"M Singh","year":"2024","unstructured":"Singh M, Khan SALA. Advances in autonomous robotics: integrating AI and machine learning for enhanced automation and control in industrial applications. Int J Multidimensional Res Perspect. 2024;2(4):74\u201390.","journal-title":"Int J Multidimensional Res Perspect."},{"key":"10500_CR28","doi-asserted-by":"crossref","unstructured":"Zanardi D, Nenna F, Orlando EM, Nannetti M, Mingardi M, Buodo G, Gamberini L. Pupil responses as indicators of learning and adaptation in human-robot collaboration scenarios. In: Proceedings of the International Conference on PErvasive Technologies Related to Assistive Environments, Crete, Greece, pp. 337\u2013342 (2024)","DOI":"10.1145\/3652037.3663909"},{"key":"10500_CR29","doi-asserted-by":"crossref","unstructured":"Shirado H, Christakis NA. Network engineering using autonomous agents increases cooperation in human groups. iScience. 2020;23(9):1\u201352","DOI":"10.1016\/j.isci.2020.101438"},{"key":"10500_CR30","doi-asserted-by":"crossref","unstructured":"Webb N, Milivojevic S, Sobhani M, Madin ZR, Ward JC, Yusuf S, Baber C, Hunt ER. Co-movement and trust development in human-robot teams. arXiv:. (2024)","DOI":"10.1007\/978-981-96-3522-1_11"},{"issue":"1","key":"10500_CR31","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1037\/emo0000799","volume":"23","author":"F Zhao","year":"2023","unstructured":"Zhao F, Wood A, Mutlu B, Niedenthal P. Faces synchronize when communication through spoken language is prevented. Emotion. 2023;23(1):87\u201396.","journal-title":"Emotion."},{"issue":"7","key":"10500_CR32","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1108\/TPM-03-2020-0024","volume":"26","author":"K Nawata","year":"2020","unstructured":"Nawata K, Yamaguchi H, Aoshima M. Team implicit coordination based on transactive memory systems. Team Performance Manag. 2020;26(7):375\u201390.","journal-title":"Team Performance Manag."},{"issue":"10","key":"10500_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3390\/s21103409","volume":"21","author":"E Jung","year":"2021","unstructured":"Jung E, Kim I. Hybrid imitation learning framework for robotic manipulation tasks. Sensors. 2021;21(10):1\u201318.","journal-title":"Sensors."},{"issue":"12","key":"10500_CR34","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1371\/journal.pbio.3001028","volume":"18","author":"A Najar","year":"2020","unstructured":"Najar A, Bonnet E, Bahrami B, Palminteri S. The actions of others act as a pseudo-reward to drive imitation in the context of social reinforcement learning. PLoS Biol. 2020;18(12):1\u201325.","journal-title":"PLoS Biol."},{"issue":"5","key":"10500_CR35","doi-asserted-by":"publisher","first-page":"1125","DOI":"10.1007\/s12369-020-00704-2","volume":"13","author":"A Taheri","year":"2021","unstructured":"Taheri A, Meghdari A, Mahoor MH. A close look at the imitation performance of children with autism and typically developing children using a robotic system. Int J Soc Robot. 2021;13(5):1125\u201347.","journal-title":"Int J Soc Robot."},{"key":"10500_CR36","doi-asserted-by":"publisher","first-page":"532375","DOI":"10.3389\/frobt.2020.532375","volume":"7","author":"A Masumori","year":"2021","unstructured":"Masumori A, Maruyama N, Ikegami T. Personogenesis through imitating human behavior in a humanoid robot Alter3. Front Robot AI. 2021;7:532375\u201388.","journal-title":"Front Robot AI."},{"key":"10500_CR37","unstructured":"Yeung AY, Joshi S, Williams JJ, Rudzicz F. Sequential explanations with mental model-based policies. In: Proceedings of the International Conference on Machine Learning, Virtual, pp. 1\u20138 (2020)"},{"key":"10500_CR38","doi-asserted-by":"crossref","unstructured":"Eckstein MK, Master SL, Xia L, Dahl RE, Wilbrecht L, Collins AG. The interpretation of computational model parameters depends on the context. eLife. 2022;11:1\u201352","DOI":"10.7554\/eLife.75474"},{"issue":"4","key":"10500_CR39","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1109\/IOTM.001.2300102","volume":"6","author":"M Chafii","year":"2023","unstructured":"Chafii M, Naoumi S, Alami R, Almazrouei E, Bennis M, Debbah M. Emergent communication in multi-agent reinforcement learning for future wireless networks. IEEE Int Things Mag. 2023;6(4):18\u201324.","journal-title":"IEEE Int Things Mag."},{"key":"10500_CR40","doi-asserted-by":"crossref","unstructured":"Chen D, Zhang K, Wang Y, Yin X, Li Z, Filev D. Communication-efficient decentralized multi-agent reinforcement learning for cooperative adaptive cruise control. IEEE Trans Intell Vehic 2024;9(10):6436\u201349.","DOI":"10.1109\/TIV.2024.3368025"},{"issue":"5","key":"10500_CR41","doi-asserted-by":"publisher","first-page":"364","DOI":"10.1016\/j.future.2022.05.014","volume":"135","author":"X Wu","year":"2022","unstructured":"Wu X, Xiao L, Sun Y, Zhang J, Ma T, He L. A survey of human-in-the-loop for machine learning. Future Generation Comput Syst. 2022;135(5):364\u201381.","journal-title":"Future Generation Comput Syst."},{"key":"10500_CR42","doi-asserted-by":"publisher","first-page":"3005","DOI":"10.1007\/s10462-022-10246-w","volume":"56","author":"E Mosqueira-Rey","year":"2022","unstructured":"Mosqueira-Rey E, Hernandez-Pereira E, Alonso-Rios D, Bobes-Bascaran J, Fernandez-Leal A. Human-in-the-loop machine learning: a state of the art. Artif Intell Rev. 2022;56:3005\u201354.","journal-title":"Artif Intell Rev."},{"key":"10500_CR43","unstructured":"Cranor LF. A framework for reasoning about the human in the loop. In: Conference on Usability, Psychology, and Security, San Francisco, CA, USA, pp. 1\u201315 (2008)"},{"key":"10500_CR44","doi-asserted-by":"publisher","first-page":"101787","DOI":"10.1016\/j.aei.2022.101787","volume":"54","author":"JMD Delgado","year":"2022","unstructured":"Delgado JMD, Oyedele L. Robotics in construction: a critical review of the reinforcement learning and imitation learning paradigms. Adv Eng Inf. 2022;54:101787\u2013810.","journal-title":"Adv Eng Inf."},{"key":"10500_CR45","unstructured":"Newman BA, Paxton C, Kitani K, Admoni H. Bootstrapping linear models for fast online adaptation in human-agent collaboration. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems, Auckland, New Zealand, pp. 1463\u20131472 (2024)"},{"key":"10500_CR46","unstructured":"Hu H, Wu DJ, Lerer A, Foerster J, Brown N. Human-AI coordination via human-regularized search and learning. arXiv:. (2022)"},{"key":"10500_CR47","doi-asserted-by":"publisher","first-page":"103500","DOI":"10.1016\/j.artint.2021.103500","volume":"297","author":"S Arora","year":"2021","unstructured":"Arora S, Doshi P. A survey of inverse reinforcement learning: challenges, methods and progress. Artif Intell. 2021;297:103500\u20131003527.","journal-title":"Artif Intell."},{"key":"10500_CR48","unstructured":"Myers V, Ellis E, Levine S, Eysenbach B, Dragan A. Learning to assist humans without inferring rewards. In: Advances in Neural Information Processing Systems, Vancouver, BC, Canada, pp. 1\u201313 (2024)"},{"key":"10500_CR49","unstructured":"Jacob AP, Wu DJ, Farina G, Lerer A, Hu H, Bakhtin A, Andreas J, Brown N. Modeling strong and human-like gameplay with KL-regularized search. In: Proceedings of the International Conference on Machine Learning, Baltimore, MD, USA, pp. 9695\u20139728 (2022)"},{"issue":"7","key":"10500_CR50","doi-asserted-by":"publisher","first-page":"3797","DOI":"10.1109\/TIT.2014.2320500","volume":"60","author":"T Erven","year":"2014","unstructured":"Erven T, Harremos P. Renyi divergence and Kullback-Leibler divergence. IEEE Trans Inf Theory. 2014;60(7):3797\u2013820.","journal-title":"IEEE Trans Inf Theory."},{"key":"10500_CR51","doi-asserted-by":"publisher","first-page":"132","DOI":"10.1016\/j.artint.2016.10.005","volume":"242","author":"S Barrett","year":"2017","unstructured":"Barrett S, Rosenfeld A, Kraus S, Stone P. Making friends on the fly: cooperating with new teammates. Artif Intell. 2017;242:132\u201371.","journal-title":"Artif Intell."},{"key":"10500_CR52","unstructured":"Lupu A, Cui B, Hu H, Foerster J. Trajectory diversity for zero-shot coordination. In: Proceedings of the International Conference on Machine Learning, Virtual, pp. 7204\u20137213 (2021)"},{"issue":"3","key":"10500_CR53","doi-asserted-by":"publisher","first-page":"2874","DOI":"10.1109\/TITS.2022.3227738","volume":"24","author":"R Bhattacharyya","year":"2023","unstructured":"Bhattacharyya R, Wulfe B, Phillips DJ, Kuefler A, Morton J, Senanayake R, et al. Modeling human driving behavior through generative adversarial imitation learning. IEEE Trans Intell Transport Syst. 2023;24(3):2874\u201387.","journal-title":"IEEE Trans Intell Transport Syst."},{"key":"10500_CR54","unstructured":"Tucker M, Zhou Y, Shah J. Adversarially guided self-play for adopting social conventions. arXiv:. (2020)"},{"key":"10500_CR55","unstructured":"Zhang R, Xu Z, Ma C, Yu C, Tu W, Tang W, Huang S, Ye D, Ding W, Yang Y, Wang Y. A survey on self-play methods in reinforcement learning. arXiv:. (2025)"},{"key":"10500_CR56","unstructured":"Lucas K, Allen RE. Any-play: an intrinsic augmentation for zero-shot coordination. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems, Virtual, pp. 853\u2013861 (2022)"},{"key":"10500_CR57","unstructured":"Dennis M, Jaques N, Vinitsky E, Bayen A, Russell S, Critch A, Levine S. Emergent complexity and zero-shot transfer via unsupervised environment design. In: Advances in Neural Information Processing Systems, Virtual, pp. 13049\u201313061 (2020)"},{"key":"10500_CR58","unstructured":"Liang A, Czempin P, Zhou Y, Tu S, Biyik E. In-context generalization to new tasks from unlabeled observation data. In: Proceedings of the International Conference on Machine Learning, Vienna, Austria, pp. 1\u201310 (2024)"},{"key":"10500_CR59","unstructured":"Grover A, Al-Shedivat M, Gupta J, Burda Y, Edwards H. Learning policy representations in multiagent systems. In: Proceedings of the International Conference on Machine Learning, Stockholm, Sweden, pp. 1802\u20131811 (2018)"},{"key":"10500_CR60","unstructured":"He JZ-Y, Erickson Z, Brown DS, Raghunathan A, Dragan A. Learning representations that enable generalization in assistive tasks. In: Proceedings of the Conference on Robot Learning, Atlanta, GA, USA, pp. 2105\u20132114 (2023)"},{"key":"10500_CR61","unstructured":"Pinto L, Davidson J, Sukthankar R, Gupta A. Robust adversarial reinforcement learning. In: Proceedings of the International Conference on Machine Learning, Sydney, Australia, pp. 2817\u20132826 (2017)"},{"issue":"12","key":"10500_CR62","doi-asserted-by":"publisher","first-page":"528","DOI":"10.1016\/j.tics.2004.10.001","volume":"8","author":"AM Leslie","year":"2004","unstructured":"Leslie AM, Friedman O, German TP. Core mechanisms in \u2018theory of mind\u2019. Trends Cognitive Sci. 2004;8(12):528\u201333.","journal-title":"Trends Cognitive Sci."},{"issue":"6","key":"10500_CR63","doi-asserted-by":"publisher","first-page":"728","DOI":"10.1080\/17405629.2018.1435413","volume":"15","author":"HM Wellman","year":"2018","unstructured":"Wellman HM. Theory of mind: the state of the art. Eur J Develop Psychol. 2018;15(6):728\u201355.","journal-title":"Eur J Develop Psychol."},{"key":"10500_CR64","doi-asserted-by":"crossref","unstructured":"Chen S, Andrejczuk E, Cao Z, Zhang J. AATEAM: achieving the ad hoc teamwork by employing the attention mechanism. In: Proceedings of the AAAI Conference on Artificial Intelligence, New York City, NY, USA, pp. 7095\u20137102 (2020)","DOI":"10.1609\/aaai.v34i05.6196"},{"key":"10500_CR65","doi-asserted-by":"crossref","unstructured":"Mirsky R, Carlucho I, Rahman A, Fosong E, Macke W, Sridharan M, Stone P, Albrecht SV. A survey of ad hoc teamwork research. In: European Conference on Multi-Agent Systems, Bucharest, Romania, pp. 275\u2013293 (2022)","DOI":"10.1007\/978-3-031-20614-6_16"},{"key":"10500_CR66","unstructured":"Bansal S, Xu J, Morales M, Streater J, Howard A Jr C. Cognitive bias for human-AI ad hoc teamwork. In: Advances in Neural Information Processing Systems, Vancouver, BC, Canada, pp. 1\u20136 (2024)"},{"key":"10500_CR67","unstructured":"Sarkar B, Shih A, Sadigh D. Diverse conventions for human-AI collaboration. In: Advances in Neural Information Processing Systems, New Orleans, LA, USA, pp. 23115\u201323139 (2023)"},{"key":"10500_CR68","unstructured":"Raileanu R, Denton E, Szlam A, Fergus R. Modeling others using oneself in multi-agent reinforcement learning. In: Proceedings of the International Conference on Machine Learning, Stockholm, Sweden, pp. 4257\u20134266 (2018)"},{"key":"10500_CR69","unstructured":"Nguyen D, Le H, Do K, Gupta S, Venkatesh S, Tran T. Diversifying training pool predictability for zero-shot coordination: a theory of mind approach. In: Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence, Jeju Island, South Korea, pp. 166\u2013174 (2024)"},{"key":"10500_CR70","unstructured":"Muglich D, Zintgraf LM, Witt CASD, Whiteson S, Foerster J. Generalized beliefs for cooperative AI. In: Proceedings of the International Conference on Machine Learning, Baltimore, MD, USA, pp. 16062\u201316082 (2022)"},{"key":"10500_CR71","unstructured":"Yu C, Gao J, Liu W, Xu B, Tang H, Yang J, Wang Y, Wu Y. Learning zero-shot cooperation with humans, assuming humans are biased. In: The International Conference on Learning Representations, Kigali, Rwanda, pp. 1\u201331 (2023)"},{"key":"10500_CR72","unstructured":"Xie A, Losey D, Tolsma R, Finn C, Sadigh D. Learning latent representations to influence multi-agent interaction. In: Proceedings of the Conference on Robot Learning, Virtual, pp. 575\u2013588 (2021)"},{"key":"10500_CR73","unstructured":"Liang Y, Chen D, Gupta A, Du SS, Jaques N. Learning to cooperate with humans using generative agents. In: Advances in Neural Information Processing Systems, Vancouver, BC, Canada, pp. 1\u201321 (2024)"},{"key":"10500_CR74","doi-asserted-by":"crossref","unstructured":"Li X, Zhang T, Liu C, Meng L, Xu B. Long short-term reasoning network with theory of mind for efficient multi-agent cooperation. In: International Joint Conference on Neural Networks, Yokohama, Japan, pp. 1\u20138 (2024)","DOI":"10.1109\/IJCNN60899.2024.10650244"},{"key":"10500_CR75","unstructured":"Yu G, Kasumba R, Ho C-J, Yeoh W. On the utility of accounting for human beliefs about AI intention in human-AI collaboration. arXiv:. (2024)"},{"key":"10500_CR76","unstructured":"Wang RE, Wu SA, Evans JA, Tenenbaum JB, Parkes DC, Kleiman-Weiner M. Too many cooks: coordinating multi-agent collaboration through inverse planning. In: Proceedings of the International Conference on Autonomous Agents and MultiAgent Systems, Auckland, New Zealand, pp. 2032\u20132034 (2020)"},{"key":"10500_CR77","unstructured":"Wang C, Chen Z, Liu H. On the utility of external agent intention predictor for human-AI coordination. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems, Auckland, New Zealand, pp. 2546\u20132548 (2024)"},{"key":"10500_CR78","unstructured":"Hu H, Foerster JN. Simplified action decoder for deep multi-agent reinforcement learning. arXiv:. (2021)"},{"issue":"1","key":"10500_CR79","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1016\/j.neuron.2015.09.027","volume":"88","author":"SE Petersen","year":"2015","unstructured":"Petersen SE, Sporns O. Brain networks and cognitive architectures. Neuron. 2015;88(1):207\u201319.","journal-title":"Neuron."},{"key":"10500_CR80","unstructured":"Yan X, Guo J, Lou X, Wang J, Zhang H, Du Y. An efficient end-to-end training approach for zero-shot human-AI coordination. In: Advances in Neural Information Processing Systems, New Orleans, LA, USA, pp. 2636\u20132658 (2023)"},{"key":"10500_CR81","doi-asserted-by":"crossref","unstructured":"Su E, Raffe W, Mathieson L, Wang Y. Better understanding of humans for cooperative AI through clustering. In: IEEE Conference on Games, Milan, Italy, pp. 1\u20138 (2024)","DOI":"10.1109\/CoG60054.2024.10645647"},{"key":"10500_CR82","unstructured":"Gao Y, Liu F, Wang L, Zheng D, Lian Z, Wang W, Yang W, Li S, Wang X, Chen W, Dai J, FU Q, Wei Y, Huang L, Liu W. Enhancing human experience in human-agent collaboration: a human-centered modeling approach based on positive human gain. In: The International Conference on Learning Representations, Vienna, Austria, pp. 1\u201329 (2024)"},{"key":"10500_CR83","unstructured":"Kazantzidis I, Norman T, Du Y, Freeman C. How to train your agent: active learning from human preferences and justifications in safety-critical environments. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems, Virtual, pp. 1654\u20131656 (2022)"},{"key":"10500_CR84","unstructured":"Lou X, Guo J, Zhang J, Wang J, Huang K, Du Y. PECAN: leveraging policy ensemble for context-aware zero-shot human-AI coordination. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems, London, UK, pp. 679\u2013688 (2023)"},{"key":"10500_CR85","unstructured":"Bansal S, Xu J, Morales M, Streater J, Howard A, Isbell CL. Reinforcement learning with cognitive bias for human-AI ad hoc teamwork. In: Coordination and Cooperation for Multi-Agent Reinforcement Learning Methods Workshop, Amherst, MA, USA, pp. 1\u201311 (2024)"},{"key":"10500_CR86","unstructured":"Ghosh A, Tschiatschek S, Mahdavi H, Singla A. Towards deployment of robust cooperative AI agents: an algorithmic framework for learning adaptive policies. In: Proceedings of the International Conference on Autonomous Agents and MultiAgent Systems, Auckland, New Zealand, pp. 447\u2013455 (2020)"},{"key":"10500_CR87","unstructured":"Muller S, Rohr A, Trimpe S. Local policy search with Bayesian optimization. In: Advances in Neural Information Processing Systems, Virtual, pp. 20708\u201320720 (2021)"},{"key":"10500_CR88","unstructured":"Zintgraf L, Devlin S, Ciosek K, Whiteson S, Hofmann K. Deep interactive Bayesian reinforcement learning via meta-learning. In: Proceedings of the International Conference on Autonomous Agents and MultiAgent Systems, Virtual, pp. 1712\u20131714 (2021)"},{"issue":"7","key":"10500_CR89","doi-asserted-by":"publisher","first-page":"4763","DOI":"10.1109\/TPAMI.2024.3357847","volume":"46","author":"A Vettoruzzo","year":"2024","unstructured":"Vettoruzzo A, Bouguelia M-R, Vanschoren J, Rognvaldsson T, Santosh K. Advances and challenges in meta-learning: a technical review. IEEE Trans Pattern Anal Mach Intell. 2024;46(7):4763\u201379.","journal-title":"IEEE Trans Pattern Anal Mach Intell."},{"key":"10500_CR90","unstructured":"Knott P, Carroll M, Devlin S, Ciosek K, Hofmann K, Dragan A, Shah R. Evaluating the robustness of collaborative agents. In: Proceedings of the International Conference on Autonomous Agents and MultiAgent Systems, Virtual, pp. 1560\u20131562 (2021)"},{"issue":"2","key":"10500_CR91","doi-asserted-by":"publisher","first-page":"228","DOI":"10.1109\/TG.2022.3169168","volume":"15","author":"R Canaan","year":"2023","unstructured":"Canaan R, Gao X, Togelius J, Nealen A, Menzel S. Generating and adapting to diverse ad hoc partners in Hanabi. IEEE Trans Games. 2023;15(2):228\u201341.","journal-title":"IEEE Trans Games."},{"key":"10500_CR92","unstructured":"Xue K, Wang Y, Guan C, Yuan L, Fu H, Fu Q, Qian C, Yu Y. Heterogeneous multi-agent zero-shot coordination by coevolution. IEEE Transactions on Evolutionary Computation, 1\u201315 (2024). Early Access"},{"key":"10500_CR93","unstructured":"Natarajan M, Xue C, Waveren S, Feigh K, Gombolay M. Mixed-initiative human-robot teaming under suboptimality with online Bayesian adaptation. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems, Auckland, New Zealand, pp. 1454\u20131462 (2024)"},{"key":"10500_CR94","unstructured":"Yang M, Carroll M, Dragan A. Optimal behavior prior: data-efficient human models for improved human-AI collaboration. In: Advances in Neural Information Processing Systems, New Orleans, LA, USA, pp. 1\u201317 (2022)"},{"issue":"1","key":"10500_CR95","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s10458-023-09607-8","volume":"38","author":"C Zhu","year":"2024","unstructured":"Zhu C, Dastani M, Wang S. A survey of multi-agent deep reinforcement learning with communication. Autonomous Agents Multi-Agent Syst. 2024;38(1):1\u20134.","journal-title":"Autonomous Agents Multi-Agent Syst."},{"key":"10500_CR96","unstructured":"Ding H, Jia C, Guan C, Chen F, Yuan L, Zhang Z, Yu Y. Coordination scheme probing for generalizable multi-agent reinforcement learning. In: International Conference on Learning Representations, Kigali, Rwanda, pp. 1\u201319 (2023)"},{"key":"10500_CR97","unstructured":"Wang WZ, Shih A, Xie A, Sadigh D. Influencing towards stable multi-agent interactions. In: Proceedings of the Conference on Robot Learning, Annapolis, MD, USA, pp. 1132\u20131143 (2022)"},{"issue":"2","key":"10500_CR98","doi-asserted-by":"publisher","first-page":"470","DOI":"10.1109\/TG.2023.3302694","volume":"16","author":"X Lou","year":"2024","unstructured":"Lou X, Zhang J, Du Y, Yu C, He Z, Huang K. Leveraging joint-action embedding in multiagent reinforcement learning for cooperative games. IEEE Trans Games. 2024;16(2):470\u201382.","journal-title":"IEEE Trans Games."},{"key":"10500_CR99","unstructured":"Villin V, Dimitrakakis C, Buening TK. A minimax-bayes approach to ad hoc teamwork. In: European Workshop on Reinforcement Learning, Toulouse, France, pp. 1\u201316 (2024)"},{"key":"10500_CR100","unstructured":"Kim DK, Liu M, Riemer MD, Sun C, Abdulhai M, Habibi G, Lopez-Cot S, Tesauro G, How JP. A policy gradient algorithm for learning to learn in multiagent reinforcement learning. In: Proceedings of the International Conference on Machine Learning, Virtual, pp. 5541\u20135550 (2021)"},{"key":"10500_CR101","unstructured":"Strouse D, McKee K, Botvinick M, Hughes E, Everett R. Collaborating with humans without human data. In: Advances in Neural Information Processing Systems, Virtual, pp. 14502\u201314515 (2021)"},{"key":"10500_CR102","first-page":"1","volume":"01","author":"A Rahman","year":"2023","unstructured":"Rahman A, Fosong E, Carlucho I, Albrecht SV. Generating teammates for training robust ad hoc teamwork agents via best-response diversity. Trans Mach Learn Res. 2023;01:1\u201327.","journal-title":"Trans Mach Learn Res."},{"key":"10500_CR103","unstructured":"Charakorn R, Manoonpong P, Dilokthanakul N. Learning to cooperate with unseen agents through meta-reinforcement learning. In: Proceedings of the International Conference on Autonomous Agents and MultiAgent Systems, Virtual, pp. 1478\u20131479 (2021)"},{"key":"10500_CR104","unstructured":"Mahajan A, Rashid T, Samvelyan M, Whiteson S. MAVEN: Multi-agent variational exploration. In: Advances in Neural Information Processing Systems, Vancouver, BC, Canada, pp. 1\u201312 (2019)"},{"key":"10500_CR105","unstructured":"Gu P, Zhao M, Hao J, An B. Online ad hoc teamwork under partial observability. In: International Conference on Learning Representations, Virtual, pp. 1\u201317 (2022)"},{"key":"10500_CR106","unstructured":"Rahman MA, Hopner N, Christianos F, Albrecht SV. Towards open ad hoc teamwork using graph-based policy learning. In: Proceedings of the International Conference on Machine Learning, Virtual, pp. 8776\u20138786 (2021)"},{"key":"10500_CR107","doi-asserted-by":"publisher","first-page":"101800","DOI":"10.1016\/j.aei.2022.101800","volume":"54","author":"B Huang","year":"2022","unstructured":"Huang B, Jin Y. Reward shaping in multiagent reinforcement learning for self-organizing systems in assembly tasks. Adv Eng Inf. 2022;54:101800\u201311.","journal-title":"Adv Eng Inf."},{"key":"10500_CR108","unstructured":"Zhou Z, Fu W, Zhang B, Wu Y. Continuously discovering novel strategies via reward-switching policy optimization. In: International Conference on Learning Representations, Virtual, pp. 1\u201330 (2022)"},{"key":"10500_CR109","unstructured":"Li Y, Zhang S, Sun J, Du Y, Wen Y, Wang X, Pan W. Cooperative open-ended learning framework for zero-shot coordination. In: Proceedings of the International Conference on Machine Learning, Honolulu, HI, USA, pp. 20470\u201320484 (2023)"},{"key":"10500_CR110","unstructured":"Tang Z, Yu C, Chen B, Xu H, Wang X, Fang F, Du SS, Wang Y, Wu Y. Discovering diverse multi-agent strategic behavior via reward randomization. In: International Conference on Learning Representations, Virtual, pp. 1\u201326 (2021)"},{"key":"10500_CR111","unstructured":"Zahavy T, O\u2019Donoghue B, Barreto A, Mnih V, Flennerhag S, Singh S. Discovering diverse nearly optimal policies with successor features. arXiv:. (2022)"},{"key":"10500_CR112","unstructured":"Parker-Holder J, Pacchiano A, Choromanski KM, Roberts SJ. Effective diversity in population based reinforcement learning. In: Advances in Neural Information Processing Systems, Virtual, pp. 18050\u201318062 (2020)"},{"key":"10500_CR113","unstructured":"Muglich D, Witt CS, Pol E, Whiteson S, Foerster J. Equivariant networks for zero-shot coordination. In: Advances in Neural Information Processing Systems, New Orleans, LA, USA, pp. 6410\u20136423 (2022)"},{"key":"10500_CR114","unstructured":"Hao X, Nakisa B, Rastgoo MN, Dazeley R, Pang G. IReCa: intrinsic reward-enhanced context-aware reinforcement learning for human-AI coordination. arXiv:. (2024)"},{"key":"10500_CR115","unstructured":"Lerer A, Peysakhovich A. Maintaining cooperation in complex social dilemmas using deep reinforcement learning. arXiv:. (2018)"},{"key":"10500_CR116","doi-asserted-by":"crossref","unstructured":"Zhao R, Song J, Yuan Y, Hu H, Gao Y, Wu Y, Sun Z, Yang W. Maximum entropy population-based training for zero-shot human-AI coordination. In: Proceedings of the AAAI Conference on Artificial Intelligence, Washington, D.C., USA, pp. 6145\u20136153 (2023)","DOI":"10.1609\/aaai.v37i5.25758"},{"key":"10500_CR117","unstructured":"Sun H, Peng Z, Dai B, Guo J, Lin D, Zhou B. Novel policy seeking with constrained optimization. arXiv:. (2022)"},{"key":"10500_CR118","unstructured":"Erlebach H, Cook J. RACCOON: regret-based adaptive curricula for cooperation. In: Coordination and Cooperation for Multi-Agent Reinforcement Learning Methods Workshop, Amherst, MA, USA, pp. 1\u201311 (2024)"},{"issue":"48","key":"10500_CR119","first-page":"1","volume":"24","author":"L Benzinger","year":"2023","unstructured":"Benzinger L, Ursin F, Balke W-T, Kacprowski T, Salloch S. Should artificial intelligence be used to support clinical ethical decision-making? A systematic review of reasons. BMC Med Ethics. 2023;24(48):1\u20139.","journal-title":"BMC Med Ethics."},{"issue":"1","key":"10500_CR120","doi-asserted-by":"publisher","first-page":"6","DOI":"10.1109\/MITS.2019.2953556","volume":"14","author":"H Wang","year":"2022","unstructured":"Wang H, Khajepour A, Cao D, Liu T. Ethical decision making in autonomous vehicles: challenges and research progress. IEEE Intell Transport Syst Mag. 2022;14(1):6\u201317.","journal-title":"IEEE Intell Transport Syst Mag."},{"issue":"3","key":"10500_CR121","first-page":"119","volume":"3","author":"AL Hunkenschroer","year":"2023","unstructured":"Hunkenschroer AL, Kriebitz A. Is AI recruiting (un)ethical? A human rights perspective on the use of AI for hiring. AI Ethics. 2023;3(3):119\u2013213.","journal-title":"AI Ethics."},{"issue":"1","key":"10500_CR122","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/s43681-021-00043-6","volume":"1","author":"A Wynsberghe","year":"2021","unstructured":"Wynsberghe A. Sustainable AI: AI for sustainability and the sustainability of AI. AI Ethics. 2021;1(1):213\u20138.","journal-title":"AI Ethics."},{"key":"10500_CR123","doi-asserted-by":"crossref","unstructured":"Nikolaidis S, Shah J. Human-robot cross-training: computational formulation, modeling and evaluation of a human team training strategy. In: ACM\/IEEE International Conference on Human-Robot Interaction, Tokyo, Japan, pp. 33\u201340 (2013)","DOI":"10.1109\/HRI.2013.6483499"},{"key":"10500_CR124","unstructured":"Mutlu B, Terrell A, Huang C-M. Coordination mechanisms in human-robot collaboration. In: ACM\/IEEE International Conference on Human-Robot Interaction, Tokyo, Japan, pp. 1\u20136 (2013)"}],"container-title":["Cognitive Computation"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12559-025-10500-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s12559-025-10500-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12559-025-10500-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:07:33Z","timestamp":1761376053000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s12559-025-10500-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,16]]},"references-count":124,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["10500"],"URL":"https:\/\/doi.org\/10.1007\/s12559-025-10500-7","relation":{},"ISSN":["1866-9956","1866-9964"],"issn-type":[{"value":"1866-9956","type":"print"},{"value":"1866-9964","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,16]]},"assertion":[{"value":"24 February 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 August 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 September 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"This manuscript does not contain any studies with human participants or animals performed by any of the authors.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"The authors declare no competing interests.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}}],"article-number":"146"}}