{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T10:37:19Z","timestamp":1764153439519,"version":"3.46.0"},"reference-count":66,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,10,29]],"date-time":"2025-10-29T00:00:00Z","timestamp":1761696000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,29]],"date-time":"2025-10-29T00:00:00Z","timestamp":1761696000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach. Intell. Res."],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s11633-025-1547-3","type":"journal-article","created":{"date-parts":[[2025,10,29]],"date-time":"2025-10-29T12:45:04Z","timestamp":1761741904000},"page":"1088-1101","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Theory of Mind Inspired Large Reasoning Language Model Improved Multi-agent Reinforcement Learning Algorithm for Robust and Adaptive Partner Modelling"],"prefix":"10.1007","volume":"22","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-9517-7030","authenticated-orcid":false,"given":"Xiyun","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5111-9891","authenticated-orcid":false,"given":"Tielin","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Chenghao","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Shuang","family":"Xu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1111-1529","authenticated-orcid":false,"given":"Bo","family":"Xu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,29]]},"reference":[{"issue":"7782","key":"1547_CR1","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"O. Vinyals, I. Babuschkin, W. M. Czarnecki, M. Mathieu, A. Dudzik, J. Chung, D. H. Choi, R. Powell, T. Ewalds, P. Georgiev, J. Oh, D. Horgan, M. Kroiss, I. Danihelka, A. Huang, L. Sifre, T. Cai, J. P. Agapiou, M. Jaderberg, A. S. Vezhnevets, R. Leblond, T. Pohlen, V. Dalibard, D. Budden, Y. Sulsky, J. Molloy, T. L. Paine, C. Gulcehre, Z. Wang, T. Pfaff, Y. Wu, R. Ring, D. Yogatama, D. W\u00fcnsch, K. Mckinney, O. Smith, T. Schaul, T. Lillicrap, K. Kavukcuoglu, D. Hassabis, C. Apps, D. Silver. Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nature, vol. 575, no. 7782, pp. 350\u2013354, 2019. DOI: https:\/\/doi.org\/10.1038\/s41586-019-1724-z.","journal-title":"Nature"},{"key":"1547_CR2","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"C Yu","year":"2022","unstructured":"C. Yu, A. Velu, E. Vinitsky, J. Gao, Y. Wang, A. Bayen, Y. Wu. The surprising effectiveness of PPO in cooperative multi-agent games. In Proceedings of the 36th International Conference on Neural Information Processing Systems, New Orleans, USA, Article number 1787, 2022."},{"issue":"2","key":"1547_CR3","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1007\/s11633-022-1383-7","volume":"20","author":"L Meng","year":"2023","unstructured":"L. Meng, M. Wen, C. Le, X. Li, D. Xing, W. Zhang, Y. Wen, H. Zhang, J. Wang, Y. Yang, B. Xu. Offline pretrained multi-agent decision transformer. Machine Intelligence Research, vol. 20, no. 2, pp. 233\u2013248, 2023. DOI: https:\/\/doi.org\/10.1007\/s11633-022-1383-7.","journal-title":"Machine Intelligence Research"},{"issue":"1","key":"1547_CR4","volume":"21","year":"2020","unstructured":"T. Rashid, M. Samvelyan, C. S. De Witt, G. Farquhar, J. Foerster, S. Whiteson. Monotonic value function factorisation for deep multi-agent reinforcement learning. The Journal of Machine Learning Research, vol. 21, no. 1, Article number 178, 2020.","journal-title":"The Journal of Machine Learning Research"},{"key":"1547_CR5","doi-asserted-by":"publisher","first-page":"2974","DOI":"10.1609\/aaai.v32i1.11794","volume-title":"Proceedings of the 32nd AAAI Conference on Artificial Intelligence","author":"J Foerster","year":"2018","unstructured":"J. Foerster, G. Farquhar, T. Afouras, N. Nardelli, S. Whiteson. Counterfactual multi-agent policy gradients. In Proceedings of the 32nd AAAI Conference on Artificial Intelligence, New Orleans, USA, pp. 2974\u20132982, 2018. DOI: https:\/\/doi.org\/10.1609\/aaai.v32i1.11794."},{"key":"1547_CR6","first-page":"5887","volume-title":"Proceedings of the 36th International Conference on Machine Learning","author":"K Son","year":"2019","unstructured":"K. Son, D. Kim, W. J. Kang, D. E. Hostallero, Y. Yi. QTRAN: Learning to factorize with transformation for cooperative multi-agent reinforcement learning. In Proceedings of the 36th International Conference on Machine Learning, Long Beach, USA, pp. 5887\u20135896, 2019."},{"key":"1547_CR7","volume-title":"Value-decomposition networks for cooperative multi-agent learning","author":"P Sunehag","year":"2017","unstructured":"P. Sunehag, G. Lever, A. Gruslys, W. M. Czarnecki, V. Zambaldi, M. Jaderberg, M. Lanctot, N. Sonnerat, J. Z. Leibo, K. Tuyls, T. Graepel. Value-decomposition networks for cooperative multi-agent learning, [Online], Available: https:\/\/arxiv.org\/abs\/1706.05296, 2017."},{"key":"1547_CR8","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1016\/j.artint.2018.01.002","volume":"258","author":"S V Albrecht","year":"2018","unstructured":"S. V. Albrecht, P. Stone. Autonomous agents modelling other agents: A comprehensive survey and open problems. Artificial Intelligence, vol. 258, pp. 66\u201395, 2018. DOI: https:\/\/doi.org\/10.1016\/j.artint.2018.01.002.","journal-title":"Artificial Intelligence"},{"key":"1547_CR9","doi-asserted-by":"publisher","first-page":"602","DOI":"10.24963\/ijcai.2019\/85","volume-title":"Proceedings of the 28th International Joint Conference on Artificial Intelligence","author":"Z Tian","year":"2019","unstructured":"Z. Tian, Y. Wen, Z. Gong, F. Punakkath, S. Zou, J. Wang. A regularized opponent model with maximum entropy objective. In Proceedings of the 28th International Joint Conference on Artificial Intelligence, Macao, China, pp. 602\u2013608, 2019. DOI: https:\/\/doi.org\/10.24963\/ijcai.2019\/85."},{"key":"1547_CR10","first-page":"1804","volume-title":"Proceedings of the 33rd International Conference on Machine Learning","author":"H He","year":"2016","unstructured":"H. He, J. Boyd-Graber, K. Kwok, H. Daum\u00e9 III. Opponent modeling in deep reinforcement learning. In Proceedings of the 33rd International Conference on Machine Learning, New York City, USA, pp. 1804\u20131813, 2016."},{"key":"1547_CR11","volume-title":"Proceedings of the 7th International Conference on Learning Representations","author":"Y Wen","year":"2019","unstructured":"Y. Wen, Y. Yang, R. Luo, J. Wang, W. Pan. Probabilistic recursive reasoning for multi-agent reinforcement learning. In Proceedings of the 7th International Conference on Learning Representations, New Orleans, USA, 2019."},{"key":"1547_CR12","doi-asserted-by":"publisher","DOI":"10.3389\/fnins.2023.1219405","volume-title":"Mixture of personality improved spiking actor network for efficient multi-agent cooperation","author":"X Li","year":"2023","unstructured":"X. Li, Z. Ni, J. Ruan, L. Meng, J. Shi, T. Zhang, B. Xu. Mixture of personality improved spiking actor network for efficient multi-agent cooperation, [Online], Available: https:\/\/arxiv.org\/abs\/2305.05898, 2023."},{"key":"1547_CR13","volume-title":"Proceedings of the 9th International Conference on Learning Representations","author":"A Shih","year":"2021","unstructured":"A. Shih, A. Sawhney, J. Kondic, S. Ermon, D. Sadigh. On the critical role of conventions in adaptive human-AI collaboration. In Proceedings of the 9th International Conference on Learning Representations, 2021."},{"key":"1547_CR14","volume-title":"GPT-4 technical report","author":"OpenAI.","year":"2023","unstructured":"OpenAI. GPT-4 technical report, [Online], Available: https:\/\/arxiv.org\/abs\/2303.08774, 2023."},{"key":"1547_CR15","doi-asserted-by":"publisher","first-page":"1339","DOI":"10.18653\/v1\/2023.emnlp-main.85","volume-title":"Proceedings of Conference on Empirical Methods in Natural Language Processing","author":"C Qin","year":"2023","unstructured":"C. Qin, A. Zhang, Z. Zhang, J. Chen, M. Yasunaga, D. Yang. Is ChatGPT a general-purpose natural language processing task solver? In Proceedings of Conference on Empirical Methods in Natural Language Processing, Singapore, pp. 1339\u20131384, 2023. DOI: https:\/\/doi.org\/10.18653\/v1\/2023.emnlp-main.85."},{"key":"1547_CR16","volume-title":"Sparks of artificial general intelligence: Early experiments with GPT-4","author":"S Bubeck","year":"2023","unstructured":"S. Bubeck, V. Chandrasekaran, R. Eldan, J. Gehrke, E. Horvitz, E. Kamar, P. Lee, Y. T. Lee, Y. Li, S. Lundberg, H. Nori, H. Palangi, M. T. Ribeiro, Y. Zhang. Sparks of artificial general intelligence: Early experiments with GPT-4, [Online], Available: https:\/\/arxiv.org\/abs\/2303.12712, 2023."},{"key":"1547_CR17","volume-title":"PaLM 2 technical report","author":"R Anil","year":"2023","unstructured":"R. Anil et al. PaLM 2 technical report, [Online], Available: https:\/\/arxiv.org\/abs\/2305.10403, 2023."},{"issue":"7992","key":"1547_CR18","doi-asserted-by":"publisher","first-page":"570","DOI":"10.1038\/s41586-023-06792-0","volume":"624","author":"D A Boiko","year":"2023","unstructured":"D. A. Boiko, R. MacKnight, B. Kline, G. Gomes. Autonomous chemical research with large language models. Nature, vol. 624, no. 7992, pp. 570\u2013578, 2023. DOI: https:\/\/doi.org\/10.1038\/s41586-023-06792-0.","journal-title":"Nature"},{"key":"1547_CR19","volume-title":"Evaluating large language models trained on code","author":"M Chen","year":"2021","unstructured":"M. Chen, J. Tworek, H. Jun, Q. Yuan, H. P. de Oliveira Pinto, J. Kaplan, H. Edwards, Y. Burda, N. Joseph, G. Brockman, A. Ray, R. Puri, G. Krueger, M. Petrov, H. Khlaaf, G. Sastry, P. Mishkin, B. Chan, S. Gray, N. Ryder, M. Pavlov, A. Power, L. Kaiser, M. Bavarian, C. Winter, P. Tillet, F. P. Such, D. Cummings, M. Plappert, F. Chantzis, E. Barnes, A. Herbert-Voss, W. H. Guss, A. Nichol, A. Paino, N. Tezak, J. Tang, I. Babuschkin, S. Balaji, S. Jain, W. Saunders, C. Hesse, A. N. Carr, J. Leike, J. Achiam, V. Misra, E. Morikawa, A. Radford, M. Knight, M. Brundage, M. Murati, K. Mayer, P. Welinder, B. McGrew, D. Amodei, S. Mc-Candlish, I. Sutskever, W. Zaremba. Evaluating large language models trained on code, [Online], Available: https:\/\/arxiv.org\/abs\/2107.03374, 2021."},{"key":"1547_CR20","volume-title":"Proceedings of the 11th International Conference on Learning Representations","author":"E Nijkamp","year":"2023","unstructured":"E. Nijkamp, B. Pang, H. Hayashi, L. Tu, H. Wang, Y. Zhou, S. Savarese, C. Xiong. CodeGen: An open large language model for code with multi-turn program synthesis. In Proceedings of the 11th International Conference on Learning Representations, Kigali, Rwanda, 2023."},{"issue":"6","key":"1547_CR21","doi-asserted-by":"publisher","DOI":"10.1007\/s11704-024-40231-1","volume":"18","year":"2024","unstructured":"L. Wang, C. Ma, X. Feng, Z. Zhang, H. Yang, J. Zhang, Z. Chen, J. Tang, X. Chen, Y. Lin, W. X. Zhao, Z. Wei, J. Wen. A survey on large language model based autonomous agents. Frontiers of Computer Science, vol. 18, no. 6, Article number 186345, 2024. DOI: https:\/\/doi.org\/10.1007\/s11704-024-40231-1.","journal-title":"Frontiers of Computer Science"},{"key":"1547_CR22","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"N Shinn","year":"2023","unstructured":"N. Shinn, F. Cassano, A. Gopinath, K. Narasimhan, S. Yao. Reflexion: Language agents with verbal reinforcement learning. In Proceedings of the 37th International Conference on Neural Information Processing Systems, New Orleans, USA, Article number 377, 2023."},{"key":"1547_CR23","volume-title":"Proceedings of the 11th International Conference on Learning Representations","author":"S Yao","year":"2023","unstructured":"S. Yao, J. Zhao, D. Yu, N. Du, I. Shafran, K. Narasimhan, Y. Cao. ReAct: Synergizing reasoning and acting in language models. In Proceedings of the 11th International Conference on Learning Representations, Kigali, Rwanda, 2023."},{"key":"1547_CR24","unstructured":"G. Wang, Y. Xie, Y. Jiang, A. Mandlekar, C. Xiao, Y. Zhu, L. Fan, A. Anandkumar. Voyager: An open-ended embodied agent with large language models. Transactions on Machine Learning Research, vol. 2024, 2024."},{"key":"1547_CR25","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"T B Brown","year":"2020","unstructured":"T. B. Brown, B. Mann, N. Ryder, M. Subbiah, J. Kaplan, P. Dhariwal, A. Neelakantan, P. Shyam, G. Sastry, A. Askell, S. Agarwal, A. Herbert-Voss, G. Krueger, T. Henighan, R. Child, A. Ramesh, D. M. Ziegler, J. Wu, C. Winter, C. Hesse, M. Chen, E. Sigler, M. Litwin, S. Gray, B. Chess, J. Clark, C. Berner, S. McCandlish, A. Radford, I. Sutskever, D. Amodei. Language models are few-shot learners. In Proceedings of the 34th International Conference on Neural Information Processing Systems, Vancouver, Canada, Article number 159, 2020."},{"key":"1547_CR26","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"J Wei","year":"2022","unstructured":"J. Wei, X. Wang, D. Schuurmans, M. Bosma, B. Ichter, F. Xia, E. H. Chi, Q. V. Le, D. Zhou. Chain-of-thought prompting elicits reasoning in large language models. In Proceedings of the 36th International Conference on Neural Information Processing Systems, New Orleans, USA, Article number 1800, 2022."},{"issue":"3","key":"1547_CR27","doi-asserted-by":"publisher","first-page":"605","DOI":"10.1111\/1468-0262.00125","volume":"68","author":"N Feltovich","year":"2000","unstructured":"N. Feltovich. Reinforcement-based VS. belief-based learning models in experimental asymmetric-information games. Econometrica, vol. 68, no. 3, pp. 605\u2013641, 2000. DOI: https:\/\/doi.org\/10.1111\/1468-0262.00125.","journal-title":"Econometrica"},{"issue":"S3","key":"1547_CR28","doi-asserted-by":"publisher","first-page":"14055","DOI":"10.1073\/pnas.1213532110","volume":"110","author":"D Von Winterfeldt","year":"2013","unstructured":"D. Von Winterfeldt. Bridging the gap between science and decision making. Proceedings of the National Academy of Sciences of the United States of America, vol. 110, no. S3, pp. 14055\u201314061, 2013. DOI: https:\/\/doi.org\/10.1073\/pnas.1213532110.","journal-title":"Proceedings of the National Academy of Sciences of the United States of America"},{"issue":"6","key":"1547_CR29","doi-asserted-by":"publisher","first-page":"990","DOI":"10.1016\/j.conb.2012.05.010","volume":"22","author":"H Seo","year":"2012","unstructured":"H. Seo, D. Lee. Neural basis of learning and preference during social decision-making. Current Opinion in Neurobiology, vol. 22, no. 6, pp. 990\u2013995, 2012. DOI: https:\/\/doi.org\/10.1016\/j.conb.2012.05.010.","journal-title":"Current Opinion in Neurobiology"},{"issue":"2","key":"1547_CR30","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1016\/S1364-6613(02)00025-6","volume":"7","author":"H L Gallagher","year":"2003","unstructured":"H. L. Gallagher, C. D. Frith. Functional imaging of \u201ctheory of mind\u201d. Trends in cognitive sciences, vol. 7, no. 2, pp. 77\u201383, 2003. DOI: https:\/\/doi.org\/10.1016\/S1364-6613(02)00025-6.","journal-title":"Trends in cognitive sciences"},{"issue":"17","key":"1547_CR31","doi-asserted-by":"publisher","first-page":"R644","DOI":"10.1016\/j.cub.2005.08.041","volume":"15","author":"C Frith","year":"2005","unstructured":"C. Frith, U. Frith. Theory of mind. Current Biology, vol. 15, no. 17, pp. R644\u2013R646, 2005. DOI: https:\/\/doi.org\/10.1016\/j.cub.2005.08.041.","journal-title":"Current Biology"},{"issue":"4","key":"1547_CR32","doi-asserted-by":"publisher","first-page":"412","DOI":"10.1037\/0033-295X.94.4.412","volume":"94","author":"A M Leslie","year":"1987","unstructured":"A. M. Leslie. Pretense and representation: The origins of \u201ctheory of mind\u201d. Psychological Review, vol. 94, no. 4, pp. 412\u2013426, 1987. DOI: https:\/\/doi.org\/10.1037\/0033-295X.94.4.412.","journal-title":"Psychological Review"},{"key":"1547_CR33","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1016\/j.dcn.2014.01.002","volume":"9","author":"C E V Mahy","year":"2014","unstructured":"C. E. V. Mahy, L. J. Moses, J. H. Pfeifer. How and where: Theory-of-mind in the brain. Developmental Cognitive Neuroscience, vol. 9, pp. 68\u201381, 2014. DOI: https:\/\/doi.org\/10.1016\/j.dcn.2014.01.002.","journal-title":"Developmental Cognitive Neuroscience"},{"issue":"68","key":"1547_CR34","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abm4183","volume":"7","year":"2022","unstructured":"L. Yuan, X. Gao, Z. Zheng, M. Edmonds, Y. N. Wu, F. Rossano, H. Lu, Y. Zhu, S. C. Zhu. In situ bidirectional human-robot value alignment. Science Robotics, vol. 7, no. 68, Article number 4183, 2022. DOI: https:\/\/doi.org\/10.1126\/scirobotics.abm4183.","journal-title":"Science Robotics"},{"key":"1547_CR35","volume-title":"Proceedings of the 10th International Conference on Learning Representations","author":"Y Wang","year":"2022","unstructured":"Y. Wang, F. Zhong, J. Xu, Y. Wang. ToM2C: Target-oriented multi-agent communication and cooperation with theory of mind. In Proceedings of the 10th International Conference on Learning Representations, 2022."},{"key":"1547_CR36","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN60899.2024.10650244","volume-title":"Proceedings of International Joint Conference on Neural Networks","author":"X Li","year":"2024","unstructured":"X. Li, T. Zhang, C. Liu, L. Meng, B. Xu. Long shortterm reasoning network with theory of mind for efficient multi-agent cooperation. In Proceedings of International Joint Conference on Neural Networks, Yokohama, Japan, 2024. DOI: https:\/\/doi.org\/10.1109\/IJCNN60899.2024.10650244."},{"issue":"4","key":"1547_CR37","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0172395","volume":"12","year":"2017","unstructured":"A. Tampuu, T. Matiisen, D. Kodelja, I. Kuzovkin, K. Korjus, J. Aru, J. Aru, R. Vicente. Multiagent cooperation and competition with deep reinforcement learning. PLoS One, vol. 12, no. 4, Article number e0172395, 2017. DOI: https:\/\/doi.org\/10.1371\/journal.pone.0172395.","journal-title":"PLoS One"},{"key":"1547_CR38","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN55064.2022.9892077","volume-title":"Proceedings of International Joint Conference on Neural Networks","author":"Z Wu","year":"2022","unstructured":"Z. Wu, K. Li, H. Xu, Y. Zang, B. An, J. Xing. L2E: Learning to exploit your opponent. In Proceedings of International Joint Conference on Neural Networks, Padua, Italy, 2022. DOI: https:\/\/doi.org\/10.1109\/IJCNN55064.2022.9892077."},{"key":"1547_CR39","first-page":"10498","volume-title":"Large language models still can\u2019t plan (A benchmark for LLMs on planning and reasoning about change)","author":"K Valmeekam","year":"2022","unstructured":"K. Valmeekam, A. Olmo, S. Sreedharan, S. Kambhampati. Large language models still can\u2019t plan (A benchmark for LLMs on planning and reasoning about change), [Online], Available: https:\/\/arxiv.org\/abs\/2206. 10498, 2022."},{"key":"1547_CR40","doi-asserted-by":"publisher","first-page":"1536","DOI":"10.18653\/v1\/2020.findings-emnlp.139","volume-title":"Proceedings of Findings of the Association for Computational Linguistics","author":"Z Feng","year":"2020","unstructured":"Z. Feng, D. Guo, D. Tang, N. Duan, X. Feng, M. Gong, L. Shou, B. Qin, T. Liu, D. Jiang, M. Zhou. CodeBERT: A pre-trained model for programming and natural languages. In Proceedings of Findings of the Association for Computational Linguistics, pp. 1536\u20131547, 2020. DOI: https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.139."},{"key":"1547_CR41","doi-asserted-by":"publisher","first-page":"13960","DOI":"10.18653\/v1\/2023.acl-long.780","volume-title":"Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics","author":"M Sclar","year":"2023","unstructured":"M. Sclar, S. Kumar, P. West, A. Suhr, Y. Choi, Y. Tsvetkov. Minding language models\u2019 (lack of) theory of mind: A plug-and-play multi-character belief tracker. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics, Toronto, Canada, pp. 13960\u201313980, 2023. DOI: https:\/\/doi.org\/10.18653\/v1\/2023.acl-long.780."},{"issue":"4","key":"1547_CR42","doi-asserted-by":"publisher","first-page":"268","DOI":"10.1038\/nrn1884","volume":"7","author":"D M Amodio","year":"2006","unstructured":"D. M. Amodio, C. D. Frith. Meeting of minds: The medial frontal cortex and social cognition. Nature Reviews Neuroscience, vol. 7, no. 4, pp. 268\u2013277, 2006. DOI: https:\/\/doi.org\/10.1038\/nrn1884.","journal-title":"Nature Reviews Neuroscience"},{"issue":"3","key":"1547_CR43","doi-asserted-by":"publisher","first-page":"435","DOI":"10.1007\/BF03395673","volume":"59","author":"H D Schlinger","year":"2009","unstructured":"H. D. Schlinger. Theory of mind: An overview and behavioral perspective. The Psychological Record, vol. 59, no. 3, pp. 435\u2013448, 2009. DOI: https:\/\/doi.org\/10.1007\/BF03395673.","journal-title":"The Psychological Record"},{"key":"1547_CR44","first-page":"4218","volume-title":"Proceedings of the 35th International Conference on Machine Learning","author":"N Rabinowitz","year":"2018","unstructured":"N. Rabinowitz, F. Perbet, F. Song, C. Zhang, S. M. Ali Eslami, M. Botvinick. Machine theory of mind. In Proceedings of the 35th International Conference on Machine Learning, Stockholm, Sweden, pp. 4218\u20134227, 2018."},{"issue":"4","key":"1547_CR45","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1007\/s43154-020-00019-0","volume":"1","author":"A Tabrez","year":"2020","unstructured":"A. Tabrez, M. B. Luebbers, B. Hayes. A survey of mental modeling techniques in human\u2014robot teaming. Current Robotics Reports, vol. 1, no. 4, pp. 259\u2013267, 2020. DOI: https:\/\/doi.org\/10.1007\/s43154-020-00019-0.","journal-title":"Current Robotics Reports"},{"key":"1547_CR46","volume-title":"Thinking, Fast and Slow","author":"K Daniel","year":"2013","unstructured":"K. Daniel. Thinking, Fast and Slow, USA: Farrar, Straus and Giroux, 2013."},{"issue":"2","key":"1547_CR47","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1097\/ACM.0000000000000121","volume":"89","author":"P Croskerry","year":"2014","unstructured":"P. Croskerry, D. A. Petrie, J. B. Reilly, G. Tait. Deciding about fast and slow decisions. Academic Medicine, vol. 89, no. 2, pp. 197\u2013200, 2014. DOI: https:\/\/doi.org\/10.1097\/ACM.0000000000000121.","journal-title":"Academic Medicine"},{"issue":"3","key":"1547_CR48","doi-asserted-by":"publisher","first-page":"202","DOI":"10.1016\/j.biopsycho.2012.03.009","volume":"90","author":"R L E P Reniers","year":"2012","unstructured":"R. L. E. P. Reniers, R. Corcoran, B. A. Vollm, A. Mashru, R. Howard, P. F. Liddle. Moral decision-making, tom, empathy and the default mode network. Biological Psychology, vol. 90, no. 3, pp. 202\u2013210, 2012. DOI: https:\/\/doi.org\/10.1016\/j.biopsycho.2012.03.009.","journal-title":"Biological Psychology"},{"issue":"7","key":"1547_CR49","doi-asserted-by":"publisher","first-page":"1776","DOI":"10.1093\/brain\/awv134","volume":"138","author":"H F Kim","year":"2015","unstructured":"H. F. Kim, O. Hikosaka. Parallel basal ganglia circuits for voluntary and automatic behaviour to reach rewards. Brain, vol. 138, no. 7, pp. 1776\u20131800, 2015. DOI: https:\/\/doi.org\/10.1093\/brain\/awv134.","journal-title":"Brain"},{"issue":"12","key":"1547_CR50","doi-asserted-by":"publisher","first-page":"719","DOI":"10.1038\/nrn4038","volume":"16","author":"M Jahanshahi","year":"2015","unstructured":"M. Jahanshahi, I. Obeso, J. C. Rothwell, J. A. Obeso. A fronto-striato-subthalamic-pallidal network for goal-directed and habitual inhibition. Nature Reviews Neuroscience, vol. 16, no. 12, pp. 719\u2013732, 2015. DOI: https:\/\/doi.org\/10.1038\/nrn4038.","journal-title":"Nature Reviews Neuroscience"},{"issue":"9","key":"1547_CR51","doi-asserted-by":"publisher","first-page":"659","DOI":"10.1038\/nrn2667","volume":"10","author":"D Badre","year":"2009","unstructured":"D. Badre, M. D\u2019Esposito. Is the rostro-caudal axis of the frontal lobe hierarchical? Nature Reviews Neuroscience, vol. 10, no. 9, pp. 659\u2013669, 2009. DOI: https:\/\/doi.org\/10.1038\/nrn2667.","journal-title":"Nature Reviews Neuroscience"},{"key":"1547_CR52","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1146\/annurev.neuro.24.1.167","volume":"24","author":"E K Miller","year":"2001","unstructured":"E. K. Miller, J. D. Cohen. An integrative theory of prefrontal cortex function. Annual Review of Neuroscience, vol. 24, pp. 167\u2013202, 2001. DOI: https:\/\/doi.org\/10.1146\/annurev.neuro.24.1.167.","journal-title":"Annual Review of Neuroscience"},{"issue":"2","key":"1547_CR53","doi-asserted-by":"publisher","first-page":"988","DOI":"10.1006\/nimg.2002.1156","volume":"17","author":"J Rowe","year":"2002","unstructured":"J. Rowe, K. Friston, R. Frackowiak, R. Passingham. Attention to action: Specific modulation of corticocortical interactions in humans. NeuroImage, vol. 17, no. 2, pp. 988\u2013998, 2002. DOI: https:\/\/doi.org\/10.1006\/nimg.2002.1156.","journal-title":"NeuroImage"},{"issue":"4","key":"1547_CR54","doi-asserted-by":"publisher","first-page":"736","DOI":"10.1016\/j.neuron.2012.12.032","volume":"77","author":"M Wang","year":"2013","unstructured":"M. Wang, Y. Yang, C. J. Wang, N. J. Gamo, L. E. Jin, J. A. Mazer, J. H. Morrison, X. J. Wang, A. F. T. Arnsten. NMDA receptors subserve persistent neuronal firing during working memory in dorsolateral prefrontal cortex. Neuron, vol. 77, no. 4, pp. 736\u2013749, 2013. DOI: https:\/\/doi.org\/10.1016\/j.neuron.2012.12.032.","journal-title":"Neuron"},{"issue":"9118","key":"1547_CR55","doi-asserted-by":"publisher","first-page":"1801","DOI":"10.1016\/S0140-6736(97)11225-9","volume":"351","author":"P Brown","year":"1998","unstructured":"P. Brown, C. Marsden. What do the basal ganglia do?. The Lancet, vol. 351, no. 9118, pp. 1801\u20131804, 1998. DOI: https:\/\/doi.org\/10.1016\/S0140-6736(97)11225-9.","journal-title":"The Lancet"},{"issue":"12","key":"1547_CR56","doi-asserted-by":"publisher","first-page":"2334","DOI":"10.1162\/jocn_a_00307","volume":"24","author":"I Opris","year":"2012","unstructured":"I. Opris, R. E. Hampson, G. A. Gerhardt, T. W. Berger, S. A. Deadwyler. Columnar processing in primate pFC: Evidence for executive control microcircuits. Journal of cognitive neuroscience, vol. 24, no. 12, pp. 2334\u20132347, 2012. DOI: https:\/\/doi.org\/10.1162\/jocn_a_00307.","journal-title":"Journal of cognitive neuroscience"},{"issue":"6191","key":"1547_CR57","doi-asserted-by":"publisher","first-page":"1481","DOI":"10.1126\/science.1252254","volume":"344","author":"M Donoso","year":"2014","unstructured":"M. Donoso, A. G. E. Collins, E. Koechlin. Foundations of human reasoning in the prefrontal cortex. Science, vol. 344, no. 6191, pp. 1481\u20131486, 2014. DOI: https:\/\/doi.org\/10.1126\/science.1252254.","journal-title":"Science"},{"issue":"3","key":"1547_CR58","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1093\/cercor\/bhh126","volume":"15","author":"[ S A Bunge","year":"2005","unstructured":"[58] S. A. Bunge, C. Wendelken, D. Badre, A. D. Wagner. Analogical reasoning and prefrontal cortex: Evidence for separable retrieval and integration mechanisms. Cerebral Cortex, vol. 15, no. 3, pp. 239\u2013249, 2005. DOI: https:\/\/doi.org\/10.1093\/cercor\/bhh126.","journal-title":"Cerebral Cortex"},{"key":"1547_CR59","volume-title":"Theory of mind may have spontaneously emerged in large language models","author":"M Kosinski","year":"2023","unstructured":"M. Kosinski. Theory of mind may have spontaneously emerged in large language models, [Online], Available: https:\/\/arxiv.org\/abs\/2302.02083, 2023."},{"issue":"3","key":"1547_CR60","doi-asserted-by":"publisher","first-page":"655","DOI":"10.1111\/1467-8624.00304","volume":"72","author":"H M Wellman","year":"2001","unstructured":"H. M. Wellman, D. Cross, J. Watson. Meta-analysis of theory-of-mind development: The truth about false belief. Child Development, vol. 72, no. 3, pp. 655\u2013684, 2001. DOI: https:\/\/doi.org\/10.1111\/1467-8624.00304.","journal-title":"Child Development"},{"issue":"6","key":"1547_CR61","doi-asserted-by":"publisher","first-page":"685","DOI":"10.1111\/1460-6984.12240","volume":"51","author":"T Korkiakangas","year":"2016","unstructured":"T. Korkiakangas, K. Dindar, A. Laitila, E. K\u00e4rn\u00e4. The Sally-Anne test: An interactional analysis of a dyadic assessment. International Journal of Language & Communication Disorders, vol. 51, no. 6, pp. 685\u2013702, 2016. DOI: https:\/\/doi.org\/10.1111\/1460-6984.12240.","journal-title":"International Journal of Language & Communication Disorders"},{"issue":"3","key":"1547_CR62","doi-asserted-by":"publisher","first-page":"1378","DOI":"10.1016\/j.neuroimage.2007.01.042","volume":"35","author":"M Sommer","year":"2007","unstructured":"M. Sommer, K. D\u00f6hnel, B. Sodian, J. Meinhardt, C. Thoermer, G. Hajak. Neural correlates of true and false belief reasoning. NeuroImage, vol. 35, no. 3, pp. 1378\u20131384, 2007. DOI: https:\/\/doi.org\/10.1016\/j.neuroimage.2007.01.042.","journal-title":"NeuroImage"},{"issue":"2","key":"1547_CR63","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1016\/j.euroneuro.2015.12.013","volume":"26","author":"A Maat","year":"2016","unstructured":"A. Maat, N. E. M. van Haren, C. F. Bartholomeusz, R. S. Kahn, W. Cahn. Emotion recognition and theory of mind are related to gray matter volume of the prefrontal cortex in schizophrenia. European Neuropsychopharmacology, vol. 26, no. 2, pp. 255\u2013264, 2016. DOI: https:\/\/doi.org\/10.1016\/j.euroneuro.2015.12.013.","journal-title":"European Neuropsychopharmacology"},{"key":"1547_CR64","doi-asserted-by":"publisher","first-page":"180","DOI":"10.1016\/j.neubiorev.2014.02.005","volume":"42","author":"P Yuan","year":"2014","unstructured":"P. Yuan, N. Raz. Prefrontal cortex and executive functions in healthy adults: A meta-analysis of structural neuroimaging studies. Neuroscience & Biobehavioral Reviews, vol. 42, pp. 180\u2013192, 2014. DOI: https:\/\/doi.org\/10.1016\/j.neubiorev.2014.02.005.","journal-title":"Neuroscience & Biobehavioral Reviews"},{"issue":"6","key":"1547_CR65","doi-asserted-by":"publisher","first-page":"410","DOI":"10.1038\/nrn2648","volume":"10","author":"A F T Arnsten","year":"2009","unstructured":"A. F. T. Arnsten. Stress signalling pathways that impair prefrontal cortex structure and function. Nature Reviews Neuroscience, vol. 10, no. 6, pp. 410\u2013422, 2009. DOI: https:\/\/doi.org\/10.1038\/nrn2648.","journal-title":"Nature Reviews Neuroscience"},{"key":"1547_CR66","doi-asserted-by":"publisher","unstructured":"S. Alvarado, M. Tajerian, M. Millecamps, M. Suderman, L. S. Stone, M. Szyf. Peripheral nerve injury is accompanied by chronic transcriptome-wide changes in the mouse prefrontal cortex. Molecular Pain, vol. 9, Article number 21, 2013. DOI: https:\/\/doi.org\/10.1186\/1744-8069-9-21.","DOI":"10.1186\/1744-8069-9-21"}],"container-title":["Machine Intelligence Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-025-1547-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11633-025-1547-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-025-1547-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T10:03:01Z","timestamp":1764151381000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11633-025-1547-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,29]]},"references-count":66,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["1547"],"URL":"https:\/\/doi.org\/10.1007\/s11633-025-1547-3","relation":{},"ISSN":["2731-538X","2731-5398"],"issn-type":[{"type":"print","value":"2731-538X"},{"type":"electronic","value":"2731-5398"}],"subject":[],"published":{"date-parts":[[2025,10,29]]},"assertion":[{"value":"31 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 January 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declared that they have no conflicts of interest to this work.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations of conflict of interest"}}]}}