{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T15:56:51Z","timestamp":1781107011293,"version":"3.54.1"},"reference-count":51,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004761","name":"Hainan Province Natural Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004761","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007129","name":"Shandong Province Natural Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100007129","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Robotics and Autonomous Systems"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.robot.2026.105530","type":"journal-article","created":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T11:07:45Z","timestamp":1778756865000},"page":"105530","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["MA2MB: Multi-agent mutual-advising model-based reinforcement learning for pursuit and evasion games"],"prefix":"10.1016","volume":"203","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-6529-4015","authenticated-orcid":false,"given":"Baolin","family":"Zhao","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Qi","family":"Guo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiandong","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Rachid","family":"Hedjam","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2952-6642","authenticated-orcid":false,"given":"Guoqiang","family":"Zhong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.robot.2026.105530_b1","doi-asserted-by":"crossref","DOI":"10.1016\/j.robot.2025.105216","article-title":"FG-PE: Factor-graph approach for multi-robot pursuit\u2013evasion","volume":"195","author":"Esfahani","year":"2026","journal-title":"Robot. Auton. Syst."},{"issue":"3","key":"10.1016\/j.robot.2026.105530_b2","doi-asserted-by":"crossref","first-page":"3130","DOI":"10.1109\/TNNLS.2022.3203977","article-title":"Safety-aware pursuit-evasion games in unknown environments using Gaussian processes and finite-time convergent reinforcement learning","volume":"35","author":"Kokolakis","year":"2024","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"5","key":"10.1016\/j.robot.2026.105530_b3","doi-asserted-by":"crossref","first-page":"1911","DOI":"10.1109\/TAC.2019.2926554","article-title":"Solutions for multiagent pursuit-evasion games on communication graphs: Finite-time capture and asymptotic behaviors","volume":"65","author":"Lopez","year":"2020","journal-title":"IEEE Trans. Autom. Control"},{"key":"10.1016\/j.robot.2026.105530_b4","first-page":"11327","article-title":"Mingling foresight with imagination: Model-based cooperative multi-agent reinforcement learning","volume":"35","author":"Xu","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.robot.2026.105530_b5","first-page":"11586","article-title":"Solving large-scale pursuit-evasion games using pre-trained strategies","volume":"vol. 37","author":"Li","year":"2023"},{"issue":"1","key":"10.1016\/j.robot.2026.105530_b6","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1109\/TSMC.2023.3347044","article-title":"Pursuit-evasion games of marine surface vessels using neural network-based control","volume":"55","author":"Guo","year":"2025","journal-title":"IEEE Trans. Syst. Man Cybern.: Syst."},{"issue":"4","key":"10.1016\/j.robot.2026.105530_b7","doi-asserted-by":"crossref","first-page":"5895","DOI":"10.1109\/TASE.2023.3319510","article-title":"Multi-USV cooperative chasing strategy based on obstacles assistance and deep reinforcement learning","volume":"21","author":"Gan","year":"2024","journal-title":"IEEE Trans. Autom. Sci. Eng."},{"issue":"5","key":"10.1016\/j.robot.2026.105530_b8","doi-asserted-by":"crossref","first-page":"5171","DOI":"10.1109\/TIE.2022.3187577","article-title":"Multiplayer obstacle avoidance pursuit-evasion games with adaptive parameter estimation","volume":"70","author":"Cheng","year":"2022","journal-title":"IEEE Trans. Ind. Electron."},{"issue":"2","key":"10.1016\/j.robot.2026.105530_b9","doi-asserted-by":"crossref","first-page":"2747","DOI":"10.1109\/TNNLS.2024.3351631","article-title":"Deep reinforcement learning for Nash equilibrium of differential games","volume":"36","author":"Li","year":"2025","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"3","key":"10.1016\/j.robot.2026.105530_b10","doi-asserted-by":"crossref","first-page":"1405","DOI":"10.1109\/TCYB.2019.2958548","article-title":"Cooperative pursuit with multi-pursuer and one faster free-moving evader","volume":"52","author":"Fang","year":"2022","journal-title":"IEEE Trans. Cybern."},{"issue":"12","key":"10.1016\/j.robot.2026.105530_b11","doi-asserted-by":"crossref","first-page":"13512","DOI":"10.1109\/TCYB.2021.3112572","article-title":"Distributed pursuit of an evader with collision and obstacle avoidance","volume":"52","author":"Tian","year":"2021","journal-title":"IEEE Trans. Cybern."},{"issue":"5","key":"10.1016\/j.robot.2026.105530_b12","doi-asserted-by":"crossref","first-page":"5224","DOI":"10.1109\/TIE.2023.3283684","article-title":"Distributed optimal solutions for multiagent pursuit-evasion games for capture and formation control","volume":"71","author":"Zhou","year":"2023","journal-title":"IEEE Trans. Ind. Electron."},{"key":"10.1016\/j.robot.2026.105530_b13","unstructured":"Y. Deng, Z. Wang, Y. Zhang, Improving multi-agent reinforcement learning with stable prefix policy, in: Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence. Jeju, IJCAI, 2024, pp. 49\u201357."},{"key":"10.1016\/j.robot.2026.105530_b14","unstructured":"Y. Yu, Q. Yin, J. Zhang, P. Xu, K. Huang, ADMN: agent-driven modular network for dynamic parameter sharing in cooperative multi-agent reinforcement learning, in: Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence, IJCAI, 2024, pp. 302\u2013310."},{"issue":"1","key":"10.1016\/j.robot.2026.105530_b15","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1109\/TPAMI.2023.3322426","article-title":"Fear-neuro-inspired reinforcement learning for safe autonomous driving","volume":"46","author":"He","year":"2023","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"4","key":"10.1016\/j.robot.2026.105530_b16","doi-asserted-by":"crossref","first-page":"3800","DOI":"10.1109\/TNSE.2024.3386678","article-title":"Distributed model-free optimal control for multiagent pursuit-evasion differential games","volume":"11","author":"Zhang","year":"2024","journal-title":"IEEE Trans. Netw. Sci. Eng."},{"issue":"10","key":"10.1016\/j.robot.2026.105530_b17","doi-asserted-by":"crossref","first-page":"7900","DOI":"10.1109\/TNNLS.2022.3146976","article-title":"Game of drones: Multi-UAV pursuit-evasion game with online motion planning by deep reinforcement learning","volume":"34","author":"Zhang","year":"2022","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"5","key":"10.1016\/j.robot.2026.105530_b18","doi-asserted-by":"crossref","first-page":"2788","DOI":"10.1109\/TPAMI.2023.3335263","article-title":"Model-based reinforcement learning with isolated imaginations","volume":"46","author":"Pan","year":"2023","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.robot.2026.105530_b19","first-page":"10435","article-title":"Models as agents: Optimizing multi-step predictions of interactive local models in model-based multi-agent reinforcement learning","volume":"vol. 37","author":"Wu","year":"2023"},{"key":"10.1016\/j.robot.2026.105530_b20","doi-asserted-by":"crossref","DOI":"10.1016\/j.robot.2025.105019","article-title":"Distributed cooperative pursuit with encirclement guarantee via robust model predictive control","volume":"192","author":"Wang","year":"2025","journal-title":"Robot. Auton. Syst."},{"key":"10.1016\/j.robot.2026.105530_b21","series-title":"COMBO: Compositional world models for embodied multi-agent cooperation","author":"Zhang","year":"2024"},{"key":"10.1016\/j.robot.2026.105530_b22","series-title":"ADESSE: Advice explanations in complex repeated decision-making environments","author":"Schleibaum","year":"2024"},{"issue":"4","key":"10.1016\/j.robot.2026.105530_b23","doi-asserted-by":"crossref","first-page":"2918","DOI":"10.1109\/TASE.2021.3097286","article-title":"A dynamic game framework for rational and persistent robot deception with an application to deceptive pursuit-evasion","volume":"19","author":"Huang","year":"2021","journal-title":"IEEE Trans. Autom. Sci. Eng."},{"issue":"9","key":"10.1016\/j.robot.2026.105530_b24","doi-asserted-by":"crossref","first-page":"4939","DOI":"10.1109\/TAC.2022.3168430","article-title":"Multiplayer pursuit-evasion differential games with malicious pursuers","volume":"67","author":"Xu","year":"2022","journal-title":"IEEE Trans. Autom. Control"},{"issue":"4","key":"10.1016\/j.robot.2026.105530_b25","doi-asserted-by":"crossref","first-page":"3971","DOI":"10.1109\/TAES.2023.3235873","article-title":"Nonzero-sum pursuit-evasion game control for spacecraft systems: A Q-learning method","volume":"59","author":"Zheng","year":"2023","journal-title":"IEEE Trans. Aerosp. Electron. Syst."},{"key":"10.1016\/j.robot.2026.105530_b26","series-title":"2023 IEEE International Conference on Robotics and Automation","first-page":"1386","article-title":"Mixed observable RRT: Multi-agent mission-planning in partially observable environments","author":"Johansson","year":"2023"},{"issue":"4","key":"10.1016\/j.robot.2026.105530_b27","doi-asserted-by":"crossref","first-page":"1313","DOI":"10.1109\/TRO.2020.3047521","article-title":"Game-theoretic planning for self-driving cars in multivehicle competitive scenarios","volume":"37","author":"Wang","year":"2021","journal-title":"IEEE Trans. Robot."},{"issue":"5","key":"10.1016\/j.robot.2026.105530_b28","doi-asserted-by":"crossref","first-page":"2827","DOI":"10.1109\/TAC.2024.3485433","article-title":"Distributed task allocation with minimum makespan for heterogeneous multiplayer pursuit-evasion games","volume":"70","author":"Li","year":"2025","journal-title":"IEEE Trans. Autom. Control"},{"key":"10.1016\/j.robot.2026.105530_b29","doi-asserted-by":"crossref","DOI":"10.1016\/j.robot.2024.104890","article-title":"Hierarchical probabilistic graphical models for multi-UAV cooperative pursuit in dynamic environments","volume":"185","author":"Huang","year":"2025","journal-title":"Robot. Auton. Syst."},{"key":"10.1016\/j.robot.2026.105530_b30","doi-asserted-by":"crossref","unstructured":"X. Guo, D. Shi, W. Fan, Scalable communication for multi-agent reinforcement learning via transformer-based email mechanism, in: Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence, IJCAI, 2023, pp. 126\u2013134.","DOI":"10.24963\/ijcai.2023\/15"},{"key":"10.1016\/j.robot.2026.105530_b31","doi-asserted-by":"crossref","unstructured":"B. Zhang, L. Li, Z. Xu, D. Li, G. Fan, Inducing Stackelberg equilibrium through spatio-temporal sequential decision-making in multi-agent reinforcement learning, in: Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence, IJCAI, 2023, pp. 353\u2013361.","DOI":"10.24963\/ijcai.2023\/40"},{"key":"10.1016\/j.robot.2026.105530_b32","doi-asserted-by":"crossref","DOI":"10.1016\/j.robot.2025.105163","article-title":"Multi-agent reinforcement learning for zero-shot coverage path planning with dynamic UAV networks","volume":"195","author":"Carvalho","year":"2026","journal-title":"Robot. Auton. Syst."},{"key":"10.1016\/j.robot.2026.105530_b33","first-page":"6382","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume":"vol. 30","author":"Lowe","year":"2017"},{"key":"10.1016\/j.robot.2026.105530_b34","series-title":"AMS-DRL: Learningmulti-pursuit evasion for safe targeted navigation of drones","author":"Xiao","year":"2023"},{"key":"10.1016\/j.robot.2026.105530_b35","doi-asserted-by":"crossref","unstructured":"Q. Zhang, C. Lu, A. Garg, J. Foerster, Centralized Model and Exploration Policy for Multi-Agent RL, in: Proceedings of the 21st International Conference on Autonomous Agents and Multiagent Systems, 2022, pp. 1500\u20131508.","DOI":"10.65109\/ODCQ7385"},{"key":"10.1016\/j.robot.2026.105530_b36","doi-asserted-by":"crossref","unstructured":"V. Egorov, A. Shpilman, Scalable Multi-Agent Model-Based Reinforcement Learning, in: Proceedings of the 21st International Conference on Autonomous Agents and Multiagent Systems, 2022, pp. 381\u2013390.","DOI":"10.65109\/ZYEH4087"},{"key":"10.1016\/j.robot.2026.105530_b37","series-title":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","first-page":"3384","article-title":"Model-based multi-agent policy optimization with adaptive opponent-wise rollouts","author":"Zhang","year":"2021"},{"key":"10.1016\/j.robot.2026.105530_b38","first-page":"28208","article-title":"Model-based opponent modeling","volume":"35","author":"Yu","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.robot.2026.105530_b39","series-title":"Efficient multi-agent reinforcement learning by planning","author":"Liu","year":"2024"},{"issue":"1","key":"10.1016\/j.robot.2026.105530_b40","doi-asserted-by":"crossref","first-page":"1044","DOI":"10.1109\/TNNLS.2023.3329808","article-title":"Plug-and-play model-agnostic counterfactual policy synthesis for deep reinforcement learning based recommendation","volume":"36","author":"Wang","year":"2023","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.robot.2026.105530_b41","first-page":"19580","article-title":"Efficient model-based multi-agent reinforcement learning via optimistic equilibrium computation","volume":"vol. 162","author":"P. G. Sessa","year":"2022"},{"key":"10.1016\/j.robot.2026.105530_b42","first-page":"24611","article-title":"The surprising effectiveness of PPO in cooperative multi-agent games","volume":"35","author":"Yu","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"178","key":"10.1016\/j.robot.2026.105530_b43","first-page":"1","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"J. Mach. Learn. Res."},{"issue":"12","key":"10.1016\/j.robot.2026.105530_b44","doi-asserted-by":"crossref","first-page":"7403","DOI":"10.1109\/TSMC.2023.3296773","article-title":"Ask-AC: An initiative advisor-in-the-loop actor-critic framework","volume":"53","author":"Liu","year":"2023","journal-title":"IEEE Trans. Syst. Man Cybern.: Syst."},{"key":"10.1016\/j.robot.2026.105530_b45","series-title":"Learning to advise and learning from advice in cooperative multi-agent reinforcement learning","author":"Jin","year":"2022"},{"issue":"10","key":"10.1016\/j.robot.2026.105530_b46","doi-asserted-by":"crossref","first-page":"7934","DOI":"10.1109\/TNNLS.2022.3147221","article-title":"Model-based self-advising for multi-agent learning","volume":"34","author":"Ye","year":"2023","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.robot.2026.105530_b47","first-page":"17299","article-title":"Cautiously-optimistic knowledge sharing for cooperative multi-agent reinforcement learning","volume":"vol. 38","author":"Ba","year":"2024"},{"key":"10.1016\/j.robot.2026.105530_b48","first-page":"6653","article-title":"An enhanced advising model in teacher-student framework using state categorization","volume":"vol. 35","author":"Anand","year":"2021"},{"key":"10.1016\/j.robot.2026.105530_b49","series-title":"Is centralized training with decentralized execution framework centralized enough for MARL?","author":"Zhou","year":"2023"},{"key":"10.1016\/j.robot.2026.105530_b50","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.123887","article-title":"A GNN-based teacher\u2013student framework with multi-advice","volume":"250","author":"Lei","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.robot.2026.105530_b51","series-title":"Learning to communicate through imagination with model-based deep multi-agent reinforcement learning","author":"Pretorius","year":"2020"}],"container-title":["Robotics and Autonomous Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0921889026002034?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0921889026002034?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T14:58:44Z","timestamp":1781103524000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0921889026002034"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":51,"alternative-id":["S0921889026002034"],"URL":"https:\/\/doi.org\/10.1016\/j.robot.2026.105530","relation":{},"ISSN":["0921-8890"],"issn-type":[{"value":"0921-8890","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"MA2MB: Multi-agent mutual-advising model-based reinforcement learning for pursuit and evasion games","name":"articletitle","label":"Article Title"},{"value":"Robotics and Autonomous Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.robot.2026.105530","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"105530"}}