{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T01:13:20Z","timestamp":1778116400366,"version":"3.51.4"},"reference-count":59,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100013076","name":"National Major Science and Technology Projects of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100013076","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.eswa.2026.131779","type":"journal-article","created":{"date-parts":[[2026,2,25]],"date-time":"2026-02-25T07:48:32Z","timestamp":1772005712000},"page":"131779","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Embedded mean field reinforcement learning for perimeter-defense game"],"prefix":"10.1016","volume":"316","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-1094-6927","authenticated-orcid":false,"given":"Li","family":"Wang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3354-8625","authenticated-orcid":false,"given":"Xin","family":"Yu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-1272-3154","authenticated-orcid":false,"given":"Xuxin","family":"Lv","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9905-4908","authenticated-orcid":false,"given":"Gangzheng","family":"Ai","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2998-8828","authenticated-orcid":false,"given":"Wenjun","family":"Wu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"9","key":"10.1016\/j.eswa.2026.131779_bib0001","doi-asserted-by":"crossref","first-page":"1363","DOI":"10.1177\/02783649241237544","article-title":"The role of heterogeneity in autonomous perimeter defense problems","volume":"43","author":"Adler","year":"2024","journal-title":"The International Journal of Robotics Research"},{"issue":"2","key":"10.1016\/j.eswa.2026.131779_bib0002","doi-asserted-by":"crossref","first-page":"129","DOI":"10.13111\/2066-8201.2018.10.2.12","article-title":"Modeling of target tracking system for homing missiles and air defense systems","volume":"10","author":"Alqudsi","year":"2018","journal-title":"INCAS Bulletin"},{"key":"10.1016\/j.eswa.2026.131779_bib0003","doi-asserted-by":"crossref","first-page":"1439","DOI":"10.1109\/TRO.2024.3351556","article-title":"Multivehicle perimeter defense in conical environments","volume":"40","author":"Bajaj","year":"2024","journal-title":"IEEE Transactions on Robotics"},{"key":"10.1016\/j.eswa.2026.131779_bib0004","series-title":"2021 American control conference (ACC)","first-page":"3196","article-title":"Competitive perimeter defense on a line","author":"Bajaj","year":"2021"},{"key":"10.1016\/j.eswa.2026.131779_bib0005","series-title":"2015 international conference on advanced robotics (ICAR)","first-page":"164","article-title":"An approach to multi-agent pursuit evasion games using reinforcement learning","author":"Bilgin","year":"2015"},{"issue":"2","key":"10.1016\/j.eswa.2026.131779_bib0006","doi-asserted-by":"crossref","first-page":"26","DOI":"10.1109\/MCS.2025.3534477","article-title":"Unsupervised representation learning in deep reinforcement learning: A review","volume":"45","author":"Botteghi","year":"2025","journal-title":"IEEE Control Systems"},{"issue":"9","key":"10.1016\/j.eswa.2026.131779_bib0007","doi-asserted-by":"crossref","first-page":"5417","DOI":"10.1109\/TSMC.2023.3270444","article-title":"A hierarchical deep reinforcement learning framework for 6-DOF UCAV air-to-air combat","volume":"53","author":"Chai","year":"2023","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics: Systems"},{"key":"10.1016\/j.eswa.2026.131779_bib0008","series-title":"International conference on machine learning","first-page":"941","article-title":"Learning action representations for reinforcement learning","author":"Chandak","year":"2019"},{"issue":"1","key":"10.1016\/j.eswa.2026.131779_bib0009","doi-asserted-by":"crossref","first-page":"698","DOI":"10.1109\/TAES.2019.2920219","article-title":"Wind compensation framework for unpowered aircraft using online waypoint correction","volume":"56","author":"Cho","year":"2019","journal-title":"IEEE Transactions on Aerospace and Electronic Systems"},{"key":"10.1016\/j.eswa.2026.131779_bib0010","series-title":"2022 14th international conference on wireless communications and signal processing (WCSP)","first-page":"910","article-title":"Mean-field multi-agent reinforcement learning for adaptive anti-jamming channel selection in UAV communications","author":"Du","year":"2022"},{"key":"10.1016\/j.eswa.2026.131779_bib0011","unstructured":"Dulac-Arnold, G., Evans, R., van Hasselt, H., Sunehag, P., Lillicrap, T., Hunt, J., Mann, T., Weber, T., Degris, T., & Coppin, B. (2015). Deep reinforcement learning in large discrete action spaces. arXiv preprint arXiv: 1512.07679."},{"issue":"9","key":"10.1016\/j.eswa.2026.131779_bib0012","doi-asserted-by":"crossref","first-page":"13368","DOI":"10.1109\/TVT.2024.3394235","article-title":"Age of information minimization using multi-agent uavs based on ai-enhanced mean field resource allocation","volume":"73","author":"Emami","year":"2024","journal-title":"IEEE Transactions on Vehicular Technology"},{"key":"10.1016\/j.eswa.2026.131779_bib0013","unstructured":"Fraccaro, M., Kamronn, S., Paquet, U., & Winther, O. (2017). A disentangled recognition and nonlinear dynamics model for unsupervised learning. Proceedings of the 31st International Conference on Neural Information Processing Systems, 30, 3604\u20133613."},{"issue":"23","key":"10.1016\/j.eswa.2026.131779_bib0014","doi-asserted-by":"crossref","first-page":"5113","DOI":"10.3390\/app9235113","article-title":"Attitude control in ascent phase of missile considering actuator non-linearity and wind disturbance","volume":"9","author":"Fu","year":"2019","journal-title":"Applied Sciences"},{"key":"10.1016\/j.eswa.2026.131779_bib0015","series-title":"International conference on machine learning","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"Fujimoto","year":"2018"},{"key":"10.1016\/j.eswa.2026.131779_bib0016","doi-asserted-by":"crossref","DOI":"10.1016\/j.artint.2023.103905","article-title":"Safe multi-agent reinforcement learning for multi-robot control","volume":"319","author":"Gu","year":"2023","journal-title":"Artificial Intelligence"},{"key":"10.1016\/j.eswa.2026.131779_bib0017","unstructured":"Ha, D., & Schmidhuber, J. (2018a). Recurrent world models facilitate policy evolution. Proceedings of the 32nd International Conference on Neural Information Processing Systems, 31, 2455\u20132467."},{"key":"10.1016\/j.eswa.2026.131779_bib0018","unstructured":"Ha, D., & Schmidhuber, J. (2018b). World models. arXiv preprint arXiv: 1803.10122."},{"issue":"3","key":"10.1016\/j.eswa.2026.131779_bib0019","doi-asserted-by":"crossref","first-page":"1555","DOI":"10.1109\/TAES.2017.2773272","article-title":"Consensus-based two-stage salvo attack guidance","volume":"54","author":"He","year":"2017","journal-title":"IEEE Transactions on Aerospace and Electronic Systems"},{"key":"10.1016\/j.eswa.2026.131779_bib0020","series-title":"Breakthroughs in statistics: Methodology and distribution","first-page":"492","article-title":"Robust estimation of a location parameter","author":"Huber","year":"1992"},{"issue":"12","key":"10.1016\/j.eswa.2026.131779_bib0021","doi-asserted-by":"crossref","first-page":"3559","DOI":"10.1007\/s12555-024-0533-9","article-title":"Nonlinear autopilot for improving guidance performance of dual-controlled missiles with lateral thrust regulation","volume":"22","author":"Jeong","year":"2024","journal-title":"International Journal of Control, Automation and Systems"},{"key":"10.1016\/j.eswa.2026.131779_bib0022","doi-asserted-by":"crossref","first-page":"499","DOI":"10.1007\/s10846-019-01090-2","article-title":"Strategies for patrolling missions with multiple UAVs","volume":"99","author":"Kappel","year":"2020","journal-title":"Journal of Intelligent & Robotic Systems"},{"issue":"6","key":"10.1016\/j.eswa.2026.131779_bib0023","doi-asserted-by":"crossref","first-page":"4909","DOI":"10.1109\/TITS.2021.3054625","article-title":"Deep reinforcement learning for autonomous driving: A survey","volume":"23","author":"Kiran","year":"2021","journal-title":"IEEE transactions on Intelligent Transportation Systems"},{"issue":"1-2","key":"10.1016\/j.eswa.2026.131779_bib0024","doi-asserted-by":"crossref","first-page":"83","DOI":"10.1002\/nav.3800020109","article-title":"The hungarian method for the assignment problem","volume":"2","author":"Kuhn","year":"1955","journal-title":"Naval Research Logistics Quarterly"},{"key":"10.1016\/j.eswa.2026.131779_bib0025","unstructured":"Lee, E. S., Loianno, G., Jayaraman, D., & Kumar, V. (2022a). Vision-based perimeter defense via multiview pose estimation. arXiv preprint arXiv: 2209.12136."},{"key":"10.1016\/j.eswa.2026.131779_bib0026","series-title":"2020 59th IEEE conference on decision and control (CDC)","first-page":"1530","article-title":"Perimeter-defense game between aerial defender and ground intruder","author":"Lee","year":"2020"},{"key":"10.1016\/j.eswa.2026.131779_bib0027","series-title":"2021 IEEE international symposium on safety, security, and rescue robotics (SSRR)","first-page":"184","article-title":"Defending a perimeter from a ground intruder using an aerial defender: Theory and practice","author":"Lee","year":"2021"},{"key":"10.1016\/j.eswa.2026.131779_bib0028","unstructured":"Lee, E. S., Zhou, L., Ribeiro, A., & Kumar, V. (2022b). Learning decentralized strategies for a perimeter defense game with graph neural networks. arXiv preprint arXiv: 2211.01757."},{"issue":"12","key":"10.1016\/j.eswa.2026.131779_bib0029","doi-asserted-by":"crossref","first-page":"15594","DOI":"10.1109\/TVT.2020.3043851","article-title":"Downlink transmit power control in ultra-dense UAV network based on mean field game and deep reinforcement learning","volume":"69","author":"Li","year":"2020","journal-title":"IEEE Transactions on Vehicular Technology"},{"key":"10.1016\/j.eswa.2026.131779_bib0030","unstructured":"Lillicrap, T. P., Hunt, J. J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D., & Wierstra, D. (2015). Continuous control with deep reinforcement learning. arXiv preprint arXiv: 1509.02971."},{"key":"10.1016\/j.eswa.2026.131779_bib0031","doi-asserted-by":"crossref","unstructured":"Liu, J., Zhong, Y., Hu, S., Fu, H., Fu, Q., Chang, X., & Yang, Y. (2023). Maximum entropy heterogeneous-agent reinforcement learning. arXiv preprint arXiv: 2306.10715.","DOI":"10.1007\/978-3-031-28394-9"},{"key":"10.1016\/j.eswa.2026.131779_bib0032","unstructured":"Lowe, R., Wu, Y. I., Tamar, A., Harb, J., Pieter Abbeel, O., & Mordatch, I. (2017). Multi-agent actor-critic for mixed cooperative-competitive environments. Proceedings of the 31st International Conference on Neural Information Processing Systems, 30, 6382\u20136393."},{"key":"10.1016\/j.eswa.2026.131779_bib0033","unstructured":"Mahsereci, M., Balles, L., Lassner, C., & Hennig, P. (2017). Early stopping without a validation set. arXiv preprint arXiv: 1703.09580."},{"key":"10.1016\/j.eswa.2026.131779_bib0034","series-title":"2013 International conference on advanced computer science and information systems (ICACSIS)","first-page":"161","article-title":"Simulation of intelligent unmanned aerial vehicle (UAV) for military surveillance","author":"Ma\u2019Sum","year":"2013"},{"key":"10.1016\/j.eswa.2026.131779_bib0035","doi-asserted-by":"crossref","unstructured":"Van der Pol, E., Kipf, T., Oliehoek, F. A., & Welling, M. (2020). Plannable approximations to mdp homomorphisms: Equivariance under actions. arXiv preprint arXiv: 2002.11963.","DOI":"10.65109\/DAIE3353"},{"key":"10.1016\/j.eswa.2026.131779_bib0036","series-title":"Neural networks: Tricks of the trade","first-page":"55","article-title":"Early stopping-but when?","author":"Prechelt","year":"2002"},{"key":"10.1016\/j.eswa.2026.131779_bib0037","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.125980","article-title":"A velocity-domain MAPPO approach for perimeter defensive confrontation by USV groups","volume":"265","author":"Pu","year":"2025","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2026.131779_bib0038","unstructured":"Shishika, D., & Kumar, V. (2019). Perimeter-defense game on arbitrary convex shapes. arXiv preprint arXiv: 1909.03989."},{"key":"10.1016\/j.eswa.2026.131779_bib0039","series-title":"2018 IEEE conference on decision and control (CDC)","first-page":"2093","article-title":"Local-game decomposition for multiplayer perimeter-defense problem","author":"Shishika","year":"2018"},{"key":"10.1016\/j.eswa.2026.131779_bib0040","series-title":"Decision and game theory for security: 11th international conference, gamesec 2020, college Park, MD, USA, October 28\u201330, 2020, proceedings 11","first-page":"472","article-title":"A review of multi agent perimeter defense games","author":"Shishika","year":"2020"},{"key":"10.1016\/j.eswa.2026.131779_bib0041","series-title":"2019 IEEE 58th conference on decision and control (CDC)","first-page":"7325","article-title":"Team composition for perimeter defense with patrollers and defenders","author":"Shishika","year":"2019"},{"issue":"2","key":"10.1016\/j.eswa.2026.131779_bib0042","doi-asserted-by":"crossref","first-page":"2738","DOI":"10.1109\/LRA.2020.2972818","article-title":"Cooperative team strategies for multi-player perimeter-defense games","volume":"5","author":"Shishika","year":"2020","journal-title":"IEEE Robotics and Automation Letters"},{"key":"10.1016\/j.eswa.2026.131779_bib0043","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1109\/OJVT.2020.2990072","article-title":"Dynamic virtual resource allocation for 5G and beyond network slicing","volume":"1","author":"Song","year":"2020","journal-title":"IEEE Open Journal of Vehicular Technology"},{"issue":"7","key":"10.1016\/j.eswa.2026.131779_bib0044","doi-asserted-by":"crossref","first-page":"5620","DOI":"10.1109\/TWC.2025.3548127","article-title":"Dynamic trajectory and power control in ultra-dense UAV networks: A mean-field reinforcement learning approach","volume":"24","author":"Song","year":"2025","journal-title":"IEEE Transactions on Wireless Communications"},{"key":"10.1016\/j.eswa.2026.131779_bib0045","doi-asserted-by":"crossref","unstructured":"Song, H., Feng, M., Zhou, W., & Li, H. (2023). MA2CL: Masked attentive contrastive learning for multi-agent reinforcement learning. arXiv preprint arXiv: 2306.02006.","DOI":"10.24963\/ijcai.2023\/470"},{"key":"10.1016\/j.eswa.2026.131779_bib0046","unstructured":"Subramanian, S. G., Poupart, P., Taylor, M. E., & Hegde, N. (2020). Multi type mean field reinforcement learning. arXiv preprint arXiv: 2002.02513."},{"issue":"5","key":"10.1016\/j.eswa.2026.131779_bib0047","doi-asserted-by":"crossref","first-page":"1260","DOI":"10.18186\/thermal.1377200","article-title":"The effects of different wing configurations on missile aerodynamics","volume":"9","author":"\u015eumnu","year":"2023","journal-title":"Journal of Thermal Engineering"},{"issue":"3","key":"10.1016\/j.eswa.2026.131779_bib0048","doi-asserted-by":"crossref","first-page":"686","DOI":"10.2514\/1.59139","article-title":"Intercept angle missile guidance under time varying acceleration bounds","volume":"36","author":"Taub","year":"2013","journal-title":"Journal of Guidance, Control, and Dynamics"},{"key":"10.1016\/j.eswa.2026.131779_bib0049","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., Kaiser, \u0141., & Polosukhin, I. (2017). Attention is all you need. Proceedings of the 31st International Conference on Neural Information Processing Systems, 30, 6000\u20136010."},{"key":"10.1016\/j.eswa.2026.131779_bib0050","doi-asserted-by":"crossref","first-page":"101","DOI":"10.1016\/j.neucom.2020.06.031","article-title":"Cooperative control for multi-player pursuit-evasion games with reinforcement learning","volume":"412","author":"Wang","year":"2020","journal-title":"Neurocomputing"},{"key":"10.1016\/j.eswa.2026.131779_bib0051","series-title":"2018 IEEE global communications conference (GLOBECOM)","first-page":"1","article-title":"Traffic-aware adaptive deployment for UAV-aided communication networks","author":"Wang","year":"2018"},{"issue":"9","key":"10.1016\/j.eswa.2026.131779_bib0052","doi-asserted-by":"crossref","first-page":"4531","DOI":"10.1109\/TWC.2019.2926279","article-title":"Adaptive deployment for UAV-aided communication networks","volume":"18","author":"Wang","year":"2019","journal-title":"IEEE Transactions on Wireless Communications"},{"key":"10.1016\/j.eswa.2026.131779_bib0053","series-title":"International conference on database systems for advanced applications","first-page":"301","article-title":"Weighted mean-field multi-agent reinforcement learning via reward attribution decomposition","author":"Wu","year":"2022"},{"issue":"10","key":"10.1016\/j.eswa.2026.131779_bib0054","doi-asserted-by":"crossref","first-page":"17462","DOI":"10.1109\/TITS.2024.3484764","article-title":"Joint resource allocation for V2X communications with multi-type mean-field reinforcement learning","volume":"26","author":"Xu","year":"2024","journal-title":"IEEE Transactions on Intelligent Transportation Systems"},{"key":"10.1016\/j.eswa.2026.131779_bib0055","series-title":"International conference on machine learning","first-page":"5571","article-title":"Mean field multi-agent reinforcement learning","author":"Yang","year":"2018"},{"key":"10.1016\/j.eswa.2026.131779_bib0056","series-title":"Proceedings of the AAAI conference on artificial intelligence","first-page":"11744","article-title":"Hierarchical mean-field deep reinforcement learning for large-scale multiagent systems","volume":"vol. 37","author":"Yu","year":"2023"},{"issue":"2","key":"10.1016\/j.eswa.2026.131779_bib0057","doi-asserted-by":"crossref","first-page":"180","DOI":"10.1109\/MWC.007.2400124","article-title":"Decision transformers for wireless communications: A new paradigm of resource management","volume":"32","author":"Zhang","year":"2025","journal-title":"IEEE Wireless Communications"},{"issue":"10","key":"10.1016\/j.eswa.2026.131779_bib0058","doi-asserted-by":"crossref","first-page":"7900","DOI":"10.1109\/TNNLS.2022.3146976","article-title":"Game of drones: Multi-UAV pursuit-evasion game with online motion planning by deep reinforcement learning","volume":"34","author":"Zhang","year":"2022","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"issue":"32","key":"10.1016\/j.eswa.2026.131779_bib0059","first-page":"1","article-title":"Heterogeneous-agent reinforcement learning","volume":"25","author":"Zhong","year":"2024","journal-title":"Journal of Machine Learning Research"}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426006925?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426006925?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T00:51:02Z","timestamp":1778115062000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0957417426006925"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":59,"alternative-id":["S0957417426006925"],"URL":"https:\/\/doi.org\/10.1016\/j.eswa.2026.131779","relation":{},"ISSN":["0957-4174"],"issn-type":[{"value":"0957-4174","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Embedded mean field reinforcement learning for perimeter-defense game","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.eswa.2026.131779","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"131779"}}