{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T15:48:05Z","timestamp":1779205685905,"version":"3.51.4"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","license":[{"start":{"date-parts":[[2022,6,21]],"date-time":"2022-06-21T00:00:00Z","timestamp":1655769600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,6,21]],"date-time":"2022-06-21T00:00:00Z","timestamp":1655769600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Postgraduate Scientific Research Innovation Project of Hunan Province","award":["CX20210030"],"award-info":[{"award-number":["CX20210030"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"DOI":"10.1007\/s10489-022-03840-6","type":"journal-article","created":{"date-parts":[[2022,6,21]],"date-time":"2022-06-21T22:02:33Z","timestamp":1655848953000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["Weighted mean field reinforcement learning for large-scale UAV swarm confrontation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1189-0726","authenticated-orcid":false,"given":"Baolai","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shengang","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xianzhong","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tao","family":"Xie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,6,21]]},"reference":[{"issue":"1","key":"3840_CR1","doi-asserted-by":"publisher","first-page":"743","DOI":"10.1109\/JIOT.2021.3085673","volume":"9","author":"L Zhou","year":"2022","unstructured":"Zhou L, Leng S, Liu Q, Wang Q (2022) Intelligent uav swarm cooperation for multiple targets tracking. IEEE Internet Things J 9(1):743\u2013754. https:\/\/doi.org\/10.1109\/JIOT.2021.3085673","journal-title":"IEEE Internet Things J"},{"key":"3840_CR2","doi-asserted-by":"publisher","first-page":"104112","DOI":"10.1016\/j.engappai.2020.104112","volume":"98","author":"Z Sun","year":"2021","unstructured":"Sun Z, Piao H, Yang Z, Zhao Y, Zhan G, Zhou D, Meng G, Chen H, Chen X, Qu B et al (2021) Multi-agent hierarchical policy gradient for air combat tactics emergence via self-play. Eng Appl Artif Intell 98:104112. https:\/\/doi.org\/10.1016\/j.engappai.2020.104112","journal-title":"Eng Appl Artif Intell"},{"issue":"7540","key":"3840_CR3","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK (2015) Ostrovski, G., others : Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533. https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"key":"3840_CR4","unstructured":"Mnih V, Badia AP, Mirza M, Graves A, Lillicrap T, Harley T, Silver D, Kavukcuoglu K (2016) Asynchronous methods for deep reinforcement learning. In: Proceedings of The 33rd international conference on machine learning, vol 48. PMLR, pp 1928\u20131937"},{"key":"3840_CR5","unstructured":"Lowe R, Wu Y, Tamar A, Harb J, Abbeel P, Mordatch I (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. In: Proceedings of the 31st international conference on neural information processing systems, vol 30. MIT Press, pp 6382\u20136393"},{"issue":"4","key":"3840_CR6","doi-asserted-by":"publisher","first-page":"0172395","DOI":"10.1371\/journal.pone.0172395","volume":"12","author":"A Tampuu","year":"2017","unstructured":"Tampuu A, Matiisen T, Kodelja D, Kuzovkin I, Korjus K, Aru J, Aru J, Vicente R (2017) Multiagent cooperation and competition with deep reinforcement learning. PLoS ONE 12(4):0172395. https:\/\/doi.org\/10.1371\/journal.pone.0172395","journal-title":"PLoS ONE"},{"key":"3840_CR7","doi-asserted-by":"publisher","unstructured":"Gupta JK, Egorov M, Kochenderfer M (2017) Cooperative multi-agent control using deep reinforcement learning. In: International conference on autonomous agents and multiagent systems, vol 10642. Springer, pp 66\u201383. https:\/\/doi.org\/10.1007\/978-3-319-71682-4_5","DOI":"10.1007\/978-3-319-71682-4_5"},{"key":"3840_CR8","unstructured":"Yang Y, Luo R, Li M, Zhou M, Zhang W, Wang J (2018) Mean field multi-agent reinforcement learning. In: International conference on machine learning, vol 80. PMLR, pp 5571\u20135580"},{"key":"3840_CR9","doi-asserted-by":"crossref","unstructured":"Hasselt HV, Guez A, Silver D (2016) Deep reinforcement learning with double q-learning. In: Proceedings of the Thirtieth AAAI conference on artificial intelligence, vol 30. AAAI Press, pp 2094\u20132100","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"3840_CR10","doi-asserted-by":"publisher","unstructured":"Konda VR, Tsitsiklis JN (2000) Actor-critic algorithms. In: Advances in neural information processing systems, vol 12. MIT press, pp 1008\u20131014. https:\/\/doi.org\/10.1137\/S0363012901385691","DOI":"10.1137\/S0363012901385691"},{"key":"3840_CR11","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1016\/j.isatra.2019.08.018","volume":"97","author":"S Shao","year":"2020","unstructured":"Shao S, Peng Y, He C, Du Y (2020) Efficient path planning for uav formation via comprehensively improved particle swarm optimization. ISA Trans 97:415\u2013430. https:\/\/doi.org\/10.1016\/j.isatra.2019.08.018","journal-title":"ISA Trans"},{"key":"3840_CR12","doi-asserted-by":"publisher","first-page":"7350","DOI":"10.1007\/s10489-020-02082-8","volume":"51","author":"W He","year":"2021","unstructured":"He W, qi X, Liu L (2021) A novel hybrid particle swarm optimization for multi-uav cooperate path planning. Appl Intell 51:7350\u20137364. https:\/\/doi.org\/10.1007\/s10489-020-02082-8","journal-title":"Appl Intell"},{"key":"3840_CR13","doi-asserted-by":"publisher","first-page":"196","DOI":"10.1016\/j.comcom.2020.04.050","volume":"162","author":"C Xu","year":"2020","unstructured":"Xu C, Xu M, Yin C (2020) Optimized multi-uav cooperative path planning under the complex confrontation environment. Comput Commun 162:196\u2013203. https:\/\/doi.org\/10.1016\/j.comcom.2020.04.050","journal-title":"Comput Commun"},{"key":"3840_CR14","doi-asserted-by":"publisher","first-page":"515","DOI":"10.1016\/j.ins.2018.06.061","volume":"509","author":"H Qiu","year":"2020","unstructured":"Qiu H, Duan H (2020) A multi-objective pigeon-inspired optimization approach to uav distributed flocking among obstacles. Inf Sci 509:515\u2013529. https:\/\/doi.org\/10.1016\/j.ins.2018.06.061","journal-title":"Inf Sci"},{"key":"3840_CR15","doi-asserted-by":"publisher","unstructured":"Luo L, Wang X, Ma J, Ong Y-S (2021) Grpavoid: Multigroup collision-avoidance control and optimization for uav swarm. IEEE Trans Cybern, 1\u201314. https:\/\/doi.org\/10.1109\/TCYB.2021.3132044","DOI":"10.1109\/TCYB.2021.3132044"},{"key":"3840_CR16","doi-asserted-by":"publisher","first-page":"105201","DOI":"10.1016\/j.knosys.2019.105201","volume":"196","author":"X Wu","year":"2020","unstructured":"Wu X, Chen H, Chen C, Zhong M, Xie S, Guo Y, Fujita H (2020) The autonomous navigation and obstacle avoidance for usvs with anoa deep reinforcement learning method. Knowl-Based Syst 196:105201. https:\/\/doi.org\/10.1016\/j.knosys.2019.105201","journal-title":"Knowl-Based Syst"},{"issue":"2","key":"3840_CR17","doi-asserted-by":"publisher","first-page":"1260","DOI":"10.1109\/TII.2021.3094207","volume":"18","author":"C Yan","year":"2022","unstructured":"Yan C, Wang C, Xiang X, Lan Z, Jiang Y (2022) Deep reinforcement learning of collision-free flocking policies for multiple fixed-wing uavs using local situation maps. IEEE Trans on Industr Inform 18(2):1260\u20131270. https:\/\/doi.org\/10.1109\/TII.2021.3094207","journal-title":"IEEE Trans on Industr Inform"},{"issue":"7782","key":"3840_CR18","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals O, Babuschkin I, Czarnecki WM, Mathieu M, Dudzik A, Chung J, Choi DH, Powell R, Ewalds T, Georgiev P et al (2019) Grandmaster level in starcraft ii using multi-agent reinforcement learning. Nature 575 (7782):350\u2013354. https:\/\/doi.org\/10.1038\/s41586-019-1724-z","journal-title":"Nature"},{"issue":"7839","key":"3840_CR19","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1038\/s41586-020-03051-4","volume":"588","author":"J Schrittwieser","year":"2020","unstructured":"Schrittwieser J, Antonoglou I, Hubert T, Simonyan K, Sifre L, Schmitt S, Guez A, Lockhart E, Hassabis D, Graepel T et al (2020) Mastering atari, go, chess and shogi by planning with a learned model. Nature 588(7839):604\u2013609. https:\/\/doi.org\/10.1038\/s41586-020-03051-4","journal-title":"Nature"},{"key":"3840_CR20","doi-asserted-by":"publisher","unstructured":"Kiran BR, Sobh I, Talpaert V, Mannion P, Sallab AAA, Yogamani S, P\u00e9rez P (2021) Deep reinforcement learning for autonomous driving: a survey. IEEE Trans Intell Transp Syst, 1\u201318. https:\/\/doi.org\/10.1109\/TITS.2021.3054625","DOI":"10.1109\/TITS.2021.3054625"},{"issue":"10","key":"3840_CR21","doi-asserted-by":"publisher","first-page":"3884","DOI":"10.1109\/TSMC.2018.2870983","volume":"50","author":"X Xu","year":"2018","unstructured":"Xu X, Zuo L, Li X, Qian L, Ren J, Sun Z (2018) A reinforcement learning approach to autonomous decision making of intelligent vehicles on highways. IEEE Transactions on Systems, Man, and Cybernetics: Systems 50(10):3884\u20133897. https:\/\/doi.org\/10.1109\/TSMC.2018.2870983","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics: Systems"},{"key":"3840_CR22","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1016\/j.future.2021.04.018","volume":"123","author":"Y Zhang","year":"2021","unstructured":"Zhang Y, Zhou Y, Lu H, Fujita H (2021) Cooperative multi-agent actor\u2013critic control of traffic network flow based on edge computing. Futur Gener Comput Syst 123:128\u2013141. https:\/\/doi.org\/10.1016\/j.future.2021.04.018","journal-title":"Futur Gener Comput Syst"},{"issue":"1","key":"3840_CR23","doi-asserted-by":"publisher","first-page":"174","DOI":"10.1109\/TCYB.2020.3015811","volume":"51","author":"X Wang","year":"2021","unstructured":"Wang X, Ke L, Qiao Z, Chai X (2021) Large-scale traffic signal control using a novel multiagent reinforcement learning. IEEE Trans Cybern 51(1):174\u2013187. https:\/\/doi.org\/10.1109\/TCYB.2020.3015811","journal-title":"IEEE Trans Cybern"},{"key":"3840_CR24","unstructured":"Foerster J, Nardelli N, Farquhar G, Afouras T, Torr PH, Kohli P, Whiteson S (2017) Stabilising experience replay for deep multi-agent reinforcement learning. In: International conference on machine learning, vol 70. PMLR, pp 1146\u20131155"},{"key":"3840_CR25","doi-asserted-by":"publisher","first-page":"5793","DOI":"10.1007\/s10489-020-02065-9","volume":"51","author":"H Jiang","year":"2021","unstructured":"Jiang H, Shi D, Xue C, Wang Y, Wang G, Zhang Y (2021) Multi-agent deep reinforcement learning with type-based hierarchical group communication. Appl Intell 51:5793\u20135808. https:\/\/doi.org\/10.1007\/s10489-020-02065-9","journal-title":"Appl Intell"},{"key":"3840_CR26","doi-asserted-by":"publisher","unstructured":"Jiang H, Shi D, Xue C, Wang Y, Wang G, Zhang Y (2020) Ghgc: Goal-based hierarchical group communication in multi-agent reinforcement learning. In: 2020 IEEE international conference on systems, man, and cybernetics (SMC). IEEE, pp 3507\u20133514. https:\/\/doi.org\/10.1109\/SMC42975.2020.9282974","DOI":"10.1109\/SMC42975.2020.9282974"},{"key":"3840_CR27","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1016\/j.artint.2018.01.002","volume":"258","author":"SV Albrecht","year":"2018","unstructured":"Albrecht SV, Stone P (2018) Autonomous agents modelling other agents: a comprehensive survey and open problems. Artif Intell 258:66\u201395. https:\/\/doi.org\/10.1016\/j.artint.2018.01.002","journal-title":"Artif Intell"},{"key":"3840_CR28","unstructured":"He H, Boyd-Graber J, Kwok K, Daum\u00e9 H III (2016) Opponent modeling in deep reinforcement learning. In: International conference on machine learning, vol 48. PMLR, pp 1804\u20131813"},{"key":"3840_CR29","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1016\/j.ins.2019.12.084","volume":"517","author":"P Gao","year":"2020","unstructured":"Gao P, Zhang Q, Wang F, Xiao L, Fujita H, Zhang Y (2020) Learning reinforced attentional representation for end-to-end visual tracking. Inf Sci 517:52\u201367. https:\/\/doi.org\/10.1016\/j.ins.2019.12.084","journal-title":"Inf Sci"},{"key":"3840_CR30","unstructured":"Iqbal S, Sha F (2019) Actor-attention-critic for multi-agent reinforcement learning. In: International conference on machine learning, vol 97. PMLR, pp 2961\u20132970"},{"key":"3840_CR31","unstructured":"Lever G, Heess N, Degris T, Wierstra D, Riedmiller M (2014) Deterministic policy gradient algorithms. In: International conference on machine learning, vol 32. PMLR, pp 387\u2013395"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03840-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-022-03840-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03840-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,8]],"date-time":"2023-02-08T23:33:19Z","timestamp":1675899199000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-022-03840-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,21]]},"references-count":31,"alternative-id":["3840"],"URL":"https:\/\/doi.org\/10.1007\/s10489-022-03840-6","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,6,21]]},"assertion":[{"value":"1 June 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 June 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Conflict of Interests"}}]}}