{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,12]],"date-time":"2026-06-12T16:56:30Z","timestamp":1781283390452,"version":"3.54.1"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"20","license":[{"start":{"date-parts":[[2024,7,29]],"date-time":"2024-07-29T00:00:00Z","timestamp":1722211200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,7,29]],"date-time":"2024-07-29T00:00:00Z","timestamp":1722211200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61573087"],"award-info":[{"award-number":["61573087"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"the National Key Research and Development Program of China","award":["2020AAA0109203"],"award-info":[{"award-number":["2020AAA0109203"]}]},{"name":"Fundamental Research Funds for Liaoning Natural Science Foundation","award":["2019-MS-120"],"award-info":[{"award-number":["2019-MS-120"]}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["N2104026"],"award-info":[{"award-number":["N2104026"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100007847","name":"Natural Science Foundation of Jilin Province","doi-asserted-by":"publisher","award":["52074064"],"award-info":[{"award-number":["52074064"]}],"id":[{"id":"10.13039\/100007847","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1007\/s10489-024-05674-w","type":"journal-article","created":{"date-parts":[[2024,7,29]],"date-time":"2024-07-29T07:46:41Z","timestamp":1722239201000},"page":"9892-9905","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Research on heterogeneous multi-UAV collaborative decision-making method based on improved PPO"],"prefix":"10.1007","volume":"54","author":[{"given":"Lin","family":"Xu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xinmiao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dong","family":"Xiao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Beihong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Aixue","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,7,29]]},"reference":[{"key":"5674_CR1","first-page":"1","volume":"47","author":"S Wei","year":"2021","unstructured":"Wei S, Yang-He F, Guang-Quan C, Hong-Lan H, Jin-Cai H, Zhong L, Wei H (2021) Research on multi-aircraft cooperative air combat method based on deep reinforcement learning. Acta Automatica Sinica 47:1\u201314","journal-title":"Acta Automatica Sinica"},{"key":"5674_CR2","unstructured":"Burgin GH, Owens A (1975) An adaptive maneuvering logic computer program for the simulation of one-to-one air-to-air combat. vol 2: Program description"},{"key":"5674_CR3","unstructured":"Goodrich KH (1993) A high-fidelity, six-degree-of-freedom batch simulation environment for tactical guidance research and evaluation. National Aeronautics and Space Administration, Office of Management, vol 4440"},{"issue":"1","key":"5674_CR4","first-page":"2167","volume":"6","author":"N Ernest","year":"2016","unstructured":"Ernest N, Carroll D, Schumacher C, Clark M, Cohen K, Lee G (2016) Genetic fuzzy based artificial intelligence for unmanned combat aerial vehicle control in simulated air combat missions. J Def Manag 6(1):2167\u20130374","journal-title":"J Def Manag"},{"issue":"7540","key":"5674_CR5","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"issue":"7587","key":"5674_CR6","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison CJ, Guez A, Sifre L, Van Den Driessche G, Schrittwieser J, Antonoglou I, Panneershelvam V, Lanctot M et al (2016) Mastering the game of go with deep neural networks and tree search. Nature 529(7587):484\u2013489","journal-title":"Nature"},{"issue":"4","key":"5674_CR7","doi-asserted-by":"publisher","first-page":"2393","DOI":"10.1109\/TII.2019.2936167","volume":"16","author":"H Shi","year":"2019","unstructured":"Shi H, Shi L, Xu M, Hwang K-S (2019) End-to-end navigation strategy with deep reinforcement learning for mobile robots. IEEE Trans Ind Inf 16(4):2393\u20132402","journal-title":"IEEE Trans Ind Inf"},{"key":"5674_CR8","doi-asserted-by":"crossref","unstructured":"Tong G, Jiang N, Biyue L, Xi Z, Ya W, Wenbo D (2021) Uav navigation in high dynamic environments: a deep reinforcement learning approach. Chin J Aeronaut 34(2):479\u2013489","DOI":"10.1016\/j.cja.2020.05.011"},{"key":"5674_CR9","doi-asserted-by":"crossref","unstructured":"Zhao X, Xia L, Tang J, Yin D (2019) deep reinforcement learning for search, recommendation, and online advertising: a survey by Xiangyu Zhao, Long Xia, Jiliang Tang, and Dawei Yin with Martin Vesely as coordinator. ACM SIGWEB Newsletter, no. Spring, pp 1\u201315","DOI":"10.1145\/3320496.3320500"},{"key":"5674_CR10","doi-asserted-by":"publisher","first-page":"106170","DOI":"10.1016\/j.knosys.2020.106170","volume":"205","author":"F Liu","year":"2020","unstructured":"Liu F, Tang R, Li X, Zhang W, Ye Y, Chen H, Guo H, Zhang Y, He X (2020) State representation modeling for deep reinforcement learning based recommendation. Knowl-Based Syst 205:106170","journal-title":"Knowl-Based Syst"},{"issue":"6443","key":"5674_CR11","doi-asserted-by":"publisher","first-page":"859","DOI":"10.1126\/science.aau6249","volume":"364","author":"M Jaderberg","year":"2019","unstructured":"Jaderberg M, Czarnecki WM, Dunning I, Marris L, Lever G, Castaneda AG, Beattie C, Rabinowitz NC, Morcos AS, Ruderman A et al (2019) Human-level performance in 3d multiplayer games with population-based reinforcement learning. Science 364(6443):859\u2013865","journal-title":"Science"},{"key":"5674_CR12","unstructured":"Rashid T, Samvelyan M, Schroeder C, Farquhar G, Foerster J, Whiteson S (2018) Qmix: monotonic value function factorisation for deep multi-agent reinforcement learning. In: International conference on machine learning. PMLR, pp 4295\u20134304"},{"key":"5674_CR13","doi-asserted-by":"crossref","unstructured":"Sun X, Qiu J (2021) Two-stage volt\/var control in active distribution networks with multi-agent deep reinforcement learning method. IEEE Transactions on Smart Grid","DOI":"10.1109\/TSG.2021.3052998"},{"key":"5674_CR14","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1016\/j.neucom.2020.01.079","volume":"390","author":"D Sim\u00f5es","year":"2020","unstructured":"Sim\u00f5es D, Lau N, Reis LP (2020) Multi-agent actor centralized-critic with communication. Neurocomputing 390:40\u201356","journal-title":"Neurocomputing"},{"issue":"2","key":"5674_CR15","doi-asserted-by":"publisher","first-page":"457","DOI":"10.1016\/j.dt.2020.11.014","volume":"17","author":"B Li","year":"2021","unstructured":"Li B, Yang Z-P, Chen D-Q, Liang S-Y, Ma H (2021) Maneuvering target tracking of uav based on mn-ddpg and transfer learning. Def Technol 17(2):457\u2013466","journal-title":"Def Technol"},{"key":"5674_CR16","doi-asserted-by":"crossref","unstructured":"Wang C, Wang J, Shen Y, Zhang X (2019) Autonomous navigation of uavs in large-scale complex environments: a deep reinforcement learning approach. IEEE Trans Veh Technol 68(3):2124\u20132136","DOI":"10.1109\/TVT.2018.2890773"},{"key":"5674_CR17","doi-asserted-by":"crossref","unstructured":"Chen H, Liu Q, Zhong S (2021) Self-guided deep deterministic policy gradient with multi-actor. Neural Computing and Applications, pp 1\u201310","DOI":"10.1007\/s00521-021-05738-9"},{"issue":"27","key":"5674_CR18","first-page":"7","volume":"35","author":"S Sirota","year":"2019","unstructured":"Sirota S (2019) Darpa announces alphadogfight trials with simulated, al-controlled uavs. Inside the Pentagon 35(27):7\u20137","journal-title":"Inside the Pentagon"},{"key":"5674_CR19","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv:1707.06347"},{"issue":"6","key":"5674_CR20","first-page":"731","volume":"41","author":"M Lu","year":"2019","unstructured":"Lu M, Chengju L, Limin L, Binchen X, Qijun C (2019) Adaptive walking control algorithm of biped robot based on am-rppo. Robot 41(6):731\u2013741","journal-title":"Robot"},{"key":"5674_CR21","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction. MIT press"},{"key":"5674_CR22","unstructured":"Babaeizadeh M, Frosio I, Tyree S, Clemons J, Kautz J (2016) Reinforcement learning through asynchronous advantage actor-critic on a gpu. arXiv:1611.06256"},{"key":"5674_CR23","unstructured":"Schulman J, Levine S, Abbeel P, Jordan M, Moritz P (2015) Trust region policy optimization. In: International conference on machine learning. PMLR, pp 1889\u20131897"},{"key":"5674_CR24","unstructured":"Yu C, Velu A, Vinitsky E, Wang Y, Bayen A, Wu Y (2021) The surprising effectiveness of mappo in cooperative, multi-agent games. arXiv:2103.01955"},{"key":"5674_CR25","doi-asserted-by":"crossref","unstructured":"Wierstra D, Foerster A, Peters J, Schmidhuber J (2007) Solving deep memory pomdps with recurrent policy gradients. In: International conference on artificial neural networks","DOI":"10.1007\/978-3-540-74690-4_71"},{"key":"5674_CR26","unstructured":"Hausknecht M, Stone P (2015) Deep recurrent q-learning for partially observable mdps. Computer Science"},{"key":"5674_CR27","doi-asserted-by":"crossref","unstructured":"Tariq M, Naeem F, Ali M, Poor HV (2020) Vulnerability assessment of 6g-enabled smart grid cyber-physical systems. IEEE Internet of Things Journal","DOI":"10.1109\/JIOT.2020.3042090"},{"issue":"07","key":"5674_CR28","doi-asserted-by":"publisher","first-page":"10663","DOI":"10.1609\/aaai.v34i07.6693","volume":"34","author":"M Choi","year":"2020","unstructured":"Choi M, Kim H, Han B, Xu N, Lee KM (2020) Channel attention is all you need for video frame interpolation. Proceedings of the AAAI Conference on Artificial Intelligence 34(07):10663\u201310671","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"5674_CR29","doi-asserted-by":"crossref","unstructured":"Ali M, Karimipour H, Tariq M (2021) Integration of blockchain and federated learning for internet of things: recent advances and future challenges. Comput & Secur 5:102355","DOI":"10.1016\/j.cose.2021.102355"},{"key":"5674_CR30","unstructured":"Lowe R, Wu Y, Tamar A, Harb J, Abbeel P, Mordatch I (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. arXiv:1706.02275"},{"key":"5674_CR31","unstructured":"Rashid T, Samvelyan M, Schroeder C, Farquhar G, Foerster J, Whiteson S (2018) Qmix: monotonic value function factorisation for deep multi-agent reinforcement learning. In: International conference on machine learning, pp 4295\u20134304"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05674-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-024-05674-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05674-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,15]],"date-time":"2024-08-15T13:25:36Z","timestamp":1723728336000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-024-05674-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,29]]},"references-count":31,"journal-issue":{"issue":"20","published-print":{"date-parts":[[2024,10]]}},"alternative-id":["5674"],"URL":"https:\/\/doi.org\/10.1007\/s10489-024-05674-w","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,7,29]]},"assertion":[{"value":"7 July 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 July 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflict of interest","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of Interest"}}]}}