{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T21:25:49Z","timestamp":1771104349872,"version":"3.50.1"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2025,6,27]],"date-time":"2025-06-27T00:00:00Z","timestamp":1750982400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,6,27]],"date-time":"2025-06-27T00:00:00Z","timestamp":1750982400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61906211"],"award-info":[{"award-number":["61906211"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["72201275"],"award-info":[{"award-number":["72201275"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Complex Intell. Syst."],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s40747-025-01992-9","type":"journal-article","created":{"date-parts":[[2025,6,27]],"date-time":"2025-06-27T02:45:29Z","timestamp":1750992329000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["An air combat maneuver decision-making approach using coupled reward in deep reinforcement learning"],"prefix":"10.1007","volume":"11","author":[{"given":"Jian","family":"Yang","sequence":"first","affiliation":[]},{"given":"Liangpei","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Jiale","family":"Han","sequence":"additional","affiliation":[]},{"given":"Changdi","family":"Chen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3982-7860","authenticated-orcid":false,"given":"Yinlong","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Zhu Liang","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Guoli","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,27]]},"reference":[{"key":"1992_CR1","unstructured":"Jiandong Z, Dinghan W, Qiming Y et al. (2023) Multi-dimensional decisionmaking for uav air combat based on hierarchical reinforcement learning. Acta Armamentarii 44(06):1547\u20131563. https:\/\/doi.org\/10.12382\/bgxb.2022. 0711"},{"key":"1992_CR2","doi-asserted-by":"publisher","DOI":"10.1016\/j.ast.2022.107857","volume":"129","author":"J Xu","year":"2022","unstructured":"Xu J, Zhang J, Yang L, Liu C (2022) Autonomous decision-making for dogfights based on a tactical pursuit point approach. Aerosp Sci Technol 129:107857. https:\/\/doi.org\/10.1016\/j.ast.2022.107857","journal-title":"Aerosp Sci Technol"},{"issue":"9","key":"1992_CR3","doi-asserted-by":"publisher","first-page":"1697","DOI":"10.1016\/j.dt.2021.09.014","volume":"18","author":"Y-F Li","year":"2022","unstructured":"Li Y-F, Shi J-P, Jiang W, Zhang W-G, Lyu Y-X (2022) Autonomous maneuver decision-making for a ucav in short-range aerial combat based on an ms-ddqn algorithm. Defence Technol 18(9):1697\u20131714. https:\/\/doi.org\/10.1016\/j.dt.2021.09.014","journal-title":"Defence Technol"},{"key":"1992_CR4","doi-asserted-by":"publisher","first-page":"305","DOI":"10.1016\/j.ast.2017.11.014","volume":"72","author":"H Shin","year":"2018","unstructured":"Shin H, Lee J, Kim H, Shim DH (2018) An autonomous aerial combat framework for two-on-two engagements based on basic fighter maneuvers. Aerosp Sci Technol 72:305\u2013315. https:\/\/doi.org\/10.1016\/j.ast.2017.11.014","journal-title":"Aerosp Sci Technol"},{"key":"1992_CR5","doi-asserted-by":"publisher","DOI":"10.1016\/j.futures.2021.102848","volume":"134","author":"J Jordan","year":"2021","unstructured":"Jordan J (2021) The future of unmanned combat aerial vehicles: an analysis using the three horizons framework. Futures 134:102848. https:\/\/doi.org\/10.1016\/j.futures.2021.102848","journal-title":"Futures"},{"key":"1992_CR6","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-023-05058-6","author":"L Jia","year":"2023","unstructured":"Jia L, Cai C, Wang X, Ding Z, Xu J, Wu K, Liu J (2023) Multi-intent autonomous decision-making for air combat with deep reinforcement learning. Appl Intell. https:\/\/doi.org\/10.1007\/s10489-023-05058-6","journal-title":"Appl Intell"},{"issue":"4, SI","key":"1992_CR7","doi-asserted-by":"publisher","first-page":"2425","DOI":"10.1007\/s00500-018-03689-3","volume":"24","author":"Y Feng","year":"2020","unstructured":"Feng Y, Dai L, Gao J, Cheng G (2020) Uncertain pursuit-evasion game. Soft Comput 24(4, SI):2425\u20132429. https:\/\/doi.org\/10.1007\/s00500-018-03689-3","journal-title":"Soft Comput"},{"key":"1992_CR8","doi-asserted-by":"publisher","DOI":"10.2514\/6.1987-2393","author":"F Austin","year":"1987","unstructured":"Austin F, Carbone G, Falco M, Hinz H, Lewis M (1987) Automated maneuvering decisions for air-to-air combat. AIAA J. https:\/\/doi.org\/10.2514\/6.1987-2393","journal-title":"AIAA J"},{"issue":"8","key":"1992_CR9","doi-asserted-by":"publisher","DOI":"10.7527\/S1000-6893.2021.25799","volume":"42","author":"Z Sun","year":"2021","unstructured":"Sun Z, Yang S, Piao Y, Bai C, Ge J (2021) A survey of air combat artificial intelligence. Acta Aeronautica et Astronautica Sinica 42(8):525799. https:\/\/doi.org\/10.7527\/S1000-6893.2021.25799","journal-title":"Acta Aeronautica et Astronautica Sinica"},{"key":"1992_CR10","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1016\/j.neunet.2022.03.037","volume":"152","author":"Y Matsuo","year":"2022","unstructured":"Matsuo Y, LeCun Y, Sahani M, Precup D, Silver D, Sugiyama M, Uchibe E, Morimoto J (2022) Deep learning, reinforcement learning, and world models. Neural Netw 152:267\u2013275. https:\/\/doi.org\/10.1016\/j.neunet.2022.03.037","journal-title":"Neural Netw"},{"issue":"9","key":"1992_CR11","doi-asserted-by":"publisher","first-page":"3826","DOI":"10.1109\/TCYB.2020.2977374","volume":"50","author":"TT Nguyen","year":"2020","unstructured":"Nguyen TT, Nguyen ND, Nahavandi S (2020) Deep reinforcement learning for multiagent systems: A review of challenges, solutions, and applications. IEEE Trans Cybern 50(9):3826\u20133839. https:\/\/doi.org\/10.1109\/TCYB.2020.2977374","journal-title":"IEEE Trans Cybern"},{"issue":"7587","key":"1992_CR12","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison CJ, Guez A, Sifre L, Driessche G, Schrittwieser J, Antonoglou I, Panneershelvam V, Lanctot M, Dieleman S, Grewe D, Nham J, Kalchbrenner N, Sutskever I, Lillicrap T, Leach M, Kavukcuoglu K, Graepel T, Hassabis D (2016) Mastering the game of go with deep neural networks and tree search. Nature 529(7587):484. https:\/\/doi.org\/10.1038\/nature16961","journal-title":"Nature"},{"issue":"12","key":"1992_CR13","doi-asserted-by":"publisher","first-page":"33","DOI":"10.11918\/202005108","volume":"53","author":"Y Li","year":"2021","unstructured":"Li Y, Shi J, Zhang W, Jiang W (2021) Maneuver decision of ucav in air combat based on deep reinforcement learning. J Harbin Inst Technol 53(12):33\u201341. https:\/\/doi.org\/10.11918\/202005108","journal-title":"J Harbin Inst Technol"},{"issue":"6","key":"1992_CR14","doi-asserted-by":"publisher","first-page":"1547","DOI":"10.12382\/bgxb.2022.0711","volume":"44","author":"J Zhang","year":"2023","unstructured":"Zhang J, Wang D, Yang Q, Shi G, Lu Y, Zhang Y (2023) Multi-dimensional decision-making for uav air combat based on hierarchical reinforcement learning. Binggong Xuebao\/Acta Armamentarii 44(6):1547\u20131563. https:\/\/doi.org\/10.12382\/bgxb.2022.0711","journal-title":"Binggong Xuebao\/Acta Armamentarii"},{"issue":"1","key":"1992_CR15","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1051\/jnwpu\/20224010047","volume":"40","author":"F Xiaowei","year":"2022","unstructured":"Xiaowei F, Zhe X, Hui W (2022) Generalization strategy design of uavs pursuit evasion game based on ddpg. Xibei Gongye Daxue Xuebao\/J Northwestern Polytech Univ 40(1):47\u201355. https:\/\/doi.org\/10.1051\/jnwpu\/20224010047","journal-title":"Xibei Gongye Daxue Xuebao\/J Northwestern Polytech Univ"},{"key":"1992_CR16","doi-asserted-by":"publisher","unstructured":"Chai J, Chen W, Zhu Y, Yao Z-X, Zhao D (2023) A hierarchical deep reinforcement learning framework for 6-dof ucav air-to-air combat. IEEE Transactions on Systems, Man, and Cybernetics: Systems .https:\/\/doi.org\/10.1109\/TSMC.2023.3270444","DOI":"10.1109\/TSMC.2023.3270444"},{"key":"1992_CR17","doi-asserted-by":"publisher","unstructured":"Mao Y, Li Q, He J, Xia Z, Fei A (2022) Construction method of air combat agent based on reinforcement learning. In: Proceedings of 2022 10th China Conference on Command and Control. Lecture Notes in Electrical Engineering (949), pp. 98\u2013110. https:\/\/doi.org\/10.1007\/978-981-19-6052-9_10","DOI":"10.1007\/978-981-19-6052-9_10"},{"key":"1992_CR18","doi-asserted-by":"publisher","unstructured":"He Y, Yang D, Zhang M, Li Y (2023) Curriculum-rl based air combat decision-making. In: Yan L, Duan H, Deng Y (eds) Advances in guidance, navigation and control: Proceedings of 2022 International Conference on Guidance, Navigation and Control. Lecture Notes in Electrical Engineering (845), pp. 4611\u201321. https:\/\/doi.org\/10.1007\/978-981-19-6613-2_447","DOI":"10.1007\/978-981-19-6613-2_447"},{"key":"1992_CR19","doi-asserted-by":"publisher","unstructured":"Wang J, Zhu L, Yang H, Ji Y, Wang X (2023) 2v2 air combat confrontation strategy based on reinforcement learning. In: Fu W, Gu M, Niu Y (eds) Proceedings of 2022 international conference on autonomous unmanned systems, ICAUS 2022. Lecture Notes in Electrical Engineering, vol. 1010, pp. 1368\u20131377. https:\/\/doi.org\/10.1007\/978-981-99-0479-2_125","DOI":"10.1007\/978-981-99-0479-2_125"},{"issue":"8","key":"1992_CR20","doi-asserted-by":"publisher","first-page":"2874","DOI":"10.1007\/s10489-019-01417-4","volume":"49","author":"Y Yuan","year":"2019","unstructured":"Yuan Y, Yu ZL, Gu Z, Deng X, Li Y (2019) A novel multi-step reinforcement learning method for solving reward hacking. Appl Intell 49(8):2874\u20132888. https:\/\/doi.org\/10.1007\/s10489-019-01417-4","journal-title":"Appl Intell"},{"key":"1992_CR21","doi-asserted-by":"publisher","unstructured":"Amin K, Jiang N, Singh S (2017) Repeated inverse reinforcement learning. In: Guyon I, Luxburg U, Bengio S, Wallach H, Fergus R, Vishwanathan S, Garnett R (eds) Advances in neural information processing systems 30 (NIPS 2017). Advances in Neural Information Processing Systems, vol. 30. https:\/\/doi.org\/10.48550\/arXiv.1705.05427","DOI":"10.48550\/arXiv.1705.05427"},{"issue":"9","key":"1992_CR22","doi-asserted-by":"publisher","first-page":"2849","DOI":"10.12382\/bgxb.2022.0669","volume":"44","author":"Z Li","year":"2023","unstructured":"Li Z, Li B, Bai S, Meng B (2023) Uav autonomous air combat decision-making based on am-sac. Binggong Xuebao\/Acta Armamentarii 44(9):2849\u20132858. https:\/\/doi.org\/10.12382\/bgxb.2022.0669","journal-title":"Binggong Xuebao\/Acta Armamentarii"},{"key":"1992_CR23","doi-asserted-by":"publisher","DOI":"10.12382\/bgxb.2024.0978","author":"W Yu","year":"2024","unstructured":"Yu W, Yuanpeng L, Zhongyu G, Shuo L, Tianjun R (2024) Hierarchical decision-making for uav air combat based on ddqn-d3pg. Acta Armamentarii. https:\/\/doi.org\/10.12382\/bgxb.2024.0978","journal-title":"Acta Armamentarii"},{"issue":"1","key":"1992_CR24","doi-asserted-by":"publisher","first-page":"86","DOI":"10.21629\/JSEE.2018.01.09","volume":"29","author":"H Changqiang","year":"2018","unstructured":"Changqiang H, Kangsheng D, Hanqiao H, Shangqin T, Zhuoran Z (2018) Autonomous air combat maneuver decision using bayesian inference and moving horizon optimization. J Syst Eng Electron 29(1):86\u201397. https:\/\/doi.org\/10.21629\/JSEE.2018.01.09","journal-title":"J Syst Eng Electron"},{"key":"1992_CR25","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1561\/2200000071","volume":"11","author":"V Francois-Lavet","year":"2018","unstructured":"Francois-Lavet V, Henderson P, Islam R, Bellemare MG, Pineau J (2018) An introduction to deep reinforcement learning. Found Trends Mach Learn 11:219\u2013354. https:\/\/doi.org\/10.1561\/2200000071","journal-title":"Found Trends Mach Learn"},{"key":"1992_CR26","doi-asserted-by":"publisher","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: Dy J, Krause A (eds) International Conference on machine learning, VOL 80. Proceedings of Machine Learning Research, vol. 80. https:\/\/doi.org\/10.48550\/arXiv.1801.01290","DOI":"10.48550\/arXiv.1801.01290"},{"key":"1992_CR27","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2022.104767","volume":"111","author":"D Hu","year":"2022","unstructured":"Hu D, Yang R, Zhang Y, Yue L, Yan M, Zuo J, Zhao X (2022) Aerial combat maneuvering policy learning based on confrontation demonstrations and dynamic quality replay. Eng Appl Artif Intell 111:104767. https:\/\/doi.org\/10.1016\/j.engappai.2022.104767","journal-title":"Eng Appl Artif Intell"},{"key":"1992_CR28","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1155\/2020\/7180639","volume":"2020","author":"Z Wang","year":"2020","unstructured":"Wang Z, Li H, Wu H, Wu Z (2020) Improving maneuver strategy in air combat by alternate freeze games with a deep reinforcement learning algorithm. Math Probl Eng 2020:1\u201317. https:\/\/doi.org\/10.1155\/2020\/7180639","journal-title":"Math Probl Eng"},{"key":"1992_CR29","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2021.107753","author":"X Hou","year":"2022","unstructured":"Hou X, Guo Z, Wang X, Qian T, Zhang J, Qi S, Xiao J (2022) Parallel learner: a practical deep reinforcement learning framework for multi-scenario games. Knowl-Based Syst. https:\/\/doi.org\/10.1016\/j.knosys.2021.107753","journal-title":"Knowl-Based Syst"},{"key":"1992_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.110613","author":"Y Li","year":"2023","unstructured":"Li Y, Ren J, Zhang T, Fang Y, Chen F (2023) Mer: Modular element randomization for robust generalizable policy in deep reinforcement learning. Knowl-Based Syst. https:\/\/doi.org\/10.1016\/j.knosys.2023.110613","journal-title":"Knowl-Based Syst"},{"key":"1992_CR31","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.112000","author":"B Wang","year":"2024","unstructured":"Wang B, Gao X, Xie T (2024) An evolutionary multi-agent reinforcement learning algorithm for multi-uav air combat. Knowl-Based Syst. https:\/\/doi.org\/10.1016\/j.knosys.2024.112000","journal-title":"Knowl-Based Syst"}],"container-title":["Complex &amp; Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-025-01992-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40747-025-01992-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-025-01992-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,17]],"date-time":"2025-07-17T19:04:52Z","timestamp":1752779092000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40747-025-01992-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,27]]},"references-count":31,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["1992"],"URL":"https:\/\/doi.org\/10.1007\/s40747-025-01992-9","relation":{},"ISSN":["2199-4536","2198-6053"],"issn-type":[{"value":"2199-4536","type":"print"},{"value":"2198-6053","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6,27]]},"assertion":[{"value":"10 June 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 June 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 June 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors do not have any conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}}],"article-number":"364"}}