{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T22:04:45Z","timestamp":1769810685300,"version":"3.49.0"},"reference-count":25,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T00:00:00Z","timestamp":1764028800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T00:00:00Z","timestamp":1764028800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100001809","name":"Project of National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62106283"],"award-info":[{"award-number":["62106283"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Complex Intell. Syst."],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1007\/s40747-025-02128-9","type":"journal-article","created":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T05:48:37Z","timestamp":1764049717000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Hierarchical reinforcement learning with opponent modeling for command and control system"],"prefix":"10.1007","volume":"12","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4411-7633","authenticated-orcid":false,"given":"Tengda","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gang","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiang","family":"Fu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Minrui","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiangyu","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,11,25]]},"reference":[{"issue":"6","key":"2128_CR1","doi-asserted-by":"publisher","first-page":"1835","DOI":"10.1109\/TSMCB.2012.2231673","volume":"43","author":"ZR Bogdanowicz","year":"2013","unstructured":"Bogdanowicz ZR, Tolano A, Patel K et al (2013) Optimization of weapon-target pairings based on kill probabilities. IEEE Trans Cybern 43(6):1835\u20131844","journal-title":"IEEE Trans Cybern"},{"issue":"3","key":"2128_CR2","doi-asserted-by":"publisher","first-page":"3499","DOI":"10.32604\/cmc.2023.041253","volume":"76","author":"TD Li","year":"2023","unstructured":"Li TD, Wang G, Fu Q et al (2023) An intelligent algorithm for solving weapon-target assignment problem: DDPG-DNPE algorithm. CMC-Comput Mater Continua 76(3):3499\u20133522","journal-title":"CMC-Comput Mater Continua"},{"issue":"7","key":"2128_CR3","first-page":"732","volume":"15","author":"S Xiao","year":"2024","unstructured":"Xiao S (2024) Unmanned aerial vehicles following photography path planning technology based on kinematic and adaptive models. Int J Adv Comput Sci Appl 15(7):732\u2013742","journal-title":"Int J Adv Comput Sci Appl"},{"issue":"8","key":"2128_CR4","doi-asserted-by":"publisher","first-page":"3857","DOI":"10.1109\/TCSI.2024.3371492","volume":"7","author":"ZB Du","year":"2024","unstructured":"Du ZB, Xie XP, Qu ZF et al (2024) Dynamic event-triggered consensus control for interval type-2 fuzzy multi-agent systems. IEEE Trans Circuits Syst I-Regul Pap 7(8):3857\u20133866","journal-title":"IEEE Trans Circuits Syst I-Regul Pap"},{"issue":"3","key":"2128_CR5","doi-asserted-by":"publisher","first-page":"7131","DOI":"10.3934\/math.2024348","volume":"9","author":"SM Alshammari","year":"2024","unstructured":"Alshammari SM, Alganmi NA, Ba-Aoum MH et al (2024) Hybrid arithmetic optimization algorithm with deep learning model for secure unmanned aerial vehicle networks. Aims Math 9(3):7131\u20137151","journal-title":"Aims Math"},{"issue":"3","key":"2128_CR6","doi-asserted-by":"publisher","first-page":"1034","DOI":"10.1007\/s42405-024-00707-7","volume":"25","author":"C Zhang","year":"2024","unstructured":"Zhang C, Tao CY, Xu YL et al (2024) Autonomous defense of unmanned aerial vehicles against missile attacks using a GRU-based PPO algorithm. Int J Aeronaut Space Sci 25(3):1034\u20131049","journal-title":"Int J Aeronaut Space Sci"},{"issue":"2","key":"2128_CR7","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1109\/TITS.2020.3024655","volume":"23","author":"S Aradi","year":"2022","unstructured":"Aradi S (2022) Survey of deep reinforcement learning for motion planning of autonomous vehicles. IEEE Trans Intell Transp Syst 23(2):740\u2013759","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"2128_CR8","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1016\/j.trc.2019.08.011","volume":"107","author":"YJ Ye","year":"2019","unstructured":"Ye YJ, Zhang XH, Sun J (2019) Automated vehicle\u2019s behavior decision making using deep reinforcement learning and high-fidelity simulation environment. Transp Res Part C-Emerg Technol 107:155\u2013170","journal-title":"Transp Res Part C-Emerg Technol"},{"issue":"10","key":"2128_CR9","doi-asserted-by":"publisher","first-page":"12098","DOI":"10.1109\/TPAMI.2023.3283537","volume":"45","author":"TM Hu","year":"2023","unstructured":"Hu TM, Luo B, Yang CH et al (2023) MO-MIX: multi-objective multi-agent cooperative decision-making with deep reinforcement learning. IEEE Trans Pattern Anal Mach Intell 45(10):12098\u201312112","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2128_CR10","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1016\/j.ins.2023.03.070","volume":"634","author":"CE Hrabia","year":"2023","unstructured":"Hrabia CE, Lehmann PM, Albayrak S (2023) Self-triggered finite-time control for discrete-time Markov jump systems. Inf Sci 634:101\u2013121","journal-title":"Inf Sci"},{"key":"2128_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.rcim.2022.102412","volume":"78","author":"Y Zhang","year":"2022","unstructured":"Zhang Y, Zhu HH, Tang DB et al (2022) Dynamic job shop scheduling based on deep reinforcement learning for multi-agent manufacturing systems. Robot Comput-Integr Manuf 78:102412","journal-title":"Robot Comput-Integr Manuf"},{"key":"2128_CR12","doi-asserted-by":"publisher","first-page":"29076","DOI":"10.1007\/s10489-023-05058-6","volume":"53","author":"LY Jia","year":"2023","unstructured":"Jia LY, Cai CT, Wang XM et al (2023) Multi-intent autonomous decision-making for air combat with deep reinforcement learning. Appl Intell 53:29076\u201329093","journal-title":"Appl Intell"},{"key":"2128_CR13","doi-asserted-by":"publisher","DOI":"10.1016\/j.oceaneng.2021.109794","volume":"239","author":"J Xu","year":"2021","unstructured":"Xu J, Huang F, Wu D et al (2021) Deep reinforcement learning based multi-AUVs cooperative decision-making for attack-defense confrontation missions. Ocean Eng 239:109794","journal-title":"Ocean Eng"},{"issue":"6","key":"2128_CR14","doi-asserted-by":"publisher","first-page":"2233","DOI":"10.3390\/s21062233","volume":"21","author":"K Li","year":"2021","unstructured":"Li K, Zhang K, Zhang ZC et al (2021) A UAV Maneuver decision-making algorithm for autonomous airdrop based on deep reinforcement learning. Sensors 21(6):2233","journal-title":"Sensors"},{"issue":"2","key":"2128_CR15","doi-asserted-by":"publisher","first-page":"294","DOI":"10.1109\/TIV.2019.2955905","volume":"5","author":"CJ Hoel","year":"2020","unstructured":"Hoel CJ, Driggs-Campbell K, Wolff K et al (2020) Combining planning and deep reinforcement learning in tactical decision making for autonomous driving. IEEE Trans Intell Veh 5(2):294\u2013305","journal-title":"IEEE Trans Intell Veh"},{"issue":"1","key":"2128_CR16","doi-asserted-by":"publisher","first-page":"715","DOI":"10.1109\/TSG.2021.3124465","volume":"13","author":"TY Chen","year":"2022","unstructured":"Chen TY, Bu SR, Liu X et al (2022) Peer-to-peer energy trading and energy conversion in interconnected multi-energy microgrids using multi-agent deep reinforcement learning. IEEE Trans Smart Grid 13(1):715\u2013727","journal-title":"IEEE Trans Smart Grid"},{"issue":"7","key":"2128_CR17","doi-asserted-by":"publisher","DOI":"10.3390\/app13074569","volume":"13","author":"S Yu","year":"2023","unstructured":"Yu S, Zhu W, Wang Y (2023) Research on wargame decision-making method based on multi-agent deep deterministic policy gradient. Appl Sci 13(7):4569","journal-title":"Appl Sci"},{"issue":"4","key":"2128_CR18","doi-asserted-by":"publisher","first-page":"2974","DOI":"10.1109\/TETCI.2024.3369636","volume":"8","author":"XY Tan","year":"2024","unstructured":"Tan XY, Qu C, Xiong JW et al (2024) Model-based off-policy deep reinforcement learning with model-embedding. IEEE Trans Emerg Top Comput Intell 8(4):2974\u20132986","journal-title":"IEEE Trans Emerg Top Comput Intell"},{"issue":"9","key":"2128_CR19","doi-asserted-by":"publisher","first-page":"3826","DOI":"10.1109\/TCYB.2020.2977374","volume":"50","author":"TT Nguyen","year":"2020","unstructured":"Nguyen TT, Nguyen ND, Nahavandi S (2020) Deep reinforcement learning for multiagent systems: a review of challenges, solutions, and applications. IEEE Trans Cybern 50(9):3826\u20133839","journal-title":"IEEE Trans Cybern"},{"issue":"6","key":"2128_CR20","doi-asserted-by":"publisher","first-page":"4909","DOI":"10.1109\/TITS.2021.3054625","volume":"23","author":"BR Kiran","year":"2022","unstructured":"Kiran BR, Sobh I, Talpaert V et al (2022) Deep reinforcement learning for autonomous driving: a survey. IEEE Trans Intell Transp Syst 23(6):4909\u20134926","journal-title":"IEEE Trans Intell Transp Syst"},{"issue":"2","key":"2128_CR21","doi-asserted-by":"publisher","first-page":"895","DOI":"10.1007\/s10462-021-09996-w","volume":"55","author":"S Gronauer","year":"2022","unstructured":"Gronauer S, Diepold K (2022) Multi-agent deep reinforcement learning: a survey. Artif Intell Rev 55(2):895\u2013943","journal-title":"Artif Intell Rev"},{"issue":"2","key":"2128_CR22","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1007\/s10458-006-7035-4","volume":"13","author":"M Ghavamzadeh","year":"2006","unstructured":"Ghavamzadeh M, Mahadevan S, Makar R (2006) Hierarchical multi-agent reinforcement learning. Auton Agent Multi-Agent Syst 13(2):197\u2013229","journal-title":"Auton Agent Multi-Agent Syst"},{"issue":"8","key":"2128_CR23","doi-asserted-by":"publisher","first-page":"672","DOI":"10.1080\/01969722.2019.1677335","volume":"50","author":"M Ossenkopf","year":"2019","unstructured":"Ossenkopf M, Jorgensen M, Geihs K (2019) When does communication learning need hierarchical multi-agent deep reinforcement learning. Cybern Syst 50(8):672\u2013692","journal-title":"Cybern Syst"},{"issue":"9","key":"2128_CR24","first-page":"35","volume":"13","author":"GE Setyawan","year":"2022","unstructured":"Setyawan GE, Hartono P, Sawada H (2022) Cooperative multi-robot hierarchical reinforcement learning. Int J Adv Comput Sci Appl 13(9):35\u201344","journal-title":"Int J Adv Comput Sci Appl"},{"issue":"4","key":"2128_CR25","doi-asserted-by":"publisher","first-page":"4152","DOI":"10.1109\/TPAMI.2022.3192418","volume":"45","author":"TR Zhang","year":"2023","unstructured":"Zhang TR, Guo SQ, Tan T et al (2023) Adjacency constraint for efficient hierarchical reinforcement learning. IEEE Trans Pattern Anal Mach Intell 45(4):4152\u20134166","journal-title":"IEEE Trans Pattern Anal Mach Intell"}],"container-title":["Complex &amp; Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-025-02128-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40747-025-02128-9","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-025-02128-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T11:48:05Z","timestamp":1769773685000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40747-025-02128-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,25]]},"references-count":25,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,1]]}},"alternative-id":["2128"],"URL":"https:\/\/doi.org\/10.1007\/s40747-025-02128-9","relation":{},"ISSN":["2199-4536","2198-6053"],"issn-type":[{"value":"2199-4536","type":"print"},{"value":"2198-6053","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,25]]},"assertion":[{"value":"16 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 September 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 November 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"20"}}