{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,25]],"date-time":"2026-01-25T04:13:23Z","timestamp":1769314403235,"version":"3.49.0"},"reference-count":64,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T00:00:00Z","timestamp":1769212800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T00:00:00Z","timestamp":1769212800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"National Research and Development Program of China","award":["JCKY2018607C019"],"award-info":[{"award-number":["JCKY2018607C019"]}]},{"name":"Key Laboratory Fund of UAV of Northwestern Polytechnical University","award":["2021JCJ-QLB07101"],"award-info":[{"award-number":["2021JCJ-QLB07101"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"DOI":"10.1007\/s11227-026-08239-5","type":"journal-article","created":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T14:57:24Z","timestamp":1769266644000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Causal reinforcement learning for unmanned aerial vehicle pursuit-evasion games with sparse rewards"],"prefix":"10.1007","volume":"82","author":[{"given":"Lei","family":"Lei","sequence":"first","affiliation":[]},{"given":"Zhaoxin","family":"Dong","sequence":"additional","affiliation":[]},{"given":"Chengfu","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Huaimin","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,24]]},"reference":[{"issue":"5","key":"8239_CR1","doi-asserted-by":"publisher","first-page":"7056","DOI":"10.1109\/TII.2024.3363084","volume":"20","author":"N Sun","year":"2024","unstructured":"Sun N, Zhao J, Shi Q, Liu C, Liu P (2024) Moving target tracking by unmanned aerial vehicle: a survey and taxonomy. IEEE Trans Ind Inform 20(5):7056\u20137068. https:\/\/doi.org\/10.1109\/TII.2024.3363084","journal-title":"IEEE Trans Ind Inform"},{"key":"8239_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1017\/S1049023X22000139","volume":"31","author":"DG Barten","year":"2022","unstructured":"Barten DG, Tin D, De Cauwer H, Ciottone RG, Ciottone GR (2022) A counter-terrorism medicine analysis of drone attacks. Prehosp Disaster Med 31:1\u20135. https:\/\/doi.org\/10.1017\/S1049023X22000139","journal-title":"Prehosp Disaster Med"},{"issue":"6","key":"8239_CR3","doi-asserted-by":"publisher","first-page":"4906","DOI":"10.1109\/JIOT.2020.3030240","volume":"8","author":"Y Wu","year":"2021","unstructured":"Wu Y, Wu S, Hu X (2021) Cooperative path planning of UAVs & UGVs or a persistent surveillance task in urban environment. IEEE Internet Things J 8(6):4906\u20134919. https:\/\/doi.org\/10.1109\/JIOT.2020.3030240","journal-title":"IEEE Internet Things J"},{"key":"8239_CR4","doi-asserted-by":"publisher","unstructured":"Schmidt LM, Brosig J, Plinge A, Eskofier BM and Mutschler C (2022) An introduction to multiagent reinforcement learning and review of its application to autonomous mobility. In: 2022 IEEE 25th International Conference on Intelligent Transportation Systems (ITSC), pp 1342-1349. https:\/\/doi.org\/10.1109\/ITSC55140.2022.9922205","DOI":"10.1109\/ITSC55140.2022.9922205"},{"key":"8239_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.neucom.2021.12.025","volume":"475","author":"J Selvakumar","year":"2022","unstructured":"Selvakumar J, Bakolas E (2022) Min\u2013Max Q-learning for multi-player pursuit-evasion games. Neurocomputing 475:1\u201314. https:\/\/doi.org\/10.1016\/j.neucom.2021.12.025","journal-title":"Neurocomputing"},{"issue":"4","key":"8239_CR6","doi-asserted-by":"publisher","first-page":"1911","DOI":"10.1109\/TASE.2020.2980423","volume":"17","author":"P Shen","year":"2020","unstructured":"Shen P, Zhang X, Fang Y, Yuan M (2020) Real-time acceleration continuous path-constrained trajectory planning with built-in tradeoff between cruiseand time-optimal motions. IEEE Trans Autom Sci Eng 17(4):1911\u20131924. https:\/\/doi.org\/10.1109\/TASE.2020.2980423","journal-title":"IEEE Trans Autom Sci Eng"},{"issue":"7540","key":"8239_CR7","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533. https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"issue":"7676","key":"8239_CR8","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver D, Schrittwieser J, Simonyan K, Antonoglou I, Huang A, Guez A, Hubert T (2017) Mastering the game of Go without human knowledge. Nature 550(7676):354\u2013359. https:\/\/doi.org\/10.1038\/nature24270","journal-title":"Nature"},{"issue":"7782","key":"8239_CR9","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals O, Babuschkin I, Czarnecki WM, Mathieu M, Dudzik A, Chung J (2019) Grandmaster level in StarCraft II using multiagent reinforcement learning. Nature 575(7782):350\u2013354. https:\/\/doi.org\/10.1038\/s41586-019-1724-z","journal-title":"Nature"},{"key":"8239_CR10","doi-asserted-by":"publisher","unstructured":"Riedmiller M, Hafner R, Lampe T, Neunert M, Degrave J, Wiele T, et al (2018) Learning by playing solving sparse reward tasks from scratch. In: 2018 35th International Conference on Machine Learning (ICML), pp 4344-4353. https:\/\/doi.org\/10.48550\/arXiv.1802.10567","DOI":"10.48550\/arXiv.1802.10567"},{"key":"8239_CR11","doi-asserted-by":"publisher","unstructured":"Booth S, Knox WB, Shah J, Niekum S, Stone P, Allievi A (2023) The perils of trial-and-error reward design: Misdesign through overfitting and invalid task specifications. In: 2023 37th AAAI Conference on Artificial Intelligence, vol 37, no 5, pp 5920\u20135929. https:\/\/doi.org\/10.1609\/aaai.v37i5.25733","DOI":"10.1609\/aaai.v37i5.25733"},{"key":"8239_CR12","doi-asserted-by":"publisher","unstructured":"Vasan G, Wang Y, Shahriar F, Bergstra J, Jagersand M, Mahmood R (2024) Revisiting constant negative rewards for goal-reaching tasks in robot learning. https:\/\/doi.org\/10.48550\/arXiv.2407.00324","DOI":"10.48550\/arXiv.2407.00324"},{"key":"8239_CR13","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1016\/j.neucom.2020.08.024","volume":"s418","author":"J Li","year":"2020","unstructured":"Li J, Shi X, Li J, Zhang X, Random Wang J (2020) curiosity-driven exploration in deep reinforcement learning. Neurocomputing s418:139\u2013147. https:\/\/doi.org\/10.1016\/j.neucom.2020.08.024","journal-title":"Neurocomputing"},{"key":"8239_CR14","doi-asserted-by":"publisher","unstructured":"Wang Z, Hu J, Stone P (2023) ELDEN: Exploration via local dependencies. In: 2023 37th International Conference on Neural Information Processing Systems (NIPS), pp 15456-15474. https:\/\/doi.org\/10.48550\/arXiv.2310.08702","DOI":"10.48550\/arXiv.2310.08702"},{"key":"8239_CR15","doi-asserted-by":"publisher","unstructured":"Ji T, Liang Y, Zeng Y (2024) ACE: Off-policy actor-critic with causality-aware entropy regularization. https:\/\/doi.org\/10.48550\/arXiv.2402.14528","DOI":"10.48550\/arXiv.2402.14528"},{"key":"8239_CR16","doi-asserted-by":"publisher","unstructured":"Andrychowicz M, Wolski F, Ray A, Schneider J, Fong R, Welinder P (2017) Hindsight experience replay. In: 2017 31st International Conference on Neural Information Processing Systems (NIPS), pp 5048\u20135058. https:\/\/doi.org\/10.48550\/arXiv.1707.01495","DOI":"10.48550\/arXiv.1707.01495"},{"key":"8239_CR17","doi-asserted-by":"publisher","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T and Tassa Y (2016) Continuous control with deep reinforcement learning. In: 2016 4th International Conference on Learning Representations (ICLR), pp 1\u201314. https:\/\/doi.org\/10.48550\/arXiv.1509.02971","DOI":"10.48550\/arXiv.1509.02971"},{"issue":"3","key":"8239_CR18","doi-asserted-by":"publisher","first-page":"1405","DOI":"10.1109\/TCYB.2019.2958548","volume":"52","author":"X Fang","year":"2022","unstructured":"Fang X, Wang C, Xie L, Chen J (2022) Cooperative pursuit with multi-pursuer and one faster free-moving evader. IEEE Trans Cybern 52(3):1405\u20131414. https:\/\/doi.org\/10.1109\/TCYB.2019.2958548","journal-title":"IEEE Trans Cybern"},{"key":"8239_CR19","unstructured":"Tran HV (2021) Hamilton\u2013Jacobi equations: theory and application. American Mathematical Society, Providence, USA"},{"issue":"10","key":"8239_CR20","doi-asserted-by":"publisher","first-page":"4432","DOI":"10.1109\/TNNLS.2020.3017762","volume":"32","author":"Y Yuan","year":"2021","unstructured":"Yuan Y, Zhang P, Li X (2021) Synchronous fault-tolerant near-optimal control for discrete-time nonlinear PE game. IEEE Trans Neural Netw Learn Syst 32(10):4432\u20134444. https:\/\/doi.org\/10.1109\/TNNLS.2020.3017762","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"3","key":"8239_CR21","doi-asserted-by":"publisher","first-page":"1958","DOI":"10.1109\/TSMC.2022.3210022","volume":"53","author":"T Pan","year":"2023","unstructured":"Pan T, Yuan Y (2023) A region-based relay pursuit scheme for a pursuit-evasion game with a single evader and multiple pursuers. IEEE Trans Syst Man Cybern Syst 53(3):1958\u20131969. https:\/\/doi.org\/10.1109\/TSMC.2022.3210022","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"issue":"1","key":"8239_CR22","doi-asserted-by":"publisher","first-page":"519","DOI":"10.1007\/s40815-021-01158-y","volume":"24","author":"A Wu","year":"2022","unstructured":"Wu A, Yang R, Liang X, Zhang J, Qi D, Wang N (2022) Visual range maneuver decision of unmanned combat aerial vehicle based on fuzzy reasoning. Int J Fuzzy Syst 24(1):519\u2013536. https:\/\/doi.org\/10.1007\/s40815-021-01158-y","journal-title":"Int J Fuzzy Syst"},{"key":"8239_CR23","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1016\/j.automatica.2016.04.012","volume":"71","author":"J Chen","year":"2016","unstructured":"Chen J, Zha W, Peng Z, Gu D (2016) Multi-player pursuit-evasion games with one superior evader. Automatica 71:24\u201332. https:\/\/doi.org\/10.1016\/j.automatica.2016.04.012","journal-title":"Automatica"},{"key":"8239_CR24","doi-asserted-by":"publisher","unstructured":"Zhang X, Qiu H, Wang Y, Sun L, Sun C (2024) Multi-UAV distributed expulsion based on deep reinforcement learning. In: 2024 43rd Chinese Control Conference (CCC), pp 3918\u20133925. https:\/\/doi.org\/10.23919\/CCC63176.2024.10662395","DOI":"10.23919\/CCC63176.2024.10662395"},{"key":"8239_CR25","doi-asserted-by":"publisher","DOI":"10.1016\/j.oceaneng.2024.120059","volume":"317","author":"Z Wang","year":"2025","unstructured":"Wang Z, Hu Q, Wang C, Liu Y, Xie W (2025) Target tracking control for unmanned surface vehicles: an end-to-end deep reinforcement learning approach. Ocean Eng 317:120059. https:\/\/doi.org\/10.1016\/j.oceaneng.2024.120059","journal-title":"Ocean Eng"},{"key":"8239_CR26","doi-asserted-by":"publisher","unstructured":"Wang X, Wang Y, Zhou W, Zhang J (2023) Pursuit-evasion game of unmanded surface vehicles based on deep reinforcement learning. In: 2023 4th International Conference on Electronic Communication and Artificial Intelligence (ICECAI), pp 358\u2013363. https:\/\/doi.org\/10.1109\/ICECAI58670.2023.10176487","DOI":"10.1109\/ICECAI58670.2023.10176487"},{"key":"8239_CR27","doi-asserted-by":"publisher","unstructured":"Bandela SR, Sinha A and Cao Y (2025) A reinforcement learning approach to target capture in the presence of a defender. In: AIAA SCITECH 2025 Forum, p 1902. https:\/\/doi.org\/10.2514\/6.2025-1902","DOI":"10.2514\/6.2025-1902"},{"key":"8239_CR28","doi-asserted-by":"publisher","unstructured":"Niu Y, Tian Y, Wang Q (2024) Counter-encirclement of UAV in pursuit-evasion environment via improved RL. In: 2024 IEEE International Conference on Unmanned Systems (ICUS), pp 266\u2013271. https:\/\/doi.org\/10.1109\/ICUS61736.2024.10840004","DOI":"10.1109\/ICUS61736.2024.10840004"},{"key":"8239_CR29","doi-asserted-by":"publisher","DOI":"10.1016\/j.displa.2023.102440","volume":"78","author":"W Xiao","year":"2023","unstructured":"Xiao W, Yuan L, Ran T, He L, Zhang J, Cui J (2023) Multimodal fusion for autonomous navigation via deep reinforcement learning with sparse rewards and hindsight experience replay. Displays 78:102440. https:\/\/doi.org\/10.1016\/j.displa.2023.102440","journal-title":"Displays"},{"issue":"1","key":"8239_CR30","doi-asserted-by":"publisher","first-page":"244","DOI":"10.1109\/TAI.2023.3237665","volume":"5","author":"X He","year":"2023","unstructured":"He X, Lv C (2023) Robotic control in adversarial and sparse reward environments: a robust goal-conditioned reinforcement learning approach. IEEE Trans Artif Intell 5(1):244\u2013253. https:\/\/doi.org\/10.1109\/TAI.2023.3237665","journal-title":"IEEE Trans Artif Intell"},{"key":"8239_CR31","doi-asserted-by":"publisher","unstructured":"Hossain J, Faridee A, Roy N, Freeman J, Gregory T, Trout TT (2024) TopoNav: Topological navigation for efficient exploration in sparse reward environments. https:\/\/doi.org\/10.48550\/arXiv.2402.04061","DOI":"10.48550\/arXiv.2402.04061"},{"issue":"7","key":"8239_CR32","doi-asserted-by":"publisher","first-page":"6180","DOI":"10.1109\/JIOT.2020.2973193","volume":"7","author":"C Wang","year":"2020","unstructured":"Wang C, Wang J, Wang J, Zhang X (2020) Deep-reinforcement-learning-based autonomous UAV navigation with sparse rewards. IEEE Internet Things J 7(7):6180\u20136190. https:\/\/doi.org\/10.1109\/JIOT.2020.2973193","journal-title":"IEEE Internet Things J"},{"key":"8239_CR33","doi-asserted-by":"publisher","unstructured":"Ororbia A and Mali A (2023) Active predictive coding: brain-inspired reinforcement learning for sparse reward robotic control problem. In: 2023 IEEE International Conference on Robotics and Automation (ICRA), pp 3015\u20133021. https:\/\/doi.org\/10.1109\/ICRA48891.2023.10160530","DOI":"10.1109\/ICRA48891.2023.10160530"},{"issue":"1","key":"8239_CR34","doi-asserted-by":"publisher","DOI":"10.1088\/2631-8695\/ad1f14","volume":"6","author":"D Rupayan","year":"2024","unstructured":"Rupayan D, Angshuman K, Gunjan P (2024) A proximal policy optimization with curiosity algorithm for virtual drone navigation. Eng Res Express 6(1):015057. https:\/\/doi.org\/10.1088\/2631-8695\/ad1f14","journal-title":"Eng Res Express"},{"key":"8239_CR35","doi-asserted-by":"publisher","unstructured":"Yin H, Lin Y, Yan J, Meng Q, Festl K and Schichler L (2023) AGV path planning using curiosity-driven deep reinforcement learning. In: 2023 IEEE 19th International Conference on Automation Science and Engineering (CASE), pp 1\u20136. https:\/\/doi.org\/10.1109\/CASE56687.2023.10260579","DOI":"10.1109\/CASE56687.2023.10260579"},{"key":"8239_CR36","doi-asserted-by":"publisher","unstructured":"Pathak D, Agrawal P, Efros AA, Darrell T (2017) Curiosity-driven exploration by self-supervised prediction. In: 2017 34th International Conference on Machine Learning (ICML), pp 2778\u20132787. https:\/\/doi.org\/10.48550\/arXiv.1705.05363","DOI":"10.48550\/arXiv.1705.05363"},{"key":"8239_CR37","doi-asserted-by":"publisher","unstructured":"Chen Y, Zhou W and Li J (2023) An improved exploration method for cooperative multi-UAV policy learning with sparse rewards. In: 2023 China Automation Congress (CAC), pp 9044\u20139049. https:\/\/doi.org\/10.1109\/CAC59555.2023.10450304","DOI":"10.1109\/CAC59555.2023.10450304"},{"key":"8239_CR38","doi-asserted-by":"publisher","unstructured":"Burda Y, Edwards H, Storkey A, Klimov O (2019) Exploration by random network distillation. In: 2019 7th International Conference on Learning Representations (ICLR), pp 1\u201317. https:\/\/doi.org\/10.48550\/arXiv.1810.12894","DOI":"10.48550\/arXiv.1810.12894"},{"issue":"2","key":"8239_CR39","doi-asserted-by":"publisher","first-page":"274","DOI":"10.1016\/j.ijar.2006.09.005","volume":"46","author":"S Maes","year":"2007","unstructured":"Maes S, Meganck S, Manderick B (2007) Inference in multi-agent causal model. Int J Approx Reason 46(2):274\u2013299. https:\/\/doi.org\/10.1016\/j.ijar.2006.09.005","journal-title":"Int J Approx Reason"},{"key":"8239_CR40","doi-asserted-by":"publisher","unstructured":"Sontakke S, Mehrjou A and Itti L (2021) Causal curiosity: RL agents discovering self-supervised experiments for causal representation learning. In: 2021 38th International Conference on Machine Learning (ICML), pp 9848\u20139858. https:\/\/doi.org\/10.48550\/arXiv.2010.03110","DOI":"10.48550\/arXiv.2010.03110"},{"key":"8239_CR41","doi-asserted-by":"publisher","unstructured":"Dai T, Liu H, Arulkumaran K, Ren G, Bharath AA (2021) Diversity-based trajectory and goal selection with hindsight experience replay. In: 2021 18th Pacific Rim International Conference on Artificial Intelligence (PRICAI), pp 32\u201345. https:\/\/doi.org\/10.1007\/978-3-030-89370-5_3","DOI":"10.1007\/978-3-030-89370-5_3"},{"key":"8239_CR42","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.126620","volume":"557","author":"Y Luo","year":"2023","unstructured":"Luo Y, Wang Y, Dong K, Zhang Q, Cheng E, Sun Z (2023) Relay hindsight experience replay: self-guided continual reinforcement learning for sequential object manipulation tasks with sparse rewards. Neurocomputing 557:126620. https:\/\/doi.org\/10.1016\/j.neucom.2023.126620","journal-title":"Neurocomputing"},{"key":"8239_CR43","doi-asserted-by":"publisher","unstructured":"Yang R, Fang M, Han L, Du Y, Luo F, Li X (2021) MHER: Model-based hindsight experience replay. In: 2021 35th International Conference on Neural Information Processing Systems (NIPS), pp 28\u201338. https:\/\/doi.org\/10.48550\/arXiv.2107.00306","DOI":"10.48550\/arXiv.2107.00306"},{"key":"8239_CR44","doi-asserted-by":"publisher","unstructured":"Li W, Yan W, Shi H, Li S and Zhou Y (2024) Multi-robot cooperative pursuit-evasion control: A deep reinforcement learning approach based on prioritized experience replay. In: 2024 8th International Conference on Control Engineering and Artificial Intelligence, pp 120\u2013127. https:\/\/doi.org\/10.1145\/3640824.3640843","DOI":"10.1145\/3640824.3640843"},{"key":"8239_CR45","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2023.108858","volume":"110","author":"W Yuan","year":"2023","unstructured":"Yuan W, Rui X (2023) Deep reinforcement learning-based controller for dynamic positioning of an unmanned surface vehicle. Comput Electr Eng 110:108858. https:\/\/doi.org\/10.1016\/j.compeleceng.2023.108858","journal-title":"Comput Electr Eng"},{"key":"8239_CR46","doi-asserted-by":"publisher","first-page":"5649","DOI":"10.1007\/s00521-021-06702-3","volume":"34","author":"AM Tactical","year":"2022","unstructured":"Tactical AM (2022) UAV path optimization under radar threat using deep reinforcement learning. Neural Comput Appl 34:5649\u20135664. https:\/\/doi.org\/10.1007\/s00521-021-06702-3","journal-title":"Neural Comput Appl"},{"key":"8239_CR47","doi-asserted-by":"publisher","unstructured":"Schaul T, Quan J, Antonoglou I, Silver D (2016) Prioritized experience replay. In: 2016 4th International Conference on Learning Representations (ICLR), pp 1\u201321. https:\/\/doi.org\/10.48550\/arXiv.1511.05952","DOI":"10.48550\/arXiv.1511.05952"},{"key":"8239_CR48","doi-asserted-by":"publisher","DOI":"10.1016\/j.jocs.2023.102113","volume":"72","author":"X Zhao","year":"2023","unstructured":"Zhao X, Du J, Wang Z (2023) HCS-R-HER: Hierarchical reinforcement learning based on cross subtasks rainbow hindsight experience replay. J Comput Sci 72:102113. https:\/\/doi.org\/10.1016\/j.jocs.2023.102113","journal-title":"J Comput Sci"},{"key":"8239_CR49","doi-asserted-by":"publisher","unstructured":"Liu C, Kampen EV (2022) HER-PDQN: a reinforcement learning approach for UAV navigation with hybrid action spaces and sparse rewards. In: AIAA SciTech Forum, pp 2022\u20130793. https:\/\/doi.org\/10.2514\/6.2022-0793","DOI":"10.2514\/6.2022-0793"},{"issue":"2","key":"8239_CR50","doi-asserted-by":"publisher","first-page":"161","DOI":"10.20517\/ir.2023.10","volume":"3","author":"S Feng","year":"2023","unstructured":"Feng S, Li X, Ren L, Xu S (2023) Reinforcement learning with parameterized action space and sparse reward for UAV navigation. Intell Robot 3(2):161\u201375. https:\/\/doi.org\/10.20517\/ir.2023.10","journal-title":"Intell Robot"},{"key":"8239_CR51","volume-title":"Elements of causal inference: foundations and learning algorithms","author":"J Peters","year":"2017","unstructured":"Peters J, Janzing D, Sch\u00f6lkopf B (2017) Elements of causal inference: foundations and learning algorithms. MIT Press, Cambridge"},{"issue":"7","key":"8239_CR52","doi-asserted-by":"publisher","first-page":"100","DOI":"10.1016\/j.cja.2021.09.008","volume":"35","author":"W Zhou","year":"2022","unstructured":"Zhou W, Li J, Liu Z, Shen L (2022) Improving multi-target cooperative tracking guidance for UAV swarms using multi-agent reinforcement learning. Chin J Aeronaut 35(7):100\u2013112. https:\/\/doi.org\/10.1016\/j.cja.2021.09.008","journal-title":"Chin J Aeronaut"},{"issue":"1","key":"8239_CR53","doi-asserted-by":"publisher","first-page":"931","DOI":"10.1109\/TVT.2021.3129504","volume":"71","author":"Z Xia","year":"2021","unstructured":"Xia Z, Du J, Wang J (2021) Multi-agent reinforcement learning aided intelligent UAV swarm for target tracking. IEEE Trans Veh Technol 71(1):931\u2013945. https:\/\/doi.org\/10.1109\/TVT.2021.3129504","journal-title":"IEEE Trans Veh Technol"},{"key":"8239_CR54","unstructured":"Cover TM, Thomas JA (2006) Elements of information theory. In: Wiley Series in Telecommunications and Signal Processing, 2nd edn. Wiley, Hoboken"},{"key":"8239_CR55","doi-asserted-by":"publisher","unstructured":"Poole B, Ozair S, Van Den Oord A, Alemi A, Tucker G (2019) On variational bounds of mutual information. In: 2019 36th International Conference on Machine Learning (ICML), pp 5171\u20135180. https:\/\/doi.org\/10.48550\/arXiv.1905.06922","DOI":"10.48550\/arXiv.1905.06922"},{"key":"8239_CR56","doi-asserted-by":"publisher","unstructured":"Seitzer M, Sch\u00f6lkopf B, Martius G (2021) Causal influence detection for improving efficiency in reinforcement learning. In: 2021 35th International Conference on Neural Information Processing Systems (NIPS), pp 22905\u201322918. https:\/\/doi.org\/10.48550\/arXiv.2106.03443","DOI":"10.48550\/arXiv.2106.03443"},{"key":"8239_CR57","doi-asserted-by":"publisher","unstructured":"Durrieu JL, Thiran J, Kelly F (2012) Lower and upper bounds for approximation of the Kullback\u2013Leibler divergence between gaussian mixture models. In: 2012 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp 4833\u20134836. https:\/\/doi.org\/10.1109\/ICASSP.2012.6289001","DOI":"10.1109\/ICASSP.2012.6289001"},{"key":"8239_CR58","doi-asserted-by":"publisher","unstructured":"Schulman J, Wolski F, Dhariwal P (2017) Proximal policy optimization algorithms. In: 2017 5th International Conference on Learning Representations (ICLR), pp 1\u201312. https:\/\/doi.org\/10.48550\/arXiv.1707.06347","DOI":"10.48550\/arXiv.1707.06347"},{"key":"8239_CR59","doi-asserted-by":"publisher","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: 2018 6th International Conference on Learning Representations (ICLR), pp 551\u2013565. https:\/\/doi.org\/10.48550\/arXiv.1801.01290","DOI":"10.48550\/arXiv.1801.01290"},{"key":"8239_CR60","doi-asserted-by":"publisher","unstructured":"Barth-Maron G, Hoffman MW, Budden D et al (2018) Distributed distributional deterministic policy gradients. In: 2018 6th International Conference on Learning Representations (ICLR), pp 1023\u20131039. https:\/\/doi.org\/10.48550\/arXiv.1804.08617","DOI":"10.48550\/arXiv.1804.08617"},{"key":"8239_CR61","doi-asserted-by":"publisher","unstructured":"Chen X, Wang C, Zhou Z, Ross KW (2021) Randomized ensembled double Q-learning: learning fast without a model. In: 2021 6th International Conference on Learning Representations (ICLR), pp 1662\u20131687. https:\/\/doi.org\/10.48550\/arXiv.2101.05982","DOI":"10.48550\/arXiv.2101.05982"},{"key":"8239_CR62","doi-asserted-by":"publisher","unstructured":"Ren Z, Dong K, Zhou Y (2019) Exploration via hindsight goal generation. In: 2019 33rd International Conference on Neural Information Processing Systems (NIPS), pp 13499\u201313509. https:\/\/doi.org\/10.48550\/arXiv.1906.04279","DOI":"10.48550\/arXiv.1906.04279"},{"key":"8239_CR63","doi-asserted-by":"publisher","unstructured":"Wang Z, Bapst V, Heess N, Mnih V, Munos R, Kavukcuoglu K, et al (2017) Sample efficient actor-critic with experience replay. In: 2017 5th International Conference on Learning Representations (ICLR), pp 180\u2013199. https:\/\/doi.org\/10.48550\/arXiv.1611.01224","DOI":"10.48550\/arXiv.1611.01224"},{"key":"8239_CR64","doi-asserted-by":"publisher","unstructured":"Zhao R, Tresp V (2018) Energy-based hindsight experience prioritization. In: 2018 2nd Conference on Robot Learning, pp 113\u2013122. https:\/\/doi.org\/10.48550\/arXiv.1810.01363","DOI":"10.48550\/arXiv.1810.01363"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-026-08239-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-026-08239-5","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-026-08239-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T14:57:29Z","timestamp":1769266649000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-026-08239-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,24]]},"references-count":64,"journal-issue":{"issue":"2","published-online":{"date-parts":[[2026,1]]}},"alternative-id":["8239"],"URL":"https:\/\/doi.org\/10.1007\/s11227-026-08239-5","relation":{},"ISSN":["1573-0484"],"issn-type":[{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,24]]},"assertion":[{"value":"14 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 January 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"96"}}