{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,21]],"date-time":"2025-12-21T07:12:02Z","timestamp":1766301122535,"version":"3.37.3"},"reference-count":61,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2024,12,30]],"date-time":"2024-12-30T00:00:00Z","timestamp":1735516800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,30]],"date-time":"2024-12-30T00:00:00Z","timestamp":1735516800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1007\/s10489-024-06220-4","type":"journal-article","created":{"date-parts":[[2024,12,30]],"date-time":"2024-12-30T08:14:02Z","timestamp":1735546442000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Deep reinforcement learning in real-time strategy games: a systematic literature review"],"prefix":"10.1007","volume":"55","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1164-0655","authenticated-orcid":false,"given":"Gabriel Caldas","family":"Barros e S\u00e1","sequence":"first","affiliation":[]},{"given":"Charles Andrye Galv\u00e3o","family":"Madeira","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,30]]},"reference":[{"unstructured":"Sutton R, Barto A (2018) Reinforcement Learning: An Introduction (2nd Edition) (MIT Press)","key":"6220_CR1"},{"unstructured":"Li Y (2018) Deep reinforcement learning. arXiv. arxiv:1810.06339","key":"6220_CR2"},{"unstructured":"Shao K, Tang Z, Zhu Y, Li N, Zhao D (2019) A survey of deep reinforcement learning in video games. arxiv:1912.10944","key":"6220_CR3"},{"doi-asserted-by":"publisher","unstructured":"Szita I (2012) Reinforcement Learning in Games, 539\u2013577 (Springer Berlin Heidelberg, Berlin, Heidelberg). https:\/\/doi.org\/10.1007\/978-3-642-27645-3_17","key":"6220_CR4","DOI":"10.1007\/978-3-642-27645-3_17"},{"doi-asserted-by":"crossref","unstructured":"Lecun Y, Bengio Y, Hinton G (2015) Deep learning. Nature 521:436\u2013444. https:\/\/www.nature.com\/articles\/nature14539","key":"6220_CR5","DOI":"10.1038\/nature14539"},{"unstructured":"Mnih V et\u00a0al (2013) Playing atari with deep reinforcement learning. NIPS Deep Learning Workshop 2013","key":"6220_CR6"},{"doi-asserted-by":"crossref","unstructured":"Mnih V, Kavukcuoglu K, Silver D et al (2015) Human-level control through deep reinforcement learning. Nature 518:529\u2013533. www.nature.com\/articles\/nature14236","key":"6220_CR7","DOI":"10.1038\/nature14236"},{"doi-asserted-by":"crossref","unstructured":"van Hasselt H, Guez A, Silver D (2016) Deep reinforcement learning with Double Q-Learning. Proceedings of the AAAI Conference on Artificial Intelligence 30. https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/10295","key":"6220_CR8","DOI":"10.1609\/aaai.v30i1.10295"},{"doi-asserted-by":"crossref","unstructured":"Silver D et al (2016) Mastering the game of go with deep neural networks and tree search. Proceedings of the AAAI Conference on Artificial Intelligence 529:484\u2013489. https:\/\/www.nature.com\/articles\/nature16961","key":"6220_CR9","DOI":"10.1038\/nature16961"},{"unstructured":"Silver D et\u00a0al (2017) Mastering the game of go without human knowledge. Nature. https:\/\/www.nature.com\/articles\/nature24270","key":"6220_CR10"},{"unstructured":"Vinyals O et\u00a0al (2017) Starcraft ii: A new challenge for reinforcement learning. arxiv:1708.04782","key":"6220_CR11"},{"doi-asserted-by":"crossref","unstructured":"Vinyals O et al (2019) Grandmaster level in starcraft ii using multi-agent reinforcement learning. Proceedings of the AAAI Conference on Artificial Intelligence 575:350\u2013354. https:\/\/www.nature.com\/articles\/s41586-019-1724-z","key":"6220_CR12","DOI":"10.1038\/s41586-019-1724-z"},{"unstructured":"Ye D et\u00a0al (2020) Towards playing full moba games with deep reinforcement learning. In: Larochelle H, Ranzato M, Hadsell R, Balcan M, Lin H (eds) Advances in Neural Information Processing Systems, vol\u00a033, 621\u2013632 (Curran Associates, Inc.). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/06d5ae105ea1bea4d800bc96491876e9-Paper.pdf","key":"6220_CR13"},{"unstructured":"Zha, D. et\u00a0al. Meila, M. & Zhang, T. (eds) Douzero: Mastering doudizhu with self-play deep reinforcement learning. (eds Meila, M. & Zhang, T.) Proceedings of the 38th International Conference on Machine Learning, Vol. 139 of Proceedings of Machine Learning Research, 12333\u201312344 (PMLR, 2021). https:\/\/proceedings.mlr.press\/v139\/zha21a.html","key":"6220_CR14"},{"doi-asserted-by":"crossref","unstructured":"Perolat J et al (2022) Mastering the game of stratego with model-free multiagent reinforcement learning. Science 378:990\u2013996. https:\/\/www.science.org\/doi\/abs\/10.1126\/science.add4679","key":"6220_CR15","DOI":"10.1126\/science.add4679"},{"doi-asserted-by":"crossref","unstructured":"Wurman PR et al (2022) Outracing champion gran turismo drivers with deep reinforcement learning. Nature 602:223\u2013228. https:\/\/www.nature.com\/articles\/s41586-021-04357-7","key":"6220_CR16","DOI":"10.1038\/s41586-021-04357-7"},{"doi-asserted-by":"crossref","unstructured":"Sethy H, Patel A, Padmanabhan V (2015) Real time strategy games: A reinforcement learning approach. Procedia Computer Science 54:257\u2013264. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S187705091501354X","key":"6220_CR17","DOI":"10.1016\/j.procs.2015.06.030"},{"doi-asserted-by":"crossref","unstructured":"Robertson G, Watson I (2014) A review of real-time strategy game ai. AI Magazine 35:75\u2013104. https:\/\/ojs.aaai.org\/aimagazine\/index.php\/aimagazine\/article\/view\/2478","key":"6220_CR18","DOI":"10.1609\/aimag.v35i4.2478"},{"doi-asserted-by":"crossref","unstructured":"Onta\u00f1\u00f3n S et\u00a0al (2015) RTS AI Problems and Techniques, 1\u201312 (Springer International Publishing, Cham). https:\/\/link.springer.com\/referenceworkentry\/10.1007\/978-3-319-08234-9_17-1","key":"6220_CR19","DOI":"10.1007\/978-3-319-08234-9_17-1"},{"key":"6220_CR20","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1109\/TCIAIG.2013.2286295","volume":"5","author":"S Onta\u00f1\u00f3n","year":"2013","unstructured":"Onta\u00f1\u00f3n S et al (2013) A survey of real-time strategy game ai research and competition in starcraft. IEEE Transactions on Computational Intelligence and AI in Games 5:293\u2013311","journal-title":"IEEE Transactions on Computational Intelligence and AI in Games"},{"unstructured":"Churchill DG (2016) Heuristic Search Techniques for Real-Time Strategy Games. Ph.D. thesis, University of Alberta","key":"6220_CR21"},{"doi-asserted-by":"crossref","unstructured":"Ashraf NM et\u00a0al (2021) A State-of-the-Art Review of Deep Reinforcement Learning Techniques for Real-Time Strategy Games, 285\u2013307 (Springer International Publishing, Cham). https:\/\/link.springer.com\/chapter\/10.1007\/978-3-030-72080-3_17","key":"6220_CR22","DOI":"10.1007\/978-3-030-72080-3_17"},{"unstructured":"Kitchenham B, Charters S (2007) Guidelines for performing systematic literature reviews in software engineering. Technical Report EBSE 2007-001, Keele University and Durham University Joint Report","key":"6220_CR23"},{"doi-asserted-by":"crossref","unstructured":"Huang S, Onta\u00f1\u00f3n S, Bamford C, Grela L (2021) Gym-\u00b5rts: Toward affordable full game real-time strategy games research with deep reinforcement learning, 1\u20138 (IEEE Press). https:\/\/ieeexplore.ieee.org\/document\/9619076","key":"6220_CR24","DOI":"10.1109\/CoG52621.2021.9619076"},{"doi-asserted-by":"crossref","unstructured":"Andersen P-A, Goodwin M, Granmo O-C (2018) Deep rts: A game environment for deep reinforcement learning in real-time strategy games, 1\u20138","key":"6220_CR25","DOI":"10.1109\/CIG.2018.8490409"},{"doi-asserted-by":"crossref","unstructured":"Ara\u00fajo MAS, Alves LPC, Madeira CAG, N\u00f3brega MM (2020) Urnai: A multi-game toolkit for experimenting deep reinforcement learning algorithms, 178\u2013187","key":"6220_CR26","DOI":"10.1109\/SBGames51465.2020.00032"},{"doi-asserted-by":"crossref","unstructured":"Ramadhan F, Suyanto S (2020) Royale heroes: A unique rts game using deep reinforcement learning-based autonomous movement, 494\u2013498","key":"6220_CR27","DOI":"10.1109\/ISRITI51436.2020.9315441"},{"unstructured":"Han L et\u00a0al (2019) Chaudhuri K, Salakhutdinov R (eds) Grid-wise control for multi-agent reinforcement learning in video game AI. In: Chaudhuri K, Salakhutdinov R (eds) Proceedings of the 36th International Conference on Machine Learning, vol 97 of Proceedings of Machine Learning Research, 2576\u20132585 (PMLR). https:\/\/proceedings.mlr.press\/v97\/han19a.html","key":"6220_CR28"},{"key":"6220_CR29","first-page":"00980","volume":"2004","author":"A Kanervisto","year":"2020","unstructured":"Kanervisto A, Scheller C, Hautam\u00e4ki V (2020) Action space shaping in deep reinforcement learning 2004:00980","journal-title":"Action space shaping in deep reinforcement learning"},{"key":"6220_CR30","volume-title":"Policy invariance under reward transformations: Theory and application to reward shaping, ICML \u201999, 278\u2013287","author":"AY Ng","year":"1999","unstructured":"Ng AY, Harada D, Russell SJ (1999) Policy invariance under reward transformations: Theory and application to reward shaping, ICML \u201999, 278\u2013287. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA"},{"doi-asserted-by":"crossref","unstructured":"Huang S, Onta\u00f1\u00f3n S (2022) A closer look at invalid action masking in policy gradient algorithms 35. https:\/\/journals.flvc.org\/FLAIRS\/article\/view\/130584","key":"6220_CR31","DOI":"10.32473\/flairs.v35i.130584"},{"doi-asserted-by":"crossref","unstructured":"Hao D, Sweetser P, Aitchison M (2020) Designing curriculum for deep reinforcement learning in starcraft ii. In: Gallagher M, Moustafa N, Lakshika E (eds) AI 2020: Advances in Artificial Intelligence, 243\u2013255 (Springer International Publishing, Cham)","key":"6220_CR32","DOI":"10.1007\/978-3-030-64984-5_19"},{"doi-asserted-by":"publisher","unstructured":"Waytowich N, Barton SL, Lawhern V, Stump E, Warnell G (2019) Grounding natural language commands to StarCraft II game states for narration-guided reinforcement learning. In: Pham T (ed) Artificial Intelligence and Machine Learning for Multi-Domain Operations Applications, vol. 11006, 110060S. International Society for Optics and Photonics (SPIE). https:\/\/doi.org\/10.1117\/12.2519138","key":"6220_CR33","DOI":"10.1117\/12.2519138"},{"doi-asserted-by":"crossref","unstructured":"Zhang F, Yang Q, An D (2022) A leader-following paradigm based deep reinforcement learning method for multi-agent cooperation games. Neural Networks 156:1\u201312. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S089360802200346X","key":"6220_CR34","DOI":"10.1016\/j.neunet.2022.09.012"},{"doi-asserted-by":"crossref","unstructured":"yang Zhao L et\u00a0al (2022) Targeted multi-agent communication algorithm based on state control. Defence Technology. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S2214914722001490","key":"6220_CR35","DOI":"10.1016\/j.dt.2022.07.005"},{"doi-asserted-by":"crossref","unstructured":"Li Y, Fang Y, Akhtar Z (2020) Accelerating deep reinforcement learning model for game strategy. Neurocomputing 408:157\u2013168. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0925231220303337","key":"6220_CR36","DOI":"10.1016\/j.neucom.2019.06.110"},{"doi-asserted-by":"crossref","unstructured":"Zhang J, Chen J, Huang Y, Wan W, Li T (2018) Applying online expert supervision in deep actor-critic reinforcement learning. In: Lai J-H et\u00a0al (eds) Pattern Recognition and Computer Vision, 469\u2013478 (Springer International Publishing, Cham)","key":"6220_CR37","DOI":"10.1007\/978-3-030-03335-4_41"},{"doi-asserted-by":"crossref","unstructured":"Wang H et\u00a0al (2020) Large scale deep reinforcement learning in war-games, 1693\u20131699","key":"6220_CR38","DOI":"10.1109\/BIBM49941.2020.9313387"},{"doi-asserted-by":"crossref","unstructured":"Li C, Wei X, Zhao Y, Geng X (2020) An effective maximum entropy exploration approach for deceptive game in reinforcement learning. Neurocomputing 403:98\u2013108. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0925231220306536","key":"6220_CR39","DOI":"10.1016\/j.neucom.2020.04.068"},{"doi-asserted-by":"crossref","unstructured":"Hu C (2020) A confrontation decision-making method with deep reinforcement learning and knowledge transfer for multi-agent system. Symmetry 12. https:\/\/www.mdpi.com\/2073-8994\/12\/4\/631","key":"6220_CR40","DOI":"10.3390\/sym12040631"},{"unstructured":"Kelly R, Churchill D (2020) Transfer learning between rts combat scenarios using component-action deep reinforcement learning. https:\/\/ceur-ws.org\/Vol-2862\/","key":"6220_CR41"},{"doi-asserted-by":"crossref","unstructured":"Lee D et\u00a0al (2018) Modular architecture for starcraft ii with deep reinforcement learning, AIIDE\u201918 (AAAI Press)","key":"6220_CR42","DOI":"10.1609\/aiide.v14i1.13033"},{"doi-asserted-by":"crossref","unstructured":"Chen L, LIU T, Liu Y-t (2020) Research on the starcraft ii decision method based on hierarchical reinforcement learning 582\u2013586","key":"6220_CR43","DOI":"10.1109\/CCDC49329.2020.9164140"},{"doi-asserted-by":"crossref","unstructured":"Xu, S. et\u00a0al. Macro action selection with deep reinforcement learning in starcraft. Proceedings of the AAAI Conference on Artificial Intelligence and Interactive Digital Entertainment15, 94\u201399 (2019). https:\/\/ojs.aaai.org\/index.php\/AIIDE\/article\/view\/5230","key":"6220_CR44","DOI":"10.1609\/aiide.v15i1.5230"},{"doi-asserted-by":"crossref","unstructured":"Liu T, Zheng Z, Li H, Bian K, Song L (2019) Playing card-based rts games with deep reinforcement learning, pp 4540\u20134546 (Int Joint Conf Artif Intell Org). https:\/\/www.ijcai.org\/proceedings\/2019\/631","key":"6220_CR45","DOI":"10.24963\/ijcai.2019\/631"},{"doi-asserted-by":"crossref","unstructured":"Hu H, Wang Q (2020) Implementation on benchmark of sc2le environment with advantage actor \u2013 critic method, pp 362\u2013366","key":"6220_CR46","DOI":"10.1109\/ICUAS48674.2020.9214032"},{"doi-asserted-by":"crossref","unstructured":"Hao D, Sweetser P, Aitchison M (2022) Curriculum generation and sequencing for deep reinforcement learning in starcraft ii, ACSW \u201922:1\u201311 (Association for Computing Machinery, New York, NY, USA). https:\/\/dl.acm.org\/doi\/10.1145\/3511616.3513093","key":"6220_CR47","DOI":"10.1145\/3511616.3513093"},{"doi-asserted-by":"crossref","unstructured":"Harris A, Liu S (2021) Maidrl: Semi-centralized multi-agent reinforcement learning using agent influence, pp 01\u201308","key":"6220_CR48","DOI":"10.1109\/CoG52621.2021.9619002"},{"doi-asserted-by":"crossref","unstructured":"Nipu AS, Liu S, Harris A (2022) Maidcrl: Semi-centralized multi-agent influence dense-cnn reinforcement learning, pp 512\u2013515","key":"6220_CR49","DOI":"10.1109\/CoG51982.2022.9893711"},{"doi-asserted-by":"crossref","unstructured":"Sun Y, Yuan B, Zhang Y et\u00a0al (2021) Research on action strategies and simulations of drl and mcts-based intelligent round game. Int J Control Autom Syst 19:2984\u20132998. https:\/\/link.springer.com\/article\/10.1007\/s12555-020-0277-0","key":"6220_CR50","DOI":"10.1007\/s12555-020-0277-0"},{"key":"6220_CR51","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1109\/THMS.2022.3225867","volume":"53","author":"Y Sun","year":"2023","unstructured":"Sun Y et al (2023) Intelligent decision-making and human language communication based on deep reinforcement learning in a wargame environment. IEEE Transactions on human-machine systems 53:201\u2013214","journal-title":"IEEE Transactions on human-machine systems"},{"doi-asserted-by":"crossref","unstructured":"Andersen P-A, Goodwin M, Granmo O-C (2021) Increasing sample efficiency in deep reinforcement learning using generative environment modelling. Exp Syst 38. https:\/\/onlinelibrary.wiley.com\/doi\/abs\/10.1111\/exsy.12537","key":"6220_CR52","DOI":"10.1111\/exsy.12537"},{"unstructured":"Fu Y, Liang X, Ma Y, Huang K, Li Y (2021) Coordinating multi-agent deep reinforcement learning in wargame, ACAI \u201920 (Association for Computing Machinery, New York, NY, USA). https:\/\/dl.acm.org\/doi\/10.1145\/3446132.3446137","key":"6220_CR53"},{"doi-asserted-by":"crossref","unstructured":"Boron J, Darken C (2020) Developing combat behavior through reinforcement learning in wargames and simulations, pp 728\u2013731","key":"6220_CR54","DOI":"10.1109\/CoG47356.2020.9231609"},{"doi-asserted-by":"crossref","unstructured":"Huang W, Yin Q, Zhang J, Huang K (2021) Learning macromanagement in starcraft by deep reinforcement learning. Sensors 21. https:\/\/www.mdpi.com\/1424-8220\/21\/10\/3332","key":"6220_CR55","DOI":"10.3390\/s21103332"},{"key":"6220_CR56","first-page":"04043","volume":"1902","author":"M Samvelyan","year":"2019","unstructured":"Samvelyan M et al (2019) The starcraft multi-agent challenge 1902:04043","journal-title":"The starcraft multi-agent challenge"},{"unstructured":"Rashid T et\u00a0al (2018) Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning. Proc Mach Learn Res. https:\/\/proceedings.mlr.press\/v80\/rashid18a.html","key":"6220_CR57"},{"doi-asserted-by":"crossref","unstructured":"Yun WJ, Yi S, Kim J (2021) Multi-agent deep reinforcement learning using attentive graph neural architectures for real-time strategy games pp 2967\u20132972","key":"6220_CR58","DOI":"10.1109\/SMC52423.2021.9658625"},{"key":"6220_CR59","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1109\/MCI.2019.2919363","volume":"14","author":"NA Barriga","year":"2019","unstructured":"Barriga NA, Stanescu M, Besoain F, Buro M (2019) Improving rts game ai by supervised policy learning, tactical search, and deep reinforcement learning. IEEE Comput Intell Mag 14:8\u201318","journal-title":"IEEE Comput Intell Mag"},{"doi-asserted-by":"crossref","unstructured":"Zhou Y et\u00a0al (2020) Towards a distributed framework for multi-agent reinforcement learning research pp 1\u20139","key":"6220_CR60","DOI":"10.1109\/HPEC43674.2020.9286212"},{"doi-asserted-by":"crossref","unstructured":"Shen X, Yin C, Hou X (2019) Self-attention for deep reinforcement learning, ICMAI \u201919, 71\u201375 (Association for Computing Machinery, New York, NY, USA). https:\/\/dl.acm.org\/doi\/10.1145\/3325730.3325743","key":"6220_CR61","DOI":"10.1145\/3325730.3325743"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-06220-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-024-06220-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-06220-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,31]],"date-time":"2025-01-31T14:56:06Z","timestamp":1738335366000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-024-06220-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,30]]},"references-count":61,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,2]]}},"alternative-id":["6220"],"URL":"https:\/\/doi.org\/10.1007\/s10489-024-06220-4","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"type":"print","value":"0924-669X"},{"type":"electronic","value":"1573-7497"}],"subject":[],"published":{"date-parts":[[2024,12,30]]},"assertion":[{"value":"21 December 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 December 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}},{"value":"This is a review paper and we do not use any data.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"Not applicable","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not applicable","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}],"article-number":"243"}}