{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T14:26:59Z","timestamp":1762352819497},"reference-count":43,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2019,8,1]],"date-time":"2019-08-01T00:00:00Z","timestamp":1564617600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,8,1]],"date-time":"2019-08-01T00:00:00Z","timestamp":1564617600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,8,1]],"date-time":"2019-08-01T00:00:00Z","timestamp":1564617600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Comput. Intell. Mag."],"published-print":{"date-parts":[[2019,8]]},"DOI":"10.1109\/mci.2019.2919363","type":"journal-article","created":{"date-parts":[[2019,7,16]],"date-time":"2019-07-16T20:06:43Z","timestamp":1563307603000},"page":"8-18","source":"Crossref","is-referenced-by-count":25,"title":["Improving RTS Game AI by Supervised Policy Learning, Tactical Search, and Deep Reinforcement Learning"],"prefix":"10.1109","volume":"14","author":[{"given":"Nicolas A.","family":"Barriga","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marius","family":"Stanescu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Felipe","family":"Besoain","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael","family":"Buro","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","article-title":"Stabilising experience replay for deep multi-agent reinforcement learning","author":"foerster","year":"0","journal-title":"Proc Int Conf Machine Learning (ICML)"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-015-9292-6"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"ref32","article-title":"Dueling network architectures for deep reinforcement learning","volume":"abs 1511 6581","author":"wang","year":"2015","journal-title":"CoRR"},{"key":"ref31","author":"vinyals","year":"2017","journal-title":"Starcraft ii A new challenge for reinforcement learning"},{"key":"ref30","article-title":"Playing Atari with deep reinforcement learning","author":"mnih","year":"0","journal-title":"NIPS Deep Learning Workshop"},{"key":"ref37","first-page":"165","article-title":"Potential-based difference rewards for multiagent reinforcement learning","author":"devlin","year":"0","journal-title":"Proc 1st Int Conf Autonomous Agents and Multi-Agent Systems"},{"key":"ref36","article-title":"Multiagent bidirectionally-coordinated nets for learning to play StarCraft combat games","volume":"abs 1703 10069","author":"peng","year":"2017","journal-title":"CoRR"},{"key":"ref35","article-title":"Multi-agent reinforcement learning in sequential social dilemmas","volume":"abs 1702 3037","author":"leibo","year":"2017","journal-title":"CoRR"},{"key":"ref34","article-title":"Counterfactual multi-agent policy gradients","volume":"abs 1705 8926","author":"foerster","year":"2017","journal-title":"CoRR"},{"key":"ref10","first-page":"73","article-title":"Game-tree search over high-level game states in RTS games","author":"uriarte","year":"0","journal-title":"Proc AAAI Conf Artif Intell Interactive Digit Entertain"},{"key":"ref40","article-title":"TStarBots: Defeating the cheating level builtin AI in StarCraft II in the full game","author":"sun","year":"2018"},{"key":"ref11","article-title":"Incorporating search algorithms into RTS game agents","author":"churchill","year":"0","journal-title":"Proc AIIDE Workshop on Artificial Intelligence in Adversarial Real-Time Games"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1613\/jair.5398"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/522"},{"key":"ref14","first-page":"74","article-title":"Action abstractions for combinatorial multi-armed bandit tree search","author":"moraes","year":"0","journal-title":"Proc AAAI Conf Artif Intell Interactive Digit Entertain"},{"key":"ref15","first-page":"876","article-title":"Asymmetric action abstractions for multi-unit control in adversarial real-time games","author":"moraes","year":"0","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/11424918_9"},{"key":"ref17","first-page":"86","article-title":"Predicting army combat outcomes in StarCraft","author":"stanescu","year":"0","journal-title":"Proc AAAI Conf Artif Intell Interactive Digit Entertain"},{"key":"ref18","first-page":"86","article-title":"Using Lanchester attrition laws for combat prediction in StarCraft","author":"stanescu","year":"0","journal-title":"Proc AAAI Conf Artif Intell Interactive Digit Entertain"},{"key":"ref19","first-page":"112","article-title":"Global state evaluation in StarCraft","author":"erickson","year":"0","journal-title":"Proc AAAI Conf Artif Intell Interactive Digit Entertain"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2016.7860439"},{"key":"ref4","first-page":"112","article-title":"Fast heuristic search for RTS game combat scenarios","author":"churchill","year":"0","journal-title":"Proc AAAI Conf Artif Intell Interactive Digit Entertain"},{"key":"ref27","first-page":"9","article-title":"Combining strategic learning and tactical search in real-time strategy games","author":"barriga","year":"0","journal-title":"Proc AAAI Conf Artif Intell Interactive Digit Entertain"},{"key":"ref3","first-page":"58","article-title":"The combinatorial multi-armed bandit problem and its application to RTS games","author":"onta\u00f1\u00f3n","year":"0","journal-title":"Proc Artif Intell Interactive Digit Entertainment Conf"},{"key":"ref6","article-title":"Incorporating search algorithms into RTS game agents","author":"churchill","year":"0","journal-title":"Proc AIIDE Workshop on Artificial Intelligence in Adversarial Real-Time Games"},{"key":"ref29","first-page":"2094","article-title":"Deep reinforcement learning with double Q-learning","author":"hasselt","year":"0","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2017.8080449"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TCIAIG.2017.2717902"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2013.6633643"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref9","first-page":"1652","article-title":"Adversarial hierarchical-task network planning for complex real-time games","author":"onta\u00f1\u00f3n","year":"0","journal-title":"Proc Int Joint Conf Artificial Intelligence (IJCAI)"},{"key":"ref1","author":"koster","year":"2004","journal-title":"A Theory of Fun for Game Design"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref42","article-title":"Value-decomposition networks for cooperative multi-agent learning","volume":"abs 1706 5296","author":"sunehag","year":"2017","journal-title":"CoRR"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref41","article-title":"World models","volume":"abs 1803 10122","author":"ha","year":"2018","journal-title":"CoRR"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(01)00129-1"},{"key":"ref26","article-title":"Deep reinforcement learning","volume":"abs 1810 6339","author":"li","year":"2018","journal-title":"CoRR"},{"key":"ref43","article-title":"QMIX: Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"abs 1803 11485","author":"rashid","year":"2018","journal-title":"CoRR"},{"key":"ref25","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"0","journal-title":"Proc Int Conf Machine Learning (ICML)"}],"container-title":["IEEE Computational Intelligence Magazine"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10207\/8764622\/08764630.pdf?arnumber=8764630","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,13]],"date-time":"2022-07-13T20:47:33Z","timestamp":1657745253000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8764630\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,8]]},"references-count":43,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/mci.2019.2919363","relation":{},"ISSN":["1556-603X","1556-6048"],"issn-type":[{"value":"1556-603X","type":"print"},{"value":"1556-6048","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,8]]}}}