{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,17]],"date-time":"2026-01-17T20:59:55Z","timestamp":1768683595878,"version":"3.49.0"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,8,17]],"date-time":"2021-08-17T00:00:00Z","timestamp":1629158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,8,17]],"date-time":"2021-08-17T00:00:00Z","timestamp":1629158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100009148","name":"QMUL Research-IT","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100009148","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,8,17]]},"DOI":"10.1109\/cog52621.2021.9619076","type":"proceedings-article","created":{"date-parts":[[2021,12,7]],"date-time":"2021-12-07T20:53:06Z","timestamp":1638910386000},"page":"1-8","source":"Crossref","is-referenced-by-count":20,"title":["Gym-\u00b5RTS: Toward Affordable Full Game Real-time Strategy Games Research with Deep Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Shengyi","family":"Huang","sequence":"first","affiliation":[{"name":"Drexel University,Philadelphia,PA,USA"}]},{"given":"Santiago","family":"Ontanon","sequence":"additional","affiliation":[{"name":"Drexel University,Philadelphia,PA,USA"}]},{"given":"Chris","family":"Bamford","sequence":"additional","affiliation":[{"name":"Queen Mary University,London,United Kindom"}]},{"given":"Lukasz","family":"Grela","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref32","author":"huang","year":"2020","journal-title":"A closer look at invalid action masking in policy gradient algorithms"},{"key":"ref31","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"ArXiv Preprint"},{"key":"ref30","author":"king","year":"2017","journal-title":"Apocrita - High Performance Computing Cluster for Queen Mary University of London"},{"key":"ref10","article-title":"Case-based reasoning for build order in real-time strategy games","volume":"4","author":"weber","year":"0","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence and Interactive Digital Entertainment"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1111\/j.1467-8640.2009.00344.x"},{"key":"ref12","first-page":"40","article-title":"Uct for tactical assault planning in real-time strategy games","author":"balla","year":"0","journal-title":"Proceedings of the 21 St International Jont Conference on Artifical Intelligence"},{"key":"ref13","article-title":"Fast heuristic search for rts game combat scenarios","volume":"8","author":"churchill","year":"0","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence and Interactive Digital Entertainment"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2014.6932900"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1613\/jair.5398"},{"key":"ref16","article-title":"Dota 2 with large scale deep reinforcement learning","volume":"abs 1912 6680","author":"berner","year":"2019","journal-title":"ArXiv"},{"key":"ref17","article-title":"The starcraft multi-agent challenge","author":"samvelyan","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.29007\/7ksb"},{"key":"ref19","article-title":"Mod-ular architecture for starcraft ii with deep reinforcement learning","author":"lee","year":"0","journal-title":"Fourth Artificial Intelligence and Interactive Digital Entertainment Conference"},{"key":"ref28","first-page":"479","article-title":"Action space shaping in deep reinforcement learning","author":"kanervisto","year":"0","journal-title":"2020 IEEE Conference on Games (CoG) IEEE"},{"key":"ref4","article-title":"The combinatorial multi-armed bandit problem and its application to real-time strategy games","volume":"9","author":"ontan\u00f3n","year":"0","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence and Interactive Digital Entertainment"},{"key":"ref27","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"ArXiv Preprint"},{"key":"ref3","article-title":"Tstarbot-x: An open-sourced and comprehensive study for efficient league training in starcraft ii full game","author":"han","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref6","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"ArXiv"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_38"},{"key":"ref5","article-title":"Starcraft ii: A new challenge for reinforcement learning","author":"vinyals","year":"2017","journal-title":"ar Xiv preprint"},{"key":"ref8","first-page":"1534","article-title":"Real-time strategy games: A new ai research challenge","author":"buro","year":"2003","journal-title":"Proceedings of the 18th International Joint Conference on Artificial Intelligence ser IJCAI&#x2019; 03"},{"key":"ref7","author":"dhariwal","year":"2017","journal-title":"OpenAI Baselines"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TCIAIG.2013.2286295"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","article-title":"Grandmaster level in StarCraft II using multi-agent reinforcement learning","volume":"575","author":"vinyals","year":"2019","journal-title":"Nature"},{"key":"ref9","article-title":"Classq-l: A q-learning algorithm for adversarial real-time strategy games","volume":"8","author":"jaidee","year":"0","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence and Interactive Digital Entertainment"},{"key":"ref20","first-page":"2659","article-title":"Elf: An extensive, lightweight and flexible research platform for real-time strategy games","author":"tian","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref22","first-page":"329","author":"sutton","year":"2018","journal-title":"Reinforcement learning An introduction (2nd)"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2018.8490409"},{"key":"ref24","first-page":"1407","article-title":"Impala: Scalable distributed deep-rl with importance weighted actor-learner architectures","author":"espeholt","year":"0","journal-title":"Int Conference on Machine Learning"},{"key":"ref23","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref26","article-title":"Implementation matters in deep rl: A case study on ppo and trpo","author":"engstrom","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref25","first-page":"2576","article-title":"Grid-wise control for multi-agent reinforcement learning in video game ai","author":"han","year":"0","journal-title":"Int Conference on Machine Learning"}],"event":{"name":"2021 IEEE Conference on Games (CoG)","location":"Copenhagen, Denmark","start":{"date-parts":[[2021,8,17]]},"end":{"date-parts":[[2021,8,20]]}},"container-title":["2021 IEEE Conference on Games (CoG)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9618888\/9618891\/09619076.pdf?arnumber=9619076","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,3]],"date-time":"2022-08-03T00:14:20Z","timestamp":1659485660000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9619076\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,17]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/cog52621.2021.9619076","relation":{},"subject":[],"published":{"date-parts":[[2021,8,17]]}}}