{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T04:45:45Z","timestamp":1750308345287,"version":"3.41.0"},"reference-count":48,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Games"],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1109\/tg.2024.3520970","type":"journal-article","created":{"date-parts":[[2024,12,23]],"date-time":"2024-12-23T19:40:42Z","timestamp":1734982842000},"page":"522-535","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing AI-Bot Strength and Strategy Diversity in Adversarial Games: A Novel Deep Reinforcement Learning Framework"],"prefix":"10.1109","volume":"17","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9957-4973","authenticated-orcid":false,"given":"Chenglu","family":"Sun","sequence":"first","affiliation":[{"name":"Cooperation Product Department, Interactive Entertainment Group, Tencent, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5401-4078","authenticated-orcid":false,"given":"Shuo","family":"Shen","sequence":"additional","affiliation":[{"name":"Cooperation Product Department, Interactive Entertainment Group, Tencent, Shanghai, China"}]},{"given":"Deyi","family":"Xue","sequence":"additional","affiliation":[{"name":"Cooperation Product Department, Interactive Entertainment Group, Tencent, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2157-4721","authenticated-orcid":false,"given":"Wenzhi","family":"Tao","sequence":"additional","affiliation":[{"name":"Cooperation Product Department, Interactive Entertainment Group, Tencent, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2271-0762","authenticated-orcid":false,"given":"Zixia","family":"Zhou","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, CA, USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.entcom.2018.02.005"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TCIAIG.2017.2738156"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2019.8848079"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TG.2018.2816806"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.2977374"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3207346"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref9","first-page":"434","article-title":"Open-ended learning in symmetric zero-sum games","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Balduzzi","year":"2019"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.23919\/ACC50511.2021.9482783"},{"key":"ref11","article-title":"Adversarial policies: Attacking deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Gleave","year":"2019"},{"article-title":"Dota 2 with large scale deep reinforcement learning","year":"2019","author":"Berner","key":"ref12"},{"article-title":"Suphx: Mastering mahjong with deep reinforcement learning","year":"2020","author":"Li","key":"ref13"},{"article-title":"The minerl 2020 competition on sample efficient reinforcement learning using human priors","year":"2021","author":"Guss","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6144"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s13218-020-00647-w"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-63519-4"},{"key":"ref18","first-page":"6244","article-title":"ELF opengo: An analysis and open reimplementation of alphazero","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Tian","year":"2019"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CoG52621.2021.9619127"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1609\/aiide.v18i1.21958"},{"key":"ref21","article-title":"Multi-critic actor learning: Teaching RL policies to act with style","volume-title":"Int. Conf. Learn. Representations","author":"Mysore","year":"2021"},{"key":"ref22","first-page":"8514","article-title":"Modelling behavioural diversity for learning in open-ended games","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Perez-Nieves","year":"2021"},{"key":"ref23","first-page":"18050","article-title":"Effective diversity in population based reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Parker-Holder","year":"2020"},{"key":"ref24","first-page":"16171","article-title":"Learning diverse policies in moba games via macro-goals","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Gao","year":"2021"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5878"},{"article-title":"Tikick: Towards playing multi-agent football full games from single-agent demonstrations","year":"2021","author":"Huang","key":"ref26"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2019.8848006"},{"key":"ref28","article-title":"A generalized training approach for multiagent learning","volume-title":"Int. Conf. Learn. Representations","author":"Muller","year":"2020"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1006\/anbe.2000.1571"},{"key":"ref31","article-title":"Effect of scale on catastrophic forgetting in neural networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Ramasesh","year":"2022"},{"key":"ref32","first-page":"3272","article-title":"Re-evaluating evaluation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Balduzzi","year":"2018"},{"article-title":"Evaluating agents using social choice theory","year":"2023","author":"Lanctot","key":"ref33"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.12794\/metadc1505267"},{"key":"ref35","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mnih","year":"2016"},{"article-title":"High-dimensional continuous control using generalized advantage estimation","year":"2016","author":"Schulman","key":"ref36"},{"article-title":"Exploration by random network distillation","year":"2019","author":"Burda","key":"ref37"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2020.3020400"},{"key":"ref39","first-page":"4403","article-title":"LIIR: Learning individual intrinsic reward in multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Du","year":"2019"},{"key":"ref40","first-page":"11436","article-title":"What can learned intrinsic rewards capture?","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zheng","year":"2020"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-60990-0_12"},{"issue":"1","key":"ref42","first-page":"7234","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref43","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"article-title":"Dealing with sparse rewards in reinforcement learning","year":"2019","author":"Hare","key":"ref44"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA52953.2021.00267"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/j.cosrev.2021.100378"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3069908"},{"key":"ref48","first-page":"67","article-title":"Tizero:: Mastering multi-agent football with curriculum learning and self-play","volume-title":"Proc. Int. Conf. Auton. Agents Multiagent Syst.","author":"Lin","year":"2023"}],"container-title":["IEEE Transactions on Games"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7782673\/11038929\/10812583.pdf?arnumber=10812583","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T17:41:32Z","timestamp":1750268492000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10812583\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":48,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tg.2024.3520970","relation":{},"ISSN":["2475-1502","2475-1510"],"issn-type":[{"type":"print","value":"2475-1502"},{"type":"electronic","value":"2475-1510"}],"subject":[],"published":{"date-parts":[[2025,6]]}}}