{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,7]],"date-time":"2025-08-07T09:18:24Z","timestamp":1754558304350,"version":"3.37.3"},"reference-count":64,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022ZD0116401"],"award-info":[{"award-number":["2022ZD0116401"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62076238","62222606","61902402"],"award-info":[{"award-number":["62076238","62222606","61902402"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"China Computer Federation (CCF)-Tencent Open Fund"},{"DOI":"10.13039\/501100002367","name":"Strategic Priority Research Program of the Chinese Academy of Sciences","doi-asserted-by":"publisher","award":["XDA27000000"],"award-info":[{"award-number":["XDA27000000"]}],"id":[{"id":"10.13039\/501100002367","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1109\/tnnls.2023.3280186","type":"journal-article","created":{"date-parts":[[2023,6,14]],"date-time":"2023-06-14T17:23:34Z","timestamp":1686763414000},"page":"14618-14632","source":"Crossref","is-referenced-by-count":2,"title":["OpenHoldem: A Benchmark for Large-Scale Imperfect-Information Game Research"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3840-3270","authenticated-orcid":false,"given":"Kai","family":"Li","sequence":"first","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5580-0062","authenticated-orcid":false,"given":"Hang","family":"Xu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"given":"Enmin","family":"Zhao","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5043-5004","authenticated-orcid":false,"given":"Zhe","family":"Wu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6801-0510","authenticated-orcid":false,"given":"Junliang","family":"Xing","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(01)00129-1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1126\/science.aau6249"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"journal-title":"arXiv:1912.06680","article-title":"Dota 2 with large scale deep reinforcement learning","year":"2019","key":"ref9"},{"key":"ref10","article-title":"Suphx: Mastering mahjong with deep reinforcement learning","author":"Li","year":"2020","journal-title":"arXiv:2003.13590"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6144"},{"key":"ref12","first-page":"621","article-title":"Towards playing full MOBA games with deep reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ye"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1126\/science.1259433"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1126\/science.aam6960"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1126\/science.aao1733"},{"key":"ref16","first-page":"12333","article-title":"Douzero: Mastering DouDizhu with self-play deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zha"},{"key":"ref17","first-page":"1","article-title":"Human-level performance in no-press diplomacy via equilibrium search","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Gray"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-021-04357-7"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.2307\/1969529"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2010.12.005"},{"key":"ref21","article-title":"Measuring the size of large no-limit poker games","author":"Johanson","year":"2013","journal-title":"arXiv:1302.7008"},{"key":"ref22","first-page":"35","article-title":"Slumbot NL: Solving large games with counterfactual regret minimization using sampling and distributed processing","volume-title":"Proc. AAAI Conf. Artif. Intell. Workshops","author":"Jackson"},{"key":"ref23","first-page":"7","article-title":"Hierarchical abstraction, distributed equilibrium computation, and post-processing, with application to a champion no-limit Texas hold\u2019em agent","volume-title":"Proc. Int. Conf. Auto. Agents Multiagent Syst.","author":"Brown"},{"key":"ref24","first-page":"1729","article-title":"Regret minimization in games with incomplete information","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zinkevich"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1126\/science.aay2400"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2017.2782266"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2022.3144171"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1992.225858"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref30","article-title":"Achieving human parity on automatic Chinese to English news translation","author":"Hassan","year":"2018","journal-title":"arXiv:1803.05567"},{"key":"ref31","article-title":"OpenAI gym","author":"Brockman","year":"2016","journal-title":"arXiv:1606.01540"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TG.2018.2877047"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/339"},{"key":"ref34","article-title":"OpenSpiel: A framework for reinforcement learning in games","author":"Lanctot","year":"2019","journal-title":"arXiv:1908.09453"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/764"},{"key":"ref36","first-page":"493","article-title":"Opponent modeling in poker","volume-title":"Proc. AAAI Conf. Artif. Intell.","author":"Billings"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1609\/aimag.v34i2.2474"},{"key":"ref38","first-page":"271","article-title":"Evaluating state-space abstractions in extensive-form games","volume-title":"Proc. Int. Conf. Auto. Agents Multiagent Syst.","author":"Johanson"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v28i1.8816"},{"key":"ref40","first-page":"1078","article-title":"Monte Carlo sampling for regret minimization in extensive games","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lanctot"},{"key":"ref41","article-title":"Solving large imperfect information games using CFR+","author":"Tammelin","year":"2014","journal-title":"arXiv:1407.5042"},{"key":"ref42","first-page":"366","article-title":"Compact CFR","volume-title":"Proc. AAAI Conf. Artif. Intell. Workshops","author":"Jackson"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33012157"},{"volume-title":"A Course in Game Theory","year":"1994","author":"Osborne","key":"ref44"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.2140\/pjm.1956.6.1"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3041469"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2561300"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3099095"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3087579"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2806006"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2995920"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2021.3110194"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11481"},{"key":"ref54","first-page":"361","article-title":"Eqilibrium approximation quality of current no-limit poker bots","volume-title":"Proc. AAAI Conf. Artif. Intell. Workshops","author":"Lisy"},{"key":"ref55","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33011829"},{"key":"ref57","article-title":"Unlocking the potential of deep counterfactual value networks","author":"Zarick","year":"2020","journal-title":"arXiv:2007.10442"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"volume-title":"Reinforcement Learning: An Introduction","year":"2018","author":"Sutton","key":"ref59"},{"key":"ref60","first-page":"1","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Kingma"},{"key":"ref61","first-page":"1","article-title":"High-dimensional continuous control using generalized advantage estimation","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Schulman"},{"volume-title":"The Rating of Chessplayers, Past and Present","year":"1978","author":"Elo","key":"ref62"},{"key":"ref63","first-page":"793","article-title":"Deep counterfactual regret minimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Brown"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10013"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10707065\/10153087.pdf?arnumber=10153087","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T17:40:28Z","timestamp":1728409228000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10153087\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10]]},"references-count":64,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2023.3280186","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"type":"print","value":"2162-237X"},{"type":"electronic","value":"2162-2388"}],"subject":[],"published":{"date-parts":[[2024,10]]}}}