{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T21:39:19Z","timestamp":1769722759300,"version":"3.49.0"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2021,7,16]],"date-time":"2021-07-16T00:00:00Z","timestamp":1626393600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,7,16]],"date-time":"2021-07-16T00:00:00Z","timestamp":1626393600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2021,10]]},"DOI":"10.1007\/s11704-020-9307-6","type":"journal-article","created":{"date-parts":[[2021,7,16]],"date-time":"2021-07-16T13:03:06Z","timestamp":1626440586000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["A Monte Carlo Neural Fictitious Self-Play approach to approximate Nash Equilibrium in imperfect-information dynamic games"],"prefix":"10.1007","volume":"15","author":[{"given":"Li","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Yuxuan","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Ziliang","family":"Han","sequence":"additional","affiliation":[]},{"given":"Shijian","family":"Li","sequence":"additional","affiliation":[]},{"given":"Zhijie","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Gang","family":"Pan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,7,16]]},"reference":[{"key":"9307_CR1","doi-asserted-by":"crossref","unstructured":"Arulkumaran K, Cully A, Togelius J. Alphastar: an evolutionary computation perspective. In: Proceedings of the Genetic and Evolutionary Computation Conference Companion. 2019, 314\u2013315","DOI":"10.1145\/3319619.3321894"},{"issue":"2","key":"9307_CR2","doi-asserted-by":"publisher","first-page":"286","DOI":"10.2307\/1969529","volume":"54","author":"J Nash","year":"1951","unstructured":"Nash J. Non-cooperative games. Annals of Mathematics, 1951, 54(2): 286\u2013295","journal-title":"Annals of Mathematics"},{"issue":"4","key":"9307_CR3","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1609\/aimag.v31i4.2311","volume":"31","author":"T Sanholm","year":"2010","unstructured":"Sanholm T. The state of solving large incomplete-information games, and application to poker. AI Magazine, 2010, 31(4): 13\u201332","journal-title":"AI Magazine"},{"key":"9307_CR4","doi-asserted-by":"publisher","first-page":"829","DOI":"10.1613\/jair.4477","volume":"51","author":"B Bo\u0161ansk\u00fd","year":"2014","unstructured":"Bo\u0161ansk\u00fd B, Kiekintveld C, Lis\u00fd V, P\u0115chou\u010dek M. An exact double-oracle algorithm for zero-sum extensive-form games with imperfect information. Journal of Artificial Intelligence Research, 2014, 51: 829\u2013866","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"6218","key":"9307_CR5","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1126\/science.1259433","volume":"347","author":"M Bowling","year":"2015","unstructured":"Bowling M, Burch N, Johanson M, Tammelin O. Heads-up limit hold\u2019em poker is solved. Science, 2015, 347(6218): 145\u2013149","journal-title":"Science"},{"issue":"1","key":"9307_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TCIAIG.2012.2186810","volume":"4","author":"C B Browne","year":"2012","unstructured":"Browne C B, Powley E, Whitehouse D, Lucas S M, Cowling P I, Rohlfshagen P, Tavener S, Perez D, Samothrakis S, Colton S. A survey of Monte Carlo tree search methods. IEEE Transactions on Computational Intelligence and AI in Games, 2012, 4(1): 1\u201343","journal-title":"IEEE Transactions on Computational Intelligence and AI in Games"},{"issue":"1","key":"9307_CR7","first-page":"374","volume":"13","author":"G W Brown","year":"1951","unstructured":"Brown G W. Iterative solution of games by fictitious play. Activity Analysis of Production and Allocation, 1951, 13(1): 374\u2013376","journal-title":"Activity Analysis of Production and Allocation"},{"key":"9307_CR8","unstructured":"Heinrich J, Lanctot M, Silver D. Fictitious self-play in extensive-form games. In: Proceedings of the 32nd International Conference on Machine Learning. 2015, 805\u2013813"},{"key":"9307_CR9","unstructured":"Heinrich J, Silver D. Deep reinforcement learning from self-play in imperfect-information games. 2016, arXiv preprint arXiv:1603.01121"},{"key":"9307_CR10","volume-title":"Reinforcement Learning: An Introduction","author":"R S Sutton","year":"1998","unstructured":"Sutton R S, Barto A G. Reinforcement Learning: An Introduction. 2nd ed. London: MIT Press, 1998","edition":"2nd ed."},{"key":"9307_CR11","volume-title":"Game Theory: Analysis of Conflict","author":"R B Myerson","year":"1991","unstructured":"Myerson R B. Game Theory: Analysis of Conflict. 1st ed. London: Harvard University Press, 1991","edition":"1st ed."},{"key":"9307_CR12","unstructured":"Shi L, Li S, Cao L, Yang L, Pan G. TBQ (\u03c3) improving efficiency of trace utilization for off-policy reinforcement learning. In: Proceedings of the 18th International Conference on Autonomous Agents and MultiAgent Systems. 2019, 1025\u20131032"},{"key":"9307_CR13","doi-asserted-by":"crossref","unstructured":"Yang L, Shi M, Zheng Q, Meng W, Pan G. A unified approach for multi-step temporal-difference learning with eligibility traces in reinforcement learning. In: Proceedings of the 27th International Joint Conference on Artificial Intelligence. 2018, 2984\u20132990","DOI":"10.24963\/ijcai.2018\/414"},{"issue":"7540","key":"9307_CR14","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu A A, Veness J, Bellemare M G, Graves A, Riedmiller M, Fidjeland A K, Ostrovski G, Petersen S, Beattie C, Sadik A, Antonoglou I, King H, Kumaran D, Wierstra D, Legg S, Hassabis D. Human-level control through deep reinforcement learning. Nature, 2015, 518(7540): 529\u2013533","journal-title":"Nature"},{"issue":"10","key":"9307_CR15","doi-asserted-by":"publisher","first-page":"4374","DOI":"10.1109\/TNNLS.2019.2948892","volume":"31","author":"W Meng","year":"2019","unstructured":"Meng W, Zheng Q, Yang L, Li P, Pan G. Qualitative measurements of policy discrepancy for return-based deep q-network. IEEE Transactions on Neural Networks and Learning Systems, 2019, 31(10): 4374\u20134380","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"9307_CR16","unstructured":"Mnih V, Badia A P, Mirza M, Graves A, Lillicrap T P, Harley T, Silver D, Kavukcuoglu K. Asynchronous methods for deep reinforcement learning. In: Proceedings of the 33rd International Conference on Machine Learning. 2016, 1928\u20131937"},{"issue":"7587","key":"9307_CR17","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison C J, Guez A, Sifre L, van den Driessche G, Schrittwieser J, Antonoglou I, Panneershelvam V, Lanctot M, et al. Mastering the game of go with deep neural networks and tree search. Nature, 2016, 529(7587): 484","journal-title":"Nature"},{"issue":"7676","key":"9307_CR18","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver D, Schrittwieser J, Simonyan K, Antonoglou I, Huang A, Guez A, Hubert T, Baker L, Lai M, Bolton A, et al. Mastering the game of go without human knowledge. Nature, 2017, 550(7676): 354","journal-title":"Nature"},{"key":"9307_CR19","unstructured":"Sukhbaatar S, Szlam A, Fergus R. Learning multiagent communication with back propagation. In: Proceedings of the 30th Annual Conference on Neural Information Processing Systems. 2016, 2244\u20132252"},{"key":"9307_CR20","unstructured":"Peng P, Wen Y, Yang Y, Yuan Q, Tang Z, Long H, Wang J. Multiagent bidirectionally-coordinated nets: emergence of human-level coordination in learning to play starcraft combat games. 2017, arXiv preprint arXiv:1703.10069"},{"key":"9307_CR21","unstructured":"Heinrich J, Silver D. Smooth uct search in computer poker. In: Proceedings of the 24th International Joint Conference on Artificial Intelligence. 2015, 554\u2013560"},{"key":"9307_CR22","unstructured":"Lisy V, Lanctot M, Bowling M. Online Monte Carlo counterfactual regret minimization for search in imperfect information games. In: Proceedings of the 14th International Conference on Autonomous Agents and Multiagent Systems. 2015, 27\u201336"},{"key":"9307_CR23","doi-asserted-by":"crossref","unstructured":"Brown N, Sandholm T. Libratus: the superhuman ai for no-limit poker. In: Proceedings of the 26th International Joint Conference on Artificial Intelligence. 2017, 5226\u20135228","DOI":"10.24963\/ijcai.2017\/772"},{"issue":"6337","key":"9307_CR24","doi-asserted-by":"publisher","first-page":"508","DOI":"10.1126\/science.aam6960","volume":"356","author":"M Morav\u010d\u00edk","year":"2017","unstructured":"Morav\u010d\u00edk M, Schmid M, Burch N, Lisy V, Morrill D, Bard N, Davis T, Waugh K, Johanson M, Bowling M. Deepstack: expert-level artificial intelligence in heads-up no-limit poker. Science, 2017, 356(6337): 508\u2013513","journal-title":"Science"},{"issue":"2","key":"9307_CR25","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1016\/j.geb.2005.08.005","volume":"56","author":"D S Leslie","year":"2006","unstructured":"Leslie D S, Collins E J. Generalised weakened fictitious play. Games and Economic Behavior, 2006, 56(2): 285\u2013298","journal-title":"Games and Economic Behavior"},{"issue":"2","key":"9307_CR26","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1006\/game.1996.0065","volume":"15","author":"E Hendon","year":"1996","unstructured":"Hendon E, Jacobsen H J, Sloth B. Fictitious play in extensive form games. Games and Economic Behavior, 1996, 15(2): 177\u2013202","journal-title":"Games and Economic Behavior"},{"key":"9307_CR27","unstructured":"Thrun S, Schwartz A. Issues in using function approximation for reinforcement learning. In: Proceedings of the 4th Connectionist Models Summer School. 1993, 1\u20137"},{"key":"9307_CR28","unstructured":"Anschel O, Baram N, Shimkin N. Averaged-DQN: variance reduction and stabilization for deep reinforcement learning. In: Proceedings of the 34th International Conference on Machine Learning. 2017, 176\u2013185"},{"key":"9307_CR29","unstructured":"Foerster J, Nardelli N, Farquhar G, Afouras T, Torr P H, Kohli P, Whiteson S. Stabilising experience replay for deep multi-agent reinforcement learning. In: Proceedings of the 34th International Conference on Machine Learning. 2017, 1146\u20131155"},{"key":"9307_CR30","doi-asserted-by":"crossref","unstructured":"Kocsis L, Szepesv\u00e1ri C. Bandit based Monte-Carlo planning. In: Proceedings of the 17th European Conference on Machine Learning. 2006, 282\u2013293","DOI":"10.1007\/11871842_29"},{"issue":"19","key":"9307_CR31","doi-asserted-by":"publisher","first-page":"1876","DOI":"10.1016\/j.tcs.2009.01.016","volume":"410","author":"J Y Audibert","year":"2009","unstructured":"Audibert J Y, Munos R, Szepesv\u00e1ri C. Exploration-exploitation tradeoff using variance estimates in multi-armed bandits. Theoretical Computer Science, 2009, 410(19): 1876\u20131902","journal-title":"Theoretical Computer Science"},{"key":"9307_CR32","unstructured":"Shah D, Xie Q, Xu Z. On reinforcement learning using Monte Carlo tree search with supervised learning: non-asymptotic analysis. 2019, arXiv preprint arXiv:1902.05213"},{"key":"9307_CR33","unstructured":"Lisy V, Kovarik V, Lanctot M, Bosansky B. Convergence of Monte Carlo tree search in simultaneous move games. In: Proceedings of the 27th Annual Conference on Neural Information Processing Systems. 2013, 2112\u20132120"},{"key":"9307_CR34","doi-asserted-by":"crossref","unstructured":"Auger D. Multiple tree for partially observable Monte-Carlo tree search. In: Proceedings of the 14th European Conference on the Applications of Evolutionary Computation. 2011, 53\u201362","DOI":"10.1007\/978-3-642-20525-5_6"},{"key":"9307_CR35","first-page":"575","volume":"42","author":"M Ponsen","year":"2011","unstructured":"Ponsen M, De Jong S, Lanctot M. Computing approximate nash equilibria and robust best-responses using sampling. Journal of Artificial Intelligence Research, 2011, 42: 575\u2013605","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"2","key":"9307_CR36","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1109\/TCIAIG.2012.2200894","volume":"4","author":"P I Cowling","year":"2012","unstructured":"Cowling P I, Powley E J, Whitehouse D. Information set Monte Carlo tree search. IEEE Transactions on Computational Intelligence and AI in Games, 2012, 4(2): 120\u2013143","journal-title":"IEEE Transactions on Computational Intelligence and AI in Games"},{"key":"9307_CR37","unstructured":"Jin P, Keutzer K, Levine S. Regret minimization for partially observable deep reinforcement learning. In: Proceedings of the 35th International Conference on Machine Learning. 2018, 2342\u20132351"},{"issue":"1","key":"9307_CR38","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1145\/3147.3165","volume":"11","author":"J S Vitter","year":"1985","unstructured":"Vitter J S. Random sampling with a reservoir. ACM Transactions on Mathematical Software, 1985, 11(1): 37\u201357","journal-title":"ACM Transactions on Mathematical Software"},{"key":"9307_CR39","doi-asserted-by":"crossref","unstructured":"Chaslot G M B, Winands M H, van den Herik H J. Parallel Monte-Carlo tree search. In: Proceedings of the 6th International Conference on Computers and Games. 2008, 60\u201371","DOI":"10.1007\/978-3-540-87608-3_6"},{"issue":"22","key":"9307_CR40","doi-asserted-by":"publisher","first-page":"20178","DOI":"10.1364\/OE.17.020178","volume":"17","author":"Q Fang","year":"2009","unstructured":"Fang Q, Boas D A. Monte Carlo simulation of photon migration in 3d turbid media accelerated by graphics processing units. Optics Express, 2009, 17(22): 20178\u201320190","journal-title":"Optics Express"},{"issue":"7","key":"9307_CR41","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1109\/MC.2008.209","volume":"41","author":"M D Hill","year":"2008","unstructured":"Hill M D, Marty M R. Amdahl\u2019s law in the multicore era. Computer, 2008, 41(7): 33\u201338","journal-title":"Computer"},{"key":"9307_CR42","unstructured":"Lanctot M, Waugh K, Zinkevich M, Bowling M. Monte Carlo sampling for regret minimization in extensive games. In: Proceedings of the 23rd Annual Conference on Neural Information Processing Systems. 2009, 1078\u20131086"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-020-9307-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11704-020-9307-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-020-9307-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,19]],"date-time":"2022-11-19T21:56:47Z","timestamp":1668895007000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11704-020-9307-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,16]]},"references-count":42,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2021,10]]}},"alternative-id":["9307"],"URL":"https:\/\/doi.org\/10.1007\/s11704-020-9307-6","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"value":"2095-2228","type":"print"},{"value":"2095-2236","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,7,16]]},"assertion":[{"value":"28 August 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 July 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 July 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"155334"}}