{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:18:02Z","timestamp":1763191082892,"version":"3.45.0"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1109\/ijcnn64981.2025.11228019","type":"proceedings-article","created":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T18:46:15Z","timestamp":1763145975000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["Mixed Policy-Space Response Oracles"],"prefix":"10.1109","author":[{"given":"Junyu","family":"Zhang","sequence":"first","affiliation":[{"name":"Tsinghua University,Department of Electronic Engineering,Beijing,China"}]},{"given":"Feihong","family":"Yang","sequence":"additional","affiliation":[{"name":"Tsinghua University,High Technology Lab,Beijing,China"}]},{"given":"Jian","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Electronic Engineering,Beijing,China"}]},{"given":"Chao","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua University,High Technology Lab,Beijing,China"}]},{"given":"Xudong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Electronic Engineering,Beijing,China"}]}],"member":"263","reference":[{"issue":"1","key":"ref1","first-page":"374","article-title":"Iterative solution of games by fictitious play","volume":"13","author":"Brown","year":"1951","journal-title":"Act. Anal. Prod Allocation"},{"key":"ref2","first-page":"805","article-title":"Fictitious self-play in extensive-form games","volume-title":"Proceedings of the 32nd International Conference on Machine Learning","volume":"37","author":"Heinrich"},{"key":"ref3","first-page":"1729","article-title":"Regret minimization in games with incomplete information","volume-title":"Proceedings of the 20th International Conference on Neural Information Processing Systems","author":"Zinkevich"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1126\/science.1259433"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref6","article-title":"Deep reinforcement learning from self-play in imperfect-information games","volume":"abs\/1603.01121","author":"Heinrich","year":"2016","journal-title":"CoRR"},{"key":"ref7","first-page":"7951","article-title":"Policy space response oracles: A survey","volume-title":"Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence, IJCAI-24","author":"Bighashdel"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.3233\/faia251106"},{"article-title":"A-psro: A unified strategy learning method with advantage function for normal-form games","year":"2024","author":"Hu","key":"ref9"},{"key":"ref10","first-page":"536","article-title":"Planning in the presence of cost functions controlled by an adversary","volume-title":"Proceedings of the 20th International Conference on Machine Learning (ICML-03)","author":"McMahan"},{"key":"ref11","article-title":"A unified game-theoretic approach to multiagent reinforcement learning","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Lanctot"},{"key":"ref12","first-page":"20 238","article-title":"Pipeline psro: A scalable approach for finding approximate nash equilibria in large games","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Mcaleer"},{"article-title":"NeuPL: Neural population learning","volume-title":"International Conference on Learning Representations","author":"Liu","key":"ref13"},{"key":"ref14","first-page":"434","article-title":"Open-ended learning in symmetric zero-sum games","volume-title":"Proceedings of the 36th International Conference on Machine Learning","volume":"97","author":"Balduzzi"},{"key":"ref15","first-page":"8514","article-title":"Modelling behavioural diversity for learning in open-ended games","volume-title":"Proceedings of the 38th International Conference on Machine Learning","volume":"139","author":"Perez-Nieves"},{"key":"ref16","first-page":"941","article-title":"Towards unifying behavioral and response diversity for open-ended learning in zero-sum games","volume-title":"Advances in Neural Information Processing Systems","volume":"34","author":"Liu"},{"key":"ref17","first-page":"10 339","article-title":"A unified diversity measure for multiagent reinforcement learning","volume-title":"Advances in Neural Information Processing Systems","volume":"35","author":"Liu"},{"key":"ref18","first-page":"67 771","article-title":"Policy space diversity for non-transitive games","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Yao"},{"article-title":"Efficient policy space response oracles","year":"2022","author":"Zhou","key":"ref19"},{"key":"ref20","article-title":"Online double oracle","author":"Dinh","year":"2022","journal-title":"Transactions on Machine Learning Research"},{"key":"ref21","article-title":"Anytime PSRO for two-player zero-sum games","volume":"abs\/2201.07700","author":"McAleer","year":"2022","journal-title":"CoRR"},{"key":"ref22","first-page":"3164","article-title":"Connecting optimal ex-ante collusion in teams to extensive-form correlation: Faster algorithms and positive complexity results","volume-title":"International Conference on Machine Learning","author":"Farina"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i6.16728"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3490486.3538330"},{"article-title":"Self-play psro: Toward optimal populations in two-player zero-sum games","year":"2022","author":"McAleer","key":"ref25"},{"key":"ref26","first-page":"7480","article-title":"Multi-agent training beyond zero-sum with correlated equilibrium meta-solvers","volume-title":"Proceedings of the 38th International Conference on Machine Learning","volume":"139","author":"Marris"},{"key":"ref27","first-page":"1247","article-title":"Neural population learning beyond symmetric zero-sum games","volume-title":"Proceedings of the 23rd International Conference on Autonomous Agents and Multiagent Systems","author":"Liu"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-019-45619-9"},{"key":"ref29","first-page":"17 443","article-title":"Real world games look like spinning tops","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Czarnecki"},{"key":"ref30","first-page":"24 611","article-title":"The surprising effectiveness of ppo in cooperative multi-agent games","volume-title":"Advances in Neural Information Processing Systems","volume":"35","author":"Yu"}],"event":{"name":"2025 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2025,6,30]]},"location":"Rome, Italy","end":{"date-parts":[[2025,7,5]]}},"container-title":["2025 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11227166\/11227148\/11228019.pdf?arnumber=11228019","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:14:07Z","timestamp":1763190847000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11228019\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/ijcnn64981.2025.11228019","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]}}}