{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T04:29:54Z","timestamp":1760243394836,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032080639","type":"print"},{"value":"9783032080646","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T00:00:00Z","timestamp":1760227200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T00:00:00Z","timestamp":1760227200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-08064-6_3","type":"book-chapter","created":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T21:43:54Z","timestamp":1760219034000},"page":"42-60","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Explore Reinforced: Equilibrium Approximation with\u00a0Reinforcement Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-5197-4966","authenticated-orcid":false,"given":"Mateusz","family":"Nowak","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2287-5049","authenticated-orcid":false,"given":"Qintong","family":"Xie","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7882-0586","authenticated-orcid":false,"given":"Emma","family":"Graham","sequence":"additional","affiliation":[]},{"given":"Ryan","family":"Yu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7053-0924","authenticated-orcid":false,"given":"Michelle Yilin","family":"Feng","sequence":"additional","affiliation":[]},{"given":"Roy","family":"Leibovitz","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8545-0371","authenticated-orcid":false,"given":"Xavier","family":"Cadet","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1913-4223","authenticated-orcid":false,"given":"Peter","family":"Chin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,12]]},"reference":[{"key":"3_CR1","unstructured":"Arjona-Medina, J.A., Gillhofer, M., Widrich, M., Unterthiner, T., Brandstetter, J., Hochreiter, S.: Rudder: return decomposition for delayed rewards. In: Wallach, H., Larochelle, H., Beygelzimer, A., d\u2019 Alch\u00e9-Buc, F., Fox, E., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol.\u00a032. Curran Associates, Inc. (2019). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2019\/file\/16105fb9cc614fc29e1bda00dab60d41-Paper.pdf"},{"key":"3_CR2","unstructured":"Auer, P.: Using confidence bounds for exploitation-exploration trade-offs. J. Mach. Learn. Res. 3, 397\u2013422 (2002)"},{"key":"3_CR3","doi-asserted-by":"crossref","unstructured":"Auer, P., Cesa-Bianchi, N., Freund, Y., Schapire, R.E.: The nonstochastic multiarmed bandit problem. SIAM J. Comput. 32(1), 48\u201377 (2002)","DOI":"10.1137\/S0097539701398375"},{"issue":"1","key":"3_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.2307\/1911154","volume":"55","author":"RJ Aumann","year":"1987","unstructured":"Aumann, R.J.: Correlated equilibrium as an expression of bayesian rationality. Econometrica 55(1), 1\u201318 (1987)","journal-title":"Econometrica"},{"key":"3_CR5","unstructured":"Badia, A.P., et al.: Agent57: outperforming the atari human benchmark. In: International Conference on Machine Learning, pp. 507\u2013517. PMLR (2020)"},{"key":"3_CR6","unstructured":"Brown, N., Sandholm, T., Amos, B.: Depth-limited solving for imperfect-information games. In: Bengio, S., Wallach, H., Larochelle, H., Grauman, K., Cesa-Bianchi, N., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol.\u00a031. Curran Associates, Inc. (2018). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2018\/file\/34306d99c63613fad5b2a140398c0420-Paper.pdf"},{"key":"3_CR7","doi-asserted-by":"publisher","unstructured":"Bubeck, S., Nicol\u00f2, C.B.: Regret Analysis of Stochastic and Nonstochastic Multi-armed Bandit Problems. now (2012). https:\/\/doi.org\/10.1561\/2200000024, https:\/\/ieeexplore.ieee.org\/document\/8187572","DOI":"10.1561\/2200000024"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Cesa-Bianchi, N., Lugosi, G.: Prediction, Learning, and Games. Cambridge University Press (2006)","DOI":"10.1017\/CBO9780511546921"},{"key":"3_CR9","unstructured":"Daskalakis, C., Papadimitriou, C.H.: Three-player games are hard. In: Electronic Colloquium on Computational Complexity, vol.\u00a0139, pp. 81\u201387. Citeseer (2005)"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Farina, G., Bianchi, T., Sandholm, T.: Coarse correlation in extensive-form games. In: AAAI Conference on Artificial Intelligence (2020)","DOI":"10.1609\/aaai.v34i02.5563"},{"key":"3_CR11","unstructured":"Flaxman, A.D., Kalai, A.T., McMahan, H.B.: Online convex optimization in the bandit setting: gradient descent without a gradient. In: Proceedings of the Sixteenth Annual ACM-SIAM Symposium on Discrete Algorithms, SODA 2005, pp. 385\u2013394. Society for Industrial and Applied Mathematics, USA (2005)"},{"key":"3_CR12","unstructured":"Fudenberg, D., Tirole, J.: Game theory. MIT press (1991)"},{"key":"3_CR13","doi-asserted-by":"crossref","unstructured":"Gu, S., Holly, E., Lillicrap, T., Levine, S.: Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. In: 2017 IEEE international conference on robotics and automation (ICRA), pp. 3389\u20133396. IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"3_CR14","doi-asserted-by":"publisher","unstructured":"Holzleitner, M., Gruber, L., Arjona-Medina, J., Brandstetter, J., Hochreiter, S.: Convergence proof for actor-critic methods applied to PPO and RUDDER, pp. 105\u2013130. Springer, Heidelberg (2021). https:\/\/doi.org\/10.1007\/978-3-662-63519-3_5","DOI":"10.1007\/978-3-662-63519-3_5"},{"key":"3_CR15","unstructured":"Jaakkola, T., Jordan, M., Singh, S.: Convergence of stochastic iterative dynamic programming algorithms. In: Cowan, J., Tesauro, G., Alspector, J. (eds.) Advances in Neural Information Processing Systems, vol.\u00a06. Morgan-Kaufmann (1993). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/1993\/file\/5807a685d1a9ab3b599035bc566ce2b9-Paper.pdf"},{"key":"3_CR16","doi-asserted-by":"publisher","unstructured":"Jain, G., Kumar, A., Bhat, S.A.: Recent developments of game theory and reinforcement learning approaches: a systematic review. IEEE Access 12, 9999\u201310011 (2024). https:\/\/doi.org\/10.1109\/ACCESS.2024.3352749","DOI":"10.1109\/ACCESS.2024.3352749"},{"key":"3_CR17","unstructured":"Kiely, M., Bowman, D., Standen, M., Moir, C.: On autonomous agents in a cyber defence environment. ArXiv abs\/2309.07388 (2023). https:\/\/api.semanticscholar.org\/CorpusID:261822629"},{"key":"3_CR18","unstructured":"Lis\u1ef3, V., Bowling, M.: Eqilibrium approximation quality of current no-limit poker bots. In: Workshops at the Thirty-First AAAI Conference on Artificial Intelligence (2017)"},{"key":"3_CR19","unstructured":"Lowe, R., Wu, Y.I., Tamar, A., Harb, J., Pieter\u00a0Abbeel, O., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. Advances in neural information processing systems 30 (2017)"},{"key":"3_CR20","unstructured":"Mnih, V., et al.: Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Mnih, V., et\u00a0al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","DOI":"10.1038\/nature14236"},{"key":"3_CR22","doi-asserted-by":"crossref","unstructured":"Morav\u010d\u00edk, M., et al.: Deepstack: expert-level artificial intelligence in heads-up no-limit poker. Science 356(6337), 508\u2013513 (2017)","DOI":"10.1126\/science.aam6960"},{"key":"3_CR23","unstructured":"Myerson, R.: Game Theory: Analysis of Conflict. Harvard University Press (1991). http:\/\/www.jstor.org\/stable\/j.ctvjsf522"},{"key":"3_CR24","unstructured":"Neu, G.: Explore no more: Improved high-probability regret bounds for non-stochastic bandits. In: Cortes, C., Lawrence, N., Lee, D., Sugiyama, M., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol.\u00a028. Curran Associates, Inc. (2015). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2015\/file\/e5a4d6bf330f23a8707bb0d6001dfbe8-Paper.pdf"},{"key":"3_CR25","doi-asserted-by":"crossref","unstructured":"Nisan, N., Tardos, E., Roughgarden, T., Vazirani, V.: Algorithmic Game Theory. Cambridge University Press (2007)","DOI":"10.1017\/CBO9780511800481"},{"key":"3_CR26","unstructured":"Osborne, M.: An Introduction to Game Theory. Oxford University Press (2009)"},{"issue":"3","key":"3_CR27","doi-asserted-by":"publisher","first-page":"498","DOI":"10.1016\/S0022-0000(05)80063-7","volume":"48","author":"CH Papadimitriou","year":"1994","unstructured":"Papadimitriou, C.H.: On the complexity of the parity argument and other inefficient proofs of existence. J. Comput. Syst. Sci. 48(3), 498\u2013532 (1994)","journal-title":"J. Comput. Syst. Sci."},{"key":"3_CR28","unstructured":"Peng, B., Rubinstein, A.: The complexity of approximate (coarse) correlated equilibrium for incomplete information games. In: Agrawal, S., Roth, A. (eds.) Proceedings of Thirty Seventh Conference on Learning Theory. Proceedings of Machine Learning Research, vol.\u00a0247, pp. 4158\u20134184. PMLR, 30 Jun\u201303 Jul 2024. https:\/\/proceedings.mlr.press\/v247\/peng24a.html"},{"key":"3_CR29","unstructured":"Schulman, J.: Trust region policy optimization. arXiv preprint arXiv:1502.05477 (2015)"},{"key":"3_CR30","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"3_CR31","unstructured":"Shaheen, A., Badr, A., Abohendy, A., Alsaadawy, H., Alsayad, N.: Reinforcement learning in strategy-based and atari games: a review of google deepminds innovations (2025). https:\/\/arxiv.org\/abs\/2502.10303"},{"key":"3_CR32","unstructured":"Standen, M., Lucas, M., Bowman, D., Richer, T.J., Kim, J., Marriott, D.A.: Cyborg: A gym for the development of autonomous cyber agents. ArXiv abs\/2108.09118 (2021). https:\/\/api.semanticscholar.org\/CorpusID:237259783"},{"key":"3_CR33","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: An introduction. A Bradford Book (2018)"},{"key":"3_CR34","unstructured":"Sutton, R.S., McAllester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. Advances in neural information processing systems 12 (1999)"},{"key":"3_CR35","doi-asserted-by":"crossref","unstructured":"Tang, C., Abbatematteo, B., Hu, J., Chandra, R., Mart\u00edn-Mart\u00edn, R., Stone, P.: Deep reinforcement learning for robotics: A survey of real-world successes. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a039, pp. 28694\u201328698 (2025)","DOI":"10.1609\/aaai.v39i27.35095"},{"key":"3_CR36","doi-asserted-by":"publisher","unstructured":"Timbers, F., et al.: Approximate exploitability: learning a best response. In: Raedt, L.D. (ed.) Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence, IJCAI-22, pp. 3487\u20133493. International Joint Conferences on Artificial Intelligence Organization (7 2022). https:\/\/doi.org\/10.24963\/ijcai.2022\/484","DOI":"10.24963\/ijcai.2022\/484"},{"key":"3_CR37","doi-asserted-by":"crossref","unstructured":"Wellman, M.P., Nguyen, T.H., Wright, M.: Empirical game-theoretic methods for adaptive cyber-defense. In: Adversarial and Uncertain Reasoning for Adaptive Cyber Defense: Control-and Game-Theoretic Approaches to Cyber Security, pp. 112\u2013128 (2019)","DOI":"10.1007\/978-3-030-30719-6_6"},{"key":"3_CR38","doi-asserted-by":"publisher","first-page":"1017","DOI":"10.1613\/jair.1.16146","volume":"82","author":"MP Wellman","year":"2025","unstructured":"Wellman, M.P., Tuyls, K., Greenwald, A.: Empirical game theoretic analysis: a survey. J. Artif. Intell. Res. 82, 1017\u20131076 (2025)","journal-title":"J. Artif. Intell. Res."}],"container-title":["Lecture Notes in Computer Science","Game Theory and AI for Security"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-08064-6_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T21:44:17Z","timestamp":1760219057000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-08064-6_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"ISBN":["9783032080639","9783032080646"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-08064-6_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,12]]},"assertion":[{"value":"12 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"GameSec","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Game Theory and AI for Security","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Athens","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"gamesec2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.gamesec-conf.org\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}