{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,23]],"date-time":"2025-12-23T15:36:31Z","timestamp":1766504191639,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":48,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032080639","type":"print"},{"value":"9783032080646","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T00:00:00Z","timestamp":1760227200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T00:00:00Z","timestamp":1760227200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-08064-6_9","type":"book-chapter","created":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T21:45:48Z","timestamp":1760219148000},"page":"172-192","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["PoolFlip: A Multi-agent Reinforcement Learning Security Environment for\u00a0Cyber Defense"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8545-0371","authenticated-orcid":false,"given":"Xavier","family":"Cadet","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3411-8912","authenticated-orcid":false,"given":"Simona","family":"Boboila","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5078-9907","authenticated-orcid":false,"given":"Sie Hendrata","family":"Dharmawan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4979-5292","authenticated-orcid":false,"given":"Alina","family":"Oprea","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1913-4223","authenticated-orcid":false,"given":"Peter","family":"Chin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,12]]},"reference":[{"key":"9_CR1","doi-asserted-by":"publisher","first-page":"105116","DOI":"10.1016\/j.engappai.2022.105116","volume":"114","author":"G Apruzzese","year":"2022","unstructured":"Apruzzese, G., Laskov, P., Schneider, J.: Cyber-security and reinforcement learning \u2013 a brief survey. Eng. Appl. Artif. Intell. 114, 105116 (2022)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"9_CR2","unstructured":"ATT &CK, M.: Scheduled Task\/Job: Cron. https:\/\/attack.mitre.org\/techniques\/T1053\/003\/. Accessed 4 June 2025"},{"key":"9_CR3","unstructured":"Authors, M.: Multi-agent reinforcement learning for cybersecurity: classification and survey. J. Netw. Comput. Appl. (2025)"},{"key":"9_CR4","unstructured":"Bighashdel, A., Wang, Y., McAleer, S., Savani, R., Oliehoek, F.A.: Policy space response oracles: a survey. In: IJCAI (2024)"},{"key":"9_CR5","unstructured":"Cui, J., et al.: MACTA: a multi-agent reinforcement learning approach for cache timing attacks and detection. In: ICLR (2023). http:\/\/www.cs.utexas.edu\/users\/ai-lab?ICLR23-Cui"},{"issue":"4","key":"9_CR6","doi-asserted-by":"publisher","first-page":"655","DOI":"10.1007\/s00145-012-9134-5","volume":"26","author":"M Dijk","year":"2013","unstructured":"Dijk, M., Juels, A., Oprea, A., Rivest, R.L.: FlipIt: the game of stealthy takeover. J. Cryptol. 26(4), 655\u2013713 (2013)","journal-title":"J. Cryptol."},{"key":"9_CR7","doi-asserted-by":"crossref","unstructured":"Farhang, S., Grossklags, J.: Flipleakage: A game-theoretic approach to protect against stealthy attackers in the presence of information leakage. In: Proceedings of GameSec, pp. 195\u2013214 (2016)","DOI":"10.1007\/978-3-319-47413-7_12"},{"key":"9_CR8","doi-asserted-by":"publisher","first-page":"102767","DOI":"10.1016\/j.jnca.2020.102767","volume":"169","author":"S Gamage","year":"2020","unstructured":"Gamage, S., Samarabandu, J.: Deep learning methods in network intrusion detection: a survey and an objective comparison. J. Netw. Comput. Appl. 169, 102767 (2020)","journal-title":"J. Netw. Comput. Appl."},{"key":"9_CR9","series-title":"Studies in Computational Intelligence","doi-asserted-by":"publisher","first-page":"831","DOI":"10.1007\/978-3-030-93409-5_68","volume-title":"Complex Networks & Their Applications X","author":"L Greige","year":"2022","unstructured":"Greige, L., Chin, P.: Deep reinforcement learning for FlipIt security game. In: Benito, R.M., Cherifi, C., Cherifi, H., Moro, E., Rocha, L.M., Sales-Pardo, M. (eds.) COMPLEX NETWORKS 2021. SCI, vol. 1072, pp. 831\u2013843. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-030-93409-5_68"},{"key":"9_CR10","unstructured":"Guo, W., Wu, X., Wang, L., Xing, X., Song, D.: Patrol: provable defense against adversarial policy in two-player games. In: USENIX Security Symposium (2023). https:\/\/www.usenix.org\/conference\/usenixsecurity23\/presentation\/guo-wenbo"},{"key":"9_CR11","unstructured":"Hammar, K., Dhir, N., Stadler, R.: Optimal defender strategies for CAGE-2 using causal modeling and tree search. arXiv preprint arXiv:2407.11070 (2024)"},{"key":"9_CR12","doi-asserted-by":"crossref","unstructured":"Hu, P., Li, H., Fu, H., Cansever, D., Mohapatra, P.: Dynamic defense strategy against advanced persistent threat with insiders. In: INFOCOM (2015)","DOI":"10.1109\/INFOCOM.2015.7218444"},{"key":"9_CR13","unstructured":"Kiely, M., Bowman, D., Standen, M., Moir, C.: On autonomous agents in a cyber defence environment (2023). arXiv:2309.07388"},{"key":"9_CR14","unstructured":"Lanctot, M., et al.: A unified game-theoretic approach to multiagent reinforcement learning (2017). arXiv:1711.00832"},{"key":"9_CR15","doi-asserted-by":"crossref","unstructured":"Laszka, A., Horvath, G., Felegyhazi, M., Butty\u00e1n, L.: FlipThem: modeling targeted attacks with FlipIt for multiple resources. In: Decision and Game Theory for Security, pp. 175\u2013194 (2014)","DOI":"10.1007\/978-3-319-12601-2_10"},{"key":"9_CR16","unstructured":"Leung, K., Leckie, C.: Unsupervised anomaly detection in network intrusion detection using clusters. In: ACSC, pp. 333\u2013342 (2005)"},{"key":"9_CR17","unstructured":"Lian, J., et al.: Fusion-PSRO: nash policy fusion for policy space response oracles (2025). arXiv:2405.21027"},{"key":"9_CR18","unstructured":"Liang, Y., et al.: Game-theoretic robust reinforcement learning handles temporally-coupled perturbations (2024). arXiv:2307.12062"},{"key":"9_CR19","unstructured":"Ma, C., et al.: Evolving diverse red-team language models in multi-round multi-agent games (2024). arXiv:2310.00322"},{"key":"9_CR20","unstructured":"Mcaleer, S., Lanier, J., Fox, R., Baldi, P.: Pipeline PSRO: a scalable approach for finding approximate nash equilibria in large games. In: NeurIPS, vol.\u00a033, pp. 20238\u201320248 (2020)"},{"key":"9_CR21","doi-asserted-by":"publisher","first-page":"120292","DOI":"10.1109\/ACCESS.2024.3446310","volume":"12","author":"G Mcdonald","year":"2024","unstructured":"Mcdonald, G., Li, L., Mallah, R.A.: Finding the optimal security policies for autonomous cyber operations with competitive reinforcement learning. IEEE Access 12, 120292\u2013120305 (2024)","journal-title":"IEEE Access"},{"key":"9_CR22","unstructured":"Microsoft Research: CyberBattleSim - Microsoft Research (2020). https:\/\/www.microsoft.com\/en-us\/research\/project\/cyberbattlesim\/. An experimentation and research platform to investigate the interaction of automated agents in an abstract simulated network environments"},{"key":"9_CR23","unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning (2013). arXiv:1312.5602"},{"issue":"7540","key":"9_CR24","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"9_CR25","unstructured":"Nguyen, T.T., Reddi, V.J.: Deep reinforcement learning for cyber security. IEEE Trans. Neural Netw. Learn. Syst. (2021). https:\/\/arxiv.org\/abs\/1906.05799, arXiv preprint arXiv:1906.05799"},{"key":"9_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"364","DOI":"10.1007\/978-3-030-32430-8_22","volume-title":"Decision and Game Theory for Security","author":"L Oakley","year":"2019","unstructured":"Oakley, L., Oprea, A.: $$\\sf QFlip$$: an adaptive reinforcement learning strategy for the $$\\sf FlipIt$$ security game. In: Alpcan, T., Vorobeychik, Y., Baras, J.S., D\u00e1n, G. (eds.) GameSec 2019. LNCS, vol. 11836, pp. 364\u2013384. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-32430-8_22"},{"key":"9_CR27","doi-asserted-by":"crossref","unstructured":"Oliehoek, F.A., Amato, C.: A concise introduction to decentralized POMDPs. In: SpringerBriefs in Intelligent Systems (2016)","DOI":"10.1007\/978-3-319-28929-8"},{"key":"9_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-642-34266-0_14","volume-title":"Decision and Game Theory for Security","author":"V Pham","year":"2012","unstructured":"Pham, V., Cid, C.: Are we compromised? Modelling security assessment games. In: Grossklags, J., Walrand, J. (eds.) GameSec 2012. LNCS, vol. 7638, pp. 234\u2013247. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-34266-0_14"},{"issue":"7","key":"9_CR29","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1145\/1785414.1785439","volume":"53","author":"T Roughgarden","year":"2010","unstructured":"Roughgarden, T.: Algorithmic game theory. Commun. ACM 53(7), 78\u201386 (2010)","journal-title":"Commun. ACM"},{"key":"9_CR30","doi-asserted-by":"crossref","unstructured":"Saha, S., Vullikanti, A., Halappanavar, M.: FlipNet: modeling covert and persistent attacks on networked resources. In: ICDCS, pp. 2444\u20132451 (2017)","DOI":"10.1109\/ICDCS.2017.298"},{"issue":"1","key":"9_CR31","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1186\/s40537-024-00957-y","volume":"11","author":"AH Salem","year":"2024","unstructured":"Salem, A.H., Azzam, S.M., Abohany, A.A., Emam, O.E.: Advancing cybersecurity: a comprehensive review of AI-driven detection techniques. J. Big Data 11(1), 105 (2024)","journal-title":"J. Big Data"},{"key":"9_CR32","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"issue":"7587","key":"9_CR33","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"9_CR34","unstructured":"Singh, A.V., et al.: Hierarchical multi-agent reinforcement learning for cyber network defense. arXiv preprint arXiv:2410.17351 (2024)"},{"key":"9_CR35","unstructured":"Smith, M.O., Anthony, T., Wang, Y., Wellman, M.P.: Learning to play against any mixture of opponents (2021). arXiv:2009.14180"},{"key":"9_CR36","unstructured":"Standen, M., Lucas, M., David, B., Richer, T.J., Kim, J., Marriott, D.: CybORG: a gym for the development of autonomous cyber agents. In: IJCAI-21 1st International Workshop on Adaptive Cyber Defense. arXiv (2021)"},{"issue":"8","key":"9_CR37","doi-asserted-by":"publisher","first-page":"201","DOI":"10.21275\/MR24802085215","volume":"13","author":"S Subudhi","year":"2024","unstructured":"Subudhi, S.: Effectiveness of AI\/ML in SOAR (security automation and orchestration) platforms. IJSR 13(8), 201\u2013206 (2024)","journal-title":"IJSR"},{"key":"9_CR38","doi-asserted-by":"crossref","unstructured":"Tong, L., Laszka, A., Yan, C., Zhang, N., Vorobeychik, Y.: Finding needles in a moving haystack: prioritizing alerts with adversarial reinforcement learning. AAAI, vol. 34, no. 01, pp. 946\u2013953 (2020)","DOI":"10.1609\/aaai.v34i01.5442"},{"issue":"7782","key":"9_CR39","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals, O., et al.: Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nature 575(7782), 350\u2013354 (2019)","journal-title":"Nature"},{"key":"9_CR40","doi-asserted-by":"crossref","unstructured":"Wang, Y., et al.: Deep reinforcement learning for green security games with real-time information. In: AAAI (2019)","DOI":"10.1609\/aaai.v33i01.33011401"},{"issue":"3","key":"9_CR41","first-page":"279","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins, C.J.C.H., Dayan, P.: Q-learning. Mach. Learn. 8(3), 279\u2013292 (1992)","journal-title":"Mach. Learn."},{"key":"9_CR42","unstructured":"Wiebe, J., Al\u00a0Mallah, R., Li, L.: Learning cyber defence tactics from scratch with multi-agent reinforcement learning (2023). arXiv:2310.05939"},{"key":"9_CR43","unstructured":"Xu, L., Perrault, A., Fang, F., Chen, H., Tambe, M.: Robust reinforcement learning under minimax regret for green security (2021). arXiv:2106.08413"},{"key":"9_CR44","unstructured":"Yao, J., et al.: Policy space diversity for non-transitive games. In: NeurIPS (2023)"},{"key":"9_CR45","doi-asserted-by":"crossref","unstructured":"Yen, T.F., et al.: Beehive: large-scale log analysis for detecting suspicious activity in enterprise networks. In: ACSAC, pp. 199\u2013208 (2013)","DOI":"10.1145\/2523649.2523670"},{"key":"9_CR46","unstructured":"Zhang, B.H., et al.: Computing optimal equilibria and mechanisms via learning in zero-sum extensive-form games. In: NIPS (2023)"},{"key":"9_CR47","unstructured":"Zhang, R., et al.: A survey on self-play methods in reinforcement learning (2025). arXiv:2408.01072"},{"key":"9_CR48","unstructured":"Zhou, M., et al.: Efficient policy space response oracles (2022). arXiv:2202.00633"}],"container-title":["Lecture Notes in Computer Science","Game Theory and AI for Security"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-08064-6_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T21:46:12Z","timestamp":1760219172000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-08064-6_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"ISBN":["9783032080639","9783032080646"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-08064-6_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,12]]},"assertion":[{"value":"12 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"GameSec","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Game Theory and AI for Security","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Athens","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"gamesec2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.gamesec-conf.org\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}