{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:10:06Z","timestamp":1755882606958,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,30]],"date-time":"2024-07-30T00:00:00Z","timestamp":1722297600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,30]]},"DOI":"10.1145\/3664476.3664484","type":"proceedings-article","created":{"date-parts":[[2024,7,25]],"date-time":"2024-07-25T12:35:50Z","timestamp":1721910950000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["SoK: A Comparison of Autonomous Penetration Testing Agents"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-4157-1674","authenticated-orcid":false,"given":"Raphael","family":"Simon","sequence":"first","affiliation":[{"name":"Royal Military Academy, Belgium"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0696-8093","authenticated-orcid":false,"given":"Wim","family":"Mees","sequence":"additional","affiliation":[{"name":"Royal Military Academy, Belgium"}]}],"member":"320","published-online":{"date-parts":[[2024,7,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"32nd USENIX Security Symposium (USENIX Security 23)","author":"Al\u00a0Wahaibi Salim","year":"2023","unstructured":"Salim Al\u00a0Wahaibi, Myles Foley, and Sergio Maffeis. 2023. { SQIRL} :{ Grey-Box} Detection of { SQL} Injection Vulnerabilities Using Reinforcement Learning. In 32nd USENIX Security Symposium (USENIX Security 23). 6097\u20136114."},{"volume-title":"Dynamical Systems, and Control (2 ed.)","author":"Brunton L.","key":"e_1_3_2_1_2_1","unstructured":"Steven\u00a0L. Brunton and J.\u00a0Nathan Kutz. 2022. Data-Driven Science and Engineering: Machine Learning, Dynamical Systems, and Control (2 ed.). Cambridge University Press."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2022.103055"},{"key":"e_1_3_2_1_4_1","unstructured":"Gelei Deng Yi Liu V\u00edctor Mayoral-Vilches Peng Liu Yuekang Li Yuan Xu Tianwei Zhang Yang Liu Martin Pinzger and Stefan Rass. 2023. PentestGPT: An LLM-empowered Automatic Penetration Testing Tool. http:\/\/arxiv.org\/abs\/2308.06782 arXiv:2308.06782 [cs]."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","unstructured":"Gabriel Dulac-Arnold Richard Evans Hado van Hasselt Peter Sunehag Timothy Lillicrap Jonathan Hunt Timothy Mann Theophane Weber Thomas Degris and Ben Coppin. 2016. Deep Reinforcement Learning in Large Discrete Action Spaces. https:\/\/doi.org\/10.48550\/arXiv.1512.07679 arXiv:1512.07679 [cs stat].","DOI":"10.48550\/arXiv.1512.07679"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-021-05961-4"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jisa.2021.102903"},{"key":"e_1_3_2_1_8_1","unstructured":"Jonathan Esteban. 2022. Simulating Network Lateral Movements through the CyberBattleSim Web Platform. https:\/\/dspace.mit.edu\/handle\/1721.1\/143191 Accepted: 2022-06-15T13:02:28Z."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TrustCom56396.2022.00031"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"e_1_3_2_1_11_1","unstructured":"Matthew Hausknecht and Peter Stone. 2017. Deep Recurrent Q-Learning for Partially Observable MDPs. http:\/\/arxiv.org\/abs\/1507.06527 arXiv:1507.06527 [cs]."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","unstructured":"Jarom\u00edr Janisch Tom\u00e1\u0161 Pevn\u00fd and Viliam Lis\u00fd. 2023. NASimEmu: Network Attack Simulator & Emulator for Training Agents Generalizing to Novel Scenarios. https:\/\/doi.org\/10.48550\/arXiv.2305.17246 arXiv:2305.17246 [cs].","DOI":"10.48550\/arXiv.2305.17246"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474369.3486877"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512234"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.22541\/au.169406476.64066230\/v1"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2020.102108"},{"key":"e_1_3_2_1_18_1","volume-title":"Human-level control through deep reinforcement learning. nature 518, 7540","author":"Mnih Volodymyr","year":"2015","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Andrei\u00a0A Rusu, Joel Veness, Marc\u00a0G Bellemare, Alex Graves, Martin Riedmiller, Andreas\u00a0K Fidjeland, Georg Ostrovski, 2015. Human-level control through deep reinforcement learning. nature 518, 7540 (2015), 529\u2013533."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.28.1.1"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.5220\/0010232504400449"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3121870"},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the 37th International Conference on Machine Learning, ICML 2020","author":"Petrenko Aleksei","year":"2020","unstructured":"Aleksei Petrenko, Zhehui Huang, Tushar Kumar, Gaurav\u00a0S. Sukhatme, and Vladlen Koltun. 2020. Sample Factory: Egocentric 3D Control from Pixels at 100000 FPS with Asynchronous Reinforcement Learning. In Proceedings of the 37th International Conference on Machine Learning, ICML 2020, 13-18 July 2020, Virtual Event(Proceedings of Machine Learning Research, Vol.\u00a0119). PMLR, 7652\u20137662. http:\/\/proceedings.mlr.press\/v119\/petrenko20a.html"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","unstructured":"Van-Hau Pham Hien\u00a0Do Hoang Phan\u00a0Thanh Trung Van\u00a0Dinh Quoc Trong-Nghia To and Phan\u00a0The Duy. 2023. Raij\\=u: Reinforcement Learning-Guided Post-Exploitation for Automating Security Assessment of Network Systems. https:\/\/doi.org\/10.48550\/arXiv.2309.15518 arXiv:2309.15518 [cs].","DOI":"10.48550\/arXiv.2309.15518"},{"key":"e_1_3_2_1_24_1","unstructured":"Rapid7. [n. d.]. Metasploit. https:\/\/www.metasploit.com\/."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v26i1.8363"},{"key":"e_1_3_2_1_26_1","unstructured":"Carlos Sarraute Olivier Buffet and Joerg Hoffmann. 2013. Penetration Testing == POMDP Solving?http:\/\/arxiv.org\/abs\/1306.4714 arXiv:1306.4714 [cs]."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","unstructured":"Tom Schaul John Quan Ioannis Antonoglou and David Silver. 2016. Prioritized Experience Replay. https:\/\/doi.org\/10.48550\/arXiv.1511.05952 arXiv:1511.05952 [cs].","DOI":"10.48550\/arXiv.1511.05952"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","unstructured":"John Schulman Filip Wolski Prafulla Dhariwal Alec Radford and Oleg Klimov. 2017. Proximal Policy Optimization Algorithms. https:\/\/doi.org\/10.48550\/arXiv.1707.06347 arXiv:1707.06347 [cs].","DOI":"10.48550\/arXiv.1707.06347"},{"key":"e_1_3_2_1_29_1","volume-title":"Autonomous penetration testing using reinforcement learning. arXiv preprint arXiv:1905.05965","author":"Schwartz Jonathon","year":"2019","unstructured":"Jonathon Schwartz and Hanna Kurniawati. 2019. Autonomous penetration testing using reinforcement learning. arXiv preprint arXiv:1905.05965 (2019)."},{"key":"e_1_3_2_1_30_1","unstructured":"Jonathon Schwartz and Hanna Kurniawatti. 2019. NASim: Network Attack Simulator. https:\/\/networkattacksimulator.readthedocs.io\/."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3041951"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","unstructured":"Maxwell Standen Martin Lucas David Bowman Toby\u00a0J. Richer Junae Kim and Damian Marriott. 2021. CybORG: A Gym for the Development of Autonomous Cyber Agents. https:\/\/doi.org\/10.48550\/arXiv.2108.09118 arXiv:2108.09118 [cs].","DOI":"10.48550\/arXiv.2108.09118"},{"key":"e_1_3_2_1_33_1","volume-title":"Reinforcement Learning: An Introduction. A Bradford Book","author":"Sutton S.","year":"2018","unstructured":"Richard\u00a0S. Sutton and Andrew\u00a0G. Barto. 2018. Reinforcement Learning: An Introduction. A Bradford Book, Cambridge, MA, USA."},{"key":"e_1_3_2_1_34_1","unstructured":"Isao Takaesu. [n. d.]. DeepExploit. https:\/\/github.com\/13o-bbr-bbq\/machine_learning_security\/tree\/master\/DeepExploit."},{"key":"e_1_3_2_1_35_1","unstructured":"Microsoft Defender\u00a0Research Team.2021. CyberBattleSim. https:\/\/github.com\/microsoft\/cyberbattlesim. Created by Christian Seifert Michael Betser William Blum James Bono Kate Farris Emily Goren Justin Grana Kristian Holsheimer Brandon Marken Joshua Neil Nicole Nichols Jugal Parikh Haoran Wei.."},{"key":"e_1_3_2_1_36_1","unstructured":"Khuong Tran Ashlesha Akella Maxwell Standen Junae Kim David Bowman Toby Richer and Chin-Teng Lin. 2021. Deep hierarchical reinforcement agents for automated penetration testing. http:\/\/arxiv.org\/abs\/2109.06449 arXiv:2109.06449 [cs]."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-022-09575-5"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1155\/2023\/5834434"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"e_1_3_2_1_40_1","unstructured":"Yizhou Yang and Xin Liu. 2022. Behaviour-Diverse Automatic Penetration Testing: A Curiosity-Driven Multi-Objective Deep Reinforcement Learning Approach. http:\/\/arxiv.org\/abs\/2202.10630 arXiv:2202.10630 [cs]."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.3390\/app11198823"}],"event":{"name":"ARES 2024: The 19th International Conference on Availability, Reliability and Security","acronym":"ARES 2024","location":"Vienna Austria"},"container-title":["Proceedings of the 19th International Conference on Availability, Reliability and Security"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664476.3664484","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664476.3664484","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T16:53:20Z","timestamp":1755881600000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664476.3664484"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,30]]},"references-count":41,"alternative-id":["10.1145\/3664476.3664484","10.1145\/3664476"],"URL":"https:\/\/doi.org\/10.1145\/3664476.3664484","relation":{},"subject":[],"published":{"date-parts":[[2024,7,30]]},"assertion":[{"value":"2024-07-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}