{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T10:33:43Z","timestamp":1776681223704,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,11,7]],"date-time":"2022-11-07T00:00:00Z","timestamp":1667779200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,11,11]]},"DOI":"10.1145\/3560830.3563732","type":"proceedings-article","created":{"date-parts":[[2022,11,2]],"date-time":"2022-11-02T22:32:41Z","timestamp":1667428361000},"page":"149-159","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":23,"title":["Bridging Automated to Autonomous Cyber Defense"],"prefix":"10.1145","author":[{"given":"Andy","family":"Applebaum","sequence":"first","affiliation":[{"name":"Apple, Cupertino, CA, USA"}]},{"given":"Camron","family":"Dennler","sequence":"additional","affiliation":[{"name":"Apple, Cupertino, CA, USA"}]},{"given":"Patrick","family":"Dwyer","sequence":"additional","affiliation":[{"name":"Apple, Cupertino, CA, USA"}]},{"given":"Marina","family":"Moskowitz","sequence":"additional","affiliation":[{"name":"Apple, Cupertino, CA, USA"}]},{"given":"Harold","family":"Nguyen","sequence":"additional","affiliation":[{"name":"Apple, Cupertino, CA, USA"}]},{"given":"Nicole","family":"Nichols","sequence":"additional","affiliation":[{"name":"Apple, Cupertino, CA, USA"}]},{"given":"Nicole","family":"Park","sequence":"additional","affiliation":[{"name":"Apple, Cupertino, CA, USA"}]},{"given":"Paul","family":"Rachwalski","sequence":"additional","affiliation":[{"name":"Apple, Cupertino, CA, USA"}]},{"given":"Frank","family":"Rau","sequence":"additional","affiliation":[{"name":"Apple, Cupertino, CA, USA"}]},{"given":"Adrian","family":"Webster","sequence":"additional","affiliation":[{"name":"Apple, Cupertino, CA, USA"}]},{"given":"Melody","family":"Wolk","sequence":"additional","affiliation":[{"name":"Apple, Cupertino, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2022,11,7]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Finite-time analysis of the multiarmed bandit problem. Machine learning","author":"Auer Peter","year":"2002","unstructured":"Peter Auer , Nicolo Cesa-Bianchi , and Paul Fischer . 2002. Finite-time analysis of the multiarmed bandit problem. Machine learning , Vol. 47 , 2 ( 2002 ), 235--256. Peter Auer, Nicolo Cesa-Bianchi, and Paul Fischer. 2002. Finite-time analysis of the multiarmed bandit problem. Machine learning, Vol. 47, 2 (2002), 235--256."},{"key":"e_1_3_2_1_2_1","volume-title":"Cyborg: An autonomous cyber operations research gym. arXiv preprint arXiv:2002.10667","author":"Baillie Callum","year":"2020","unstructured":"020)]% baillie2020cyborg, Callum Baillie , Maxwell Standen , Jonathon Schwartz , Michael Docking , David Bowman , and Junae Kim . 2020 . Cyborg: An autonomous cyber operations research gym. arXiv preprint arXiv:2002.10667 (2020). 020)]% baillie2020cyborg, Callum Baillie, Maxwell Standen, Jonathon Schwartz, Michael Docking, David Bowman, and Junae Kim. 2020. Cyborg: An autonomous cyber operations research gym. arXiv preprint arXiv:2002.10667 (2020)."},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the IEEE Systems, Man, and Cybernetics Information Assurance and Security Workshop","author":"Carver Curtis","year":"2000","unstructured":"Curtis Carver , JM Hill , John R Surdu , and Udo W Pooch . 2000 . A methodology for using intelligent agents to provide automated intrusion response . In Proceedings of the IEEE Systems, Man, and Cybernetics Information Assurance and Security Workshop , West Point, NY. 110--116. Curtis Carver, JM Hill, John R Surdu, and Udo W Pooch. 2000. A methodology for using intelligent agents to provide automated intrusion response. In Proceedings of the IEEE Systems, Man, and Cybernetics Information Assurance and Security Workshop, West Point, NY. 110--116."},{"key":"e_1_3_2_1_5_1","volume-title":"Albert S Thie, Madalina M Drugan, and Marco A Wiering.","author":"Elderman Richard","year":"2017","unstructured":"Richard Elderman , Leon JJ Pater , Albert S Thie, Madalina M Drugan, and Marco A Wiering. 2017 . Adversarial Reinforcement Learning in a Cyber Security Simulation.. In ICAART ( 2). 559--566. Richard Elderman, Leon JJ Pater, Albert S Thie, Madalina M Drugan, and Marco A Wiering. 2017. Adversarial Reinforcement Learning in a Cyber Security Simulation.. In ICAART (2). 559--566."},{"key":"e_1_3_2_1_6_1","volume-title":"Competitive Markov decision processes","author":"Filar Jerzy","unstructured":"Jerzy Filar and Koos Vrieze . 1997. Competitive Markov decision processes . Springer-Verlag . Jerzy Filar and Koos Vrieze. 1997. Competitive Markov decision processes. Springer-Verlag."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488932.3527286"},{"key":"e_1_3_2_1_8_1","volume-title":"On upper-confidence bound policies for non-stationary bandit problems. arXiv preprint arXiv:0805.3415","author":"Garivier Aur\u00e9lien","year":"2008","unstructured":"Aur\u00e9lien Garivier and Eric Moulines . 2008. On upper-confidence bound policies for non-stationary bandit problems. arXiv preprint arXiv:0805.3415 ( 2008 ). Aur\u00e9lien Garivier and Eric Moulines. 2008. On upper-confidence bound policies for non-stationary bandit problems. arXiv preprint arXiv:0805.3415 (2008)."},{"key":"e_1_3_2_1_9_1","volume-title":"Deep Reinforcement Learning for FlipIt Security Game. In International Conference on Complex Networks and Their Applications. Springer, 831--843","author":"Greige Laura","year":"2021","unstructured":"Laura Greige and Peter Chin . 2021 . Deep Reinforcement Learning for FlipIt Security Game. In International Conference on Complex Networks and Their Applications. Springer, 831--843 . Laura Greige and Peter Chin. 2021. Deep Reinforcement Learning for FlipIt Security Game. In International Conference on Complex Networks and Their Applications. Springer, 831--843."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.23919\/CNSM50824.2020.9269092"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01554-1_9"},{"key":"e_1_3_2_1_12_1","volume-title":"Hado Van Hasselt, and David Silver","author":"Horgan Dan","year":"2018","unstructured":"Dan Horgan , John Quan , David Budden , Gabriel Barth-Maron , Matteo Hessel , Hado Van Hasselt, and David Silver . 2018 . Distributed prioritized experience replay. arXiv preprint arXiv:1803.00933 (2018). Dan Horgan, John Quan, David Budden, Gabriel Barth-Maron, Matteo Hessel, Hado Van Hasselt, and David Silver. 2018. Distributed prioritized experience replay. arXiv preprint arXiv:1803.00933 (2018)."},{"key":"e_1_3_2_1_13_1","unstructured":"IACD. 2020. IACD Playbooks and Workflows. https:\/\/www.iacdautomate.org\/intro-to-playbooks-and-workflows. [Accessed 2022].  IACD. 2020. IACD Playbooks and Workflows. https:\/\/www.iacdautomate.org\/intro-to-playbooks-and-workflows. [Accessed 2022]."},{"key":"e_1_3_2_1_14_1","unstructured":"Incident Response Consortium. 2022. Playbook Policy Engine. https:\/\/www.incidentresponse.org\/playbooks\/. [Accessed 2022].  Incident Response Consortium. 2022. Playbook Policy Engine. https:\/\/www.incidentresponse.org\/playbooks\/. [Accessed 2022]."},{"key":"e_1_3_2_1_15_1","volume-title":"CyGIL: A Cyber Gym for Training Autonomous Agents over Emulated Network Systems. arXiv preprint arXiv:2109.03331","author":"Li Li","year":"2021","unstructured":"Li Li , Raed Fayad , and Adrian Taylor . 2021. CyGIL: A Cyber Gym for Training Autonomous Agents over Emulated Network Systems. arXiv preprint arXiv:2109.03331 ( 2021 ). Li Li, Raed Fayad, and Adrian Taylor. 2021. CyGIL: A Cyber Gym for Training Autonomous Agents over Emulated Network Systems. arXiv preprint arXiv:2109.03331 (2021)."},{"key":"e_1_3_2_1_16_1","volume-title":"International Conference on Machine Learning. PMLR, 3053--3062","author":"Liang Eric","year":"2018","unstructured":"Eric Liang , Richard Liaw , Robert Nishihara , Philipp Moritz , Roy Fox , Ken Goldberg , Joseph Gonzalez , Michael Jordan , and Ion Stoica . 2018 . RLlib: Abstractions for distributed reinforcement learning . In International Conference on Machine Learning. PMLR, 3053--3062 . Eric Liang, Richard Liaw, Robert Nishihara, Philipp Moritz, Roy Fox, Ken Goldberg, Joseph Gonzalez, Michael Jordan, and Ion Stoica. 2018. RLlib: Abstractions for distributed reinforcement learning. In International Conference on Machine Learning. PMLR, 3053--3062."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50052-9"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Ashley SM McAbee Murali Tummala and John Mceachen. 2021. The use of partially observable Markov decision processes to optimally implement moving target defense. HICSS.  Ashley SM McAbee Murali Tummala and John Mceachen. 2021. The use of partially observable Markov decision processes to optimally implement moving target defense. HICSS.","DOI":"10.24251\/HICSS.2021.840"},{"key":"e_1_3_2_1_19_1","volume-title":"Michael Betser, William Blum, James Bono, Kate Farris, Emily Goren, Justin Grana, Kristian Holsheimer, Brandon Marken, Joshua Neil, Nicole Nichols","author":"Research Team Microsoft Defender","year":"2021","unstructured":"Microsoft Defender Research Team . 2021. CyberBattleSim. Created by Christian Seifert , Michael Betser, William Blum, James Bono, Kate Farris, Emily Goren, Justin Grana, Kristian Holsheimer, Brandon Marken, Joshua Neil, Nicole Nichols , Jugal Parikh , Haoran Wei ( 2021 ). Microsoft Defender Research Team. 2021. CyberBattleSim. Created by Christian Seifert, Michael Betser, William Blum, James Bono, Kate Farris, Emily Goren, Justin Grana, Kristian Holsheimer, Brandon Marken, Joshua Neil, Nicole Nichols, Jugal Parikh, Haoran Wei (2021)."},{"key":"e_1_3_2_1_20_1","volume-title":"International conference on machine learning. PMLR","author":"Mnih Volodymyr","year":"2016","unstructured":"Volodymyr Mnih , Adria Puigdomenech Badia , Mehdi Mirza , Alex Graves , Timothy Lillicrap , Tim Harley , David Silver , and Koray Kavukcuoglu . 2016 . Asynchronous methods for deep reinforcement learning . In International conference on machine learning. PMLR , 1928--1937. Volodymyr Mnih, Adria Puigdomenech Badia, Mehdi Mirza, Alex Graves, Timothy Lillicrap, Tim Harley, David Silver, and Koray Kavukcuoglu. 2016. Asynchronous methods for deep reinforcement learning. In International conference on machine learning. PMLR, 1928--1937."},{"key":"e_1_3_2_1_21_1","volume-title":"Network environment design for autonomous cyberdefense. arXiv preprint arXiv:2103.07583","author":"Molina-Markham Andres","year":"2021","unstructured":"Andres Molina-Markham , Cory Miniter , Becky Powell , and Ahmad Ridley . 2021. Network environment design for autonomous cyberdefense. arXiv preprint arXiv:2103.07583 ( 2021 ). Andres Molina-Markham, Cory Miniter, Becky Powell, and Ahmad Ridley. 2021. Network environment design for autonomous cyberdefense. arXiv preprint arXiv:2103.07583 (2021)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1117\/12.2518976"},{"key":"e_1_3_2_1_23_1","volume-title":"QFlip: An Adaptive Reinforcement Learning Strategy for the FlipIt Security Game. In International Conference on Decision and Game Theory for Security. Springer, 364--384","author":"Oakley Lisa","year":"2019","unstructured":"Lisa Oakley and Alina Oprea . 2019 . QFlip: An Adaptive Reinforcement Learning Strategy for the FlipIt Security Game. In International Conference on Decision and Game Theory for Security. Springer, 364--384 . Lisa Oakley and Alina Oprea. 2019. QFlip: An Adaptive Reinforcement Learning Strategy for the FlipIt Security Game. In International Conference on Decision and Game Theory for Security. Springer, 364--384."},{"key":"e_1_3_2_1_24_1","volume-title":"Game theory","author":"Owen Guillermo","unstructured":"Guillermo Owen . 1995. Game theory . Academic Press . Guillermo Owen. 1995. Game theory. Academic Press."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"e_1_3_2_1_26_1","unstructured":"R. J. Williams. 1987. Reinforcement-learning connectionist systems.  R. J. Williams. 1987. Reinforcement-learning connectionist systems."},{"key":"e_1_3_2_1_27_1","first-page":"7","article-title":"Machine learning for autonomous cyber defense","volume":"22","author":"Ridley Ahmad","year":"2018","unstructured":"Ahmad Ridley . 2018 . Machine learning for autonomous cyber defense . The Next Wave , Vol. 22 , 1 (2018), 7 -- 14 . Ahmad Ridley. 2018. Machine learning for autonomous cyber defense. The Next Wave, Vol. 22, 1 (2018), 7--14.","journal-title":"The Next Wave"},{"key":"e_1_3_2_1_28_1","volume-title":"Using randomization to break the curse of dimensionality. Econometrica: Journal of the Econometric Society","author":"Rust John","year":"1997","unstructured":"John Rust . 1997. Using randomization to break the curse of dimensionality. Econometrica: Journal of the Econometric Society ( 1997 ), 487--516. John Rust. 1997. Using randomization to break the curse of dimensionality. Econometrica: Journal of the Econometric Society (1997), 487--516."},{"key":"e_1_3_2_1_29_1","volume-title":"Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman , Filip Wolski , Prafulla Dhariwal , Alec Radford , and Oleg Klimov . 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 ( 2017 ). John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)."},{"key":"e_1_3_2_1_30_1","volume-title":"Monte-Carlo planning in large POMDPs. Advances in neural information processing systems","author":"Silver David","year":"2010","unstructured":"David Silver and Joel Veness . 2010. Monte-Carlo planning in large POMDPs. Advances in neural information processing systems , Vol. 23 ( 2010 ). David Silver and Joel Veness. 2010. Monte-Carlo planning in large POMDPs. Advances in neural information processing systems , Vol. 23 (2010)."},{"key":"e_1_3_2_1_31_1","volume-title":"Assessing the Usability of Incident Response Playbook Frameworks. In CHI Conference on Human Factors in Computing Systems. 1--18","author":"Stevens Rock","year":"2022","unstructured":"Rock Stevens , Daniel Votipka , Josiah Dykstra , Fernando Tomlinson , Erin Quartararo , Colin Ahern , and Michelle L Mazurek . 2022 . How Ready is Your Ready? Assessing the Usability of Incident Response Playbook Frameworks. In CHI Conference on Human Factors in Computing Systems. 1--18 . Rock Stevens, Daniel Votipka, Josiah Dykstra, Fernando Tomlinson, Erin Quartararo, Colin Ahern, and Michelle L Mazurek. 2022. How Ready is Your Ready? Assessing the Usability of Incident Response Playbook Frameworks. In CHI Conference on Human Factors in Computing Systems. 1--18."},{"key":"e_1_3_2_1_32_1","volume-title":"Deep hierarchical reinforcement agents for automated penetration testing. arXiv preprint arXiv:2109.06449","author":"Tran Khuong","year":"2021","unstructured":"Khuong Tran , Ashlesha Akella , Maxwell Standen , Junae Kim , David Bowman , Toby Richer , and Chin-Teng Lin . 2021. Deep hierarchical reinforcement agents for automated penetration testing. arXiv preprint arXiv:2109.06449 ( 2021 ). Khuong Tran, Ashlesha Akella, Maxwell Standen, Junae Kim, David Bowman, Toby Richer, and Chin-Teng Lin. 2021. Deep hierarchical reinforcement agents for automated penetration testing. arXiv preprint arXiv:2109.06449 (2021)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00145-012-9134-5"},{"key":"e_1_3_2_1_34_1","volume-title":"Incorporating Deception into CyberBattleSim for Autonomous Defense. arXiv preprint arXiv:2108.13980","author":"Walter Erich","year":"2021","unstructured":"Erich Walter , Kimberly Ferguson-Walter , and Ahmad Ridley . 2021. Incorporating Deception into CyberBattleSim for Autonomous Defense. arXiv preprint arXiv:2108.13980 ( 2021 ). Erich Walter, Kimberly Ferguson-Walter, and Ahmad Ridley. 2021. Incorporating Deception into CyberBattleSim for Autonomous Defense. arXiv preprint arXiv:2108.13980 (2021)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.3390\/a15040134"},{"key":"e_1_3_2_1_36_1","volume-title":"Machine learning","author":"Watkins Christopher JCH","year":"1992","unstructured":"Christopher JCH Watkins and Peter Dayan . 1992. Q-learning. Machine learning , Vol. 8 , 3 ( 1992 ), 279--292. Christopher JCH Watkins and Peter Dayan. 1992. Q-learning. Machine learning, Vol. 8, 3 (1992), 279--292."},{"key":"e_1_3_2_1_37_1","unstructured":"Christopher John Cornish Hellaby Watkins. 1989. Learning from delayed rewards. (1989).  Christopher John Cornish Hellaby Watkins. 1989. Learning from delayed rewards. (1989)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2013.211"}],"event":{"name":"CCS '22: 2022 ACM SIGSAC Conference on Computer and Communications Security","location":"Los Angeles CA USA","acronym":"CCS '22","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control"]},"container-title":["Proceedings of the 15th ACM Workshop on Artificial Intelligence and Security"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3560830.3563732","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3560830.3563732","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:00:34Z","timestamp":1750186834000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3560830.3563732"}},"subtitle":["Foundational Analysis of Tabular Q-Learning"],"short-title":[],"issued":{"date-parts":[[2022,11,7]]},"references-count":37,"alternative-id":["10.1145\/3560830.3563732","10.1145\/3560830"],"URL":"https:\/\/doi.org\/10.1145\/3560830.3563732","relation":{},"subject":[],"published":{"date-parts":[[2022,11,7]]},"assertion":[{"value":"2022-11-07","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}