{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T12:12:06Z","timestamp":1780661526322,"version":"3.54.1"},"reference-count":153,"publisher":"Springer Science and Business Media LLC","issue":"23","license":[{"start":{"date-parts":[[2024,5,7]],"date-time":"2024-05-07T00:00:00Z","timestamp":1715040000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,5,7]],"date-time":"2024-05-07T00:00:00Z","timestamp":1715040000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2024,8]]},"DOI":"10.1007\/s00521-024-09668-0","type":"journal-article","created":{"date-parts":[[2024,5,7]],"date-time":"2024-05-07T05:02:31Z","timestamp":1715058151000},"page":"14341-14360","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Reinforcement learning-based autonomous attacker to uncover computer network vulnerabilities"],"prefix":"10.1007","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9166-6750","authenticated-orcid":false,"given":"Ahmed","family":"Mohamed Ahmed","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Thanh Thi","family":"Nguyen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mohamed","family":"Abdelrazek","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sunil","family":"Aryal","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,5,7]]},"reference":[{"key":"9668_CR1","doi-asserted-by":"publisher","first-page":"433","DOI":"10.1093\/mind\/lix.236.433","volume":"59","author":"AM Turing","year":"1950","unstructured":"Turing AM (1950) Computing machinery and intelligence. Mind 59:433\u2013460. https:\/\/doi.org\/10.1093\/mind\/lix.236.433","journal-title":"Mind"},{"issue":"3","key":"9668_CR2","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1145\/203330.203343","volume":"38","author":"G Tesauro","year":"1995","unstructured":"Tesauro G (1995) Temporal difference learning and td-gammon. Commun ACM 38(3):58\u201368. https:\/\/doi.org\/10.1145\/203330.203343","journal-title":"Commun ACM"},{"key":"9668_CR3","doi-asserted-by":"publisher","unstructured":"Kohl N, Stone P (2004) Policy gradient reinforcement learning for fast quadrupedal locomotion. In: IEEE international conference on robotics and automation, 2004. Proceedings. ICRA \u201904. 2004, vol 3, pp 2619\u201326243. https:\/\/doi.org\/10.1109\/ROBOT.2004.1307456","DOI":"10.1109\/ROBOT.2004.1307456"},{"key":"9668_CR4","doi-asserted-by":"crossref","unstructured":"Ng AY, Coates A, Diel M, Ganapathi V, Schulte J, Tse B, Berger E, Liang E (2006) Autonomous inverted helicopter flight via reinforcement learning. In: Experimental Robotics IX, Springer, Berlin, Heidelberg, pp 363\u2013372","DOI":"10.1007\/11552246_35"},{"issue":"1","key":"9668_CR5","first-page":"105","volume":"16","author":"S Singh","year":"2002","unstructured":"Singh S, Litman D, Kearns M, Walker M (2002) Optimizing dialogue management with reinforcement learning: experiments with the njfun system. J Artif Int Res 16(1):105\u2013133","journal-title":"J Artif Int Res"},{"key":"9668_CR6","doi-asserted-by":"crossref","unstructured":"Arulkumaran K, Deisenroth MP, Brundage M, Bharath AA (2017) A brief survey of deep reinforcement learning. CoRR arXiv:1708.05866","DOI":"10.1109\/MSP.2017.2743240"},{"key":"9668_CR7","unstructured":"Rusu AA, Colmenarejo SG, G\u00fcl\u00e7ehre Desjardins G, Kirkpatrick J, Pascanu R, Mnih V, Kavukcuoglu K, Hadsell R (2016) Policy distillation. In: ICLR (Poster). arxiv:1511.06295"},{"key":"9668_CR8","unstructured":"Nguyen TT, Reddi VJ (2019) Deep reinforcement learning for cyber security. CoRR arXiv:1906.05799"},{"key":"9668_CR9","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Graves A, Antonoglou I, Wierstra D, Riedmiller MA (2013) Playing atari with deep reinforcement learning. CoRR arxiv:1312.5602"},{"issue":"7540","key":"9668_CR10","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G, Petersen S, Beattie C, Sadik A, Antonoglou I, King H, Kumaran D, Wierstra D, Legg S, Hassabis D (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533. https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"key":"9668_CR11","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison CJ, Guez A, Sifre L, Driessche G, Schrittwieser J, Antonoglou I, Panneershelvam V, Lanctot M, Dieleman S, Grewe D, Nham J, Kalchbrenner N, Sutskever I, Lillicrap TP, Leach M, Kavukcuoglu K, Graepel T, Hassabis D (2016) Mastering the game of go with deep neural networks and tree search. Nature 529:484\u2013489","journal-title":"Nature"},{"key":"9668_CR12","unstructured":"Vinyals O, Ewalds T, Bartunov S, Georgiev P, Vezhnevets AS, Yeo M, Makhzani A, K\u00fcttler H, Agapiou JP, Schrittwieser J, Quan J, Gaffney S, Petersen S, Simonyan K, Schaul T, Hasselt H, Silver D, Lillicrap TP, Calderone K, Keet P, Brunasso A, Lawrence D, Ekermo A, Repp J, Tsing R (2017) Starcraft II: a new challenge for reinforcement learning. CoRR arxiv:1708.04782"},{"key":"9668_CR13","doi-asserted-by":"crossref","unstructured":"Isele D, Cosgun A, Subramanian K, Fujimura K (2017) Navigating intersections with autonomous vehicles using deep reinforcement learning. CoRR arxiv:1705.01196","DOI":"10.1109\/ICRA.2018.8461233"},{"key":"9668_CR14","unstructured":"Gu S, Holly E, Lillicrap TP, Levine S (2016) Deep reinforcement learning for robotic manipulation. CoRR arxiv:1610.00633"},{"issue":"4","key":"9668_CR15","doi-asserted-by":"publisher","first-page":"110","DOI":"10.52711\/2321-581X.2021.00019","volume":"12","author":"VB Savant","year":"2021","unstructured":"Savant VB, Kasar RD (2021) A review on network security and cryptography. Res J Eng Technol 12(4):110\u2013114","journal-title":"Res J Eng Technol"},{"issue":"3","key":"9668_CR16","first-page":"576","volume":"9","author":"M Al-Shabi","year":"2019","unstructured":"Al-Shabi M (2019) A survey on symmetric and asymmetric cryptography algorithms in information security. Int J Sci Res Publ (IJSRP) 9(3):576\u2013589","journal-title":"Int J Sci Res Publ (IJSRP)"},{"key":"9668_CR17","unstructured":"Pachghare V (2019) Cryptography and information security. PHI Learning Pvt. Ltd"},{"key":"9668_CR18","unstructured":"Mushtaq MF, Jamel S, Disina AH, Pindar ZA, Shakir NSA, Deris MM (2017) A survey on the cryptographic encryption algorithms. Int J Adv Comput Sci Appl 8(11)"},{"key":"9668_CR19","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1016\/j.matpr.2021.04.583","volume":"51","author":"DK Sharma","year":"2022","unstructured":"Sharma DK, Singh NC, Noola DA, Doss AN, Sivakumar J (2022) A review on various cryptographic techniques and algorithms. Mater Today Proc 51:104\u2013109","journal-title":"Mater Today Proc"},{"key":"9668_CR20","doi-asserted-by":"publisher","DOI":"10.1007\/1-84628-253-5","volume-title":"Machine learning and data mining for computer security: methods and applications","author":"MA Maloof","year":"2006","unstructured":"Maloof MA et al (2006) Machine learning and data mining for computer security: methods and applications. Springer, Berlin"},{"key":"9668_CR21","doi-asserted-by":"crossref","unstructured":"Han Y, Rubinstein BIP, Abraham T, Alpcan T, Vel OY, Erfani SM, Hubczenko D, Leckie C, Montague P (2018) Reinforcement learning for autonomous defence in software-defined networking. CoRR arxiv:1808.05770","DOI":"10.1007\/978-3-030-01554-1_9"},{"key":"9668_CR22","doi-asserted-by":"publisher","unstructured":"Wan X, Sheng G, Li Y, Xiao L, Du X (2017) Reinforcement learning based mobile offloading for cloud-based malware detection. In: GLOBECOM 2017 - 2017 IEEE global communications conference, pp 1\u20136. https:\/\/doi.org\/10.1109\/GLOCOM.2017.8254503","DOI":"10.1109\/GLOCOM.2017.8254503"},{"key":"9668_CR23","doi-asserted-by":"publisher","unstructured":"Li Y, Liu J, Li Q, Xiao L (2015) Mobile cloud offloading for malware detections with learning. In: 2015 IEEE conference on computer communications workshops (INFOCOM WKSHPS), pp 197\u2013201. https:\/\/doi.org\/10.1109\/INFCOMW.2015.7179384","DOI":"10.1109\/INFCOMW.2015.7179384"},{"key":"9668_CR24","doi-asserted-by":"publisher","DOI":"10.1145\/2480741.2480742","author":"MH Manshaei","year":"2013","unstructured":"Manshaei MH, Zhu Q, Alpcan T, Bac\u015far T, Hubaux J-P (2013) Game theory meets network security and privacy. ACM Comput Surv. https:\/\/doi.org\/10.1145\/2480741.2480742","journal-title":"ACM Comput Surv"},{"key":"9668_CR25","unstructured":"Hasselt H, Guez A, Silver D (2015) Deep reinforcement learning with double q-learning. CoRR arxiv:1509.06461"},{"key":"9668_CR26","unstructured":"Wang Z, Freitas N, Lanctot M (2015) Dueling network architectures for deep reinforcement learning. CoRR arxiv:1511.06581"},{"key":"9668_CR27","unstructured":"Team MDR (2021) CyberBattleSim. GitHub. Created by Christian Seifert, Michael Betser, William Blum, James Bono, Kate Farris, Emily Goren, Justin Grana, Kristian Holsheimer, Brandon Marken, Joshua Neil, Nicole Nichols, Jugal Parikh, Haoran Wei"},{"key":"9668_CR28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-01821-3","volume-title":"A guide to convolutional neural networks for computer vision","author":"S Khan","year":"2018","unstructured":"Khan S, Rahmani H, Shah SAA, Bennamoun M, Medioni G, Dickinson S (2018) A guide to convolutional neural networks for computer vision, vol 8. Springer, Berlin"},{"key":"9668_CR29","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1016\/j.cag.2023.10.015","volume":"117","author":"AM Ahmed","year":"2023","unstructured":"Ahmed AM, Abdelrazek M, Aryal S, Nguyen TT (2023) An overview of Eulerian video motion magnification methods. Comput Graph 117:145\u2013163. https:\/\/doi.org\/10.1016\/j.cag.2023.10.015","journal-title":"Comput Graph"},{"key":"9668_CR30","volume-title":"Neural network methods for natural language processing","author":"Y Goldberg","year":"2022","unstructured":"Goldberg Y (2022) Neural network methods for natural language processing. Springer, Berlin"},{"key":"9668_CR31","volume-title":"Neural networks in robotics","author":"GA Bekey","year":"2012","unstructured":"Bekey GA, Goldberg KY (2012) Neural networks in robotics, vol 202. Springer, Berlin"},{"key":"9668_CR32","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction, 2nd edn. MIT press, Cambridge http:\/\/incompleteideas.net\/book\/the-book-2nd.html"},{"key":"9668_CR33","unstructured":"Brockman G, Cheung V, Pettersson L, Schneider J, Schulman J, Tang J, Zaremba W (2016) Openai gym. CoRR arxiv:1606.01540"},{"issue":"3","key":"9668_CR34","doi-asserted-by":"publisher","first-page":"248","DOI":"10.1109\/TG.2018.2877047","volume":"11","author":"M Wydmuch","year":"2018","unstructured":"Wydmuch M, Kempka M, Ja\u015bkowski W (2018) Vizdoom competitions: playing doom from pixels. IEEE Trans Games 11(3):248\u2013259","journal-title":"IEEE Trans Games"},{"key":"9668_CR35","unstructured":"Lanctot M, Lockhart E, Lespiau J-B, Zambaldi V, Upadhyay S, P\u00e9rolat J, Srinivasan S, Timbers F, Tuyls K, Omidshafiei S, Hennes D, Morrill D, Muller P, Ewalds T, Faulkner R, Kram\u00e1r J, Vylder BD, Saeta B, Bradbury J, Ding D, Borgeaud S, Lai M, Schrittwieser J, Anthony T, Hughes E, Danihelka I, Ryan-Davis J (2019) OpenSpiel: a framework for reinforcement learning in games. CoRR arxiv:arXiv:1908.09453 [cs.LG]"},{"key":"9668_CR36","doi-asserted-by":"crossref","unstructured":"Shah S, Dey D, Lovett C, Kapoor A (2017) Airsim: high-fidelity visual and physical simulation for autonomous vehicles. In: Field and service robotics. arxiv:1705.05065","DOI":"10.1007\/978-3-319-67361-5_40"},{"key":"9668_CR37","doi-asserted-by":"publisher","DOI":"10.1016\/j.simpa.2020.100022","volume":"6","author":"S Tunyasuvunakool","year":"2020","unstructured":"Tunyasuvunakool S, Muldal A, Doron Y, Liu S, Bohez S, Merel J, Erez T, Lillicrap T, Heess N, Tassa Y (2020) dm_control: software and tasks for continuous control. Softw Impacts 6:100022. https:\/\/doi.org\/10.1016\/j.simpa.2020.100022","journal-title":"Softw Impacts"},{"key":"9668_CR38","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1007\/978-3-642-27645-3_7","volume-title":"Reinforcement learning: state-of-the-art","author":"H Hasselt","year":"2012","unstructured":"Hasselt H (2012) Reinforcement learning in continuous state and action spaces. In: Wiering M, Otterlo M (eds) Reinforcement learning: state-of-the-art. Springer, Berlin, Heidelberg, pp 207\u2013251"},{"key":"9668_CR39","unstructured":"Achiam J (2018) spinning up in deep reinforcement learning"},{"key":"9668_CR40","doi-asserted-by":"crossref","unstructured":"Littman ML (1994) Markov games as a framework for multi-agent reinforcement learning. In: Proceedings of the eleventh international conference on international conference on machine learning. ICML\u201994, pp 157\u2013163. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"9668_CR41","unstructured":"Watkins CJCH (1989) Learning from delayed rewards. PhD thesis, King\u2019s College, Cambridge, UK"},{"key":"9668_CR42","first-page":"1008","volume-title":"SIAM Journal on Control and Optimization","author":"V Konda","year":"2000","unstructured":"Konda V, Tsitsiklis J (2000) Actor-critic algorithms. SIAM Journal on Control and Optimization. MIT Press, Cambridge, pp 1008\u20131014"},{"key":"9668_CR43","unstructured":"Mnih V, Badia AP, Mirza M, Graves A, Lillicrap TP, Harley T, Silver D, Kavukcuoglu K (2016) Asynchronous methods for deep reinforcement learning. CoRR arxiv:1602.01783"},{"key":"9668_CR44","unstructured":"Schulman J, Levine S, Moritz P, Jordan MI, Abbeel P (2015) Trust region policy optimization. CoRR arxiv:1502.05477"},{"key":"9668_CR45","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arxiv: 1707.06347"},{"key":"9668_CR46","unstructured":"Fortunato M, Azar MG, Piot B, Menick J, Osband I, Graves A, Mnih V, Munos R, Hassabis D, Pietquin O, Blundell C, Legg S (2017) Noisy networks for exploration. CoRR arxiv: 1706.10295"},{"key":"9668_CR47","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2016) Continuous control with deep reinforcement learning. In: ICLR (Poster). arxiv:1509.02971"},{"issue":"3","key":"9668_CR48","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins CJCH, Dayan P (1992) Q-learning. Mach Learn 8(3):279\u2013292. https:\/\/doi.org\/10.1007\/BF00992698","journal-title":"Mach Learn"},{"key":"9668_CR49","unstructured":"Pollack J, Blair A (1996) Why did td-gammon work? In: Mozer MC, Jordan M, Petsche T (eds) Advances in neural information processing systems, vol 9. MIT Press, Cambridge https:\/\/proceedings.neurips.cc\/paper\/1996\/file\/459a4ddcb586f24efd9395aa7662bc7c-Paper.pdf"},{"key":"9668_CR50","doi-asserted-by":"crossref","unstructured":"Baird L (1995) Residual algorithms: reinforcement learning with function approximation. In: Proceedings of the twelfth international conference on machine learning, pp 30\u201337. Morgan Kaufmann, Burlington","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"issue":"5","key":"9668_CR51","doi-asserted-by":"publisher","first-page":"674","DOI":"10.1109\/9.580874","volume":"42","author":"JN Tsitsiklis","year":"1997","unstructured":"Tsitsiklis JN, Van Roy B (1997) An analysis of temporal-difference learning with function approximation. IEEE Trans Autom Control 42(5):674\u2013690. https:\/\/doi.org\/10.1109\/9.580874","journal-title":"IEEE Trans Autom Control"},{"key":"9668_CR52","first-page":"1063","volume":"5","author":"B Sallans","year":"2004","unstructured":"Sallans B, Hinton GE (2004) Reinforcement learning with factored states and actions. J Mach Learn Res 5:1063\u20131088","journal-title":"J Mach Learn Res"},{"key":"9668_CR53","unstructured":"Maei HR, Szepesv\u00e1ri C, Bhatnagar S, Precup D, Silver D, Sutton RS (2009) Convergent temporal-difference learning with arbitrary smooth function approximation. In: Proceedings of the 22nd international conference on neural information processing systems. NIPS\u201909, pp 1204\u20131212. Curran Associates Inc., Red Hook, NY, USA"},{"key":"9668_CR54","unstructured":"Sutton RS, Maei H, Szepesv\u00e1ri C (2008) A convergent o(n) temporal-difference algorithm for off-policy learning with linear function approximation. In: Koller D, Schuurmans D, Bengio Y, Bottou L (eds) Advances in Neural Information Processing Systems, vol 21. Curran Associates, Inc., New York https:\/\/proceedings.neurips.cc\/paper\/2008\/file\/e0c641195b27425bb056ac56f8953d24-Paper.pdf"},{"key":"9668_CR55","doi-asserted-by":"publisher","unstructured":"Sutton RS, Maei HR, Precup D, Bhatnagar S, Silver D, Szepesv\u00e1ri C, Wiewiora E (2009) Fast gradient-descent methods for temporal-difference learning with linear function approximation. ICML \u201909, pp 993\u20131000. Association for Computing Machinery, New York https:\/\/doi.org\/10.1145\/1553374.1553501","DOI":"10.1145\/1553374.1553501"},{"key":"9668_CR56","unstructured":"Lin L-J (1992) Reinforcement learning for robots using neural networks. PhD thesis, USA. UMI Order No. GAX93-22750"},{"issue":"3","key":"9668_CR57","doi-asserted-by":"publisher","first-page":"419","DOI":"10.1037\/0033-295X.102.3.419","volume":"102","author":"JL Mcclelland","year":"1995","unstructured":"Mcclelland JL, Mcnaughton BL, O\u2019Reilly RC (1995) Why there are complementary learning systems in the hippocampus and neocortex: insights from the successes and failures of connectionist models of learning and memory. Psychol Rev 102(3):419\u2013457","journal-title":"Psychol Rev"},{"issue":"5","key":"9668_CR58","doi-asserted-by":"publisher","first-page":"220","DOI":"10.1016\/j.tins.2010.01.006","volume":"33","author":"J O\u2019Neill","year":"2010","unstructured":"O\u2019Neill J, Pleydell-Bouverie B, Dupret D, Csicsvari J (2010) Play it again: reactivation of waking experience and memory. Trends Neurosci 33(5):220\u2013229. https:\/\/doi.org\/10.1016\/j.tins.2010.01.006","journal-title":"Trends Neurosci"},{"key":"9668_CR59","doi-asserted-by":"publisher","unstructured":"Riedmiller M (2005) Neural fitted q iteration - first experiences with a data efficient neural reinforcement learning method. In: Proceedings of the 16th European conference on machine learning. ECML\u201905, pp 317\u2013328. Springer, Berlin, Heidelberg. https:\/\/doi.org\/10.1007\/11564096_32","DOI":"10.1007\/11564096_32"},{"key":"9668_CR60","doi-asserted-by":"publisher","unstructured":"Lange S, Riedmiller M (2010) Deep auto-encoder neural networks in reinforcement learning. In: The 2010 international joint conference on neural networks (IJCNN), pp 1\u20138.https:\/\/doi.org\/10.1109\/IJCNN.2010.5596468","DOI":"10.1109\/IJCNN.2010.5596468"},{"key":"9668_CR61","doi-asserted-by":"publisher","unstructured":"Diuk C, Cohen A, Littman ML (2008) An object-oriented representation for efficient reinforcement learning. In: Proceedings of the 25th international conference on machine learning. ICML \u201908, pp 240\u2013247. Association for Computing Machinery, New York https:\/\/doi.org\/10.1145\/1390156.1390187","DOI":"10.1145\/1390156.1390187"},{"issue":"1","key":"9668_CR62","first-page":"253","volume":"47","author":"MG Bellemare","year":"2013","unstructured":"Bellemare MG, Naddaf Y, Veness J, Bowling M (2013) The arcade learning environment: an evaluation platform for general agents. J Artif Int Res 47(1):253\u2013279","journal-title":"J Artif Int Res"},{"issue":"4","key":"9668_CR63","doi-asserted-by":"publisher","first-page":"355","DOI":"10.1109\/TCIAIG.2013.2294713","volume":"6","author":"M Hausknecht","year":"2014","unstructured":"Hausknecht M, Lehman J, Miikkulainen R, Stone P (2014) A neuroevolution approach to general Atari game playing. IEEE Trans Comput Intell AI Games 6(4):355\u2013366. https:\/\/doi.org\/10.1109\/TCIAIG.2013.2294713","journal-title":"IEEE Trans Comput Intell AI Games"},{"key":"9668_CR64","unstructured":"Hasselt H (2010) Double q-learning. In: Lafferty J, Williams C, Shawe-Taylor J, Zemel R, Culotta A (eds) Advances in neural information processing systems, vol 23. Curran Associates, Inc., New York. https:\/\/proceedings.neurips.cc\/paper\/2010\/file\/091d584fced301b442654dd8c23b3fc9-Paper.pdf"},{"issue":"1","key":"9668_CR65","first-page":"237","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling LP, Littman ML, Moore AW (1996) Reinforcement learning: a survey. J Artif Int Res 4(1):237\u2013285","journal-title":"J Artif Int Res"},{"key":"9668_CR66","doi-asserted-by":"publisher","unstructured":"Sutton RS (1990) Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In: Porter B, Mooney R (eds) Machine learning proceedings 1990, pp 216\u2013224. Morgan Kaufmann, San Francisco (CA). https:\/\/doi.org\/10.1016\/B978-1-55860-141-3.50030-4","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"9668_CR67","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1162\/153244303765208377","volume":"3","author":"RI Brafman","year":"2003","unstructured":"Brafman RI, Tennenholtz M (2003) R-max - a general polynomial time algorithm for near-optimal reinforcement learning. J Mach Learn Res 3:213\u2013231. https:\/\/doi.org\/10.1162\/153244303765208377","journal-title":"J Mach Learn Res"},{"key":"9668_CR68","unstructured":"Fujimoto S, Hoof H, Meger D (2018) Addressing function approximation error in actor-critic methods. CoRR arxiv: 1802.09477"},{"key":"9668_CR69","doi-asserted-by":"publisher","DOI":"10.1007\/s00145-012-9134-5","author":"M Dijk","year":"2013","unstructured":"Dijk M, Juels A, Oprea A, Rivest R (2013) Flipit: the game of stealthy takeover. J Cryptol. https:\/\/doi.org\/10.1007\/s00145-012-9134-5","journal-title":"J Cryptol"},{"key":"9668_CR70","doi-asserted-by":"publisher","unstructured":"Chung K, Kamhoua CA, Kwiat KA, Kalbarczyk ZT, Iyer RK (2016) Game theory with learning for cyber security monitoring. In: 2016 IEEE 17th international symposium on high assurance systems engineering (HASE), pp 1\u20138. https:\/\/doi.org\/10.1109\/HASE.2016.48","DOI":"10.1109\/HASE.2016.48"},{"issue":"3","key":"9668_CR71","doi-asserted-by":"publisher","first-page":"252","DOI":"10.1016\/j.stamet.2005.05.003","volume":"3","author":"AG Tartakovsky","year":"2006","unstructured":"Tartakovsky AG, Rozovskii BL, Bla\u017eek RB, Kim H (2006) Detection of intrusions in information systems by sequential change-point methods. Stat Methodol 3(3):252\u2013293. https:\/\/doi.org\/10.1016\/j.stamet.2005.05.003","journal-title":"Stat Methodol"},{"key":"9668_CR72","doi-asserted-by":"crossref","unstructured":"Rasouli M, Miehling E, Teneketzis D (2014) A supervisory control approach to dynamic cyber-security. CoRR arxiv:1409.0838","DOI":"10.1007\/978-3-319-12601-2_6"},{"issue":"1","key":"9668_CR73","doi-asserted-by":"publisher","first-page":"42308","DOI":"10.1038\/srep42308","volume":"7","author":"W Liu","year":"2017","unstructured":"Liu W, Zhong S (2017) Web malware spread modelling and optimal control strategies. Sci Rep 7(1):42308","journal-title":"Sci Rep"},{"issue":"10","key":"9668_CR74","doi-asserted-by":"publisher","first-page":"2490","DOI":"10.1109\/TIFS.2018.2819967","volume":"13","author":"E Miehling","year":"2018","unstructured":"Miehling E, Rasouli M, Teneketzis D (2018) A pomdp approach to the dynamic defense of large-scale cyber networks. IEEE Trans Inf Forensics Secur 13(10):2490\u20132505. https:\/\/doi.org\/10.1109\/TIFS.2018.2819967","journal-title":"IEEE Trans Inf Forensics Secur"},{"key":"9668_CR75","doi-asserted-by":"publisher","unstructured":"Bronfman-Nadas R, Zincir-Heywood N, Jacobs JT (2018) An artificial arms race: Could it improve mobile malware detectors?. In: 2018 network traffic measurement and analysis conference (TMA), pp 1\u20138. https:\/\/doi.org\/10.23919\/TMA.2018.8506545","DOI":"10.23919\/TMA.2018.8506545"},{"key":"9668_CR76","unstructured":"MYERSON RB (1991) Game theory: analysis of conflict. Harvard University Press, Cambridge http:\/\/www.jstor.org\/stable\/j.ctvjsf522 Accessed 04 Oct 2022"},{"key":"9668_CR77","doi-asserted-by":"publisher","unstructured":"Alpcan T, Basar T (2003) A game theoretic approach to decision and analysis in network intrusion detection. In: 42nd IEEE international conference on decision and control (IEEE Cat. No.03CH37475), vol 3, pp 2595\u201326003. https:\/\/doi.org\/10.1109\/CDC.2003.1273013","DOI":"10.1109\/CDC.2003.1273013"},{"key":"9668_CR78","doi-asserted-by":"publisher","unstructured":"Nguyen KC, Alpcan T, Basar T (2009) Security games with incomplete information. In: 2009 IEEE international conference on communications, pp 1\u20136. https:\/\/doi.org\/10.1109\/ICC.2009.5199443","DOI":"10.1109\/ICC.2009.5199443"},{"key":"9668_CR79","unstructured":"Durkota K, Lisy V, Bo\u0161ansky B, Kiekintveld C (2015) Optimal network security hardening using attack graph games. In: Proceedings of the 24th International Conference on Artificial Intelligence. IJCAI\u201915, pp 526\u2013532. AAAI Press"},{"key":"9668_CR80","doi-asserted-by":"crossref","unstructured":"Carroll TE, Grosu D (2009) A game theoretic investigation of deception in network security. In: 2009 Proceedings of 18th international conference on computer communications and networks, pp 1\u20136","DOI":"10.1109\/ICCCN.2009.5235344"},{"key":"9668_CR81","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511973031","volume-title":"Security and game theory: algorithms, deployed systems, lessons learned","author":"M Tambe","year":"2011","unstructured":"Tambe M (2011) Security and game theory: algorithms, deployed systems, lessons learned, 1st edn. Cambridge University Press, Cambridge","edition":"1"},{"key":"9668_CR82","doi-asserted-by":"publisher","DOI":"10.1093\/acprof:oso\/9780195300796.001.0001","volume-title":"Repeated games and reputations: long-run relationships","author":"GJ Mailath","year":"2006","unstructured":"Mailath GJ, Samuelson L (2006) Repeated games and reputations: long-run relationships. Oxford University Press, Oxford. https:\/\/doi.org\/10.1093\/acprof:oso\/9780195300796.001.0001"},{"issue":"4","key":"9668_CR83","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1145\/581271.581272","volume":"5","author":"F Bergadano","year":"2002","unstructured":"Bergadano F, Gunetti D, Picardi C (2002) User authentication through keystroke dynamics. ACM Trans Inf Syst Secur 5(4):367\u2013397. https:\/\/doi.org\/10.1145\/581271.581272","journal-title":"ACM Trans Inf Syst Secur"},{"issue":"1","key":"9668_CR84","doi-asserted-by":"publisher","first-page":"124","DOI":"10.1109\/TR.2004.824828","volume":"53","author":"RA Maxion","year":"2004","unstructured":"Maxion RA, Townsend TN (2004) Masquerade detection augmented with error analysis. IEEE Trans Reliab 53(1):124\u2013147. https:\/\/doi.org\/10.1109\/TR.2004.824828","journal-title":"IEEE Trans Reliab"},{"issue":"10","key":"9668_CR85","doi-asserted-by":"publisher","first-page":"1095","DOI":"10.1073\/pnas.39.10.1095","volume":"39","author":"LS Shapley","year":"1953","unstructured":"Shapley LS (1953) Stochastic games*. Proc Nat Acad Sci 39(10):1095\u20131100. https:\/\/doi.org\/10.1073\/pnas.39.10.1095","journal-title":"Proc Nat Acad Sci"},{"key":"9668_CR86","unstructured":"Bethencourt J, Franklin J, Vernon M (2005) Mapping internet sensors with probe response attacks. In: 14th USENIX security symposium (USENIX Security 05). USENIX Association, Baltimore, MD. https:\/\/www.usenix.org\/conference\/14th-usenix-security-symposium\/mapping-internet-sensors-probe-response-attacks"},{"key":"9668_CR87","doi-asserted-by":"crossref","unstructured":"Elderman R, Pater L, Thie A, Drugan M, Wiering M (2017) Adversarial reinforcement learning in a cyber security simulation. In: Filipe J, van den Herik J, Rocha A, Filipe J (eds) ICAART 2017 - Proceedings of the 9th international conference on agents and artificial intelligence, pp 559\u2013566. SCITEPRESS-Science and Technology Publications, Lda., 9th International Conference on Agents and Artificial Intelligence (ICAART 2017), ICAART 2017 ; Conference date: 24-02-2017 Through 26-02-2017","DOI":"10.5220\/0006197105590566"},{"key":"9668_CR88","doi-asserted-by":"publisher","unstructured":"Alpcan T, Ba\u015far T (2010) Network security: a decision and game-theoretic approach. Cambridge University Press, Cambridge https:\/\/doi.org\/10.1017\/CBO9780511760778","DOI":"10.1017\/CBO9780511760778"},{"key":"9668_CR89","unstructured":"Li T, Peng G, Zhu Q, Basar T (2021) The confluence of networks, games and learning. CoRR arxiv:2105.08158"},{"key":"9668_CR90","doi-asserted-by":"publisher","unstructured":"Roy S, Ellis C, Shiva S, Dasgupta D, Shandilya V, Wu Q (2010) A survey of game theory as applied to network security. In: 2010 43rd Hawaii international conference on system sciences, pp 1\u201310. https:\/\/doi.org\/10.1109\/HICSS.2010.35","DOI":"10.1109\/HICSS.2010.35"},{"key":"9668_CR91","unstructured":"Uther WTB, Veloso MM (2003) Adversarial reinforcement learning"},{"key":"9668_CR92","unstructured":"Szegedy C, Zaremba W, Sutskever I, Bruna J, Erhan D, Goodfellow I, Fergus R (2014) Intriguing properties of neural networks. In: 2nd international conference on learning representations, ICLR 2014; Conference date: 14-04-2014 Through 16-04-2014"},{"key":"9668_CR93","unstructured":"Gilmer J, Metz L, Faghri F, Schoenholz SS, Raghu M, Wattenberg M, Goodfellow IJ (2018) Adversarial spheres. CoRR arxiv:1801.02774"},{"key":"9668_CR94","unstructured":"Shafahi A, Huang WR, Studer C, Feizi S, Goldstein T (2018) Are adversarial examples inevitable? CoRR arxiv:1809.02104"},{"key":"9668_CR95","doi-asserted-by":"publisher","unstructured":"Goodfellow IJ, Shlens J, Szegedy C (2014) Explaining and Harnessing Adversarial Examples. https:\/\/doi.org\/10.48550\/ARXIV.1412.6572, arXiv arxiv:1412.6572","DOI":"10.48550\/ARXIV.1412.6572"},{"key":"9668_CR96","unstructured":"Huang SH, Papernot N, Goodfellow IJ, Duan Y, Abbeel P (2017) Adversarial attacks on neural network policies. CoRR arxiv:1702.02284"},{"key":"9668_CR97","doi-asserted-by":"crossref","unstructured":"Lin Y, Hong Z, Liao Y, Shih M, Liu M, Sun M (2017) Tactics of adversarial attack on deep reinforcement learning agents. CoRR arxiv:1703.06748","DOI":"10.24963\/ijcai.2017\/525"},{"key":"9668_CR98","doi-asserted-by":"publisher","unstructured":"Kos J, Song D (2017) Delving into adversarial attacks on deep policies. https:\/\/doi.org\/10.48550\/ARXIV.1705.06452, arXiv arxiv:1705.06452","DOI":"10.48550\/ARXIV.1705.06452"},{"key":"9668_CR99","unstructured":"Pattanaik A, Tang Z, Liu S, Bommannan G, Chowdhary G (2017) Robust deep reinforcement learning with adversarial attacks. CoRR arxiv:1712.03632"},{"key":"9668_CR100","unstructured":"Gleave A, Dennis M, Kant N, Wild C, Levine S, Russell S (2019) Adversarial policies: Attacking deep reinforcement learning. CoRR arxiv:1905.10615"},{"key":"9668_CR101","unstructured":"Molina-Markham A, Miniter C, Powell B, Ridley A (2021) Network environment design for autonomous cyberdefense. CoRR arxiv:2103.07583"},{"key":"9668_CR102","doi-asserted-by":"crossref","unstructured":"Xie C, Wu Y, Maaten L, Yuille AL, He K (2018) Feature denoising for improving adversarial robustness. CoRR arxiv:1812.03411","DOI":"10.1109\/CVPR.2019.00059"},{"key":"9668_CR103","unstructured":"Pinto L, Davidson J, Sukthankar R, Gupta A (2017) Robust adversarial reinforcement learning. CoRR arxiv:1703.02702"},{"issue":"2","key":"9668_CR104","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1109\/TAI.2021.3111139","volume":"3","author":"I Ilahi","year":"2022","unstructured":"Ilahi I, Usama M, Qadir J, Janjua MU, Al-Fuqaha A, Hoang DT, Niyato D (2022) Challenges and countermeasures for adversarial attacks on deep reinforcement learning. IEEE Trans Artif Intell 3(2):90\u2013109. https:\/\/doi.org\/10.1109\/TAI.2021.3111139","journal-title":"IEEE Trans Artif Intell"},{"key":"9668_CR105","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s42400-019-0027-x","volume":"2","author":"T Chen","year":"2019","unstructured":"Chen T, Liu J, Xiang Y, Niu W, Tong E, Han Z (2019) Adversarial attack and defense in reinforcement learning-from AI security view. Cybersecurity 2:1\u201322","journal-title":"Cybersecurity"},{"key":"9668_CR106","unstructured":"Silva SH, Najafirad P (2020) Opportunities and challenges in deep learning adversarial robustness: a survey. CoRR arxiv:2007.00753"},{"key":"9668_CR107","unstructured":"Rege M (2018) Machine learning for cyber defense and attack. https:\/\/api.semanticscholar.org\/CorpusID:232319054"},{"key":"9668_CR108","doi-asserted-by":"publisher","unstructured":"Benjamin DP, Pal P, Webber F, Rubel P, Atigetchi M (2008) Using a cognitive architecture to automate cyberdefense reasoning. In: 2008 bio-inspired, learning and intelligent systems for security, pp 58\u201363. https:\/\/doi.org\/10.1109\/BLISS.2008.17","DOI":"10.1109\/BLISS.2008.17"},{"key":"9668_CR109","unstructured":"Ko RKL (2020) Cyber autonomy: Automating the hacker- self-healing, self-adaptive, automatic cyber defense systems and their impact to the industry, society and national security. CoRR arxiv:2012.04405"},{"key":"9668_CR110","unstructured":"Baah GK, Hobson T, Okhravi H, Roberts SC, Streilein WW, Yuditskaya S (2015) A study of gaps in cyber defense automation. https:\/\/api.semanticscholar.org\/CorpusID:40128147"},{"key":"9668_CR111","doi-asserted-by":"publisher","unstructured":"Applebaum A, Dennler C, Dwyer P, Moskowitz M, Nguyen H, Nichols N, Park N, Rachwalski P, Rau F, Webster A, Wolk M (2022) Bridging automated to autonomous cyber defense: foundational analysis of tabular q-learning. In: Proceedings of the 15th ACM workshop on artificial intelligence and security. AISec\u201922, pp 149\u2013159. Association for Computing Machinery, New York, NY, USA. https:\/\/doi.org\/10.1145\/3560830.3563732","DOI":"10.1145\/3560830.3563732"},{"key":"9668_CR112","unstructured":"Standen M, Lucas M, Bowman D, Richer TJ, Kim J, Marriott D (2021) CybORG: a gym for the development of autonomous cyber agents. arXiv arXiv:2108.09118"},{"key":"9668_CR113","unstructured":"Vyas S, Hannay J, Bolton A, Burnap PP (2023) Automated cyber defence: a review"},{"key":"9668_CR114","doi-asserted-by":"crossref","unstructured":"Lohn A, Knack A, Burke A, Jackson K (2023) Autonomous cyber defence: a roadmap from lab to ops. Technical report, CETaS Research Reports (June). https:\/\/cetas.turing.ac.uk\/publications\/autonomous-cyber-defence","DOI":"10.51593\/2022CA007"},{"key":"9668_CR115","doi-asserted-by":"publisher","unstructured":"Rush G, Tauritz D, Kent A (2015) Coevolutionary agent-based network defense lightweight event system (candles), pp 859\u2013866. https:\/\/doi.org\/10.1145\/2739482.2768429","DOI":"10.1145\/2739482.2768429"},{"key":"9668_CR116","doi-asserted-by":"publisher","unstructured":"Zhu M, Hu Z, Liu P (2014) Reinforcement learning algorithms for adaptive cyber defense against heartbleed. In: Proceedings of the First ACM workshop on moving target defense. MTD \u201914, pp 51\u201358. Association for Computing Machinery, New York, NY, USA. https:\/\/doi.org\/10.1145\/2663474.2663481","DOI":"10.1145\/2663474.2663481"},{"issue":"1","key":"9668_CR117","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1109\/COMST.2016.2618874","volume":"19","author":"DB Rawat","year":"2017","unstructured":"Rawat DB, Reddy SR (2017) Software defined networking architecture, security and energy efficiency: a survey. IEEE Commun Surv Tutor 19(1):325\u2013346. https:\/\/doi.org\/10.1109\/COMST.2016.2618874","journal-title":"IEEE Commun Surv Tutor"},{"issue":"4","key":"9668_CR118","doi-asserted-by":"publisher","first-page":"2317","DOI":"10.1109\/COMST.2015.2474118","volume":"17","author":"I Ahmad","year":"2015","unstructured":"Ahmad I, Namal S, Ylianttila M, Gurtov A (2015) Security in software defined networks: a survey. IEEE Commun Surv Tutor 17(4):2317\u20132346. https:\/\/doi.org\/10.1109\/COMST.2015.2474118","journal-title":"IEEE Commun Surv Tutor"},{"key":"9668_CR119","doi-asserted-by":"publisher","first-page":"18121","DOI":"10.1109\/ACCESS.2022.3151081","volume":"10","author":"G Kim","year":"2022","unstructured":"Kim G, Kim Y, Lim H (2022) Deep reinforcement learning-based routing on software-defined networks. IEEE Access 10:18121\u201318133. https:\/\/doi.org\/10.1109\/ACCESS.2022.3151081","journal-title":"IEEE Access"},{"issue":"2","key":"9668_CR120","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1109\/JIOT.2014.2368356","volume":"2","author":"MA Salahuddin","year":"2015","unstructured":"Salahuddin MA, Al-Fuqaha A, Guizani M (2015) Software-defined networking for RSU clouds in support of the internet of vehicles. IEEE Internet Things J 2(2):133\u2013144. https:\/\/doi.org\/10.1109\/JIOT.2014.2368356","journal-title":"IEEE Internet Things J"},{"key":"9668_CR121","doi-asserted-by":"publisher","unstructured":"Mao H, Alizadeh M, Menache I, Kandula S (2016) Resource management with deep reinforcement learning. HotNets \u201916, pp 50\u201356. Association for computing machinery, New York, NY, USA . https:\/\/doi.org\/10.1145\/3005745.3005750","DOI":"10.1145\/3005745.3005750"},{"key":"9668_CR122","doi-asserted-by":"publisher","unstructured":"Lin S-C, Akyildiz IF, Wang P, Luo M (2016) Qos-aware adaptive routing in multi-layer hierarchical software defined networks: a reinforcement learning approach. In: 2016 IEEE international conference on services computing (SCC), pp 25\u201333. https:\/\/doi.org\/10.1109\/SCC.2016.12","DOI":"10.1109\/SCC.2016.12"},{"key":"9668_CR123","doi-asserted-by":"publisher","DOI":"10.1155\/2015\/360428","author":"R Huang","year":"2015","unstructured":"Huang R, Chu X, Zhang J, Hu YH (2015) Energy-efficient monitoring in software defined wireless sensor networks using reinforcement learning: A prototype. Int J Distrib Sens Netw. https:\/\/doi.org\/10.1155\/2015\/360428","journal-title":"Int J Distrib Sens Netw"},{"issue":"4","key":"9668_CR124","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1109\/MWC.2016.7553036","volume":"23","author":"MA Salahuddin","year":"2016","unstructured":"Salahuddin MA, Al-Fuqaha A, Guizani M (2016) Reinforcement learning for resource provisioning in the vehicular cloud. IEEE Wirel Commun 23(4):128\u2013135. https:\/\/doi.org\/10.1109\/MWC.2016.7553036","journal-title":"IEEE Wirel Commun"},{"issue":"3","key":"9668_CR125","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1145\/3138808.3138810","volume":"47","author":"A Mestres","year":"2017","unstructured":"Mestres A, Rodriguez-Natal A, Carner J, Barlet-Ros P, Alarc\u00f3n E, Sol\u00e9 M, Munt\u00e9s-Mulero V, Meyer D, Barkai S, Hibbett MJ, Estrada G, Ma\u2019ruf K, Coras F, Ermagan V, Latapie H, Cassar C, Evans J, Maino F, Walrand J, Cabellos A (2017) Knowledge-defined networking. SIGCOMM Comput Commun Rev 47(3):2\u201310. https:\/\/doi.org\/10.1145\/3138808.3138810","journal-title":"SIGCOMM Comput Commun Rev"},{"key":"9668_CR126","doi-asserted-by":"publisher","unstructured":"Kim S, Son J, Talukder A, Hong CS (2016) Congestion prevention mechanism based on q-leaning for efficient routing in sdn. In: 2016 international conference on information networking (ICOIN), pp 124\u2013128. https:\/\/doi.org\/10.1109\/ICOIN.2016.7427100","DOI":"10.1109\/ICOIN.2016.7427100"},{"issue":"1","key":"9668_CR127","doi-asserted-by":"publisher","first-page":"870","DOI":"10.1109\/TNSM.2020.3036911","volume":"18","author":"DM Casas-Velasco","year":"2021","unstructured":"Casas-Velasco DM, Rendon OMC, Fonseca NLS (2021) Intelligent routing based on reinforcement learning for software-defined networking. IEEE Trans Netw Serv Manage 18(1):870\u2013881. https:\/\/doi.org\/10.1109\/TNSM.2020.3036911","journal-title":"IEEE Trans Netw Serv Manage"},{"issue":"3","key":"9668_CR128","doi-asserted-by":"publisher","first-page":"2041","DOI":"10.1109\/TII.2021.3093905","volume":"18","author":"P Radoglou-Grammatikis","year":"2022","unstructured":"Radoglou-Grammatikis P, Rompolos K, Sarigiannidis P, Argyriou V, Lagkas T, Sarigiannidis A, Goudos S, Wan S (2022) Modeling, detecting, and mitigating threats against industrial healthcare systems: a combined software defined networking and reinforcement learning approach. IEEE Trans Industr Inf 18(3):2041\u20132052. https:\/\/doi.org\/10.1109\/TII.2021.3093905","journal-title":"IEEE Trans Industr Inf"},{"issue":"1","key":"9668_CR129","first-page":"7","volume":"22","author":"A Ridley","year":"2018","unstructured":"Ridley A (2018) Machine learning for autonomous cyber defense. Next Wave 22(1):7\u201314","journal-title":"Next Wave"},{"key":"9668_CR130","doi-asserted-by":"crossref","unstructured":"Hammar K, Stadler R (2020) Finding effective security strategies through reinforcement learning and self-play. CoRR arxiv:2009.08120","DOI":"10.23919\/CNSM50824.2020.9269092"},{"key":"9668_CR131","unstructured":"Berner C, Brockman G, Chan B, Cheung V, Debiak P, Dennison C, Farhi D, Fischer Q, Hashme S, Hesse C, J\u00f3zefowicz R, Gray S, Olsson C, Pachocki J, Petrov M, Oliveira\u00a0Pinto HP, Raiman J, Salimans T, Schlatter J, Schneider J, Sidor S, Sutskever I, Tang J, Wolski F, Zhang S (2019) Dota 2 with large scale deep reinforcement learning. CoRR arxiv:1912.06680"},{"key":"9668_CR132","unstructured":"Bowling M, Veloso MM (2002) Scalable learning in stochastic games"},{"key":"9668_CR133","doi-asserted-by":"publisher","unstructured":"Li S, Wu Y, Cui X, Dong H, Fang F, Russell S (2019) Robust multi-agent reinforcement learning via minimax deep deterministic policy gradient. AAAI Press https:\/\/doi.org\/10.1609\/aaai.v33i01.33014213","DOI":"10.1609\/aaai.v33i01.33014213"},{"key":"9668_CR134","doi-asserted-by":"crossref","unstructured":"Hammar K, Stadler R (2021) Learning intrusion prevention policies through optimal stopping. CoRR arxiv:2106.07160","DOI":"10.23919\/CNSM52442.2021.9615542"},{"key":"9668_CR135","unstructured":"Walter E, Ferguson-Walter K, Ridley A (2021) Incorporating deception into cyberbattlesim for autonomous defense. CoRR arxiv:2108.13980"},{"key":"9668_CR136","doi-asserted-by":"crossref","unstructured":"Mokube I, Adams M (2007) Honeypots: concepts, approaches, and challenges. In: ACM-SE 45: Proceedings of the 45th annual southeast regional conference, pp 321\u2013326","DOI":"10.1145\/1233341.1233399"},{"key":"9668_CR137","unstructured":"Andrew A, Spillard S, Collyer J, Dhir N (2022) Developing optimal causal cyber-defence agents via cyber security simulation"},{"key":"9668_CR138","unstructured":"Aglietti V, Dhir N, Gonz\u00e1lez J, Damoulas T (2021) Dynamic causal bayesian optimization"},{"key":"9668_CR139","doi-asserted-by":"publisher","unstructured":"Foley M, Hicks C, Highnam K, Mavroudis V (2022) Autonomous network defence using reinforcement learning. In: Proceedings of the 2022 ACM on Asia conference on computer and communications security. ASIA CCS \u201922, pp 1252\u20131254. Association for computing machinery, New York, NY, USA. https:\/\/doi.org\/10.1145\/3488932.3527286","DOI":"10.1145\/3488932.3527286"},{"key":"9668_CR140","doi-asserted-by":"crossref","unstructured":"Foley M, Wang M, MZ, Hicks C, Mavroudis V (2023) Inroads into autonomous network defence using explained reinforcement learning","DOI":"10.1145\/3488932.3527286"},{"key":"9668_CR141","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2023.103578","volume":"136","author":"Z Zhu","year":"2024","unstructured":"Zhu Z, Chen M, Zhu C, Zhu Y (2024) Effective defense strategies in network security using improved double dueling deep q-network. Comput Secur 136:103578. https:\/\/doi.org\/10.1016\/j.cose.2023.103578","journal-title":"Comput Secur"},{"key":"9668_CR142","unstructured":"Kiely M, Bowman D, Standen M, Moir C (2023) On autonomous agents in a cyber defence environment"},{"key":"9668_CR143","unstructured":"Cyber Autonomy Gym for Experimentation Challenge 2. GitHub. Created by Maxwell Standen, David Bowman, Son Hoang, Toby Richer, Martin Lucas, Richard Van Tassel, Phillip Vu, Mitchell Kiely (2022)"},{"key":"9668_CR144","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2022.105116","volume":"114","author":"AMK Adawadkar","year":"2022","unstructured":"Adawadkar AMK, Kulkarni N (2022) Cyber-security and reinforcement learning - a brief survey. Eng Appl Artif Intell 114:105116. https:\/\/doi.org\/10.1016\/j.engappai.2022.105116","journal-title":"Eng Appl Artif Intell"},{"key":"9668_CR145","doi-asserted-by":"publisher","unstructured":"Sewak M, Sahay SK, Rathore H (2022) Deep Reinforcement learning for cybersecurity threat detection and protection: a review. Springer, Berlin, pp 51\u201372 https:\/\/doi.org\/10.1007\/978-3-030-97532-6_4","DOI":"10.1007\/978-3-030-97532-6_4"},{"key":"9668_CR146","doi-asserted-by":"publisher","DOI":"10.3390\/a15040134","author":"W Wang","year":"2022","unstructured":"Wang W, Sun D, Jiang F, Chen X, Zhu C (2022) Research and challenges of reinforcement learning in cyber defense decision-making for intranet security. Algorithms. https:\/\/doi.org\/10.3390\/a15040134","journal-title":"Algorithms"},{"key":"9668_CR147","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1016\/j.arcontrol.2022.01.001","volume":"53","author":"Y Huang","year":"2022","unstructured":"Huang Y, Huang L, Zhu Q (2022) Reinforcement learning for feedback-enabled cyber resilience. Annu Rev Control 53:273\u2013295. https:\/\/doi.org\/10.1016\/j.arcontrol.2022.01.001","journal-title":"Annu Rev Control"},{"issue":"3","key":"9668_CR148","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1109\/MTS.2023.3306540","volume":"42","author":"NE Fard","year":"2023","unstructured":"Fard NE, Selmic RR, Khorasani K (2023) A review of techniques and policies on cybersecurity using artificial intelligence and reinforcement learning algorithms. IEEE Technol Soc Mag 42(3):57\u201368. https:\/\/doi.org\/10.1109\/MTS.2023.3306540","journal-title":"IEEE Technol Soc Mag"},{"key":"9668_CR149","unstructured":"Baillie C, Standen M, Schwartz J, Docking M, Bowman D, Kim J (2020) Cyborg: an autonomous cyber operations research gym. CoRR arxiv:2002.10667"},{"key":"9668_CR150","unstructured":"Schwartz J (2022) Network Attack Simulator. https:\/\/github.com\/Jjschwartz\/NetworkAttackSimulator"},{"key":"9668_CR151","doi-asserted-by":"crossref","unstructured":"Tian Z, Shi W, Wang Y, Zhu C, Du X, Su S, Sun Y, Guizani N (2019) Real time lateral movement detection based on evidence reasoning network for edge computing environment. CoRR arxiv:1902.04387","DOI":"10.1109\/TII.2019.2907754"},{"key":"9668_CR152","doi-asserted-by":"publisher","unstructured":"Bohara A, Noureddine MA, Fawaz A, Sanders WH (2017) An unsupervised multi-detector approach for identifying malicious lateral movement. In: 2017 IEEE 36th symposium on reliable distributed systems (SRDS), pp 224\u2013233. https:\/\/doi.org\/10.1109\/SRDS.2017.31","DOI":"10.1109\/SRDS.2017.31"},{"key":"9668_CR153","doi-asserted-by":"publisher","unstructured":"Fawaz A, Bohara A, Cheh C, Sanders WH (2016) Lateral movement detection using distributed data fusion. In: 2016 IEEE 35th symposium on reliable distributed systems (SRDS), pp 21\u201330 . https:\/\/doi.org\/10.1109\/SRDS.2016.014","DOI":"10.1109\/SRDS.2016.014"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09668-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-024-09668-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09668-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,19]],"date-time":"2024-08-19T10:13:54Z","timestamp":1724062434000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-024-09668-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,7]]},"references-count":153,"journal-issue":{"issue":"23","published-print":{"date-parts":[[2024,8]]}},"alternative-id":["9668"],"URL":"https:\/\/doi.org\/10.1007\/s00521-024-09668-0","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,5,7]]},"assertion":[{"value":"19 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 March 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 May 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}