{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T18:24:40Z","timestamp":1775067880974,"version":"3.50.1"},"reference-count":119,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"U.S. NSF","doi-asserted-by":"publisher","award":["CNS\/SaTC 2039583"],"award-info":[{"award-number":["CNS\/SaTC 2039583"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"U.S. NSF","doi-asserted-by":"publisher","award":["CNS 1650831"],"award-info":[{"award-number":["CNS 1650831"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"U.S. NSF","doi-asserted-by":"publisher","award":["1828811"],"award-info":[{"award-number":["1828811"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100012365","name":"DoD Center of Excellence in AI and Machine Learning at Howard University","doi-asserted-by":"publisher","award":["W911NF-20-2-0277"],"award-info":[{"award-number":["W911NF-20-2-0277"]}],"id":[{"id":"10.13039\/100012365","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006754","name":"U.S. Army Research Laboratory","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006754","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006168","name":"DoE\u2019s National Nuclear Security Administration","doi-asserted-by":"publisher","award":["DE-NA0003946"],"award-info":[{"award-number":["DE-NA0003946"]}],"id":[{"id":"10.13039\/100006168","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000180","name":"U.S. Department of Homeland Security","doi-asserted-by":"publisher","award":["2017-ST-062-000003"],"award-info":[{"award-number":["2017-ST-062-000003"]}],"id":[{"id":"10.13039\/100000180","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Internet Things J."],"published-print":{"date-parts":[[2021,6,1]]},"DOI":"10.1109\/jiot.2020.3040957","type":"journal-article","created":{"date-parts":[[2020,11,26]],"date-time":"2020-11-26T21:19:22Z","timestamp":1606425562000},"page":"8693-8706","source":"Crossref","is-referenced-by-count":168,"title":["Reinforcement Learning for IoT Security: A Comprehensive Survey"],"prefix":"10.1109","volume":"8","author":[{"given":"Aashma","family":"Uprety","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3638-3464","authenticated-orcid":false,"given":"Danda B.","family":"Rawat","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","author":"nguyen","year":"2019","journal-title":"Deep reinforcement learning for cyber security"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2018.2825478"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2017.02.013"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2836950"},{"key":"ref31","first-page":"101","article-title":"Intrusion detection using machine learning: A comparison study","volume":"118","author":"biswas","year":"2018","journal-title":"Int J Pure Appl Math"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.3390\/info10040122"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/s10845-017-1315-5"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2017.10.009"},{"key":"ref35","first-page":"83","article-title":"Machine learning for cyber defense and attack","author":"rege","year":"2018","journal-title":"Proc 7th Int Conf Data Anal"},{"key":"ref34","author":"mohammed harun babu","year":"2018","journal-title":"A short review on applications of deep learning for cyber security"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1201\/b10867"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2008.08.003"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2015.2494502"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref22","author":"lillicrap","year":"2015","journal-title":"Continuous control with deep reinforcement learning"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref24","author":"wang","year":"2015","journal-title":"Dueling network architectures for deep reinforcement learning"},{"key":"ref23","first-page":"2094","article-title":"Deep reinforcement learning with double Q-learning","author":"van hasselt","year":"2016","journal-title":"Proc 13th AAAI Conf Artif Intell"},{"key":"ref101","article-title":"Guidelines for smart grid cybersecurity","author":"pillitteri","year":"2014"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2015.09.017"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.23919\/ACC.2018.8430922"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2017.00016"},{"key":"ref50","author":"romdhani","year":"2015","journal-title":"Architecting the Internet of Things State of the Art"},{"key":"ref51","first-page":"484","article-title":"Research on the architecture of Internet of Things","volume":"5","author":"wu","year":"2010","journal-title":"Proc 3rd Int Conf Adv Comput Theory Eng (ICACTE)"},{"key":"ref59","first-page":"1551","article-title":"Multiagent router throttling: Decentralized coordinated response against DDoS attacks","author":"malialis","year":"2013","journal-title":"Proc 25th IAAI Conf"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/1102219.1102235"},{"key":"ref57","first-page":"58","article-title":"A survey: DDoS attack on Internet of Things","volume":"10","author":"sonar","year":"2014","journal-title":"Int J Eng Research & Development"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1145\/1278901.1278916"},{"key":"ref55","first-page":"14","article-title":"Classification of RFID attacks","volume":"15693","author":"mitrokotsa","year":"2010","journal-title":"GEN"},{"key":"ref54","first-page":"1","article-title":"Known attacks on RFID systems, possible countermeasures and upcoming standardisation activities","author":"finkenzeller","year":"2009","journal-title":"Proc 5th Eur Workshop RFID Syst Technol"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/DCABES.2015.56"},{"key":"ref52","doi-asserted-by":"crossref","first-page":"337","DOI":"10.1108\/IntR-07-2014-0173","article-title":"The Internet of Things: A security point of view","volume":"26","author":"wang","year":"2016","journal-title":"Internet Res"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2016.2539923"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-44494-9"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2018.03.015"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2018.2875926"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/S1353-4858(18)30016-3"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/MCAS.2009.933854"},{"key":"ref49","author":"woolf","year":"2016","journal-title":"DDoS attack that disrupted internet was largest of its kind in history experts say"},{"key":"ref7","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1126\/science.275.5306.1593"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/j.jnca.2017.04.002"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2012.12.018"},{"key":"ref48","first-page":"1093","article-title":"Understanding the mirai botnet","author":"antonakakis","year":"2017","journal-title":"Proc 26th USENIX Security Symp (USENIX Security)"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/HealthCom.2015.7454499"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2019.05.013"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-018-9639-x"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ISCC.2015.7405513"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.2011.6069711"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2013.6523802"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/GLOCOM.2016.7841922"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2011.110418"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2007.250674"},{"key":"ref76","first-page":"4177","article-title":"Cognitive radio anti-jamming scheme for security provisioning IoT communications","volume":"9","author":"kim","year":"2015","journal-title":"KSII Trans Internet Inf Syst"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/WCNC.2015.7127678"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2018.2817339"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/SAHCN.2017.7964926"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1186\/s13638-016-0785-z"},{"key":"ref79","article-title":"Dynamic beamforming optimization for anti-jamming and hardware fault recovery","author":"becker","year":"2014"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2015.01.013"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2014.2326417"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2004.842221"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/TMAG.2013.2254703"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CAMAD.2018.8514971"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/ICIIECS.2017.8276173"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1504\/IJAHUC.2014.066419"},{"key":"ref67","first-page":"1","article-title":"Towards trustworthy identity and access management for the future Internet","author":"weber","year":"2010","journal-title":"Proc 4th Int Workshop Trustworthy Internet People Things Services"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2012.120102"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2020.3036778"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CISS.2014.6814091"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2010.05.010"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/SURV.2011.061411.00019"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2016.2524258"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2885530"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-015-2958-x"},{"key":"ref107","first-page":"323","article-title":"Using eligibility traces to find the best memoryless policy in partially observable Markov decision processes","author":"loch","year":"1998","journal-title":"Proc Int Conf Machine Learn (ICML)"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/MILCOM.2011.6127727"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2018.2878570"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/ICED.2016.7804660"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2018.2790704"},{"key":"ref91","author":"whalen","year":"2001","journal-title":"An Introduction to ARP Spoofing"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2016.2607701"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/TDSC.2007.70224"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2014.6883387"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/APPEEC.2013.6837157"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.3390\/s16060879"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2017.2785414"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/3ICT.2018.8855737"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/AHS.2017.8046382"},{"key":"ref99","first-page":"2586","article-title":"Bayesian inverse reinforcement learning","volume":"7","author":"ramachandran","year":"2007","journal-title":"Proc Int Joint Conf Artif Intell (IJCAI)"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01731-6_2"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/ICCPS.2018.00049"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1023\/A:1025696116075"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1126\/science.1127647"},{"key":"ref12","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1287\/opre.50.1.48.17791"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1561\/2200000071"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ADCONIP.2017.7983780"},{"key":"ref118","first-page":"833","article-title":"Reinforcement learning in continuous action spaces through sequential Monte Carlo methods","volume":"20","author":"lazaric","year":"2007","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3005745.3005750"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/MWC.2017.1600404"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/MCC.2018.1081063"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952524"},{"key":"ref18","author":"vecerik","year":"2017","journal-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/LCOMM.2018.2815018"},{"key":"ref119","first-page":"3675","article-title":"Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation","author":"kulkarni","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/LCOMM.2018.2792015"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/GLOCOM.2017.8253987"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2018.2789466"},{"key":"ref116","author":"weng","year":"2018","journal-title":"The Multi-Armed Bandit Problem and Its Solutions"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/VTCSpring.2018.8417695"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2019.8761101"},{"key":"ref89","first-page":"157","article-title":"A comprehensive analysis of spoofing","volume":"1","author":"babu","year":"2010","journal-title":"Int J Adv Comput Sci Appl"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.3390\/app9071361"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/ICCChina.2016.7636793"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2012.051512.111504"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/WCNC.2017.7925694"}],"container-title":["IEEE Internet of Things Journal"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/6488907\/9438061\/9272624-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6488907\/9438061\/09272624.pdf?arnumber=9272624","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:53:55Z","timestamp":1652194435000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9272624\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,1]]},"references-count":119,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/jiot.2020.3040957","relation":{},"ISSN":["2327-4662","2372-2541"],"issn-type":[{"value":"2327-4662","type":"electronic"},{"value":"2372-2541","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,6,1]]}}}