{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,13]],"date-time":"2026-06-13T16:06:19Z","timestamp":1781366779905,"version":"3.54.1"},"reference-count":64,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"24","license":[{"start":{"date-parts":[[2022,12,15]],"date-time":"2022-12-15T00:00:00Z","timestamp":1671062400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,12,15]],"date-time":"2022-12-15T00:00:00Z","timestamp":1671062400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,15]],"date-time":"2022-12-15T00:00:00Z","timestamp":1671062400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100004837","name":"Spanish Ministry of Science and Innovation","doi-asserted-by":"publisher","award":["PID2020-112502RB-C41 (NAUTILUS)"],"award-info":[{"award-number":["PID2020-112502RB-C41 (NAUTILUS)"]}],"id":[{"id":"10.13039\/501100004837","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Internet Things J."],"published-print":{"date-parts":[[2022,12,15]]},"DOI":"10.1109\/jiot.2022.3194694","type":"journal-article","created":{"date-parts":[[2022,7,28]],"date-time":"2022-07-28T19:45:32Z","timestamp":1659037532000},"page":"24790-24799","source":"Crossref","is-referenced-by-count":10,"title":["Inverse Reinforcement Learning: A New Framework to Mitigate an Intelligent Backoff Attack"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7028-3179","authenticated-orcid":false,"given":"Juan","family":"Parras","sequence":"first","affiliation":[{"name":"Information Processing and Telecommunications Center, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Alejandro","family":"Almodovar","sequence":"additional","affiliation":[{"name":"Information Processing and Telecommunications Center, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Patricia A.","family":"Apellaniz","sequence":"additional","affiliation":[{"name":"Information Processing and Telecommunications Center, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Santiago","family":"Zazo","sequence":"additional","affiliation":[{"name":"Information Processing and Telecommunications Center, Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref38","author":"thrun","year":"2005","journal-title":"Probabilistic Robotics"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/SmartGridComm47815.2020.9302997"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2019.2956161"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2018.2878570"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2020.2994977"},{"key":"ref37","volume":"2","author":"bertsekas","year":"2007","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref36","volume":"1","author":"bertsekas","year":"2005","journal-title":"Dynamic Programming and Optimal Control"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.3390\/s18020404"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2962914"},{"key":"ref60","first-page":"1413","article-title":"Inverse reinforcement learning in swarm systems","author":"\u0161o\u0161i?","year":"2017","journal-title":"Proc 16th Conf Auton Agents MultiAgent Syst"},{"key":"ref62","author":"hernandez","year":"2021","journal-title":"Standardised Metrics and Methods for Synthetic Tabular Data Evaluation"},{"key":"ref61","first-page":"1","article-title":"Deep reinforcement learning for swarm systems","volume":"20","author":"h\u00fcttenrauch","year":"2019","journal-title":"J Mach Learn Res"},{"key":"ref63","first-page":"358","article-title":"Anomaly intrusion detection using one class SVM","author":"wang","year":"2004","journal-title":"Proc 5th Annu IEEE SMC Inf Assur Workshop"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2018.2790704"},{"key":"ref64","article-title":"A survey of learning in multiagent environments: Dealing with non-stationarity","author":"hernandez-leal","year":"2017","journal-title":"arXiv 1707 09183"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/NetSoft48620.2020.9165383"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2885530"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-013-1021-9"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-5505-8"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2015.2422735"},{"key":"ref22","article-title":"Machine learning based detection and a novel EC-BRTT algorithm based prevention of DoS attacks in wireless sensor networks","author":"narayanan","year":"0","journal-title":"Wireless Pers Commun"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/JSYST.2019.2952395"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/GLOCOM.2010.5684069"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/SURV.2009.090404"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.3390\/s21124060"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2019.01.023"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRev.106.620"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177704477"},{"key":"ref58","year":"2016"},{"key":"ref57","first-page":"1","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"Proc 3rd Int Conf Learn Represent (ICLR)"},{"key":"ref56","first-page":"2672","article-title":"Generative adversarial nets","author":"goodfellow","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref55","first-page":"4565","article-title":"Generative adversarial imitation learning","author":"ho","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref54","article-title":"A connection between generative adversarial networks, inverse reinforcement learning, and energy-based models","author":"finn","year":"2016","journal-title":"arXiv 1611 03852"},{"key":"ref53","first-page":"1255","article-title":"Modeling interaction via the principle of maximum causal entropy","author":"ziebart","year":"2010","journal-title":"Proc 27th Int Conf Mach Learn"},{"key":"ref52","first-page":"1433","article-title":"Maximum entropy inverse reinforcement learning","volume":"8","author":"ziebart","year":"2008","journal-title":"Proc AAAI"},{"key":"ref10","first-page":"1","article-title":"Next generation intrusion detection: Autonomous reinforcement learning of network attacks","author":"cannady","year":"2000","journal-title":"Proc 23rd Nat Inf Syst Security Conf"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CNS.2013.6682689"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/0893-6080(89)90020-8"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/GLOCOM.2015.7417078"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TCNS.2016.2549640"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2016.2565198"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2017.2687918"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/WCNC.2017.7925694"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952524"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/SURV.2011.122211.00162"},{"key":"ref19","first-page":"78","article-title":"A survey on security and various attacks in wireless sensor network","volume":"5","author":"sengar","year":"2017","journal-title":"Int J Comput Sci Eng"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2017.2749883"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.3390\/s17051031"},{"key":"ref6","volume":"1","author":"goodfellow","year":"2016","journal-title":"Deep Learning"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/WoWMoM49955.2020.00029"},{"key":"ref8","article-title":"IoT security techniques based on machine learning","author":"xiao","year":"2018","journal-title":"arXiv 1801 06275"},{"key":"ref49","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","volume":"1","author":"ng","year":"2000","journal-title":"Proc ICML"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCW.2018.8403655"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2014.2320099"},{"key":"ref46","first-page":"1555","article-title":"Predictive representations of state","author":"littman","year":"2002","journal-title":"Advances in neural information processing systems"},{"key":"ref45","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref48","first-page":"29","article-title":"Deep recurrent Q-learning for partially observable MDPs","author":"hausknecht","year":"2015","journal-title":"Proc Conf Artif Intell (AAAI)"},{"key":"ref47","first-page":"712","article-title":"Learning predictive state representations","author":"singh","year":"2003","journal-title":"Proc 20th Int Conf Mach Learn (ICML)"},{"key":"ref42","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"arXiv 1312 5602"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/BF02551274"},{"key":"ref44","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume":"32","author":"silver","year":"0","journal-title":"Proc 31st Int Conf Mach Learn"},{"key":"ref43","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"}],"container-title":["IEEE Internet of Things Journal"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6488907\/9973413\/09844128.pdf?arnumber=9844128","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,26]],"date-time":"2022-12-26T19:33:47Z","timestamp":1672083227000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9844128\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,15]]},"references-count":64,"journal-issue":{"issue":"24"},"URL":"https:\/\/doi.org\/10.1109\/jiot.2022.3194694","relation":{},"ISSN":["2327-4662","2372-2541"],"issn-type":[{"value":"2327-4662","type":"electronic"},{"value":"2372-2541","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,12,15]]}}}