{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T05:11:20Z","timestamp":1760677880222,"version":"build-2065373602"},"reference-count":38,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Control Syst. Lett."],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/lcsys.2025.3594257","type":"journal-article","created":{"date-parts":[[2025,7,30]],"date-time":"2025-07-30T18:52:58Z","timestamp":1753901578000},"page":"2303-2308","source":"Crossref","is-referenced-by-count":0,"title":["Perturbation-Controlled Deep Q-Learning With Human-Teaming for Enhancing Adversarial Robustness"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-7246-1056","authenticated-orcid":false,"given":"Sadredin","family":"Hokmi","sequence":"first","affiliation":[{"name":"Electrical Engineering Department, Sharif University of Technology, Tehran, Iran"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1831-3809","authenticated-orcid":false,"given":"Pegah","family":"Moushaee","sequence":"additional","affiliation":[{"name":"Electrical Engineering Department, Sharif University of Technology, Tehran, Iran"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8509-4525","authenticated-orcid":false,"given":"Mohammad","family":"Haeri","sequence":"additional","affiliation":[{"name":"Electrical Engineering Department, Sharif University of Technology, Tehran, Iran"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1","article-title":"Intriguing properties of neural networks","volume-title":"Proc. ICLR","author":"Szegedy"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3128572.3140444"},{"key":"ref3","article-title":"Trojan attacks on deep reinforcement learning agents","author":"Kiourti","year":"2019","journal-title":"arXiv:1903.06638"},{"key":"ref4","article-title":"A distance-based anomaly detection framework for deep reinforcement learning","author":"Zhang","year":"2021","journal-title":"arXiv: 2109.09889"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.61841\/turcomat.v15i2.14793"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1287\/moor.27.2.294.324"},{"key":"ref7","first-page":"1651","article-title":"Policy gradients with variance related risk criteria","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Tamar"},{"key":"ref8","first-page":"1","article-title":"Algorithms for CVaR optimization in MDPs","volume-title":"Proc. NeurIPS","volume":"27","author":"Chow"},{"key":"ref9","first-page":"2411","article-title":"Speedy Q-learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"24","author":"Azar"},{"key":"ref10","first-page":"1","article-title":"Finite-time analysis for double Q-learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Xiong"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2020.2970555"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICCIA65044.2024.10768158"},{"key":"ref13","article-title":"Momentum-based Accelerated Q-learning","author":"Weng","year":"2019","journal-title":"arXiv:1910.11673"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11791"},{"key":"ref15","first-page":"1104","article-title":"Implicit quantile networks for distributional reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Dabney"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793611"},{"issue":"1","key":"ref17","first-page":"6070","article-title":"Risk constrained reinforcement learning with percentile risk criteria","volume":"18","author":"Chow","year":"2017","journal-title":"J. Mach. Learn. Res."},{"key":"ref18","first-page":"30977","article-title":"Distributional reinforcement learning for risk-sensitive policies","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Lim"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2020.101994"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ISMSIT.2019.8932809"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2024.103825"},{"key":"ref22","first-page":"1","article-title":"Deep reinforcement learning from human preferences","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Christiano"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/1597735.1597738"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11485"},{"key":"ref25","first-page":"2067","article-title":"Trial without error: Towards safe reinforcement learning via human intervention","volume-title":"Proc. 17th Int. Conf. Auto. Agents MultiAgent Syst.","author":"Saunders"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3420959"},{"key":"ref27","first-page":"47","article-title":"Entropic risk optimization in discounted MDPs","volume-title":"Proc. Int. Conf. Artif. Intell. Stat.","author":"Hau"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i7.26009"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/SPIES55999.2022.10082714"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.11948\/20240242"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1063\/1.1748067"},{"key":"ref32","first-page":"1","article-title":"Finite-time performance bounds and adaptive learning rate selection for two time-scale reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Gupta"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4684-9352-8"},{"key":"ref34","first-page":"1799","article-title":"Towards anomaly detection in reinforcement learning","volume-title":"Proc. 21st Int. Conf. Auto. Agents Multiagent Syst.","author":"M\u00fcller"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CCECE49351.2022.9918216"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC55140.2022.9922370"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2021.3129074"},{"key":"ref38","first-page":"1","article-title":"Online robust reinforcement learning with model uncertainty","volume-title":"Proc. 35th Conf. Neural Inf. Process. Syst.","author":"Wang"}],"container-title":["IEEE Control Systems Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7782633\/10939047\/11104842.pdf?arnumber=11104842","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T04:36:54Z","timestamp":1760675814000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11104842\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/lcsys.2025.3594257","relation":{},"ISSN":["2475-1456"],"issn-type":[{"type":"electronic","value":"2475-1456"}],"subject":[],"published":{"date-parts":[[2025]]}}}