{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T02:24:36Z","timestamp":1730341476191,"version":"3.28.0"},"reference-count":39,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,6,8]],"date-time":"2022-06-08T00:00:00Z","timestamp":1654646400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,6,8]],"date-time":"2022-06-08T00:00:00Z","timestamp":1654646400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,6,8]]},"DOI":"10.23919\/acc53348.2022.9867848","type":"proceedings-article","created":{"date-parts":[[2022,9,5]],"date-time":"2022-09-05T20:24:10Z","timestamp":1662409450000},"page":"2843-2850","source":"Crossref","is-referenced-by-count":1,"title":["Balancing detectability and performance of attacks on the control channel of Markov Decision Processes"],"prefix":"10.23919","author":[{"given":"Alessio","family":"Russo","sequence":"first","affiliation":[{"name":"KTH Royal Institute of Technology,Division of Decision and Control Systems of the EECS School,Stockholm,Sweden"}]},{"given":"Alexandre","family":"Proutiere","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Division of Decision and Control Systems of the EECS School,Stockholm,Sweden"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-01551-9"},{"key":"ref38","volume":"48","author":"borkar","year":"2009","journal-title":"Stochastic Approximation A Dynamical Systems Viewpoint"},{"key":"ref33","volume":"7","author":"altman","year":"1999","journal-title":"Constrained Markov Decision Processes"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1017\/S0021900200005520"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177697700"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1214\/aos\/1176346587"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref37"},{"key":"ref36","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref35","first-page":"6070","article-title":"Risk-constrained reinforcement learning with percentile risk criteria","volume":"18","author":"chow","year":"2017","journal-title":"The Journal of Machine Learning Research"},{"key":"ref34","first-page":"22","article-title":"Constrained policy optimization","author":"achiam","year":"2017","journal-title":"International Conference on Machine Learning"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2522401"},{"article-title":"Gartner top 10 strategic technology trends for 2020-smarter with gartner","year":"2019","author":"burke","key":"ref11"},{"article-title":"Adversarial machine learning&#x2013; industry perspectives","year":"2020","author":"kumar","key":"ref12"},{"article-title":"Explaining and harnessing adversarial examples","year":"2014","author":"goodfellow","key":"ref13"},{"article-title":"Adversarial attacks on neural network policies","year":"2017","author":"huang","key":"ref14"},{"key":"ref15","first-page":"2040","article-title":"Robust deep reinforcement learning with adversarial attacks","author":"pattanaik","year":"2018","journal-title":"Proc of International Conference on Autonomous Agents and Multiagent Systems"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.23919\/ACC50511.2021.9483025"},{"article-title":"Robust deep reinforcement learning against adversarial perturbations on observations","year":"2020","author":"zhang","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.23919\/ACC45564.2020.9147846"},{"key":"ref19","first-page":"6215","article-title":"Action robust reinforcement learning and applications in continuous control","author":"tessler","year":"2019","journal-title":"International Conference on Machine Learning"},{"article-title":"Balancing detectability and performance of attacks on the control channel of markov decision processes","year":"2021","author":"russo","key":"ref28"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-411597-2.00006-0"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"lecun","year":"2015","journal-title":"Nature"},{"article-title":"Playing atari with deep reinforcement learning","year":"2013","author":"mnih","key":"ref6"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177693055"},{"article-title":"Dota 2 with large scale deep reinforcement learning","year":"2019","author":"berner","key":"ref5"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.enbuild.2018.03.051"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref2"},{"article-title":"Reinforcement learning in healthcare: A survey","year":"2019","author":"yu","key":"ref9"},{"journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming","year":"2014","author":"puterman","key":"ref1"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"2917","DOI":"10.1109\/18.737522","article-title":"Information bounds and quick detection of parameter changes in stochastic systems","volume":"44","author":"lai","year":"1998","journal-title":"IEEE Transactions on Information Theory"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1201\/b17279"},{"key":"ref21","volume":"104","author":"basseville","year":"1993","journal-title":"Detection of Abrupt Changes Theory and Application"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/525"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-62416-7_19"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5887"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CDC45484.2021.9682884"}],"event":{"name":"2022 American Control Conference (ACC)","start":{"date-parts":[[2022,6,8]]},"location":"Atlanta, GA, USA","end":{"date-parts":[[2022,6,10]]}},"container-title":["2022 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9866948\/9867142\/09867848.pdf?arnumber=9867848","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,3]],"date-time":"2022-10-03T20:39:20Z","timestamp":1664829560000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9867848\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,8]]},"references-count":39,"URL":"https:\/\/doi.org\/10.23919\/acc53348.2022.9867848","relation":{},"subject":[],"published":{"date-parts":[[2022,6,8]]}}}