{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T10:25:22Z","timestamp":1771064722602,"version":"3.50.1"},"reference-count":39,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.23919\/acc45564.2020.9147846","type":"proceedings-article","created":{"date-parts":[[2020,7,27]],"date-time":"2020-07-27T22:02:33Z","timestamp":1595887353000},"page":"3959-3964","source":"Crossref","is-referenced-by-count":29,"title":["Robustifying Reinforcement Learning Agents via Action Space Adversarial Training"],"prefix":"10.23919","author":[{"given":"Kai Liang","family":"Tan","sequence":"first","affiliation":[]},{"given":"Yasaman","family":"Esfandiari","sequence":"additional","affiliation":[]},{"given":"Xian Yeow","family":"Lee","sequence":"additional","affiliation":[]},{"family":"Aakanksha","sequence":"additional","affiliation":[]},{"given":"Soumik","family":"Sarkar","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","first-page":"1008","article-title":"Actor-critic algorithms","author":"konda","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"ref38","article-title":"Openai gym","author":"brockman","year":"2016","journal-title":"arXiv preprint arXiv 1606 01540"},{"key":"ref33","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref32","first-page":"2817","article-title":"Robust adversarial reinforcement learning","author":"pinto","year":"0"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206245"},{"key":"ref30","first-page":"2040","article-title":"Robust deep reinforcement learning with adversarial attacks","author":"pattanaik","year":"2018","journal-title":"Proc of International Conference on Autonomous Agents and Multiagent Systems"},{"key":"ref37","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv preprint arXiv 1707 06347"},{"key":"ref36","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.2307\/3212655"},{"key":"ref34","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v30i1.10295","article-title":"Deep reinforcement learning with double q-learning","author":"van hasselt","year":"2016","journal-title":"THIRTIETH AAAI Conference on Artificial Intelligence"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3052973.3053009"},{"key":"ref11","article-title":"Towards deep learning models resistant to adversarial attacks","author":"madry","year":"2017","journal-title":"arXiv preprint arXiv 1706 06083"},{"key":"ref12","article-title":"A saddle-point dynamical system approach for robust deep learning","author":"esfandiari","year":"2019","journal-title":"arXiv preprint arXiv 1910 08623"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1137\/0114053"},{"key":"ref14","article-title":"Theoretically principled trade-off between robustness and accuracy","volume":"abs 1901 8573","author":"zhang","year":"2019","journal-title":"CoRR"},{"key":"ref15","article-title":"Attack analysis and resilient control design for discrete-time distributed multi-agent systems","volume":"abs 1801 870","author":"mustafa","year":"2018","journal-title":"CoRR"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2014.2303233"},{"key":"ref17","volume":"40","author":"zhou","year":"0","journal-title":"Robust and Optimal Control"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ISRCS.2010.5603480"},{"key":"ref19","article-title":"Action robust reinforcement learning and applications in continuous control","author":"tessler","year":"2019","journal-title":"arXiv preprint arXiv 1901 01023"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2016.41"},{"key":"ref4","article-title":"Darts: Deceiving autonomous cars with toxic signs","author":"sitawarin","year":"2018","journal-title":"arXiv preprint arXiv 1802 06360"},{"key":"ref27","article-title":"Towards deep neural network architectures robust to adversarial examples","author":"gu","year":"2014","journal-title":"arXiv preprint arXiv 1412 5068"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1115\/DSCC2019-9076"},{"key":"ref6","article-title":"Explaining and harnessing adversarial examples","author":"goodfellow","year":"2014","journal-title":"arXiv preprint arXiv 1412 6572"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-62416-7_19"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2017.49"},{"key":"ref8","article-title":"Intriguing properties of neural networks","author":"szegedy","year":"2013","journal-title":"arXiv preprint arXiv 1312 6199"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-40994-3_25"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TIE.2016.2542134"},{"key":"ref9","article-title":"Adversarial machine learning at scale","author":"kurakin","year":"2016","journal-title":"arXiv preprint arXiv 1611 01236"},{"key":"ref1","first-page":"3814","article-title":"Data center cooling using model-predictive control","author":"lazic","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref20","first-page":"9916","article-title":"Online robust policy learning in the presence of unknown adversaries","author":"havens","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2018.2815758"},{"key":"ref21","article-title":"Spatiotemporally constrained action space attacks on deep reinforcement learning agents","author":"lee","year":"2019","journal-title":"arXiv preprint arXiv 1909 01771"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2018.8619570"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2016.7799183"},{"key":"ref26","first-page":"2613","article-title":"Measuring neural net robustness with constraints","author":"bastani","year":"2016","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2018.04.027"}],"event":{"name":"2020 American Control Conference (ACC)","location":"Denver, CO, USA","start":{"date-parts":[[2020,7,1]]},"end":{"date-parts":[[2020,7,3]]}},"container-title":["2020 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9140048\/9147203\/09147846.pdf?arnumber=9147846","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,4]],"date-time":"2022-11-04T19:15:13Z","timestamp":1667589313000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9147846\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":39,"URL":"https:\/\/doi.org\/10.23919\/acc45564.2020.9147846","relation":{},"subject":[],"published":{"date-parts":[[2020,7]]}}}