{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T03:41:56Z","timestamp":1773805316159,"version":"3.50.1"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,4,26]],"date-time":"2024-04-26T00:00:00Z","timestamp":1714089600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,4,26]],"date-time":"2024-04-26T00:00:00Z","timestamp":1714089600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Sci. China Inf. Sci."],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1007\/s11432-021-3688-y","type":"journal-article","created":{"date-parts":[[2024,4,29]],"date-time":"2024-04-29T15:01:57Z","timestamp":1714402917000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Understanding adversarial attacks on observations in deep reinforcement learning"],"prefix":"10.1007","volume":"67","author":[{"given":"You","family":"Qiaoben","sequence":"first","affiliation":[]},{"given":"Chengyang","family":"Ying","sequence":"additional","affiliation":[]},{"given":"Xinning","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Hang","family":"Su","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Bo","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,4,26]]},"reference":[{"key":"3688_CR1","unstructured":"Mnih V, Kavukcuoglu K, Silver D, et al. Playing Atari with deep reinforcement learning. 2013. ArXiv:1312.5602"},{"key":"3688_CR2","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, et al. Human-level control through deep reinforcement learning. Nature, 2015, 518: 529\u2013533","journal-title":"Nature"},{"key":"3688_CR3","unstructured":"Mnih V, Badia A P, Mirza M, et al. Asynchronous methods for deep reinforcement learning. In: Proceedings of International Conference on Machine Learning (ICML), 2016. 1928\u20131937"},{"key":"3688_CR4","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison C J, et al. Mastering the game of Go with deep neural networks and tree search. Nature, 2016, 529: 484\u2013489","journal-title":"Nature"},{"key":"3688_CR5","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1038\/s41586-020-03051-4","volume":"588","author":"J Schrittwieser","year":"2020","unstructured":"Schrittwieser J, Antonoglou I, Hubert T, et al. Mastering Atari, Go, chess and shogi by planning with a learned model. Nature, 2020, 588: 604\u2013609","journal-title":"Nature"},{"key":"3688_CR6","unstructured":"Huang S, Papernot N, Goodfellow I, et al. Adversarial attacks on neural network policies. 2017. ArXiv:1702.02284"},{"key":"3688_CR7","unstructured":"Kos J, Song D. Delving into adversarial attacks on deep policies. 2017. ArXiv:1705.06452"},{"key":"3688_CR8","unstructured":"Zhang H, Chen H G, Xiao C W, et al. Robust deep reinforcement learning against adversarial perturbations on state observations. In: Proceedings of Advances in Neural Information Processing Systems (NeurIPS), 2020. 21024\u201321037"},{"key":"3688_CR9","unstructured":"Zhang H, Chen H G, Boning D S, et al. Robust reinforcement learning on state observations with learned optimal adversary. In: Proceedings of International Conference on Learning Representations (ICLR), 2021"},{"key":"3688_CR10","doi-asserted-by":"crossref","unstructured":"Biggio B, Corona I, Maiorca D, et al. Evasion attacks against machine learning at test time. In: Proceedings of Joint European Conference on Machine Learning and Knowledge Discovery in Databases (ECML PKDD), 2013. 387\u2013402","DOI":"10.1007\/978-3-642-40994-3_25"},{"key":"3688_CR11","unstructured":"Szegedy C, Zaremba W, Sutskever I, et al. Intriguing properties of neural networks. In: Proceedings of International Conference onLearning Representations (ICLR), 2014"},{"key":"3688_CR12","unstructured":"Goodfellow I J, Shlens J, Szegedy C. Explaining and harnessing adversarial examples. 2014. ArXiv:1412.6572"},{"key":"3688_CR13","unstructured":"Madry A, Makelov A, Schmidt L, et al. Towards deep learning models resistant to adversarial attacks. In: Proceedings of International Conference on Learning Representations (ICML), 2018"},{"key":"3688_CR14","doi-asserted-by":"crossref","unstructured":"Dong Y P, Liao F Z, Pang T Y, et al. Boosting adversarial attacks with momentum. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2018. 9185\u20139193","DOI":"10.1109\/CVPR.2018.00957"},{"key":"3688_CR15","unstructured":"Xiao C W, Pan X L, He W, et al. Characterizing attacks on deep reinforcement learning. 2019. ArXiv:1907.09470"},{"key":"3688_CR16","doi-asserted-by":"crossref","unstructured":"Mandlekar A, Zhu Y K, Garg A, et al. Adversarially robust policy learning: active construction of physically-plausible perturbations. In: Proceedings of IEEE\/RSJ International Conference on Intelligent Robotsand Systems (IROS), 2017. 3932\u20133939","DOI":"10.1109\/IROS.2017.8206245"},{"key":"3688_CR17","unstructured":"Russo A, Proutiere A. Optimal attacks on reinforcement learning policies. 2019. ArXiv:1907.13548"},{"key":"3688_CR18","doi-asserted-by":"crossref","unstructured":"Lin Y C, Hong Z W, Liao Y H, et al. Tactics of adversarial attack on deep reinforcement learning agents. In: Proceedings of the 26th International Joint Conference on Artificial Intelligence (IJCAI), 2017. 3756\u20133762","DOI":"10.24963\/ijcai.2017\/525"},{"key":"3688_CR19","unstructured":"Pattanaik A, Tang Z, Liu S, et al. Robust deep reinforcement learning with adversarial attacks. In: Proceedings of the 17th International Conference on Autonomous Agents and Multi Agent Systems (AAMAS), 2018. 2040\u20132042"},{"key":"3688_CR20","doi-asserted-by":"crossref","unstructured":"Lin J Y, Dzeparoska K, Zhang S Q, et al. On the robustness of cooperative multi agent reinforcement learning. In: Proceedings of IEEE Security and Privacy Workshops (SPW), 2020. 62\u201368","DOI":"10.1109\/SPW50608.2020.00027"},{"key":"3688_CR21","doi-asserted-by":"crossref","unstructured":"Ying C, Zhou X, Su H, et al. Towards safe reinforcement learning via constraining conditional value-at-risk. 2022. ArXiv:2206.04436","DOI":"10.24963\/ijcai.2022\/510"},{"key":"3688_CR22","doi-asserted-by":"crossref","unstructured":"Sun J W, Zhang T W, Xie X, et al. Stealthy and efficient adversarial attacks against deep reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI), 2020","DOI":"10.1609\/aaai.v34i04.6047"},{"key":"3688_CR23","doi-asserted-by":"crossref","unstructured":"Yang C H H, Qi J, Chen P Y, et al. Enhanced adversarial strategically-timed attacks against deep reinforcement learning. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2020. 3407\u20133411","DOI":"10.1109\/ICASSP40776.2020.9053342"},{"key":"3688_CR24","unstructured":"Inkawhich M, Chen Y, Li H. Snooping attacks on deep reinforcement learning. In: Proceedings of the 19th International Conference on Autonomous Agents and Multi Agent Systems (AAMAS), 2020. 557\u2013565"},{"key":"3688_CR25","unstructured":"Zhang H, Weng T W, Chen P Y, et al. Efficient neural network robustness certification with general activation functions. In: Proceedings of Advances in Neural Information Processing Systems (NeurIPS), 2018. 4939\u20134948"},{"key":"3688_CR26","doi-asserted-by":"crossref","unstructured":"Nielsen F, Sun K. Guaranteed deterministic bounds on the total variation distance between univariate mixtures. In: Proceedings of the 28th International Workshop on Machine Learning for Signal Processing (MLSP), 2018. 1\u20136","DOI":"10.1109\/MLSP.2018.8517093"},{"key":"3688_CR27","unstructured":"Achiam J, Held D, Tamar A, et al. Constrained policy optimization. In: Proceedings of International Conference on Machine Learning (ICML), 2017. 22\u201331"},{"key":"3688_CR28","unstructured":"Schulman J, Wolski F, Dhariwal P, et al. Proximal policy optimization. 2017. ArXiv:1707.06347"},{"key":"3688_CR29","unstructured":"Horgan D, Quan J, Budden D, et al. Distributed prioritized experience replay. In: Proceedings of International Conference on Learning Representations (ICLR), 2018"},{"key":"3688_CR30","unstructured":"Konda V R, Tsitsiklis J N. Actor-critic algorithms. In: Proceedings of Advances in Neural Information Processing Systems (NeurIPS), 2000. 1008\u20131014"},{"key":"3688_CR31","unstructured":"Schulman J, Levine S, Abbeel P, et al. Trust region policy optimization. In: Proceedings of International Conference on Machine Learning (ICML), 2015. 1889\u20131897"},{"key":"3688_CR32","unstructured":"Oikarinen T, Weng T W, Daniel L. Robust deep reinforcement learning through adversarial loss. 2020. ArXiv:2008.01976"},{"key":"3688_CR33","unstructured":"Kostrikov I. PyTorch implementations of reinforcement learning algorithms. https:\/\/github.com\/ikostrikov\/pytorch-a2c-ppo-acktr-gail"}],"container-title":["Science China Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-021-3688-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11432-021-3688-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-021-3688-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T19:52:45Z","timestamp":1750362765000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11432-021-3688-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,26]]},"references-count":33,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,5]]}},"alternative-id":["3688"],"URL":"https:\/\/doi.org\/10.1007\/s11432-021-3688-y","relation":{},"ISSN":["1674-733X","1869-1919"],"issn-type":[{"value":"1674-733X","type":"print"},{"value":"1869-1919","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,4,26]]},"assertion":[{"value":"13 December 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 September 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 November 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 April 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"152104"}}