{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,4]],"date-time":"2025-09-04T14:33:19Z","timestamp":1756996399363,"version":"3.37.3"},"reference-count":36,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001863","name":"New Energy and Industrial Technology Development Organization (NEDO), Japan","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001863","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001691","name":"Japan Society for the Promotion of Science (JSPS), Japan, through KAKENHI","doi-asserted-by":"publisher","award":["19H04180","22H04998","23H04676"],"award-info":[{"award-number":["19H04180","22H04998","23H04676"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2023]]},"DOI":"10.1109\/access.2023.3314750","type":"journal-article","created":{"date-parts":[[2023,9,13]],"date-time":"2023-09-13T17:41:11Z","timestamp":1694626871000},"page":"100798-100809","source":"Crossref","is-referenced-by-count":5,"title":["Deep Adversarial Reinforcement Learning Method to Generate Control Policies Robust Against Worst-Case Value Predictions"],"prefix":"10.1109","volume":"11","author":[{"given":"Kohei","family":"Ohashi","sequence":"first","affiliation":[{"name":"Department of Systems Science, Graduate School of Informatics, Kyoto University, Kyoto, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0078-6942","authenticated-orcid":false,"given":"Kosuke","family":"Nakanishi","sequence":"additional","affiliation":[{"name":"Department of Systems Science, Graduate School of Informatics, Kyoto University, Kyoto, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuji","family":"Yasui","sequence":"additional","affiliation":[{"name":"Honda Research and Development Company Ltd., Saitama, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shin","family":"Ishii","sequence":"additional","affiliation":[{"name":"Department of Systems Science, Graduate School of Informatics, Kyoto University, Kyoto, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","volume":"812","author":"l\u00f6fberg","year":"2003","journal-title":"Minimax approaches to robust model predictive control"},{"doi-asserted-by":"publisher","key":"ref35","DOI":"10.1103\/PhysRev.36.823"},{"key":"ref12","first-page":"2040","article-title":"Robust deep reinforcement learning with adversarial attacks","volume":"3","author":"pattanaik","year":"2018","journal-title":"Proc 3rd Int Joint Conf Auton Agents and MultiAgent Syst"},{"key":"ref34","first-page":"387","article-title":"Deterministic policy gradient algorithms","author":"silver","year":"2014","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref15","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"arXiv 1509 02971"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1109\/ACCESS.2021.3121751"},{"key":"ref36","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"arXiv 1412 6980"},{"key":"ref31","first-page":"26","article-title":"Lecture 6.5-rmsprop, COURSERA: Neural networks for machine learning","volume":"4","author":"tieleman","year":"2012","journal-title":"COURSERA Neural Netw Mach Learn"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.1613\/jair.3912"},{"key":"ref11","first-page":"21024","article-title":"Robust deep reinforcement learning against adversarial perturbations on state observations","volume":"33","author":"zhang","year":"2020","journal-title":"Proc Adv Neural Inf Process Syst (NIPS)"},{"year":"2016","author":"coumans","journal-title":"Pybullet a python module for physics simulation for games robotics and machine learning","key":"ref33"},{"key":"ref10","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"J Mach Learn Res"},{"key":"ref32","first-page":"1","article-title":"Chainer: A next-generation open source framework for deep learning","volume":"5","author":"tokui","year":"2015","journal-title":"Proc 29th Workshop Mach Learn Syst Neural Inf Process Syst (NIPS)"},{"key":"ref2","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2014","journal-title":"arXiv 1409 1556"},{"key":"ref1","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc Adv Neural Inf Process Syst (NIPS)"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1145\/3128572.3140444"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1109\/EuroSP.2016.36"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1109\/CVPR.2016.282"},{"key":"ref18","article-title":"Adversarial examples in the physical world","author":"kurakin","year":"2016","journal-title":"arXiv 1607 02533"},{"key":"ref24","first-page":"1328","article-title":"Certified adversarial robustness for deep reinforcement learning","author":"l\u00fctjens","year":"2020","journal-title":"Proc Conf Robot Learn"},{"key":"ref23","article-title":"Characterizing attacks on deep reinforcement learning","author":"pan","year":"2019","journal-title":"arXiv 1907 09470"},{"key":"ref26","article-title":"Robust reinforcement learning on state observations with learned optimal adversary","author":"zhang","year":"2021","journal-title":"arXiv 2101 08452"},{"key":"ref25","article-title":"Adversarial policies: Attacking deep reinforcement learning","author":"gleave","year":"2019","journal-title":"arXiv 1905 10615"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1162\/0899766053011528"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1109\/IROS.2017.8206245"},{"key":"ref21","first-page":"2817","article-title":"Robust adversarial reinforcement learning","volume":"70","author":"pinto","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn (ICML)"},{"key":"ref28","article-title":"Efficient adversarial training without attacking: Worst-case-aware robust reinforcement learning","author":"liang","year":"2022","journal-title":"arXiv 2210 05927"},{"key":"ref27","article-title":"Who is the strongest enemy? Towards optimal and efficient evasion attacks in deep RL","author":"sun","year":"2021","journal-title":"arXiv 2106 05087"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.1109\/TNNLS.2021.3056046"},{"key":"ref8","article-title":"Adversarial attacks on neural network policies","author":"huang","year":"2017","journal-title":"arXiv 1702 02284"},{"key":"ref7","first-page":"468","article-title":"Explaining and harnessing adversarial examples","volume":"33","author":"goodfellow","year":"2014","journal-title":"Int J Ser Solid Mech Strength Mater"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1177\/0278364917710318"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Playing Atari with deep reinforcement learning","volume":"518","author":"mnih","year":"2013","journal-title":"Nature"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref6","article-title":"Intriguing properties of neural networks","author":"szegedy","year":"2013","journal-title":"arXiv 1312 6199"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1038\/nature24270"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/10005208\/10250423.pdf?arnumber=10250423","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,13]],"date-time":"2023-11-13T19:35:09Z","timestamp":1699904109000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10250423\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/access.2023.3314750","relation":{},"ISSN":["2169-3536"],"issn-type":[{"type":"electronic","value":"2169-3536"}],"subject":[],"published":{"date-parts":[[2023]]}}}