{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T02:31:40Z","timestamp":1780367500593,"version":"3.54.1"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,12,13]],"date-time":"2023-12-13T00:00:00Z","timestamp":1702425600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,13]],"date-time":"2023-12-13T00:00:00Z","timestamp":1702425600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,12,13]]},"DOI":"10.1109\/cdc49753.2023.10383375","type":"proceedings-article","created":{"date-parts":[[2024,1,19]],"date-time":"2024-01-19T18:38:36Z","timestamp":1705689516000},"page":"334-341","source":"Crossref","is-referenced-by-count":5,"title":["Robust Safe Reinforcement Learning under Adversarial Disturbances"],"prefix":"10.1109","author":[{"given":"Zeyang","family":"Li","sequence":"first","affiliation":[{"name":"Tsinghua University,Department of Mechanical Engineering,Beijing,China,100084"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chuxiong","family":"Hu","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Mechanical Engineering,Beijing,China,100084"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shengbo Eben","family":"Li","sequence":"additional","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University,Beijing,China,100084"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jia","family":"Cheng","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Mechanical Engineering,Beijing,China,100084"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yunan","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua University,Department of Mechanical Engineering,Beijing,China,100084"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-19-7784-8"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"issue":"1","key":"ref3","first-page":"1395","article-title":"A review of robot learning for manipulation: Challenges, representations, and algorithms","volume":"22","author":"Kroemer","year":"2021","journal-title":"The Journal of Machine Learning Research"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2016.2638961"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.23919\/ECC.2019.8796030"},{"issue":"1","key":"ref7","first-page":"1437","article-title":"A comprehensive survey on safe rein-forcement learning","volume":"16","author":"Garcia","year":"2015","journal-title":"Journal of Machine Learning Research"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-042920-020211"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1201\/9781315140223"},{"issue":"1","key":"ref10","first-page":"6070","article-title":"Risk-constrained reinforcement learning with percentile risk criteria","volume":"18","author":"Chow","year":"2017","journal-title":"The Journal of Machine Learning Research"},{"key":"ref11","first-page":"9133","article-title":"Responsive safety in reinforce-ment learning by pid lagrangian methods","volume-title":"International Conference on Machine Learning","author":"Stooke"},{"key":"ref12","article-title":"Reward constrained policy optimization","volume-title":"International Conference on Learning Representations","author":"Tessler"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2022.3163816"},{"key":"ref14","article-title":"Projection-based constrained policy optimization","volume-title":"International Conference on Learning Representations","author":"Yang"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/763"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/s0005-1098(99)00113-2"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-8176-4606-6"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3238656"},{"key":"ref19","first-page":"V003T42A003","article-title":"Control in a safe set: Addressing safety in human-robot interactions","volume-title":"Dynamic Systems and Control Conference","volume":"46209","author":"Liu","year":"2014"},{"key":"ref20","first-page":"97","article-title":"Joint synthesis of safety certificate and safe control policy using constrained reinforce-ment learning","volume-title":"Learning for Dynamics and Control Conference","author":"Ma"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2011.2105730"},{"key":"ref22","first-page":"25 636","article-title":"Reachability constrained reinforce-ment learning","volume-title":"International Conference on Machine Learning","author":"Yu"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI47803.2020.9308468"},{"key":"ref24","doi-asserted-by":"crossref","DOI":"10.1007\/978-0-8176-4757-5","volume-title":"H-infinity optimal control and related minimax design problems: a dynamic game approach","author":"Ba\u015far","year":"2008"},{"key":"ref25","volume-title":"Robust constraint satisfaction: Invariant sets and predictive control","author":"Kerrigan","year":"2001"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2017.8263977"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794107"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2021.3063315"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012996299557"},{"key":"ref30","first-page":"1321","article-title":"Approximate dynamic programming for two-player zero-sum markov games","volume-title":"International Conference on Machine Learning","author":"Perolat"},{"key":"ref31","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International Conference on Machine Learning","author":"Haarnoja"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3196132"},{"key":"ref34","first-page":"1110","article-title":"Learning to walk in the real world with minimal human effort","volume-title":"Proceedings of the 2020 Conference on Robot Learning","volume":"155","author":"Ha"}],"event":{"name":"2023 62nd IEEE Conference on Decision and Control (CDC)","location":"Singapore, Singapore","start":{"date-parts":[[2023,12,13]]},"end":{"date-parts":[[2023,12,15]]}},"container-title":["2023 62nd IEEE Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10383192\/10383193\/10383375.pdf?arnumber=10383375","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,21]],"date-time":"2024-08-21T22:36:35Z","timestamp":1724279795000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10383375\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,13]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/cdc49753.2023.10383375","relation":{},"subject":[],"published":{"date-parts":[[2023,12,13]]}}}