{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T06:48:13Z","timestamp":1730270893090,"version":"3.28.0"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T00:00:00Z","timestamp":1666483200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T00:00:00Z","timestamp":1666483200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,10,23]]},"DOI":"10.1109\/iros47612.2022.9981831","type":"proceedings-article","created":{"date-parts":[[2022,12,26]],"date-time":"2022-12-26T19:38:15Z","timestamp":1672083495000},"page":"9027-9033","source":"Crossref","is-referenced-by-count":2,"title":["Safety Correction from Baseline: Towards the Risk-aware Policy in Robotics via Dual-agent Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Linrui","family":"Zhang","sequence":"first","affiliation":[{"name":"Tsinghua Shenzhen International Graduate School,Center for Intelligent Control and Telescience,Shenzhen,China,518055"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zichen","family":"Yan","sequence":"additional","affiliation":[{"name":"Tsinghua Shenzhen International Graduate School,Center for Intelligent Control and Telescience,Shenzhen,China,518055"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Li","family":"Shen","sequence":"additional","affiliation":[{"name":"JD Explore Academy,Beijing,China,100176"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shoujie","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua Shenzhen International Graduate School,Center for Intelligent Control and Telescience,Shenzhen,China,518055"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xueqian","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua Shenzhen International Graduate School,Center for Intelligent Control and Telescience,Shenzhen,China,518055"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dacheng","family":"Tao","sequence":"additional","affiliation":[{"name":"JD Explore Academy,Beijing,China,100176"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2020.2975428"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2018.xiv.049"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.3390\/app9245571"},{"key":"ref4","article-title":"Concrete problems in ai safety","author":"Amodei","year":"2016","journal-title":"arXiv preprint"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-090419-075625"},{"key":"ref6","first-page":"997","article-title":"Safe exploration for optimization with gaussian processes","volume-title":"International conference on machine learning","author":"Sui"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-042920-020211"},{"key":"ref8","article-title":"Benchmarking safe exploration in deep reinforcement learning","volume":"7","author":"Ray","year":"2019","journal-title":"arXiv preprint"},{"key":"ref9","first-page":"22","article-title":"Constrained policy optimization","volume-title":"Proceedings of the 34th International Conference on Machine Learning, ICML 2017","volume":"70","author":"Achiam","year":"2017"},{"key":"ref10","article-title":"Hindsight experience replay","volume":"30","author":"Andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/614"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9029423"},{"key":"ref13","article-title":"Safe exploration in continuous action spaces","volume":"abs\/1801.08757","author":"Dalal","year":"2018","journal-title":"CoRR"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341315"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref16","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","volume":"abs\/1707.08817","author":"Vecer\u00edk","year":"2017","journal-title":"CoRR"},{"key":"ref17","first-page":"2474","article-title":"Policy optimization with demonstrations","volume-title":"Proceedings of the 35th International Conference on Machine Learning, ICML 2018","volume":"80","author":"Kang","year":"2018"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5953"},{"key":"ref19","article-title":"Reinforcement learning from imperfect demonstrations","volume-title":"6th International Conference on Learning Representations, ICLR 2018","author":"Gao","year":"2018"},{"key":"ref20","first-page":"465","article-title":"Integrating behavior cloning and reinforcement learning for improved performance in dense and sparse reward environments","volume-title":"Proceedings of the 19th International Conference on Autonomous Agents and Multiagent Systems, AAMAS 20","author":"Goecks","year":"2020"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/436"},{"key":"ref22","article-title":"Decoupling exploration and exploitation in reinforcement learning","author":"Sch\u00e4fer","year":"2021","journal-title":"arXiv preprint"},{"key":"ref23","article-title":"Decoupled exploration and exploitation policies for sample-efficient reinforcement learning","author":"Whitney","year":"2021","journal-title":"arXiv preprint"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1201\/9781315140223"},{"key":"ref25","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"International conference on machine learning","author":"Silver"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-18842-3"},{"key":"ref27","article-title":"Reward constrained policy optimization","volume-title":"International Conference on Learning Representations","author":"Tessler","year":"2018"},{"key":"ref28","article-title":"Exploration in deep reinforcement learning: a comprehensive survey","author":"Yang","year":"2021","journal-title":"arXiv preprint"},{"key":"ref29","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"International conference on machine learning","author":"Fujimoto"},{"key":"ref30","article-title":"Lyapunov-based safe policy optimization for continuous control","author":"Chow","year":"2019","journal-title":"arXiv preprint"},{"key":"ref31","article-title":"Multi-goal reinforcement learning environments for simulated franka emika panda robot","volume":"abs\/2106.13687","author":"Gallou\u00e9dec","year":"2021","journal-title":"CoRR"}],"event":{"name":"2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2022,10,23]]},"location":"Kyoto, Japan","end":{"date-parts":[[2022,10,27]]}},"container-title":["2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9981026\/9981028\/09981831.pdf?arnumber=9981831","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,2]],"date-time":"2024-03-02T08:38:22Z","timestamp":1709368702000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9981831\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,23]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/iros47612.2022.9981831","relation":{},"subject":[],"published":{"date-parts":[[2022,10,23]]}}}