{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,12,27]],"date-time":"2024-12-27T05:16:50Z","timestamp":1735276610459,"version":"3.32.0"},"reference-count":48,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,10,14]],"date-time":"2024-10-14T00:00:00Z","timestamp":1728864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,14]],"date-time":"2024-10-14T00:00:00Z","timestamp":1728864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,10,14]]},"DOI":"10.1109\/iros58592.2024.10801338","type":"proceedings-article","created":{"date-parts":[[2024,12,25]],"date-time":"2024-12-25T19:17:39Z","timestamp":1735154259000},"page":"12378-12385","source":"Crossref","is-referenced-by-count":0,"title":["Safe Reinforcement Learning via Hierarchical Adaptive Chance-Constraint Safeguards"],"prefix":"10.1109","author":[{"given":"Zhaorun","family":"Chen","sequence":"first","affiliation":[{"name":"University of Chicago,Department of Computer Science,USA"}]},{"given":"Zhuokai","family":"Zhao","sequence":"additional","affiliation":[{"name":"University of Chicago,Department of Computer Science,USA"}]},{"given":"Tairan","family":"He","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University,Robotics Institute,USA"}]},{"given":"Binhao","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University,Department of Mechanical Engineering"}]},{"given":"Xuhao","family":"Zhao","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University,Department of Mechanical Engineering"}]},{"given":"Liang","family":"Gong","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University,Department of Mechanical Engineering"}]},{"given":"Chengliang","family":"Liu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University,Department of Mechanical Engineering"}]}],"member":"263","reference":[{"key":"ref1","first-page":"22","article-title":"Constrained policy optimization","volume-title":"International conference on machine learning","author":"Achiam"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1609\/aaai.v32i1.11797"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1201\/9781315140223"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1007\/978-3-031-63735-3_2"},{"volume-title":"Reinforcement learning and optimal control","year":"2019","author":"Bertsekas","key":"ref5"},{"year":"2020","author":"Bharadhwaj","article-title":"Conservative safety critics for exploration","key":"ref6"},{"year":"2019","author":"Bohez","article-title":"Value constrained model-free continuous control","key":"ref7"},{"year":"2021","author":"Chen","article-title":"A primal-dual approach to constrained markov decision processes","key":"ref8"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1609\/aaai.v33i01.33013387"},{"key":"ref10","article-title":"A lyapunov-based approach to safe reinforcement learning","volume":"31","author":"Chow","year":"2018","journal-title":"Advances in neural information processing systems"},{"year":"2018","author":"Dalal","article-title":"Safe exploration in continuous action spaces","key":"ref11"},{"year":"2021","author":"Donti","article-title":"Dc3: A learning method for optimization with hard constraints","key":"ref12"},{"volume-title":"Markov processes: characterization and convergence","year":"2009","author":"Ethier","key":"ref13"},{"year":"2017","author":"Eysenbach","article-title":"Leave no trace: Learning to reset for safe and autonomous reinforcement learning","key":"ref14"},{"year":"2020","author":"Ferlez","article-title":"Shieldnn: A provably safe nn filter for unsafe nn controllers","key":"ref15"},{"key":"ref16","article-title":"Iterative reachability estimation for safe reinforcement learning","volume":"36","author":"Ganai","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/IV51971.2022.9827264"},{"issue":"1","key":"ref18","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"Garc\u0131a","year":"2015","journal-title":"Journal of Machine Learning Research"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1609\/aaai.v37i12.26734"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1109\/CDC51059.2022.9992692"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1109\/ICRA.2019.8794127"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1109\/CDC.2018.8619572"},{"key":"ref23","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"year":"2018","author":"Liang","article-title":"Accelerated primal-dual policy optimization for safe reinforcement learning","key":"ref24"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1007\/BF01589116"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.24963\/ijcai.2021\/614"},{"year":"2021","author":"Ma","article-title":"Learn zero-constraint-violation policy in model-free constrained reinforcement learning","key":"ref27"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1016\/j.ifacol.2020.12.361"},{"issue":"268","key":"ref29","first-page":"1","article-title":"Stable-baselines3: Reliable reinforcement learning implementations","volume":"22","author":"Raffin","year":"2021","journal-title":"Journal of Machine Learning Research"},{"issue":"1","key":"ref30","first-page":"2","article-title":"Benchmarking safe exploration in deep reinforcement learning","volume":"7","author":"Ray","year":"2019"},{"key":"ref31","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"International conference on machine learning","author":"Schulman"},{"year":"2017","author":"Schulman","article-title":"Proximal policy optimization algorithms","key":"ref32"},{"doi-asserted-by":"publisher","key":"ref33","DOI":"10.1109\/LRA.2022.3192205"},{"year":"2020","author":"Srinivasan","article-title":"Learning to be safe: Deep rl with a safety critic","key":"ref34"},{"volume-title":"Reinforcement learning: An introduction","year":"2018","author":"Sutton","key":"ref35"},{"year":"2018","author":"Tessler","article-title":"Reward constrained policy optimization","key":"ref36"},{"doi-asserted-by":"publisher","key":"ref37","DOI":"10.1109\/LRA.2021.3070252"},{"doi-asserted-by":"publisher","key":"ref38","DOI":"10.1109\/LRA.2020.2976272"},{"key":"ref39","first-page":"10630","article-title":"Safe reinforcement learning using advantage-based intervention","volume-title":"International Conference on Machine Learning","author":"Wagener"},{"doi-asserted-by":"publisher","key":"ref40","DOI":"10.23919\/ACC53348.2022.9867675"},{"doi-asserted-by":"publisher","key":"ref41","DOI":"10.1109\/CDC40024.2019.9029720"},{"key":"ref42","article-title":"Constrained cross-entropy method for safe reinforcement learning","volume":"31","author":"Wen","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"ref43","DOI":"10.1609\/aaai.v37i12.26786"},{"key":"ref44","first-page":"15338","article-title":"First order constrained optimization in policy space","volume":"33","author":"Zhang","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"ref45","DOI":"10.1109\/CYBER46603.2019.9066510"},{"doi-asserted-by":"publisher","key":"ref46","DOI":"10.24963\/ijcai.2023\/763"},{"volume-title":"5th Annual Conference on Robot Learning","author":"Zhao","article-title":"Model-free safe control for zero-violation reinforcement learning","key":"ref47"},{"key":"ref48","first-page":"783","article-title":"Probabilistic safeguard for reinforcement learning using safety index guided gaussian process models","volume-title":"Learning for Dynamics and Control Conference","author":"Zhao"}],"event":{"name":"2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2024,10,14]]},"location":"Abu Dhabi, United Arab Emirates","end":{"date-parts":[[2024,10,18]]}},"container-title":["2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10801246\/10801290\/10801338.pdf?arnumber=10801338","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,26]],"date-time":"2024-12-26T07:27:55Z","timestamp":1735198075000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10801338\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,14]]},"references-count":48,"URL":"https:\/\/doi.org\/10.1109\/iros58592.2024.10801338","relation":{},"subject":[],"published":{"date-parts":[[2024,10,14]]}}}