{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,20]],"date-time":"2026-07-20T22:32:18Z","timestamp":1784586738669,"version":"3.55.0"},"reference-count":41,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"NSF China","award":["52221005"],"award-info":[{"award-number":["52221005"]}]},{"name":"Tsinghua University Initiative Scientific Research Program"},{"name":"Tsinghua University-Toyota Joint Research Center for AI Technology of Automated Vehicle"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2023,3]]},"DOI":"10.1109\/lra.2023.3238656","type":"journal-article","created":{"date-parts":[[2023,1,20]],"date-time":"2023-01-20T18:47:50Z","timestamp":1674240470000},"page":"1295-1302","source":"Crossref","is-referenced-by-count":57,"title":["Model-Free Safe Reinforcement Learning Through Neural Barrier Certificate"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7222-0019","authenticated-orcid":false,"given":"Yujie","family":"Yang","sequence":"first","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4285-0495","authenticated-orcid":false,"given":"Yuxuan","family":"Jiang","sequence":"additional","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yichen","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute of Interdisciplinary Information Sciences, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0282-8621","authenticated-orcid":false,"given":"Jianyu","family":"Chen","sequence":"additional","affiliation":[{"name":"Institute of Interdisciplinary Information Sciences, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4923-3633","authenticated-orcid":false,"given":"Shengbo Eben","family":"Li","sequence":"additional","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Model-based reinforcement learning for atari","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kaiser","year":"2020"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"issue":"1","key":"ref5","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"Garca","year":"2015","journal-title":"J. Mach. Learn. Res."},{"key":"ref6","first-page":"8103","article-title":"A Lyapunov-based approach to safe reinforcement learning","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Chow","year":"2018"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013387"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-042920-020211"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1201\/9781315140223"},{"issue":"1","key":"ref10","first-page":"6070","article-title":"Risk-constrained reinforcement learning with percentile risk criteria","volume":"18","author":"Chow","year":"2017","journal-title":"J. Mach. Learn. Res."},{"key":"ref11","first-page":"22","article-title":"Constrained policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Achiam","year":"2017"},{"key":"ref12","article-title":"Reward constrained policy optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Tessler","year":"2019"},{"key":"ref13","article-title":"Projection-based constrained policy optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yang","year":"2020"},{"key":"ref14","first-page":"15338","article-title":"First order constrained optimization in policy space","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Zhang","year":"2020"},{"key":"ref15","first-page":"9133","article-title":"Responsive safety in reinforcement learning by PID lagrangian methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Stooke","year":"2020"},{"key":"ref16","first-page":"97","article-title":"Joint synthesis of safety certificate and safe control policy using constrained reinforcement learning","volume-title":"Proc. 4th Annu. Learn. Dyn. Control Conf.","volume":"168","author":"Ma","year":"2021"},{"key":"ref17","first-page":"3245","article-title":"Neural Lyapunov control","volume-title":"Proc. Adv. neural Inf. Process. Syst.","volume":"32","author":"Chang","year":"2019"},{"key":"ref18","first-page":"708","article-title":"Learning for safety-critical control with control barrier functions","volume-title":"Proc. Learn. Dyn. Control.","author":"Taylor","year":"2020"},{"key":"ref19","first-page":"784","article-title":"Model-free safe control for zero-violation reinforcement learning","volume-title":"Proc. Conf. Robot Learn.","author":"Zhao","year":"2021"},{"key":"ref20","article-title":"Enforcing robust control guarantees within neural network policies","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Donti","year":"2021"},{"key":"ref21","article-title":"Learning safe multi-agent control with decentralized neural barrier certificates","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Qin","year":"2021"},{"key":"ref22","first-page":"25621","article-title":"Learning barrier certificates: Towards safe reinforcement learning with zero training-time violations","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Luo","year":"2021"},{"key":"ref23","first-page":"1724","article-title":"Safe nonlinear control using robust neural Lyapunov-barrier functions","volume-title":"Proc. Conf. Robot Learn.","author":"Dawson","year":"2021"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560886"},{"key":"ref25","first-page":"1522","article-title":"Risk-sensitive and robust decision-making: A cvar optimization approach","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Chow","year":"2015"},{"key":"ref26","first-page":"8378","article-title":"Natural policy gradient primal-dual method for constrained markov decision processes","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Ding","year":"2020"},{"key":"ref27","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schulman","year":"2015"},{"key":"ref28","first-page":"466","article-title":"The Lyapunov neural network: Adaptive stability certification for safe learning of dynamical systems","volume-title":"Proc. 2nd Conf. Robot Learn.","volume":"87","author":"Richards","year":"2018"},{"key":"ref29","first-page":"1351","article-title":"Learning hybrid control barrier functions from data","volume-title":"Proc. Conf. Robot Learn.","author":"Lindemann","year":"2021"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CDC42340.2020.9303785"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981177"},{"key":"ref32","first-page":"881","article-title":"Barrier bayesian linear regression: Online learning of control barrier conditions for safety-critical control of uncertain systems","volume-title":"Proc. Learn. Dyn. Control Conf.","author":"Brunke"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460471"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3142743"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-19-7784-8"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2014.7040372"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.23919\/ECC.2019.8796030"},{"key":"ref38","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref39","article-title":"High-dimensional continuous control using generalized advantage estimation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Schulman","year":"2016"},{"key":"ref40","article-title":"Benchmarking safe exploration in deep reinforcement learning","author":"Ray","year":"2019"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3190471"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7083369\/10024862\/10023989.pdf?arnumber=10023989","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T08:14:07Z","timestamp":1707812047000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10023989\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3]]},"references-count":41,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/lra.2023.3238656","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,3]]}}}