{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,18]],"date-time":"2026-06-18T03:09:29Z","timestamp":1781752169396,"version":"3.54.5"},"reference-count":67,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020YFB1600202"],"award-info":[{"award-number":["2020YFB1600202"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["52221005"],"award-info":[{"award-number":["52221005"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1109\/tnnls.2023.3348422","type":"journal-article","created":{"date-parts":[[2024,1,17]],"date-time":"2024-01-17T13:28:12Z","timestamp":1705498092000},"page":"2327-2341","source":"Crossref","is-referenced-by-count":17,"title":["Learn Zero-Constraint-Violation Safe Policy in Model-Free Constrained Reinforcement Learning"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9943-0638","authenticated-orcid":false,"given":"Haitong","family":"Ma","sequence":"first","affiliation":[{"name":"State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, and the Center for Intelligent Connected Vehicles and Transportation, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3767-5517","authenticated-orcid":false,"given":"Changliu","family":"Liu","sequence":"additional","affiliation":[{"name":"Robotics Institute, Carnegie Mellon University, Pittsburgh, PA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4923-3633","authenticated-orcid":false,"given":"Shengbo Eben","family":"Li","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, and the Center for Intelligent Connected Vehicles and Transportation, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5160-1365","authenticated-orcid":false,"given":"Sifa","family":"Zheng","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, and the Center for Intelligent Connected Vehicles and Transportation, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2969-4096","authenticated-orcid":false,"given":"Wenchao","family":"Sun","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, and the Center for Intelligent Connected Vehicles and Transportation, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0282-8621","authenticated-orcid":false,"given":"Jianyu","family":"Chen","sequence":"additional","affiliation":[{"name":"Institute of Interdisciplinary Information Science, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref2","article-title":"Playing Atari with deep reinforcement learning","author":"Mnih","year":"2013","journal-title":"arXiv:1312.5602"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1812.05905"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3084685"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.3046646"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3042981"},{"key":"ref8","article-title":"Open-sourced reinforcement learning environments for surgical robotics","author":"Richter","year":"2019","journal-title":"arXiv:1903.02090"},{"key":"ref9","article-title":"Safe model-based reinforcement learning with stability guarantees","author":"Berkenkamp","year":"2017","journal-title":"arXiv:1705.08551"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3029573"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TFUZZ.2022.3145809"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2022.3155755"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1201\/9781315140223"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1666"},{"key":"ref15","first-page":"22","article-title":"Constrained policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Achiam"},{"key":"ref16","article-title":"Benchmarking batch deep reinforcement learning algorithms","author":"Fujimoto","year":"2019","journal-title":"arXiv:1910.01708"},{"key":"ref17","article-title":"Projection-based constrained policy optimization","author":"Yang","year":"2020","journal-title":"arXiv:2010.03152"},{"key":"ref18","article-title":"Reward constrained policy optimization","author":"Tessler","year":"2018","journal-title":"arXiv:1805.11074"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17272"},{"issue":"1","key":"ref20","first-page":"6070","article-title":"Risk-constrained reinforcement learning with percentile risk criteria","volume":"18","author":"Chow","year":"2017","journal-title":"J. Mach. Learn. Res."},{"key":"ref21","first-page":"34464","article-title":"Enhancing safe exploration using safety state augmentation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Sootla"},{"key":"ref22","first-page":"15338","article-title":"First order constrained optimization in policy space","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Zhang"},{"key":"ref23","article-title":"Adaptive dynamic programming for nonaffine nonlinear optimal control problem with state constraints","author":"Duan","year":"2019","journal-title":"arXiv:1911.11397"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2017.8317745"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.23919\/ECC.2019.8796030"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013387"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636468"},{"key":"ref28","article-title":"Learning safe multi-agent control with decentralized neural barrier certificates","author":"Qin","year":"2021","journal-title":"arXiv:2101.05436"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1115\/DSCC2014-6048"},{"key":"ref30","first-page":"784","article-title":"Model-free safe control for zero-violation reinforcement learning","volume-title":"Proc. 5th Annu. Conf. Robot Learn.","author":"Zhao"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9029720"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2018.2876389"},{"key":"ref33","article-title":"Safe exploration in continuous action spaces","author":"Dalal","year":"2018","journal-title":"arXiv:1801.08757"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46024-6"},{"key":"ref35","article-title":"Feasible actor\u2013critic: Constrained reinforcement learning for ensuring statewise safety","author":"Ma","year":"2021","journal-title":"arXiv:2105.10682"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/DEVLRN.2007.4354030"},{"key":"ref37","first-page":"9133","article-title":"Responsive safety in reinforcement learning by PID Lagrangian methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Stooke"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/IV48863.2021.9575205"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2009.07.008"},{"key":"ref40","first-page":"8378","article-title":"Natural policy gradient primal-dual method for constrained Markov decision processes","volume-title":"Proc. NeurIPS","author":"Ding"},{"key":"ref41","first-page":"3304","article-title":"Provably efficient safe exploration via primal-dual policy optimization","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Ding"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3098985"},{"key":"ref43","article-title":"Saute RL: Almost surely safe reinforcement learning using state augmentation","volume-title":"Proc. 39th Int. Conf. Mach. Learn.","author":"Sootla"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.04.134"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/j.jfranklin.2019.12.017"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2967871"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3108034"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3213566"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460547"},{"key":"ref50","article-title":"ShieldNN: A provably safe NN filter for unsafe NN controllers","author":"Ferlez","year":"2020","journal-title":"arXiv:2006.09564"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2017.xiii.073"},{"key":"ref52","article-title":"Barrier Lyapunov function-based safe reinforcement learning for autonomous vehicles with optimized backstepping","author":"Zhang","year":"2022","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2017.2654539"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3106818"},{"key":"ref55","first-page":"1861","article-title":"Soft actor\u2013critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref56","first-page":"1587","article-title":"Addressing function approximation error in actor\u2013critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref57","volume-title":"Stochastic Approximation: A Dynamical Systems Viewpoint","volume":"48","author":"Borkar","year":"2009"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/s10957-012-9989-5"},{"key":"ref59","first-page":"1","article-title":"Actor\u2013critic algorithms for risk-sensitive MDPs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"26","author":"Prashanth"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1038\/sj.jors.2600425"},{"key":"ref61","article-title":"OpenAI gym","author":"Brockman","year":"2016","journal-title":"arXiv:1606.01540"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/520"},{"key":"ref64","article-title":"Integrated decision and control: Towards interpretable and computationally efficient driving intelligence","author":"Guan","year":"2021","journal-title":"arXiv:2103.10290"},{"key":"ref65","article-title":"Numerically stable dynamic bicycle model for discrete-time control","author":"Ge","year":"2020","journal-title":"arXiv:2011.09612"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1177\/02783640122067453"},{"key":"ref67","article-title":"Mixed policy gradient","author":"Guan","year":"2021","journal-title":"arXiv:2102.11513"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10877690\/10402567.pdf?arnumber=10402567","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T18:39:09Z","timestamp":1764959949000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10402567\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2]]},"references-count":67,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2023.3348422","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2]]}}}