{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T16:28:20Z","timestamp":1779294500199,"version":"3.51.4"},"reference-count":52,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2023,9,1]],"date-time":"2023-09-01T00:00:00Z","timestamp":1693526400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,9,1]],"date-time":"2023-09-01T00:00:00Z","timestamp":1693526400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,9,1]],"date-time":"2023-09-01T00:00:00Z","timestamp":1693526400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["52221005"],"award-info":[{"award-number":["52221005"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020YFB1600202"],"award-info":[{"award-number":["2020YFB1600202"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Alibaba Group through Alibaba Innovative Research Program"},{"name":"Alibaba Research Intern Program and Tsinghua\u2013Toyota Joint Research Fund","award":["20213930023"],"award-info":[{"award-number":["20213930023"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Intell. Transport. Syst."],"published-print":{"date-parts":[[2023,9]]},"DOI":"10.1109\/tits.2023.3271642","type":"journal-article","created":{"date-parts":[[2023,5,5]],"date-time":"2023-05-05T17:40:15Z","timestamp":1683308415000},"page":"9966-9983","source":"Crossref","is-referenced-by-count":46,"title":["Safe-State Enhancement Method for Autonomous Driving via Direct Hierarchical Reinforcement Learning"],"prefix":"10.1109","volume":"24","author":[{"given":"Ziqing","family":"Gu","sequence":"first","affiliation":[{"name":"State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lingping","family":"Gao","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9943-0638","authenticated-orcid":false,"given":"Haitong","family":"Ma","sequence":"additional","affiliation":[{"name":"Harvard John A. Paulson School of Engineering and Applied Sciences, Cambridge, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4923-3633","authenticated-orcid":false,"given":"Shengbo Eben","family":"Li","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5160-1365","authenticated-orcid":false,"given":"Sifa","family":"Zheng","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Jing","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junbo","family":"Chen","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-17462-0_28"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-017-0999-8"},{"key":"ref15","first-page":"784","article-title":"Model-free safe control for zero-violation reinforcement learning","author":"zhao","year":"2021","journal-title":"Proc Conf Robot Learn"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2016.2638961"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01662"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.23919\/ECC.2019.8796030"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1115\/DSCC2014-6048"},{"key":"ref17","article-title":"Integrated decision and control: Towards interpretable and computationally efficient driving intelligence","author":"guan","year":"2021","journal-title":"arXiv 2103 10290"},{"key":"ref16","article-title":"End to end learning for self-driving cars","author":"bojarski","year":"2016","journal-title":"arXiv 1604 07316 [cs]"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1049\/iet-its.2019.0317"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2019.8917306"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TCST.2010.2047860"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2012.2183401"},{"key":"ref46","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2010.5509799"},{"key":"ref48","article-title":"OpenAI gym","author":"brockman","year":"2016","journal-title":"arXiv 1606 01540 [cs]"},{"key":"ref47","first-page":"63","article-title":"SUMO&#x2013;simulation of urban mobility: An overview","author":"behrisch","year":"2011","journal-title":"Proc 3rd Int Conf Adv Syst Simul"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013387"},{"key":"ref41","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"garcia","year":"2015","journal-title":"J Mach Learn Res"},{"key":"ref44","first-page":"9133","article-title":"Responsive safety in reinforcement learning by PID Lagrangian methods","author":"stooke","year":"2020","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/S0005-1098(99)00113-2"},{"key":"ref49","year":"2022","journal-title":"Tianchichallenge"},{"key":"ref8","author":"altman","year":"1999","journal-title":"Constrained Markov Decision Processes Stochastic Modeling"},{"key":"ref7","first-page":"22","article-title":"Constrained policy optimization","author":"achiam","year":"2017","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1177\/0278364910371999"},{"key":"ref4","article-title":"Scaling up robust MDPs by reinforcement learning","author":"tamar","year":"2013","journal-title":"arXiv 1306 6189"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"ref6","article-title":"Safe exploration in Markov decision processes","author":"moldovan","year":"2012","journal-title":"arXiv 1205 4810"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1666"},{"key":"ref40","article-title":"First order constrained optimization in policy space","author":"zhang","year":"2020","journal-title":"arXiv 2002 06506"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2010.VI.034"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.measurement.2014.10.014"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2017.7995789"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2016.7535558"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341647"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2018.8500368"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.3182\/20140824-6-ZA-1003.00619"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2012.6315303"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"285","DOI":"10.1109\/TNN.2004.842673","article-title":"Reinforcement learning: An introduction","volume":"16","author":"sutton","year":"2005","journal-title":"IEEE Trans Neural Netw"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.scitotenv.2019.135237"},{"key":"ref39","article-title":"Projection-based constrained policy optimization","author":"yang","year":"2020","journal-title":"arXiv 2010 03152"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50021-0"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2019.2955905"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561095"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC48978.2021.9564634"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2019.8916928"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341496"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.conb.2012.05.008"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561195"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793742"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968225"},{"key":"ref29","first-page":"1326","article-title":"Attention-based hierarchical deep reinforcement learning for lane change behaviors in autonomous driving","author":"chen","year":"2019","journal-title":"Proc IEEE\/CVF Conf Comput Vis Pattern Recognit Workshops"}],"container-title":["IEEE Transactions on Intelligent Transportation Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6979\/10235283\/10120651.pdf?arnumber=10120651","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,18]],"date-time":"2023-09-18T18:09:59Z","timestamp":1695060599000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10120651\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9]]},"references-count":52,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/tits.2023.3271642","relation":{},"ISSN":["1524-9050","1558-0016"],"issn-type":[{"value":"1524-9050","type":"print"},{"value":"1558-0016","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,9]]}}}