{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,30]],"date-time":"2026-06-30T22:44:13Z","timestamp":1782859453279,"version":"3.54.5"},"reference-count":29,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U20A20334"],"award-info":[{"award-number":["U20A20334"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Tsinghua University Initiative Scientific Research Program"},{"name":"Tsinghua University-Toyota Joint Research Center for AI Technology of Automated Vehicle"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Intell. Veh."],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1109\/tiv.2026.3667572","type":"journal-article","created":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T20:59:27Z","timestamp":1771966767000},"page":"545-553","source":"Crossref","is-referenced-by-count":0,"title":["Learn a Robust Policy for Real-World Driving With Adversarial Reinforcement Learning"],"prefix":"10.1109","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1173-7230","authenticated-orcid":false,"given":"Yangang","family":"Ren","sequence":"first","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1246-4860","authenticated-orcid":false,"given":"Guojian","family":"Zhan","sequence":"additional","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4923-3633","authenticated-orcid":false,"given":"Shengbo Eben","family":"Li","sequence":"additional","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3697-1576","authenticated-orcid":false,"given":"Jingliang","family":"Duan","sequence":"additional","affiliation":[{"name":"School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1539-472X","authenticated-orcid":false,"given":"Yao","family":"Lyu","sequence":"additional","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yang","family":"Guan","sequence":"additional","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9333-7416","authenticated-orcid":false,"given":"Keqiang","family":"Li","sequence":"additional","affiliation":[{"name":"School of Vehicle and Mobility, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-19-7784-8"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2022.103662"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3142822"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1137\/21M1406477"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2023.10.1846"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-042920-020211"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793742"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN48605.2020.9207497"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196730"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2966414"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/IV47402.2020.9304542"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IV51971.2022.9827302"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCR54399.2022.9790288"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2022.3163816"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636468"},{"key":"ref17","article-title":"Integrated decision and control for high-level automated vehicles by mixed policy gradient and its experiment verification","author":"Guan","year":"2022"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.14174"},{"key":"ref19","first-page":"7968","article-title":"Improving generalization in reinforcement learning with mixture regularization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Wang","year":"2020"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794293"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2023.104161"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2022.3165178"},{"key":"ref23","first-page":"2817","article-title":"Robust adversarial reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Pinto","year":"2017"},{"key":"ref24","article-title":"Equivalence between policy gradients and soft q-learning","author":"Schulman","year":"2017"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC48978.2021.9564880"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2025.129666"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2018.8569938"},{"key":"ref28","first-page":"1352","article-title":"Reinforcement learning with deep energy-based policies","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2017"},{"key":"ref29","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"}],"container-title":["IEEE Transactions on Intelligent Vehicles"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7274857\/11457231\/11408883.pdf?arnumber=11408883","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,30]],"date-time":"2026-06-30T21:46:39Z","timestamp":1782855999000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11408883\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4]]},"references-count":29,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tiv.2026.3667572","relation":{},"ISSN":["2379-8904","2379-8858"],"issn-type":[{"value":"2379-8904","type":"electronic"},{"value":"2379-8858","type":"print"}],"subject":[],"published":{"date-parts":[[2026,4]]}}}