{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T16:16:12Z","timestamp":1778256972619,"version":"3.51.4"},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,9,20]],"date-time":"2020-09-20T00:00:00Z","timestamp":1600560000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,9,20]],"date-time":"2020-09-20T00:00:00Z","timestamp":1600560000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,9,20]],"date-time":"2020-09-20T00:00:00Z","timestamp":1600560000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,9,20]]},"DOI":"10.1109\/itsc45102.2020.9294262","type":"proceedings-article","created":{"date-parts":[[2020,12,24]],"date-time":"2020-12-24T23:14:55Z","timestamp":1608851695000},"page":"1-7","source":"Crossref","is-referenced-by-count":55,"title":["Safe Reinforcement Learning for Autonomous Vehicles through Parallel Constrained Policy Optimization"],"prefix":"10.1109","author":[{"given":"Lu","family":"Wen","sequence":"first","affiliation":[]},{"given":"Jingliang","family":"Duan","sequence":"additional","affiliation":[]},{"given":"Shengbo Eben","family":"Li","sequence":"additional","affiliation":[]},{"given":"Shaobing","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Huei","family":"Peng","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","article-title":"Distributional soft actor-critic: Off-policy reinforcement learning for addressing value estimation errors","author":"duan","year":"2020","journal-title":"arXiv preprint arXiv 2001 04786"},{"key":"ref11","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"garcia","year":"2015","journal-title":"Journal of Machine Learning Research"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1287\/opre.1050.0216"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1666"},{"key":"ref14","first-page":"387","article-title":"Policy gradients with variance related risk criteria","author":"tamar","year":"2012","journal-title":"Proceedings of the 29-th ICML"},{"key":"ref15","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11797","article-title":"Safe reinforcement learning via shielding","author":"alshiekh","year":"2018","journal-title":"The 32nd AAAI Conference on Artificial Intelligence"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.3233\/HIS-2007-4304"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3761"},{"key":"ref18","first-page":"1037","article-title":"Smart exploration in reinforcement learning using absolute temporal difference errors","author":"gehring","year":"2013","journal-title":"Proceedings of AAMAS 2013"},{"key":"ref19","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref4","author":"silver","year":"2014","journal-title":"Deterministic policy gradient algorithms"},{"key":"ref3","author":"karavolos","year":"2013","journal-title":"Q-learning with heuristic exploration in simulated car racing"},{"key":"ref6","article-title":"Deep reinforcement learning for autonomous driving","author":"wang","year":"2018","journal-title":"arXiv preprint arXiv 1811 11329"},{"key":"ref5","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"arXiv preprint arXiv 1509 02971"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2019.00147"},{"key":"ref7","article-title":"Virtual to real reinforcement learning for autonomous driving","author":"pan","year":"2017","journal-title":"arXiv preprint arXiv 1704 03952"},{"key":"ref2","author":"li","year":"2019","journal-title":"Lecture notes on reinforcement learning"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aap.2017.08.022"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1049\/iet-its.2019.0317"},{"key":"ref20","first-page":"1008","author":"konda","year":"2000","journal-title":"Actor-critic algorithms"},{"key":"ref22","article-title":"Constrained policy optimization","author":"achiam","year":"2017","journal-title":"arXiv preprint arXiv 1705 10528"},{"key":"ref21","first-page":"1889","author":"schulman","year":"2015","journal-title":"Trust region policy optimization"},{"key":"ref24","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv preprint arXiv 1707 06347"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2018.2873101"}],"event":{"name":"2020 IEEE 23rd International Conference on Intelligent Transportation Systems (ITSC)","location":"Rhodes, Greece","start":{"date-parts":[[2020,9,20]]},"end":{"date-parts":[[2020,9,23]]}},"container-title":["2020 IEEE 23rd International Conference on Intelligent Transportation Systems (ITSC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9294153\/9294168\/09294262.pdf?arnumber=9294262","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,9]],"date-time":"2022-12-09T05:32:36Z","timestamp":1670563956000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9294262\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9,20]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/itsc45102.2020.9294262","relation":{},"subject":[],"published":{"date-parts":[[2020,9,20]]}}}