{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:24:58Z","timestamp":1740101098392,"version":"3.37.3"},"reference-count":41,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,10,8]],"date-time":"2022-10-08T00:00:00Z","timestamp":1665187200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,8]],"date-time":"2022-10-08T00:00:00Z","timestamp":1665187200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001659","name":"German Research Foundation (DFG)","doi-asserted-by":"publisher","award":["AL 1185\/7-1"],"award-info":[{"award-number":["AL 1185\/7-1"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,10,8]]},"DOI":"10.1109\/itsc55140.2022.9921880","type":"proceedings-article","created":{"date-parts":[[2022,11,4]],"date-time":"2022-11-04T01:40:13Z","timestamp":1667526013000},"page":"2415-2421","source":"Crossref","is-referenced-by-count":3,"title":["Learning to Obey Traffic Rules using Constrained Policy Optimization"],"prefix":"10.1109","author":[{"given":"Xiao","family":"Wang","sequence":"first","affiliation":[{"name":"Technical University of Munich,Department of Informatics,Garching,Germany,85748"}]},{"given":"Christoph","family":"Pillmayer","sequence":"additional","affiliation":[{"name":"Technical University of Munich,Department of Informatics,Garching,Germany,85748"}]},{"given":"Matthias","family":"Althoff","sequence":"additional","affiliation":[{"name":"Technical University of Munich,Department of Informatics,Garching,Germany,85748"}]}],"member":"263","reference":[{"key":"ref1","first-page":"752","article-title":"For-malization of interstate traffic rules in temporal logic","volume-title":"Proc. of the IEEE Intelligent Vehicles Symposium","author":"Maierhofer"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1109\/CAVS51000.2020.9334599"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1109\/IV51971.2022.9827153"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1109\/ITSC.2015.269"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1007\/978-3-319-40648-0_14"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1007\/978-3-319-66845-1_4"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1109\/IVS.2019.8813875"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1145\/3359986.3361203"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1109\/CDC.2009.5399536"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.15607\/RSS.2013.IX.013"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1109\/cdc.2013.6760374"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1016\/j.ifacol.2020.12.2397"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1109\/ICRA.2014.6907641"},{"key":"ref14","article-title":"Playing Atari with deep reinforcement learning","volume-title":"Proc. of the Twenty-seventh Conf. on Neural Information Processing Systems - Workshop on Deep Learning","author":"Mnih","year":"2013"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1038\/nature24270"},{"key":"ref16","first-page":"11480","article-title":"CRPO: A new approach for safe reinforcement learning with convergence guarantee","volume-title":"Int. Conf. on Machine Learning","author":"Xu","year":"2021"},{"key":"ref17","article-title":"Lyapunov-based safe policy optimization for con-tinuous control","volume-title":"Int. Conf. on Machine Learning - Workshop on Reinforcement Learning for Real Life","author":"Chow","year":"2019"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1609\/aaai.v34i04.5932"},{"key":"ref19","first-page":"22","article-title":"Constrained policy optimization","volume-title":"Int. Conf. on Machine Learning","author":"Achiam","year":"2017"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1109\/ITSC45102.2020.9294262"},{"issue":"1","key":"ref21","first-page":"6070","article-title":"Risk-constrained reinforcement learning with percentile risk criteria","volume":"18","author":"Chow","year":"2017","journal-title":"The Journal of Machine Learning Research"},{"key":"ref22","first-page":"9133","article-title":"Responsive safety in reinforce-ment learning by PID Lagrangian methods","volume-title":"Proc. of the Int. Conf. on Machine Learning","author":"Stooke","year":"2020"},{"key":"ref23","article-title":"WC-SAC: Worst-case soft actor critic for safety-constrained reinforcement learning","volume-title":"Proc. of the Thirty-Fifth AAAI Conference on Artificial Intelligence.","author":"Yang","year":"2021"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1109\/ITSC.2018.8569552"},{"volume-title":"Constrained Markov decision processes: stochastic mod-eling. Routledge","year":"1999","author":"Altman","key":"ref25"},{"key":"ref26","first-page":"449","article-title":"A distributional perspective on reinforcement learning","volume-title":"Int. Conf. on Machine Learning","author":"Bellemare","year":"2017"},{"issue":"2019","key":"ref27","first-page":"1078","article-title":"Worst cases policy gradients","volume-title":"Proc. of the 3rd Annual Conference on Robot Learning","volume":"100","author":"Tang"},{"volume-title":"Probability: an introduction","year":"2014","author":"Grimmett","key":"ref28"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.1287\/educ.1080.0052"},{"issue":"6","key":"ref30","first-page":"70","article-title":"Conditional value-at-risk for elliptical distributions","volume":"2","author":"Khokhlov","year":"2016","journal-title":"Evropsky casopis ekonomiky a managementu"},{"key":"ref31","first-page":"390","article-title":"Real-time logics: complexity and ex-pressiveness","volume-title":"Proc. of the IEEE Symposium on Logic in Computer Science","author":"Alur"},{"doi-asserted-by":"publisher","key":"ref32","DOI":"10.1016\/j.entcs.2004.01.029"},{"key":"ref33","article-title":"High- dimensional continuous control using generalized advantage estimation","volume-title":"Int. Conf. on Learning Representations","author":"Schulman","year":"2016"},{"volume-title":"Reinforcement learning: An introduction","year":"2018","author":"Sutton","key":"ref34"},{"issue":"1","key":"ref35","first-page":"2603","article-title":"An emphatic approach to the problem of off-policy temporal-difference learning","volume":"17","author":"Sutton","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref36","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"ar Xiv preprint"},{"doi-asserted-by":"publisher","key":"ref37","DOI":"10.1109\/ITSC48978.2021.9564898"},{"doi-asserted-by":"publisher","key":"ref38","DOI":"10.1109\/ITSC48978.2021.9564432"},{"doi-asserted-by":"publisher","key":"ref39","DOI":"10.1007\/978-3-319-75632-5_5"},{"key":"ref40","article-title":"Benchmarking safe exploration in deep reinforcement learning","volume-title":"Proc. NeurIPS - Workshop on Deep Reinforcement Learning","author":"Ray","year":"2019"},{"key":"ref41","first-page":"719","article-title":"CommonRoad: Compos-able benchmarks for motion planning on roads","volume-title":"IEEE Intelligent Vehicles Symposium","author":"Althoff"}],"event":{"name":"2022 IEEE 25th International Conference on Intelligent Transportation Systems (ITSC)","start":{"date-parts":[[2022,10,8]]},"location":"Macau, China","end":{"date-parts":[[2022,10,12]]}},"container-title":["2022 IEEE 25th International Conference on Intelligent Transportation Systems (ITSC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9921415\/9921739\/09921880.pdf?arnumber=9921880","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,24]],"date-time":"2024-01-24T06:22:28Z","timestamp":1706077348000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9921880\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,8]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/itsc55140.2022.9921880","relation":{},"subject":[],"published":{"date-parts":[[2022,10,8]]}}}