{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T21:48:19Z","timestamp":1768340899654,"version":"3.49.0"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,11]],"date-time":"2021-07-11T00:00:00Z","timestamp":1625961600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,7,11]],"date-time":"2021-07-11T00:00:00Z","timestamp":1625961600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,11]],"date-time":"2021-07-11T00:00:00Z","timestamp":1625961600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012326","name":"International Science & Technology Cooperation Program of China","doi-asserted-by":"publisher","award":["2019YFE0100200"],"award-info":[{"award-number":["2019YFE0100200"]}],"id":[{"id":"10.13039\/501100012326","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,11]]},"DOI":"10.1109\/iv48863.2021.9575205","type":"proceedings-article","created":{"date-parts":[[2021,11,1]],"date-time":"2021-11-01T20:59:16Z","timestamp":1635800356000},"page":"193-199","source":"Crossref","is-referenced-by-count":12,"title":["Separated Proportional-Integral Lagrangian for Chance Constrained Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Baiyu","family":"Peng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yao","family":"Mu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingliang","family":"Duan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yang","family":"Guan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shengbo Eben","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianyu","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","first-page":"6070","article-title":"Risk-constrained reinforcement learning with percentile risk criteria","volume":"18","author":"chow","year":"2017","journal-title":"The Journal of Machine Learning Research"},{"key":"ref11","first-page":"22","article-title":"Constrained policy optimization","author":"achiam","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref12","article-title":"Projection-based constrained policy optimization","author":"yang","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref13","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"garcia","year":"2015","journal-title":"Journal of Machine Learning Research"},{"key":"ref14","article-title":"Model-based actor-critic with chance constraint for stochastic system","volume":"abs 2012 10716","author":"peng","year":"2020","journal-title":"ArXiv"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9029423"},{"key":"ref16","article-title":"Reward constrained policy optimization","author":"tessler","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/S0020-0255(99)00081-X"},{"key":"ref18","first-page":"9133","article-title":"Responsive safety in reinforcement learning by pid lagrangian methods","author":"stooke","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00889"},{"key":"ref4","article-title":"Model-ensemble trust-region policy optimization","author":"kurutach","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"lecun","year":"2015","journal-title":"Nature"},{"key":"ref3","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11796","article-title":"Rainbow: Combining improvements in deep reinforcement learning","author":"hessel","year":"2018","journal-title":"AAAI"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2019.8916781"},{"key":"ref5","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref8","first-page":"1212","article-title":"Mixed reinforcement learning for efficient policy optimization in stochastic environments","author":"mu","year":"0","journal-title":"2020 20th International Conference on Control Automation and Systems (ICCAS)"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1049\/iet-its.2019.0317"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2020.3026111"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1109\/MCS.2016.2602087","article-title":"Stochastic model predictive control: An overview and perspectives for future research","volume":"36","author":"mesbah","year":"2016","journal-title":"IEEE Control Systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2019.2912849"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511804441"},{"key":"ref24","first-page":"465","article-title":"Pilco: A model-based and data-efficient approach to policy search","author":"deisenroth","year":"2011","journal-title":"Proceedings of the 28th International Conference on Machine Learning (ICML-11)"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1080\/02331934.2019.1643344"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1049\/iet-its.2015.0205"},{"key":"ref25","article-title":"Reinforcement Learning and Control","author":"li","year":"2020","journal-title":"Lecture Note Tsinghua University"}],"event":{"name":"2021 IEEE Intelligent Vehicles Symposium (IV)","location":"Nagoya, Japan","start":{"date-parts":[[2021,7,11]]},"end":{"date-parts":[[2021,7,17]]}},"container-title":["2021 IEEE Intelligent Vehicles Symposium (IV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9575127\/9575130\/09575205.pdf?arnumber=9575205","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,12]],"date-time":"2023-11-12T02:50:02Z","timestamp":1699757402000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9575205\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,11]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/iv48863.2021.9575205","relation":{},"subject":[],"published":{"date-parts":[[2021,7,11]]}}}