{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T02:40:47Z","timestamp":1730256047380,"version":"3.28.0"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10611554","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"7426-7432","source":"Crossref","is-referenced-by-count":0,"title":["Projection-Based Fast and Safe Policy Optimization for Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Shijun","family":"Lin","sequence":"first","affiliation":[{"name":"University of Science and Technology of China,Department of Automation,Hefei,China,230026"}]},{"given":"Hao","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China,Department of Automation,Hefei,China,230026"}]},{"given":"Ziyang","family":"Chen","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China,Department of Automation,Hefei,China,230026"}]},{"given":"Zhen","family":"Kan","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China,Department of Automation,Hefei,China,230026"}]}],"member":"263","reference":[{"key":"ref1","first-page":"22","article-title":"Constrained policy optimization","volume-title":"Int. Conf. Machin. Learn","author":"Achiam"},{"article-title":"Projection-based constrained policy optimization","volume-title":"Int. Conf. Learn. Represent","author":"Yang","key":"ref2"},{"key":"ref3","first-page":"25636","article-title":"Reachability constrained reinforcement learning","volume-title":"Int. Conf. Machin. Learn","author":"Yu"},{"article-title":"Reward constrained policy optimization","year":"2018","author":"Tessler","key":"ref4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2014.7040372"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.23919\/ECC.2019.8796030"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2002.1184594"},{"issue":"12","key":"ref8","first-page":"462","article-title":"Constructive safety using control barrier functions","volume":"40","author":"Wieland","year":"2007","journal-title":"IFAC-PapersOnLine"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8967981"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CDC42340.2020.9303785"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CAC57257.2022.10055848"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1201\/9781315140223"},{"key":"ref13","first-page":"9797","article-title":"Safe reinforcement learning in constrained markov decision processes","volume-title":"Int. Conf. Machin. Learn","author":"Wachi"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysconle.2004.08.007"},{"key":"ref15","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Int. Conf. Machin. Learn","author":"Schulman"},{"key":"ref16","first-page":"11795","article-title":"Accelerating safe reinforcement learning with constraint-mismatched baseline policies","volume-title":"Int. Conf. Machin. Learn","author":"Yang"},{"key":"ref17","first-page":"15338","article-title":"First order constrained optimization in policy space","volume":"33","author":"Zhang","year":"2020","journal-title":"Adv. neural inf. proces. syst."},{"key":"ref18","first-page":"8378","article-title":"Natural policy gradient primal-dual method for constrained markov decision processes","volume":"33","author":"Ding","year":"2020","journal-title":"Adv. neural inf. proces. syst."},{"key":"ref19","first-page":"8502","article-title":"Constrained markov decision processes via backward value functions","volume-title":"Int. Conf. Machin. Learn","author":"Satija"},{"key":"ref20","first-page":"10521","article-title":"A near-optimal primal-dual method for off-policy learning in cmdp","volume":"35","author":"Chen","year":"2022","journal-title":"Adv. neural inf. proces. syst."},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2021.3138704"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.3006967"},{"key":"ref23","first-page":"10497","article-title":"Ltl2action: Generalizing ltl instructions for multi-task rl","volume-title":"Int. Conf. Machin. Learn","author":"Vaezipoor"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TIE.2023.3299048"},{"key":"ref25","first-page":"2107","article-title":"Using reward machines for high-level task specification and decomposition in reinforcement learning","volume-title":"Int. Conf. Machin. Learn","author":"Icarte"},{"article-title":"Safe-critical modular deep reinforcement learning with temporal logic through gaussian processes and control barrier functions","year":"2021","author":"Cai","key":"ref26"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.nahs.2022.101295"},{"key":"ref28","first-page":"267","article-title":"Approximately optimal approximate reinforcement learning","volume-title":"Int. Conf. Machin. Learn","author":"Kakade"},{"volume-title":"Principles of model checking","year":"2008","author":"Baier","key":"ref29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/SFCS.1985.12"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"Schulman","key":"ref31"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2024,5,13]]},"location":"Yokohama, Japan","end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10611554.pdf?arnumber=10611554","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,11]],"date-time":"2024-08-11T04:08:11Z","timestamp":1723349291000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10611554\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10611554","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}