{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T05:56:31Z","timestamp":1775109391564,"version":"3.50.1"},"reference-count":55,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100004359","name":"Vetenskapsr\u00e5det","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004359","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004063","name":"Knut och Alice Wallenbergs Stiftelse","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004063","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100015599","name":"Toyota Research Institute","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100015599","id-type":"DOI","asserted-by":"publisher"}]},{"name":"NASA University Leadership Initiative","award":["80NSSC20M0163"],"award-info":[{"award-number":["80NSSC20M0163"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2022,10]]},"DOI":"10.1109\/lra.2022.3192205","type":"journal-article","created":{"date-parts":[[2022,7,19]],"date-time":"2022-07-19T19:26:24Z","timestamp":1658258784000},"page":"10665-10672","source":"Crossref","is-referenced-by-count":28,"title":["Safe Reinforcement Learning Using Black-Box Reachability Analysis"],"prefix":"10.1109","volume":"7","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9499-6480","authenticated-orcid":false,"given":"Mahmoud","family":"Selim","sequence":"first","affiliation":[{"name":"Ain Shams University, Cairo, Egypt"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2941-519X","authenticated-orcid":false,"given":"Amr","family":"Alanwar","sequence":"additional","affiliation":[{"name":"Jacobs University, Bremen, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1348-7463","authenticated-orcid":false,"given":"Shreyas","family":"Kousik","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1807-8637","authenticated-orcid":false,"given":"Grace","family":"Gao","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0206-4337","authenticated-orcid":false,"given":"Marco","family":"Pavone","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9940-5929","authenticated-orcid":false,"given":"Karl H.","family":"Johansson","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology, Stockholm, Sweden"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Introduction to Reinforcement Learning","volume":"135","author":"Sutton","year":"1998"},{"issue":"2","key":"ref2","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1023\/A:1017940631555","article-title":"Risk-sensitive reinforcement learning","volume":"49","author":"Mihatsch","year":"2002","journal-title":"Mach. Learn."},{"issue":"1","key":"ref3","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"Garca","year":"2015","journal-title":"J. Mach. Learn. Res."},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1666"},{"key":"ref5","article-title":"Safe exploration in Markov decision processes","author":"Moldovan","year":"2012"},{"key":"ref6","first-page":"997","article-title":"Safe exploration for optimization with Gaussian processes","volume-title":"Proc. 32nd Int. Conf. Mach. Learn.","author":"Sui","year":"2015"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1201\/9781315140223"},{"issue":"3","key":"ref8","first-page":"207","article-title":"An actor-critic algorithm for constrained Markov decision processes","volume-title":"Syst. Control Lett.","volume":"54","author":"Borkar","year":"2005"},{"issue":"1","key":"ref9","first-page":"6070","article-title":"Risk-constrained reinforcement learning with percentile risk criteria","volume":"18","author":"Chow","year":"2017","journal-title":"J. Mach. Learn. Res."},{"key":"ref10","article-title":"Reward constrained policy optimization","author":"Tessler","year":"2018"},{"key":"ref11","article-title":"Value constrained model-free continuous control","author":"Bohez","year":"2019"},{"key":"ref12","first-page":"3304","article-title":"Provably efficient safe exploration via primal-dual policy optimization","author":"Ding","year":"2020"},{"key":"ref13","article-title":"Exploration-exploitation in constrained MDPs","author":"Efroni","year":"2020"},{"key":"ref14","first-page":"22","article-title":"Constrained policy optimization","author":"Achiam","year":"2017"},{"key":"ref15","article-title":"Conservative safety critics for exploration","author":"Bharadhwaj","year":"2020"},{"key":"ref16","first-page":"1037","article-title":"Smart exploration in reinforcement learning using absolute temporal difference errors","volume-title":"Proc. Int. Conf. Auton. Agents Multi-Agent Syst.","author":"Gehring","year":"2013"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/1569901.1569922"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/1102351.1102352"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3135569"},{"key":"ref20","first-page":"1357","article-title":"Robot reinforcement learning on the constraint manifold","volume-title":"Proc. Conf. Robot Learn.","author":"Liu","year":"2022"},{"key":"ref21","first-page":"8103","article-title":"A Lyapunov-based approach to safe reinforcement learning","volume-title":"Proc. 32nd Int. Conf. Neural Inf. Process. Syst.","author":"Chow","year":"2018"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013387"},{"key":"ref23","article-title":"Model free barrier functions via implicit evading maneuvers","author":"Squires","year":"2021"},{"key":"ref24","article-title":"Safe reinforcement learning using advantage-based intervention","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"1063010640","author":"Wagener","year":"2021"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2018.07.004"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2021.3130782"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC45102.2020.9294259"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3063989"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/tro.2022.3154715"},{"key":"ref30","article-title":"Safe exploration in continuous action spaces","author":"Dalal","year":"2018"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511546877"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1177\/0278364920943266"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1177\/0278364920950795"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989037"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202312"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461113"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202134"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460968"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1137\/0322013"},{"key":"ref40","article-title":"Reachability analysis and its application to the safety assessment of autonomous cars","author":"Althoff","year":"2010"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2018.8619572"},{"key":"ref42","first-page":"163","article-title":"Data-driven reachability analysis using matrix zonotopes","volume-title":"Proc. Learn. Dyn. Control","author":"Alanwar","year":"2021"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2016.02.036"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2022.110204"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2016.7795594"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793905"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2015.7353861"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.3040517"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2019.8917052"},{"key":"ref50","first-page":"136","article-title":"OptNet: Differentiable optimization as a layer in neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Amos","year":"2017"},{"key":"ref51","article-title":"Constrained feedforward neural network training via reachability analysis","author":"Chung","year":"2021"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/3387168.3387199"},{"key":"ref53","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"Haarnoja","year":"2018"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.32657\/10356\/90191"},{"key":"ref55","first-page":"4759","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","volume-title":"Proc. 32nd Int. Conf. Neural Inf. Process. Syst.","author":"Chua","year":"2018"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7083369\/9831196\/09833266.pdf?arnumber=9833266","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T06:36:16Z","timestamp":1706769376000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9833266\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10]]},"references-count":55,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/lra.2022.3192205","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10]]}}}