{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T00:19:41Z","timestamp":1760573981582,"version":"build-2065373602"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,24]],"date-time":"2025-06-24T00:00:00Z","timestamp":1750723200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,24]],"date-time":"2025-06-24T00:00:00Z","timestamp":1750723200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,24]]},"DOI":"10.23919\/ecc65951.2025.11186958","type":"proceedings-article","created":{"date-parts":[[2025,10,14]],"date-time":"2025-10-14T17:38:09Z","timestamp":1760463489000},"page":"2173-2179","source":"Crossref","is-referenced-by-count":0,"title":["Predictive Safety Shield for Dyna-Q Reinforcement Learning"],"prefix":"10.23919","author":[{"given":"Pin","family":"Jin","sequence":"first","affiliation":[{"name":"Institut Polytechnique de Paris,ENSTA Paris,Departement U2IS"}]},{"given":"Hanna","family":"Krasowski","sequence":"additional","affiliation":[{"name":"University of California,Department of Electrical Engineering and Computer Science,Berkeley,USA"}]},{"given":"Elena","family":"Vanneaux","sequence":"additional","affiliation":[{"name":"Institut Polytechnique de Paris,ENSTA Paris,Departement U2IS"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Provably safe reinforcement learning: Conceptual analysis, survey, and benchmarking","author":"Krasowski","year":"2023","journal-title":"Transactions on Machine Learning Research"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11797"},{"key":"ref3","article-title":"Do androids dream of electric fences? safety-aware reinforcement learning with latent shielding","volume-title":"CEUR Workshop Proceedings","volume":"3087","author":"He"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.3233\/faia230357"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"volume-title":"Reinforcement Learning: An Introduction","year":"2018","author":"Sutton","key":"ref6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3596444"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2018.2808446"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CDC49753.2023.10383601"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-61362-4_16"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3447928.3456653"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3192205"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3063989"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2018.2876389"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9028919"},{"key":"ref16","first-page":"483","article-title":"Cautious reinforcement learning with logical constraints","volume-title":"Proc. of the Int. Conf. on Autonomous Agents and Multi Agent Systems (AAMAS)","author":"Hasanbeig"},{"key":"ref17","first-page":"1","article-title":"Safe reinforcement learning using probabilistic shields","volume-title":"Proc. of the Int. Conf. on Concurrency Theory (CONCUR)","volume":"171","author":"Jansen"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i12.26723"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2021.08.483"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2020.3036624"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-071723-102940"},{"article-title":"AI safety gridworlds","year":"2017","author":"Leike","key":"ref22"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/9780262170055.001.0001"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19849-6_20"},{"article-title":"Dynamic model predictive shielding for provably safe reinforcement learning","volume-title":"Proc. of the Conference on Neural Information Processing Systems (NeurIPS)","author":"Banerjee","key":"ref25"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-071420-081941"}],"event":{"name":"2025 European Control Conference (ECC)","start":{"date-parts":[[2025,6,24]]},"location":"Thessaloniki, Greece","end":{"date-parts":[[2025,6,27]]}},"container-title":["2025 European Control Conference (ECC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11186289\/11186290\/11186958.pdf?arnumber=11186958","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,15]],"date-time":"2025-10-15T05:44:27Z","timestamp":1760507067000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11186958\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,24]]},"references-count":26,"URL":"https:\/\/doi.org\/10.23919\/ecc65951.2025.11186958","relation":{},"subject":[],"published":{"date-parts":[[2025,6,24]]}}}