{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T13:03:12Z","timestamp":1760101392410,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031199912"},{"type":"electronic","value":"9783031199929"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-19992-9_2","type":"book-chapter","created":{"date-parts":[[2022,10,22]],"date-time":"2022-10-22T09:12:06Z","timestamp":1666429926000},"page":"25-41","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Dynamic Shielding for\u00a0Reinforcement Learning in\u00a0Black-Box Environments"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9360-7490","authenticated-orcid":false,"given":"Masaki","family":"Waga","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9604-9997","authenticated-orcid":false,"given":"Ezequiel","family":"Castellano","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5929-9014","authenticated-orcid":false,"given":"Sasinee","family":"Pruekprasert","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4212-7029","authenticated-orcid":false,"given":"Stefan","family":"Klikovits","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5046-7480","authenticated-orcid":false,"given":"Toru","family":"Takisaka","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8300-4650","authenticated-orcid":false,"given":"Ichiro","family":"Hasuo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,21]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Alshiekh, M., Bloem, R., Ehlers, R., K\u00f6nighofer, B., Niekum, S., Topcu, U.: Safe reinforcement learning via shielding. In: McIlraith, S.A., Weinberger, K.Q. (eds.) Proceedings of the AAAI 2018, pp. 2669\u20132678. AAAI Press (2018)","key":"2_CR1","DOI":"10.1609\/aaai.v32i1.11797"},{"key":"2_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"630","DOI":"10.1007\/978-3-030-25540-4_36","volume-title":"Computer Aided Verification","author":"G Avni","year":"2019","unstructured":"Avni, G., Bloem, R., Chatterjee, K., Henzinger, T.A., K\u00f6nighofer, B., Pranger, S.: Run-time optimization for learned controllers through quantitative games. In: Dillig, I., Tasiran, S. (eds.) CAV 2019. LNCS, vol. 11561, pp. 630\u2013649. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-25540-4_36"},{"doi-asserted-by":"crossref","unstructured":"Bharadwaj, S., Bloem, R., Dimitrova, R., K\u00f6nighofer, B., Topcu, U.: Synthesis of minimum-cost shields for multi-agent systems. In: Proceedings of the ACC 2019, pp. 1048\u20131055. IEEE (2019)","key":"2_CR3","DOI":"10.23919\/ACC.2019.8815233"},{"unstructured":"Bloem, R., Jensen, P.G., K\u00f6nighofer, B., Larsen, K.G., Lorber, F., Palmisano, A.: It\u2019s time to play safe: shield synthesis for timed systems. CoRR abs\/2006.16688 (2020)","key":"2_CR4"},{"key":"2_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1007\/978-3-662-46681-0_51","volume-title":"Tools and Algorithms for the Construction and Analysis of Systems","author":"R Bloem","year":"2015","unstructured":"Bloem, R., K\u00f6nighofer, B., K\u00f6nighofer, R., Wang, C.: Shield synthesis: runtime enforcement for reactive systems. In: Baier, C., Tinelli, C. (eds.) TACAS 2015. LNCS, vol. 9035, pp. 533\u2013548. Springer, Heidelberg (2015). https:\/\/doi.org\/10.1007\/978-3-662-46681-0_51"},{"unstructured":"Bouton, M., Karlsson, J., Nakhaei, A., Fujimura, K., Kochenderfer, M.J., Tumova, J.: Reinforcement learning with probabilistic guarantees for autonomous driving. CoRR abs\/1904.07189 (2019)","key":"2_CR6"},{"unstructured":"Brockman, G., et al.: OpenAI Gym. CoRR abs\/1606.01540 (2016)","key":"2_CR7"},{"doi-asserted-by":"crossref","unstructured":"Cheng, R., Orosz, G., Murray, R.M., Burdick, J.W.: End-to-end safe reinforcement learning through barrier functions for safety-critical continuous control tasks. In: Proceedings of the AAAI 2019, pp. 3387\u20133395. AAAI Press (2019)","key":"2_CR8","DOI":"10.1609\/aaai.v33i01.33013387"},{"unstructured":"Chevalier-Boisvert, M.: Gym-MiniWorld Environment for OpenAI Gym (2018). https:\/\/github.com\/maximecb\/gym-miniworld","key":"2_CR9"},{"key":"2_CR10","first-page":"1437","volume":"16","author":"J Garc\u00eda","year":"2015","unstructured":"Garc\u00eda, J., Fern\u00e1ndez, F.: A comprehensive survey on safe reinforcement learning. J. Mach. Learn. Res. 16, 1437\u20131480 (2015)","journal-title":"J. Mach. Learn. Res."},{"unstructured":"Hasanbeig, M., Abate, A., Kroening, D.: Cautious reinforcement learning with logical constraints. In: Seghrouchni, A.E.F., Sukthankar, G., An, B., Yorke-Smith, N. (eds.) Proceedings of the AAMAS 2020, pp. 483\u2013491. IFAAMS (2020)","key":"2_CR11"},{"doi-asserted-by":"crossref","unstructured":"Hunt, N., Fulton, N., Magliacane, S., Hoang, T.N., Das, S., Solar-Lezama, A.: Verifiably safe exploration for end-to-end reinforcement learning. In: Bogomolov, S., Jungers, R.M. (eds.) Proceedings of the HSCC 2021, pp. 14:1\u201314:11. ACM (2021)","key":"2_CR12","DOI":"10.1145\/3447928.3456653"},{"key":"2_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"487","DOI":"10.1007\/978-3-319-21690-4_32","volume-title":"Computer Aided Verification","author":"M Isberner","year":"2015","unstructured":"Isberner, M., Howar, F., Steffen, B.: The open-source LearnLib - a framework for active automata learning. In: Kroening, D., P\u0103s\u0103reanu, C.S. (eds.) CAV 2015. LNCS, vol. 9206, pp. 487\u2013495. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-21690-4_32"},{"unstructured":"Jansen, N., K\u00f6nighofer, B., Junges, S., Serban, A., Bloem, R.: Safe reinforcement learning using probabilistic shields (invited paper). In: Konnov, I., Kov\u00e1cs, L. (eds.) Proceedings of the CONCUR 2020. LIPIcs, vol. 171, pp. 3:1\u20133:16. Schloss Dagstuhl - Leibniz-Zentrum f\u00fcr Informatik (2020)","key":"2_CR14"},{"key":"2_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1007\/11901914_11","volume-title":"Automated Technology for Verification and Analysis","author":"O Kupferman","year":"2006","unstructured":"Kupferman, O., Lampert, R.: On the construction of fine automata for safety properties. In: Graf, S., Zhang, W. (eds.) ATVA 2006. LNCS, vol. 4218, pp. 110\u2013124. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11901914_11"},{"key":"2_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/BFb0054059","volume-title":"Grammatical Inference","author":"KJ Lang","year":"1998","unstructured":"Lang, K.J., Pearlmutter, B.A., Price, R.A.: Results of the Abbadingo one DFA learning competition and a new evidence-driven state merging algorithm. In: Honavar, V., Slutzki, G. (eds.) ICGI 1998. LNCS, vol. 1433, pp. 1\u201312. Springer, Heidelberg (1998). https:\/\/doi.org\/10.1007\/BFb0054059"},{"key":"2_CR17","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1007\/978-3-662-48395-4_4","volume-title":"Topics in Grammatical Inference","author":"D L\u00f3pez","year":"2016","unstructured":"L\u00f3pez, D., Garc\u00eda, P.: On the inference of finite state automata from positive and negative data. In: Heinz, J., Sempere, J.M. (eds.) Topics in Grammatical Inference, pp. 73\u2013112. Springer, Heidelberg (2016). https:\/\/doi.org\/10.1007\/978-3-662-48395-4_4"},{"doi-asserted-by":"crossref","unstructured":"Mao, H., Chen, Y., Jaeger, M., Nielsen, T.D., Larsen, K.G., Nielsen, B.: Learning Markov decision processes for model checking. In: Fahrenberg, U., Legay, A., Thrane, C.R. (eds.) Proceedings of the QFM 2012. EPTCS, vol. 103, pp. 49\u201363 (2012)","key":"2_CR18","DOI":"10.4204\/EPTCS.103.6"},{"unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. CoRR abs\/1312.5602 (2013)","key":"2_CR19"},{"issue":"7540","key":"2_CR20","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"doi-asserted-by":"crossref","unstructured":"Oncina, J., Garc\u00eda, P.: Identifying regular languages in polynomial time. Series in Machine Perception and Artificial Intelligence, pp. 99\u2013108 (1993)","key":"2_CR21","DOI":"10.1142\/9789812797919_0007"},{"unstructured":"Plappert, M.: Keras-RL (2016). https:\/\/github.com\/keras-rl\/keras-rl","key":"2_CR22"},{"doi-asserted-by":"crossref","unstructured":"Pranger, S., K\u00f6nighofer, B., Tappler, M., Deixelberger, M., Jansen, N., Bloem, R.: Adaptive shielding under uncertainty. In: Proceedings of the ACC 2021, pp. 3467\u20133474. IEEE (2021)","key":"2_CR23","DOI":"10.23919\/ACC50511.2021.9482889"},{"unstructured":"Raffin, A., Hill, A., Ernestus, M., Gleave, A., Kanervisto, A., Dormann, N.: Stable baselines3 (2019). https:\/\/github.com\/DLR-RM\/stable-baselines3","key":"2_CR24"},{"unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. CoRR abs\/1707.06347 (2017)","key":"2_CR25"},{"issue":"7676","key":"2_CR26","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., et al.: Mastering the game of go without human knowledge. Nature 550(7676), 354\u2013359 (2017)","journal-title":"Nature"},{"doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning - An Introduction. Adaptive Computation and Machine Learning. MIT Press (1998)","key":"2_CR27","DOI":"10.1109\/TNN.1998.712192"},{"doi-asserted-by":"crossref","unstructured":"Wu, M., Wang, J., Deshmukh, J., Wang, C.: Shield synthesis for real: Enforcing safety in cyber-physical systems. In: Barrett, C.W., Yang, J. (eds.) Proceedings of the FMCAD 2019, pp. 129\u2013137. IEEE (2019)","key":"2_CR28","DOI":"10.23919\/FMCAD.2019.8894264"}],"container-title":["Lecture Notes in Computer Science","Automated Technology for Verification and Analysis"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-19992-9_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,22]],"date-time":"2022-10-22T09:12:39Z","timestamp":1666429959000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-19992-9_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031199912","9783031199929"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-19992-9_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"21 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ATVA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Automated Technology for Verification and Analysis","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"atva2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/atva-conference.org\/2022\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"81","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"21","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"26% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the COVID-19 pandemic, the conference was held virtually. Additional to the 26 papers, 1 invited talk is included.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}