{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:39:37Z","timestamp":1742913577963,"version":"3.40.3"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030763831"},{"type":"electronic","value":"9783030763848"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-76384-8_15","type":"book-chapter","created":{"date-parts":[[2021,5,18]],"date-time":"2021-05-18T23:50:53Z","timestamp":1621381853000},"page":"231-248","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Online Shielding for Stochastic Systems"],"prefix":"10.1007","author":[{"given":"Bettina","family":"K\u00f6nighofer","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Julian","family":"Rudolf","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexander","family":"Palmisano","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Martin","family":"Tappler","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Roderick","family":"Bloem","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,5,19]]},"reference":[{"key":"15_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Ng, A.Y.: Exploration and apprenticeship learning in reinforcement learning. In: ICML. ACM International Conference Proceeding Series, vol. 119, pp. 1\u20138. ACM (2005)","DOI":"10.1145\/1102351.1102352"},{"key":"15_CR2","doi-asserted-by":"crossref","unstructured":"Alshiekh, M., Bloem, R., Ehlers, R., K\u00f6nighofer, B., Niekum, S., Topcu, U.: Safe reinforcement learning via shielding. In: AAAI. AAAI Press (2018)","DOI":"10.1609\/aaai.v32i1.11797"},{"key":"15_CR3","unstructured":"Amodei, D., Olah, C., Steinhardt, J., Christiano, P., Schulman, J., Man\u00e9, D.: Concrete problems in AI safety. CoRR, abs\/1606.06565 (2016)"},{"key":"15_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"630","DOI":"10.1007\/978-3-030-25540-4_36","volume-title":"Computer Aided Verification","author":"G Avni","year":"2019","unstructured":"Avni, G., Bloem, R., Chatterjee, K., Henzinger, T.A., K\u00f6nighofer, B., Pranger, S.: Run-time optimization for learned controllers through quantitative games. In: Dillig, I., Tasiran, S. (eds.) CAV 2019. LNCS, vol. 11561, pp. 630\u2013649. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-25540-4_36"},{"key":"15_CR5","volume-title":"Principles of Model Checking","author":"C Baier","year":"2008","unstructured":"Baier, C., Katoen, J.: Principles of Model Checking. MIT Press, Cambridge (2008)"},{"key":"15_CR6","doi-asserted-by":"crossref","unstructured":"Bharadwaj, S., Bloem, R., Dimitrova, R., K\u00f6nighofer, B., Topcu, U.: Synthesis of minimum-cost shields for multi-agent systems. In: ACC, pp. 1048\u20131055. IEEE (2019)","DOI":"10.23919\/ACC.2019.8815233"},{"key":"15_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1007\/978-3-662-46681-0_51","volume-title":"Tools and Algorithms for the Construction and Analysis of Systems","author":"R Bloem","year":"2015","unstructured":"Bloem, R., K\u00f6nighofer, B., K\u00f6nighofer, R., Wang, C.: Shield synthesis: runtime enforcement for reactive systems. In: Baier, C., Tinelli, C. (eds.) TACAS 2015. LNCS, vol. 9035, pp. 533\u2013548. Springer, Heidelberg (2015). https:\/\/doi.org\/10.1007\/978-3-662-46681-0_51"},{"key":"15_CR8","doi-asserted-by":"crossref","unstructured":"Cheng, R., Orosz, G., Murray, R.M., Burdick, J.W.: End-to-end safe reinforcement learning through barrier functions for safety-critical continuous control tasks. In: AAAI (2019)","DOI":"10.1609\/aaai.v33i01.33013387"},{"key":"15_CR9","doi-asserted-by":"crossref","unstructured":"Clouse, J.A., Utgoff, P.E.: A teaching method for reinforcement learning. In: ML, pp. 92\u2013110. Morgan Kaufmann (1992)","DOI":"10.1016\/B978-1-55860-247-2.50017-6"},{"key":"15_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"206","DOI":"10.1007\/978-3-662-46681-0_16","volume-title":"Tools and Algorithms for the Construction and Analysis of Systems","author":"A David","year":"2015","unstructured":"David, A., Jensen, P.G., Larsen, K.G., Miku\u010dionis, M., Taankvist, J.H.: Uppaal Stratego. In: Baier, C., Tinelli, C. (eds.) TACAS 2015. LNCS, vol. 9035, pp. 206\u2013211. Springer, Heidelberg (2015). https:\/\/doi.org\/10.1007\/978-3-662-46681-0_16"},{"key":"15_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"592","DOI":"10.1007\/978-3-319-63390-9_31","volume-title":"Computer Aided Verification","author":"C Dehnert","year":"2017","unstructured":"Dehnert, C., Junges, S., Katoen, J.-P., Volk, M.: A STORM is coming: a modern probabilistic model checker. In: Majumdar, R., Kun\u010dak, V. (eds.) CAV 2017. LNCS, vol. 10427, pp. 592\u2013600. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-63390-9_31"},{"key":"15_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1007\/978-3-030-32079-9_4","volume-title":"Runtime Verification","author":"Y Falcone","year":"2019","unstructured":"Falcone, Y., Pinisetty, S.: On the runtime enforcement of timed properties. In: Finkbeiner, B., Mariani, L. (eds.) RV 2019. LNCS, vol. 11757, pp. 48\u201369. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-32079-9_4"},{"key":"15_CR13","doi-asserted-by":"crossref","unstructured":"Fulton, N., Platzer, A.: Verifiably safe off-model reinforcement learning. In: TACAS, pp. 413\u2013430 (2019)","DOI":"10.1007\/978-3-030-17462-0_28"},{"key":"15_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"413","DOI":"10.1007\/978-3-030-17462-0_28","volume-title":"Tools and Algorithms for the Construction and Analysis of Systems","author":"N Fulton","year":"2019","unstructured":"Fulton, N., Platzer, A.: Verifiably safe off-model reinforcement learning. In: Vojnar, T., Zhang, L. (eds.) TACAS 2019. LNCS, vol. 11427, pp. 413\u2013430. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-17462-0_28"},{"issue":"1","key":"15_CR15","first-page":"1437","volume":"16","author":"J Garc\u0131a","year":"2015","unstructured":"Garc\u0131a, J., Fern\u00e1ndez, F.: A comprehensive survey on safe reinforcement learning. J. Mach. Learn. Res. 16(1), 1437\u20131480 (2015)","journal-title":"J. Mach. Learn. Res."},{"key":"15_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1007\/978-3-030-17462-0_27","volume-title":"Tools and Algorithms for the Construction and Analysis of Systems","author":"EM Hahn","year":"2019","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Omega-regular objectives in model-free reinforcement learning. In: Vojnar, T., Zhang, L. (eds.) TACAS 2019. LNCS, vol. 11427, pp. 395\u2013412. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-17462-0_27"},{"key":"15_CR17","unstructured":"Hasanbeig, M., Abate, A., Kroening, D.: Logically-correct reinforcement learning. CoRR, abs\/1801.08099 (2018)"},{"key":"15_CR18","doi-asserted-by":"crossref","unstructured":"Hasanbeig, M., Kantaros, Y., Abate, A., Kroening, D., Pappas, G.J., Lee, I.: Reinforcement learning for temporal logic control synthesis with probabilistic satisfaction guarantees. In: CDC, pp. 5338\u20135343. IEEE (2019)","DOI":"10.1109\/CDC40024.2019.9028919"},{"key":"15_CR19","unstructured":"Jansen, N., K\u00f6nighofer, B., Junges, S., Serban, A., Bloem, R.: Safe reinforcement learning using probabilistic shields (invited paper). In: Konnov, I., Kov\u00e1cs, L. (eds.) CONCUR, volume 171 of LIPIcs, pp. 3:1\u20133:16. Schloss Dagstuhl - Leibniz-Zentrum f\u00fcr Informatik (2020)"},{"key":"15_CR20","doi-asserted-by":"crossref","unstructured":"Katoen, J.-P.: The probabilistic model checking landscape. In: LICS, pp. 31\u201345. ACM (2016)","DOI":"10.1145\/2933575.2934574"},{"key":"15_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"290","DOI":"10.1007\/978-3-030-61362-4_16","volume-title":"Leveraging Applications of Formal Methods, Verification and Validation: Verification Principles","author":"B K\u00f6nighofer","year":"2020","unstructured":"K\u00f6nighofer, B., Lorber, F., Jansen, N., Bloem, R.: Shield synthesis for reinforcement learning. In: Margaria, T., Steffen, B. (eds.) ISoLA 2020. LNCS, vol. 12476, pp. 290\u2013306. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-61362-4_16"},{"key":"15_CR22","unstructured":"Kwiatkowska, M.Z.: Model checking for probability and time: from theory to practice. In: LICS, pp. 351. IEEE CS (2003)"},{"key":"15_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"585","DOI":"10.1007\/978-3-642-22110-1_47","volume-title":"Computer Aided Verification","author":"M Kwiatkowska","year":"2011","unstructured":"Kwiatkowska, M., Norman, G., Parker, D.: PRISM 4.0: verification of probabilistic real-time systems. In: Gopalakrishnan, G., Qadeer, S. (eds.) CAV 2011. LNCS, vol. 6806, pp. 585\u2013591. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-22110-1_47"},{"key":"15_CR24","doi-asserted-by":"crossref","unstructured":"Li, S., Bastani, O.: Robust model predictive shielding for safe reinforcement learning with stochastic dynamics. In: ICRA, pp. 7166\u20137172. IEEE (2020)","DOI":"10.1109\/ICRA40945.2020.9196867"},{"issue":"2","key":"15_CR25","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1007\/s10994-016-5565-9","volume":"105","author":"H Mao","year":"2016","unstructured":"Mao, H., Chen, Y., Jaeger, M., Nielsen, T.D., Larsen, K.G., Nielsen, B.: Learning deterministic probabilistic automata from a model checking perspective. Mach. Learn. 105(2), 255\u2013299 (2016). https:\/\/doi.org\/10.1007\/s10994-016-5565-9","journal-title":"Mach. Learn."},{"key":"15_CR26","unstructured":"Moldovan, T.M., Abbeel, P.: Safe exploration in Markov decision processes. In: ICML. icml.cc\/Omnipress (2012)"},{"key":"15_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-13823-7_31","volume-title":"Modelling and Simulation for Autonomous Systems","author":"M Pecka","year":"2014","unstructured":"Pecka, M., Svoboda, T.: Safe exploration techniques for reinforcement learning - an overview. In: Hodicky, J. (ed.) MESAS 2014. LNCS, vol. 8906. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-13823-7_31"},{"key":"15_CR28","doi-asserted-by":"crossref","unstructured":"Pnueli, A.: The temporal logic of programs. In: Foundations of Computer Science, pp. 46\u201357. IEEE (1977)","DOI":"10.1109\/SFCS.1977.32"},{"key":"15_CR29","doi-asserted-by":"crossref","unstructured":"Pranger, S., K\u00f6nighofer, B., Tappler, M., Deixelberger, M., Jansen, N., Bloem, R.: Adaptive shielding under uncertainty. CoRR, abs\/2010.03842 (2020)","DOI":"10.23919\/ACC50511.2021.9482889"},{"issue":"1","key":"15_CR30","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1017\/S0960129517000123","volume":"29","author":"M Renard","year":"2019","unstructured":"Renard, M., Falcone, Y., Rollet, A., J\u00e9ron, T., Marchand, H.: Optimal enforcement of (timed) properties with uncontrollable events. Math. Struct. Comput. Sci. 29(1), 169\u2013214 (2019)","journal-title":"Math. Struct. Comput. Sci."},{"issue":"7","key":"15_CR31","doi-asserted-by":"publisher","first-page":"1405","DOI":"10.1007\/s10514-018-9746-1","volume":"42","author":"D Sadigh","year":"2018","unstructured":"Sadigh, D., Landolfi, N., Sastry, S.S., Seshia, S.A., Dragan, A.D.: Planning for cars that coordinate with people: leveraging effects on human actions for planning and active information gathering over human internal state. Auton. Robot. 42(7), 1405\u20131426 (2018). https:\/\/doi.org\/10.1007\/s10514-018-9746-1","journal-title":"Auton. Robot."},{"key":"15_CR32","unstructured":"Sadigh, D., Sastry, S., Seshia, S.A., Dragan, A.D.: Planning for autonomous cars that leverage effects on human actions. Science and Systems. In: Robotics (2016)"},{"issue":"7587","key":"15_CR33","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of Go with deep neural networks and tree search. Nature 529(7587), 484 (2016)","journal-title":"Nature"},{"key":"15_CR34","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)"},{"key":"15_CR35","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"651","DOI":"10.1007\/978-3-030-30942-8_38","volume-title":"Formal Methods \u2013 The Next 30 Years","author":"M Tappler","year":"2019","unstructured":"Tappler, M., Aichernig, B.K., Bacci, G., Eichlseder, M., Larsen, K.G.: $$L^*$$-based learning of Markov decision processes. In: ter Beek, M.H., McIver, A., Oliveira, J.N. (eds.) FM 2019. LNCS, vol. 11800, pp. 651\u2013669. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-30942-8_38"},{"key":"15_CR36","volume-title":"Probabilistic Robotics","author":"S Thrun","year":"2005","unstructured":"Thrun, S., Burgard, W., Fox, D.: Probabilistic Robotics. The MIT Press, Cambridge (2005)"},{"key":"15_CR37","doi-asserted-by":"crossref","unstructured":"Wang, A., Kurutach, T., Liu, K., Abbeel, P., Tamar, A.: Learning robotic manipulation through visual planning and acting. arXiv preprint arXiv:1905.04411 (2019)","DOI":"10.15607\/RSS.2019.XV.074"},{"issue":"6","key":"15_CR38","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1287\/inte.15.6.73","volume":"15","author":"DJ White","year":"1985","unstructured":"White, D.J.: Real applications of Markov decision processes. Interfaces 15(6), 73\u201383 (1985)","journal-title":"Interfaces"},{"key":"15_CR39","doi-asserted-by":"crossref","unstructured":"Wu, M., Wang, J., Deshmukh, J., Wang, C.: Shield synthesis for real: enforcing safety in cyber-physical systems. In: FMCAD, pp. 129\u2013137. IEEE (2019)","DOI":"10.23919\/FMCAD.2019.8894264"},{"key":"15_CR40","unstructured":"Zhang, W., Bastani, O.: MAMPS: safe multi-agent reinforcement learning via model predictive shielding. CoRR, abs\/1910.12639 (2019)"},{"key":"15_CR41","doi-asserted-by":"crossref","unstructured":"Zhou, W., Gao, R., Kim, B., Kang, E., Li, W.: Runtime-safety-guided policy repair. In: RV, pp. 131\u2013150 (2020)","DOI":"10.1007\/978-3-030-60508-7_7"},{"key":"15_CR42","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"662","DOI":"10.1007\/978-3-319-96145-3_38","volume-title":"Computer Aided Verification","author":"W Zhou","year":"2018","unstructured":"Zhou, W., Li, W.: Safety-aware apprenticeship learning. In: Chockler, H., Weissenbacher, G. (eds.) CAV 2018. LNCS, vol. 10981, pp. 662\u2013680. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-96145-3_38"}],"container-title":["Lecture Notes in Computer Science","NASA Formal Methods"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-76384-8_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,27]],"date-time":"2022-12-27T21:12:46Z","timestamp":1672175566000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-76384-8_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030763831","9783030763848"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-76384-8_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"19 May 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NFM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"NASA Formal Methods Symposium","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 May 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 May 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nfm2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/shemesh.larc.nasa.gov\/nfm2021\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"66","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"21","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"32% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}