{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:59:26Z","timestamp":1776887966258,"version":"3.51.2"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031331695","type":"print"},{"value":"9783031331701","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-33170-1_8","type":"book-chapter","created":{"date-parts":[[2023,6,2]],"date-time":"2023-06-02T12:55:27Z","timestamp":1685710527000},"page":"122-139","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Reward Shaping from\u00a0Hybrid Systems Models in\u00a0Reinforcement Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4002-5208","authenticated-orcid":false,"given":"Marian","family":"Qian","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3194-9759","authenticated-orcid":false,"given":"Stefan","family":"Mitsch","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,6,3]]},"reference":[{"key":"8_CR1","unstructured":"Alshiekh, M., Bloem, R., Ehlers, R., K\u00f6nighofer, B., Niekum, S., Topcu, U.: Safe reinforcement learning via shielding. In: Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence, (AAAI-18), the 30th innovative Applications of Artificial Intelligence (IAAI 2018), and the 8th AAAI Symposium on Educational Advances in Artificial Intelligence (EAAI 2018), New Orleans, Louisiana, USA, 2\u20137 February 2018, pp. 2669\u20132678 (2018)"},{"key":"8_CR2","doi-asserted-by":"publisher","unstructured":"Balakrishnan, A., Deshmukh, J.V.: Structured reward shaping using signal temporal logic specifications. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS 2019), Macau, China, 4\u20138 November 2019, pp. 3481\u20133486 (2019). https:\/\/doi.org\/10.1109\/IROS40897.2019.8968254","DOI":"10.1109\/IROS40897.2019.8968254"},{"key":"8_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1007\/978-3-030-94583-1_3","volume-title":"Verification, Model Checking, and Abstract Interpretation","author":"D Bayani","year":"2022","unstructured":"Bayani, D., Mitsch, S.: Fanoos: multi-resolution, multi-strength, interactive explanations for learned systems. In: Finkbeiner, B., Wies, T. (eds.) VMCAI 2022. LNCS, vol. 13182, pp. 43\u201368. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-030-94583-1_3"},{"key":"8_CR4","doi-asserted-by":"publisher","unstructured":"Dohmen, J., Liessner, R., Friebel, C., B\u00e4ker, B.: LongiControl: a reinforcement learning environment for longitudinal vehicle control. In: Proceedings of the 13th International Conference on Agents and Artificial Intelligence - Volume 2: ICAART, pp. 1030\u20131037. INSTICC, SciTePress (2021). https:\/\/doi.org\/10.5220\/0010305210301037","DOI":"10.5220\/0010305210301037"},{"key":"8_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"264","DOI":"10.1007\/978-3-642-39799-8_19","volume-title":"Computer Aided Verification","author":"A Donz\u00e9","year":"2013","unstructured":"Donz\u00e9, A., Ferr\u00e8re, T., Maler, O.: Efficient robust monitoring for STL. In: Sharygina, N., Veith, H. (eds.) CAV 2013. LNCS, vol. 8044, pp. 264\u2013279. Springer, Heidelberg (2013). https:\/\/doi.org\/10.1007\/978-3-642-39799-8_19"},{"issue":"4","key":"8_CR6","doi-asserted-by":"publisher","first-page":"1031","DOI":"10.1007\/s10817-018-09509-5","volume":"63","author":"T Dreossi","year":"2019","unstructured":"Dreossi, T., Donz\u00e9, A., Seshia, S.A.: Compositional falsification of cyber-physical systems with machine learning components. J. Autom. Reason. 63(4), 1031\u20131053 (2019). https:\/\/doi.org\/10.1007\/s10817-018-09509-5","journal-title":"J. Autom. Reason."},{"key":"8_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"178","DOI":"10.1007\/11940197_12","volume-title":"Formal Approaches to Software Testing and Runtime Verification","author":"GE Fainekos","year":"2006","unstructured":"Fainekos, G.E., Pappas, G.J.: Robustness of temporal logic specifications. In: Havelund, K., N\u00fa\u00f1ez, M., Ro\u015fu, G., Wolff, B. (eds.) FATES\/RV -2006. LNCS, vol. 4262, pp. 178\u2013192. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11940197_12"},{"issue":"42","key":"8_CR8","doi-asserted-by":"publisher","first-page":"4262","DOI":"10.1016\/j.tcs.2009.06.021","volume":"410","author":"GE Fainekos","year":"2009","unstructured":"Fainekos, G.E., Pappas, G.J.: Robustness of temporal logic specifications for continuous-time signals. Theor. Comput. Sci. 410(42), 4262\u20134291 (2009). https:\/\/doi.org\/10.1016\/j.tcs.2009.06.021","journal-title":"Theor. Comput. Sci."},{"key":"8_CR9","unstructured":"Fulton, N., Platzer, A.: Safe reinforcement learning via formal methods: toward safe control through proof and learning. In: Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence, (AAAI 2018), the 30th Innovative Applications of Artificial Intelligence (IAAI 2018), and the 8th AAAI Symposium on Educational Advances in Artificial Intelligence (EAAI 2018), New Orleans, Louisiana, USA, 2\u20137 February 2018, pp. 6485\u20136492 (2018)"},{"key":"8_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"413","DOI":"10.1007\/978-3-030-17462-0_28","volume-title":"Tools and Algorithms for the Construction and Analysis of Systems","author":"N Fulton","year":"2019","unstructured":"Fulton, N., Platzer, A.: Verifiably safe off-model reinforcement learning. In: Vojnar, T., Zhang, L. (eds.) TACAS 2019. LNCS, vol. 11427, pp. 413\u2013430. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-17462-0_28"},{"key":"8_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"108","DOI":"10.1007\/978-3-030-59152-6_6","volume-title":"Automated Technology for Verification and Analysis","author":"EM Hahn","year":"2020","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Faithful and effective reward schemes for model-free reinforcement learning of omega-regular objectives. In: Hung, D.V., Sokolsky, O. (eds.) ATVA 2020. LNCS, vol. 12302, pp. 108\u2013124. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-59152-6_6"},{"key":"8_CR12","doi-asserted-by":"publisher","unstructured":"Hammond, L., Abate, A., Gutierrez, J., Wooldridge, M.J.: Multi-agent reinforcement learning with temporal logic specifications. In: AAMAS 2021: 20th International Conference on Autonomous Agents and Multiagent Systems, Virtual Event, United Kingdom, 3\u20137 May 2021, pp. 583\u2013592 (2021). https:\/\/doi.org\/10.5555\/3463952.3464024","DOI":"10.5555\/3463952.3464024"},{"key":"8_CR13","doi-asserted-by":"publisher","unstructured":"Hasanbeig, M., Abate, A., Kroening, D.: Cautious reinforcement learning with logical constraints. In: Proceedings of the 19th International Conference on Autonomous Agents and Multiagent Systems, AAMAS 2020, Auckland, New Zealand, 9\u201313 May 2020, pp. 483\u2013491 (2020). https:\/\/doi.org\/10.5555\/3398761.3398821","DOI":"10.5555\/3398761.3398821"},{"key":"8_CR14","doi-asserted-by":"publisher","unstructured":"Hunt, N., Fulton, N., Magliacane, S., Hoang, T.N., Das, S., Solar-Lezama, A.: Verifiably safe exploration for end-to-end reinforcement learning. In: HSCC 2021: 24th ACM International Conference on Hybrid Systems: Computation and Control, Nashville, Tennessee, 19\u201321 May 2021, pp. 14:1\u201314:11 (2021). https:\/\/doi.org\/10.1145\/3447928.3456653","DOI":"10.1145\/3447928.3456653"},{"key":"8_CR15","doi-asserted-by":"publisher","unstructured":"Ivanov, R., Carpenter, T.J., Weimer, J., Alur, R., Pappas, G.J., Lee, I.: Verifying the safety of autonomous systems with neural network controllers. ACM Trans. Embed. Comput. Syst. 20(1), 7:1\u20137:26 (2021). https:\/\/doi.org\/10.1145\/3419742","DOI":"10.1145\/3419742"},{"key":"8_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1007\/978-3-030-81685-8_11","volume-title":"Computer Aided Verification","author":"R Ivanov","year":"2021","unstructured":"Ivanov, R., Carpenter, T., Weimer, J., Alur, R., Pappas, G., Lee, I.: Verisig 2.0: verification of neural network controllers using Taylor model preconditioning. In: Silva, A., Leino, K.R.M. (eds.) CAV 2021. LNCS, vol. 12759, pp. 249\u2013262. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-81685-8_11"},{"key":"8_CR17","doi-asserted-by":"publisher","unstructured":"Jansen, N., K\u00f6nighofer, B., Junges, S., Serban, A., Bloem, R.: Safe reinforcement learning using probabilistic shields (invited paper). In: 31st International Conference on Concurrency Theory, CONCUR 2020, 1\u20134 September 2020, Vienna, Austria (Virtual Conference), pp. 3:1\u20133:16 (2020). https:\/\/doi.org\/10.4230\/LIPIcs.CONCUR.2020.3","DOI":"10.4230\/LIPIcs.CONCUR.2020.3"},{"key":"8_CR18","doi-asserted-by":"crossref","unstructured":"Jiang, Y., Bharadwaj, S., Wu, B., Shah, R., Topcu, U., Stone, P.: Temporal-logic-based reward shaping for continuing reinforcement learning tasks. In: Association for the Advancement of Artificial Intelligence (2021)","DOI":"10.1609\/aaai.v35i9.16975"},{"key":"8_CR19","doi-asserted-by":"publisher","unstructured":"K\u00f6nighofer, B., Bloem, R., Ehlers, R., Pek, C.: Correct-by-construction runtime enforcement in AI - a survey. CoRR abs\/2208.14426 (2022). https:\/\/doi.org\/10.48550\/arXiv.2208.14426","DOI":"10.48550\/arXiv.2208.14426"},{"key":"8_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"290","DOI":"10.1007\/978-3-030-61362-4_16","volume-title":"Leveraging Applications of Formal Methods, Verification and Validation: Verification Principles","author":"B K\u00f6nighofer","year":"2020","unstructured":"K\u00f6nighofer, B., Lorber, F., Jansen, N., Bloem, R.: Shield synthesis for reinforcement learning. In: Margaria, T., Steffen, B. (eds.) ISoLA 2020. LNCS, vol. 12476, pp. 290\u2013306. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-61362-4_16"},{"key":"8_CR21","doi-asserted-by":"publisher","first-page":"1574","DOI":"10.1109\/LCSYS.2021.3125717","volume":"6","author":"Q Lin","year":"2022","unstructured":"Lin, Q., Mitsch, S., Platzer, A., Dolan, J.M.: Safe and resilient practical waypoint-following for autonomous vehicles. IEEE Control. Syst. Lett. 6, 1574\u20131579 (2022). https:\/\/doi.org\/10.1109\/LCSYS.2021.3125717","journal-title":"IEEE Control. Syst. Lett."},{"issue":"1\u20132","key":"8_CR22","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1007\/s10703-016-0241-z","volume":"49","author":"S Mitsch","year":"2016","unstructured":"Mitsch, S., Platzer, A.: ModelPlex: verified runtime validation of verified cyber-physical system models. Formal Methods Syst. Des. 49(1\u20132), 33\u201374 (2016). https:\/\/doi.org\/10.1007\/s10703-016-0241-z","journal-title":"Formal Methods Syst. Des."},{"key":"8_CR23","unstructured":"Ng, A.Y., Harada, D., Russell, S.: Policy invariance under reward transformations: Theory and application to reward shaping. In: Proceedings of the Sixteenth International Conference on Machine Learning (ICML 1999), Bled, Slovenia, 27\u201330 June 1999, pp. 278\u2013287 (1999)"},{"key":"8_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1007\/978-3-030-55754-6_6","volume-title":"NASA Formal Methods","author":"DT Phan","year":"2020","unstructured":"Phan, D.T., Grosu, R., Jansen, N., Paoletti, N., Smolka, S.A., Stoller, S.D.: Neural simplex architecture. In: Lee, R., Jha, S., Mavridou, A., Giannakopoulou, D. (eds.) NFM 2020. LNCS, vol. 12229, pp. 97\u2013114. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-55754-6_6"},{"issue":"2","key":"8_CR25","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1007\/s10817-016-9385-1","volume":"59","author":"A Platzer","year":"2017","unstructured":"Platzer, A.: A complete uniform substitution calculus for differential dynamic logic. J. Autom. Reason. 59(2), 219\u2013265 (2017). https:\/\/doi.org\/10.1007\/s10817-016-9385-1","journal-title":"J. Autom. Reason."},{"key":"8_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"246","DOI":"10.1007\/978-3-642-10373-5_13","volume-title":"Formal Methods and Software Engineering","author":"A Platzer","year":"2009","unstructured":"Platzer, A., Quesel, J.-D.: European train control system: a case study in formal verification. In: Breitman, K., Cavalcanti, A. (eds.) ICFEM 2009. LNCS, vol. 5885, pp. 246\u2013265. Springer, Heidelberg (2009). https:\/\/doi.org\/10.1007\/978-3-642-10373-5_13"},{"key":"8_CR27","doi-asserted-by":"publisher","unstructured":"Tran, H., Cai, F., Lopez, D.M., Musau, P., Johnson, T.T., Koutsoukos, X.D.: Safety verification of cyber-physical systems with reinforcement learning control. ACM Trans. Embed. Comput. Syst. 18(5s), 105:1\u2013105:22 (2019). https:\/\/doi.org\/10.1145\/3358230","DOI":"10.1145\/3358230"},{"key":"8_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"392","DOI":"10.1007\/978-3-030-76384-8_24","volume-title":"NASA Formal Methods","author":"Z Zhang","year":"2021","unstructured":"Zhang, Z., Lyu, D., Arcaini, P., Ma, L., Hasuo, I., Zhao, J.: On the effectiveness of signal rescaling in hybrid system falsification. In: Dutle, A., Moscato, M.M., Titolo, L., Mu\u00f1oz, C.A., Perez, I. (eds.) NFM 2021. LNCS, vol. 12673, pp. 392\u2013399. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-76384-8_24"}],"container-title":["Lecture Notes in Computer Science","NASA Formal Methods"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-33170-1_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,2]],"date-time":"2023-06-02T12:56:54Z","timestamp":1685710614000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-33170-1_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031331695","9783031331701"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-33170-1_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"3 June 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NFM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"NASA Formal Methods Symposium","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Houston, TX","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 May 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 May 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nfm2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conf.researchr.org\/home\/nfm-2023","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"75","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"26","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"35% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.9","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}