{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,7]],"date-time":"2025-05-07T06:53:25Z","timestamp":1746600805161,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031212024"},{"type":"electronic","value":"9783031212031"}],"license":[{"start":{"date-parts":[[2022,11,12]],"date-time":"2022-11-12T00:00:00Z","timestamp":1668211200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,11,12]],"date-time":"2022-11-12T00:00:00Z","timestamp":1668211200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-21203-1_5","type":"book-chapter","created":{"date-parts":[[2022,11,11]],"date-time":"2022-11-11T07:35:25Z","timestamp":1668152125000},"page":"72-89","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["On Normative Reinforcement Learning via\u00a0Safe Reinforcement Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5998-3273","authenticated-orcid":false,"given":"Emery A.","family":"Neufeld","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8004-6601","authenticated-orcid":false,"given":"Ezio","family":"Bartocci","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6947-8772","authenticated-orcid":false,"given":"Agata","family":"Ciabattoni","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,11,12]]},"reference":[{"issue":"2","key":"5_CR1","first-page":"457","volume":"5","author":"N Alechina","year":"2018","unstructured":"Alechina, N., Dastani, M., Logan, B.: Norm specification and verification in multi-agent systems. J. Appl. Logics 5(2), 457 (2018)","journal-title":"J. Appl. Logics"},{"key":"5_CR2","doi-asserted-by":"crossref","unstructured":"Alshiekh, M., Bloem, R., Ehlers, R., K\u00f6nighofer, B., Niekum, S., Topcu, U.: Safe reinforcement learning via shielding. In: Proceedigs of AAAI, pp. 2669\u20132678 (2018)","DOI":"10.1609\/aaai.v32i1.11797"},{"key":"5_CR3","doi-asserted-by":"crossref","unstructured":"Boella, G., van der Torre, L.: Permissions and obligations in hierarchical normative systems. In: Proceedings of ICAIL, pp. 81\u201382 (2003)","DOI":"10.1145\/1047788.1047818"},{"key":"5_CR4","unstructured":"Boella, G., van der Torre, L.: Regulative and constitutive norms in normative multiagent systems. In: Proceedings of KR 2004, pp. 255\u2013266. AAAI Press (2004)"},{"key":"5_CR5","doi-asserted-by":"crossref","unstructured":"De Giacomo, G., De Masellis, R., Grasso, M., Maggi, F.M., Montali, M.: Monitoring business metaconstraints based on LTL and LDL for finite traces. In: Sadiq, S., Soffer, P., V\u00f6lzer, H. (eds.) Business Process Management, pp. 1\u201317 (2014)","DOI":"10.1007\/978-3-319-10172-9_1"},{"key":"5_CR6","doi-asserted-by":"crossref","unstructured":"De Giacomo, G., Iocchi, L., Favorito, M., Patrizi, F.: Foundations for restraining bolts: reinforcement learning with LTLf\/LDLf restraining specifications. In: Proceedings of ICAPS, vol. 29, pp. 128\u2013136 (2019)","DOI":"10.1609\/icaps.v29i1.3549"},{"key":"5_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"192","DOI":"10.1007\/978-3-319-08867-9_13","volume-title":"Computer Aided Verification","author":"J Esparza","year":"2014","unstructured":"Esparza, J., K\u0159et\u00ednsk\u00fd, J.: From LTL to deterministic automata: a safraless compositional approach. In: Biere, A., Bloem, R. (eds.) CAV 2014. LNCS, vol. 8559, pp. 192\u2013208. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-08867-9_13"},{"issue":"4","key":"5_CR8","doi-asserted-by":"publisher","first-page":"193","DOI":"10.2307\/2026120","volume":"81","author":"JW Forrester","year":"1984","unstructured":"Forrester, J.W.: Gentle murder, or the adverbial samaritan. J. Philos. 81(4), 193\u2013197 (1984)","journal-title":"J. Philos."},{"key":"5_CR9","doi-asserted-by":"crossref","unstructured":"Fu, J., Topcu, U.: Probably approximately correct MDP learning and control with temporal logic constraints. In: Proceedings of RSS (2014)","DOI":"10.15607\/RSS.2014.X.039"},{"key":"5_CR10","doi-asserted-by":"crossref","unstructured":"Governatori, G.: Thou shalt is not you will. In: Proceedings of ICAIL, pp. 63\u201368 (2015)","DOI":"10.1145\/2746090.2746105"},{"key":"5_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-030-00338-8_1","volume-title":"Reasoning Web. Learning, Uncertainty, Streaming, and Scalability","author":"G Governatori","year":"2018","unstructured":"Governatori, G.: Practical normative reasoning with defeasible deontic logic. In: d\u2019Amato, C., Theobald, M. (eds.) Reasoning Web 2018. LNCS, vol. 11078, pp. 1\u201325. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-00338-8_1"},{"key":"5_CR12","doi-asserted-by":"crossref","unstructured":"Governatori, G., Hashmi, M.: No time for compliance. In: Proceedings of EDOC, pp. 9\u201318. IEEE (2015)","DOI":"10.1109\/EDOC.2015.12"},{"key":"5_CR13","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"486","DOI":"10.1007\/978-3-540-76928-6_50","volume-title":"AI 2007: Advances in Artificial Intelligence","author":"G Governatori","year":"2007","unstructured":"Governatori, G., Hulstijn, J., Riveret, R., Rotolo, A.: Characterising deadlines in\u00a0temporal\u00a0modal\u00a0defeasible\u00a0logic. In: Orgun, M.A., Thornton, J. (eds.) AI 2007. LNCS (LNAI), vol. 4830, pp. 486\u2013496. Springer, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-76928-6_50"},{"issue":"6","key":"5_CR14","doi-asserted-by":"publisher","first-page":"799","DOI":"10.1007\/s10992-013-9295-1","volume":"42","author":"G Governatori","year":"2013","unstructured":"Governatori, G., Olivieri, F., Rotolo, A., Scannapieco, S.: Computing strong and weak permissions in defeasible logic. J. Philos. Logic 42(6), 799\u2013829 (2013)","journal-title":"J. Philos. Logic"},{"issue":"1","key":"5_CR15","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1007\/s10458-008-9030-4","volume":"17","author":"G Governatori","year":"2008","unstructured":"Governatori, G., Rotolo, A.: BIO logical agents: norms, beliefs, intentions in defeasible logic. J. Auton. Agents Multi Agent Syst. 17(1), 36\u201369 (2008)","journal-title":"J. Auton. Agents Multi Agent Syst."},{"key":"5_CR16","unstructured":"Hasanbeig, M., Abate, A., Kroening, D.: Cautious reinforcement learning with logical constraints. In: Proceedings of AAMAS, pp. 483\u2013491 (2020)"},{"key":"5_CR17","doi-asserted-by":"crossref","unstructured":"Hodkinson, I., Reynolds, M.: Temporal logic. In: Blackburn, P., Van Benthem, J., Wolter, F. (eds.) Handbook of Modal Logic, vol. 3, pp. 655\u2013720. Elsevier (2007)","DOI":"10.1016\/S1570-2464(07)80014-0"},{"key":"5_CR18","unstructured":"Jansen, N., K\u00f6nighofer, B., Junges, S., Serban, A., Bloem, R.: Safe Reinforcement Learning Using Probabilistic Shields. In: Proceedings of CONCUR. LIPIcs, vol. 171, pp. 1\u201316 (2020)"},{"key":"5_CR19","doi-asserted-by":"crossref","unstructured":"Lam, H.P., Governatori, G.: The making of SPINdle. In: Proc. of RuleML. LNCS, vol. 5858, pp. 315\u2013322 (2009)","DOI":"10.1007\/978-3-642-04985-9_29"},{"key":"5_CR20","doi-asserted-by":"crossref","unstructured":"Neufeld, E., Bartocci, E., Ciabattoni, A., Governatori, G.: A normative supervisor for reinforcement learning agents. In: Proceedings of CADE, pp. 565\u2013576 (2021)","DOI":"10.1007\/978-3-030-79876-5_32"},{"key":"5_CR21","doi-asserted-by":"publisher","unstructured":"Neufeld, E.A., Bartocci, E., Ciabattoni, A., Governatori, G.: Enforcing ethical goals over reinforcement-learning policies. J. Ethics Inf. Technol. 24, 43 (2022). https:\/\/doi.org\/10.1007\/s10676-022-09665-8","DOI":"10.1007\/s10676-022-09665-8"},{"key":"5_CR22","doi-asserted-by":"crossref","unstructured":"Noothigattu, R., et al.: Teaching AI agents ethical values using reinforcement learning and policy orchestration. In: Proceedings of IJCAI, LNCS, vol. 12158, pp. 217\u2013234 (2019)","DOI":"10.24963\/ijcai.2019\/891"},{"key":"5_CR23","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"346","DOI":"10.1007\/978-3-319-07314-9_19","volume-title":"Coordination, Organizations, Institutions, and Norms in Agent Systems IX","author":"S Panagiotidi","year":"2014","unstructured":"Panagiotidi, S., Alvarez-Napagao, S., V\u00e1zquez-Salceda, J.: Towards the norm-aware agent: bridging the gap between deontic specifications and practical mechanisms for norm monitoring and norm-aware planning. In: Balke, T., Dignum, F., van Riemsdijk, M.B., Chopra, A.K. (eds.) COIN 2013. LNCS (LNAI), vol. 8386, pp. 346\u2013363. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-07314-9_19"},{"key":"5_CR24","doi-asserted-by":"crossref","unstructured":"Pnueli, A.: The temporal logic of programs. In: Proceedings of FOCS, pp. 46\u201357 (1977)","DOI":"10.1109\/SFCS.1977.32"},{"key":"5_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1007\/978-3-030-88885-5_15","volume-title":"Automated Technology for Verification and Analysis","author":"S Pranger","year":"2021","unstructured":"Pranger, S., K\u00f6nighofer, B., Posch, L., Bloem, R.: TEMPEST - synthesis tool for reactive systems and shields in probabilistic environments. In: Hou, Z., Ganesh, V. (eds.) ATVA 2021. LNCS, vol. 12971, pp. 222\u2013228. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-88885-5_15"},{"key":"5_CR26","doi-asserted-by":"crossref","unstructured":"Rodriguez-Soto, M., Lopez-Sanchez, M., Rodriguez Aguilar, J.A.: Multi-objective reinforcement learning for designing ethical environments. In: Proceedings of IJCAI, pp. 545\u2013551 (2021)","DOI":"10.24963\/ijcai.2021\/76"},{"key":"5_CR27","doi-asserted-by":"crossref","unstructured":"Sadigh, D., Kim, E.S., Coogan, S., Sastry, S.S., Seshia, S.A.: A learning based approach to control synthesis of markov decision processes for linear temporal logic specifications. In: Proceedings of CDC, pp. 1091\u20131096 (2014)","DOI":"10.21236\/ADA623517"},{"key":"5_CR28","doi-asserted-by":"crossref","unstructured":"Searle, J.R.: Speech acts: an essay in the philosophy of language. Cambridge University Press, Cambridge, England (1969)","DOI":"10.1017\/CBO9781139173438"},{"key":"5_CR29","doi-asserted-by":"crossref","unstructured":"Sickert, S., Esparza, J., Jaax, S., K\u0159et\u00ednsk\u1ef3, J.: Limit-deterministic b\u00fcchi automata for linear temporal logic. In: Proceedings of CAV, LNCS, vol. 9780, pp. 312\u2013332 (2016)","DOI":"10.1007\/978-3-319-41540-6_17"},{"key":"5_CR30","unstructured":"Watkins, C.J.C.H.: Learning from Delayed Rewards. Ph.D. thesis, King\u2019s College, Cambridge, UK (1989). https:\/\/www.cs.rhul.ac.uk\/~chrisw\/thesis.pdf"},{"key":"5_CR31","doi-asserted-by":"crossref","unstructured":"Wen, M., Ehlers, R., Topcu, U.: Correct-by-synthesis reinforcement learning with temporal logic constraints. In: Procedings of IROS, pp. 4983\u20134990. IEEE (2015)","DOI":"10.1109\/IROS.2015.7354078"},{"key":"5_CR32","doi-asserted-by":"crossref","unstructured":"Wu, Y.H., Lin, S.D.: A low-cost ethics shaping approach for designing reinforcement learning agents. In: Proceedings of AAAI, pp. 1687\u20131694 (2018)","DOI":"10.1609\/aaai.v32i1.11498"}],"container-title":["Lecture Notes in Computer Science","PRIMA 2022: Principles and Practice of Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-21203-1_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,11]],"date-time":"2022-11-11T07:35:52Z","timestamp":1668152152000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-21203-1_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,12]]},"ISBN":["9783031212024","9783031212031"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-21203-1_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022,11,12]]},"assertion":[{"value":"12 November 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRIMA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Principles and Practice of Multi-Agent Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Valencia","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 November 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 November 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"prima2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/prima2022.webs.upv.es\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"100","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"15","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1 (demo paper)","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}