{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,2]],"date-time":"2025-11-02T14:36:53Z","timestamp":1762094213526,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,26]],"date-time":"2023-11-26T00:00:00Z","timestamp":1700956800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2105007"],"award-info":[{"award-number":["2105007"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,30]]},"DOI":"10.1145\/3605764.3623913","type":"proceedings-article","created":{"date-parts":[[2023,11,21]],"date-time":"2023-11-21T12:12:17Z","timestamp":1700568737000},"page":"139-148","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Task-Agnostic Safety for Reinforcement Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9020-2789","authenticated-orcid":false,"given":"Md Asifur","family":"Rahman","sequence":"first","affiliation":[{"name":"Wake Forest University, Winston-Salem, NC, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4572-7150","authenticated-orcid":false,"given":"Sarra","family":"Alqahtani","sequence":"additional","affiliation":[{"name":"Wake Forest University, Winston-Salem, NC, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,11,26]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"International conference on machine learning. PMLR, 22-- 31","author":"Achiam Joshua","year":"2017","unstructured":"Joshua Achiam, David Held, Aviv Tamar, and Pieter Abbeel. 2017. Constrained policy optimization. In International conference on machine learning. PMLR, 22-- 31."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11797"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3--319--44482--6_10"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2017.8263977"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.23919\/ACC50511.2021.9483182"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1186\/s42400-019-0027-x"},{"key":"e_1_3_2_1_7_1","volume-title":"Safe multi-agent reinforcement learning via shielding. arXiv preprint arXiv:2101.11196","author":"ElSayed-Aly Ingy","year":"2021","unstructured":"Ingy ElSayed-Aly, Suda Bharadwaj, Christopher Amato, R\u00fcdiger Ehlers, Ufuk Topcu, and Lu Feng. 2021. Safe multi-agent reinforcement learning via shielding. arXiv preprint arXiv:2101.11196 (2021)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACSOS55765.2022.00023"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794107"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/11871842_63"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1666"},{"key":"e_1_3_2_1_12_1","unstructured":"Seyed Kamyar Seyed Ghasemipour Shane Gu and Richard Zemel. 2019. Understanding the relation between maximum-entropy inverse reinforcement learning and behaviour cloning. (2019)."},{"key":"e_1_3_2_1_13_1","volume-title":"Know Your Boundaries: The Necessity of Explicit Behavioral Cloning in Offline RL. arXiv preprint arXiv:2206.00695","author":"Goo Wonjoon","year":"2022","unstructured":"Wonjoon Goo and Scott Niekum. 2022. Know Your Boundaries: The Necessity of Explicit Behavioral Cloning in Offline RL. arXiv preprint arXiv:2206.00695 (2022)."},{"key":"e_1_3_2_1_14_1","volume-title":"IDES: Self-adaptive Software with Online Policy Evolution Extended from Rainbow.","author":"Gu Xiaodong","year":"2012","unstructured":"Xiaodong Gu. 2012. IDES: Self-adaptive Software with Online Policy Evolution Extended from Rainbow."},{"key":"e_1_3_2_1_15_1","volume-title":"International conference on machine learning. PMLR","author":"Haarnoja Tuomas","year":"2018","unstructured":"Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, and Sergey Levine. 2018. Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In International conference on machine learning. PMLR, 1861-- 1870."},{"key":"e_1_3_2_1_16_1","unstructured":"Tuomas Haarnoja Aurick Zhou Kristian Hartikainen George Tucker Sehoon Ha Jie Tan Vikash Kumar Henry Zhu Abhishek Gupta Pieter Abbeel et al. 2018. Soft actor-critic algorithms and applications. arXiv preprint arXiv:1812.05905 (2018)."},{"key":"e_1_3_2_1_17_1","volume-title":"Anton Maximilian Sch\u00e4fer, and Steffen Udluft","author":"Hans Alexander","year":"2008","unstructured":"Alexander Hans, Daniel Schneega\u00df, Anton Maximilian Sch\u00e4fer, and Steffen Udluft. 2008. Safe exploration for reinforcement learning.. In ESANN. Citeseer, 143--148."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-66723-8_19"},{"key":"e_1_3_2_1_19_1","volume-title":"International Workshop on the Foundations of Trustworthy AI Integrating Learning, Optimization and Reasoning. Springer, 123--139","author":"Kim Youngmin","year":"2020","unstructured":"Youngmin Kim, Richard Allmendinger, and Manuel L\u00f3pez-Ib\u00e1\u00f1ez. 2020. Safe learning and optimization techniques: Towards a survey of the state of the art. In International Workshop on the Foundations of Trustworthy AI Integrating Learning, Optimization and Reasoning. Springer, 123--139."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5887"},{"key":"e_1_3_2_1_21_1","volume-title":"Reinforcement learning and control as probabilistic inference: Tutorial and review. arXiv preprint arXiv:1805.00909","author":"Levine Sergey","year":"2018","unstructured":"Sergey Levine. 2018. Reinforcement learning and control as probabilistic inference: Tutorial and review. arXiv preprint arXiv:1805.00909 (2018)."},{"key":"e_1_3_2_1_22_1","volume-title":"Learning hand-eye coordination for robotic grasping with deep learning and large-scale data collection. The International journal of robotics research 37, 4--5","author":"Levine Sergey","year":"2018","unstructured":"Sergey Levine, Peter Pastor, Alex Krizhevsky, Julian Ibarz, and Deirdre Quillen. 2018. Learning hand-eye coordination for robotic grasping with deep learning and large-scale data collection. The International journal of robotics research 37, 4--5 (2018), 421--436."},{"key":"e_1_3_2_1_23_1","volume-title":"Realizing self-adaptive systems via online reinforcement learning and feature-model-guided exploration. Computing (03","author":"Metzger Andreas","year":"2022","unstructured":"Andreas Metzger, Cl\u00e9ment Quinton, Zoltan Mann, Luciano Baresi, and Klaus Pohl. 2022. Realizing self-adaptive systems via online reinforcement learning and feature-model-guided exploration. Computing (03 2022). https:\/\/doi.org\/10. 1007\/s00607-022-01052-x"},{"key":"e_1_3_2_1_24_1","volume-title":"Risk-sensitive reinforcement learning. Machine learning 49, 2","author":"Mihatsch Oliver","year":"2002","unstructured":"Oliver Mihatsch and Ralph Neuneier. 2002. Risk-sensitive reinforcement learning. Machine learning 49, 2 (2002), 267--290."},{"volume-title":"Markov decision processes: discrete stochastic dynamic programming","author":"Puterman Martin L","key":"e_1_3_2_1_25_1","unstructured":"Martin L Puterman. 2014. Markov decision processes: discrete stochastic dynamic programming. John Wiley & Sons."},{"key":"e_1_3_2_1_26_1","unstructured":"Alex Ray Joshua Achiam and Dario Amodei. 2019. Benchmarking Safe Exploration in Deep Reinforcement Learning. (2019)."},{"key":"e_1_3_2_1_27_1","volume-title":"International conference on machine learning. PMLR","author":"Schulman John","year":"2015","unstructured":"John Schulman, Sergey Levine, Pieter Abbeel, Michael Jordan, and Philipp Moritz. 2015. Trust region policy optimization. In International conference on machine learning. PMLR, 1889--1897."},{"key":"e_1_3_2_1_28_1","volume-title":"International conference on machine learning. PMLR, 387--395","author":"Silver David","year":"2014","unstructured":"David Silver, Guy Lever, Nicolas Heess, Thomas Degris, Daan Wierstra, and Martin Riedmiller. 2014. Deterministic policy gradient algorithms. In International conference on machine learning. PMLR, 387--395."},{"key":"e_1_3_2_1_29_1","volume-title":"Learning to be safe: Deep rl with a safety critic. arXiv preprint arXiv:2010.14603","author":"Srinivasan Krishnan","year":"2020","unstructured":"Krishnan Srinivasan, Benjamin Eysenbach, Sehoon Ha, Jie Tan, and Chelsea Finn. 2020. Learning to be safe: Deep rl with a safety critic. arXiv preprint arXiv:2010.14603 (2020)."},{"key":"e_1_3_2_1_30_1","volume-title":"Barto","author":"Sutton Richard S.","year":"2018","unstructured":"Richard S. Sutton and Andrew G. Barto. 2018. Reinforcement Learning: An Introduction. A Bradford Book, Cambridge, MA, USA."},{"key":"e_1_3_2_1_31_1","volume-title":"International Conference on Machine Learning. PMLR, 6215--6224","author":"Tessler Chen","year":"2019","unstructured":"Chen Tessler, Yonathan Efroni, and Shie Mannor. 2019. Action robust reinforcement learning and applications in continuous control. In International Conference on Machine Learning. PMLR, 6215--6224."},{"key":"e_1_3_2_1_32_1","volume-title":"Reward constrained policy optimization. arXiv preprint arXiv:1805.11074","author":"Tessler Chen","year":"2018","unstructured":"Chen Tessler, Daniel J Mankowitz, and Shie Mannor. 2018. Reward constrained policy optimization. arXiv preprint arXiv:1805.11074 (2018)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3070252"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2976272"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3-031-"}],"event":{"name":"CCS '23: ACM SIGSAC Conference on Computer and Communications Security","sponsor":["SIGSAC ACM Special Interest Group on Security, Audit, and Control"],"location":"Copenhagen Denmark","acronym":"CCS '23"},"container-title":["Proceedings of the 16th ACM Workshop on Artificial Intelligence and Security"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605764.3623913","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3605764.3623913","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T01:37:40Z","timestamp":1755913060000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605764.3623913"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,26]]},"references-count":35,"alternative-id":["10.1145\/3605764.3623913","10.1145\/3605764"],"URL":"https:\/\/doi.org\/10.1145\/3605764.3623913","relation":{},"subject":[],"published":{"date-parts":[[2023,11,26]]},"assertion":[{"value":"2023-11-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}