{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,7]],"date-time":"2025-08-07T09:20:59Z","timestamp":1754558459262,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,8]],"date-time":"2024-04-08T00:00:00Z","timestamp":1712534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Bavarian Ministry for Economic Affairs, Regional Development and Energy"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,8]]},"DOI":"10.1145\/3605098.3635931","type":"proceedings-article","created":{"date-parts":[[2024,5,21]],"date-time":"2024-05-21T17:59:16Z","timestamp":1716314356000},"page":"1569-1578","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Can you trust your Agent? The Effect of Out-of-Distribution Detection on the Safety of Reinforcement Learning Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6786-0361","authenticated-orcid":false,"given":"Tom","family":"Haider","sequence":"first","affiliation":[{"name":"Fraunhofer IKS, Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9458-104X","authenticated-orcid":false,"given":"Karsten","family":"Roscher","sequence":"additional","affiliation":[{"name":"Fraunhofer IKS, Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6439-8845","authenticated-orcid":false,"given":"Benjamin","family":"Herd","sequence":"additional","affiliation":[{"name":"Fraunhofer IKS, Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6900-6601","authenticated-orcid":false,"given":"Felippe","family":"Schmoeller Roza","sequence":"additional","affiliation":[{"name":"Fraunhofer IKS, Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9040-8752","authenticated-orcid":false,"given":"Simon","family":"Burton","sequence":"additional","affiliation":[{"name":"Fraunhofer IKS, Munich, Germany"}]}],"member":"320","published-online":{"date-parts":[[2024,5,21]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"International conference on machine learning. PMLR, 22--31","author":"Achiam Joshua","year":"2017","unstructured":"Joshua Achiam, David Held, Aviv Tamar, and Pieter Abbeel. 2017. Constrained policy optimization. In International conference on machine learning. PMLR, 22--31."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11797"},{"key":"e_1_3_2_1_3_1","volume-title":"OpenAI Pieter Abbeel, and Wojciech Zaremba","author":"Andrychowicz Marcin","year":"2017","unstructured":"Marcin Andrychowicz, Filip Wolski, Alex Ray, Jonas Schneider, Rachel Fong, Peter Welinder, Bob McGrew, Josh Tobin, OpenAI Pieter Abbeel, and Wojciech Zaremba. 2017. Hindsight experience replay. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2020.3022030"},{"key":"e_1_3_2_1_5_1","volume-title":"Jun Gong, Marlos C Machado, Subhodeep Moitra, Sameera S Ponda, and Ziyu Wang.","author":"Bellemare Marc G","year":"2020","unstructured":"Marc G Bellemare, Salvatore Candido, Pablo Samuel Castro, Jun Gong, Marlos C Machado, Subhodeep Moitra, Sameera S Ponda, and Ziyu Wang. 2020. Autonomous navigation of stratospheric balloons using reinforcement learning. Nature 588, 7836 (2020), 77--82."},{"key":"e_1_3_2_1_6_1","volume-title":"A Markovian decision process. Journal of mathematics and mechanics","author":"Bellman Richard","year":"1957","unstructured":"Richard Bellman. 1957. A Markovian decision process. Journal of mathematics and mechanics (1957), 679--684."},{"key":"e_1_3_2_1_7_1","volume-title":"Safe model-based reinforcement learning with stability guarantees. Advances in neural information processing systems 30","author":"Berkenkamp Felix","year":"2017","unstructured":"Felix Berkenkamp, Matteo Turchetta, Angela Schoellig, and Andreas Krause. 2017. Safe model-based reinforcement learning with stability guarantees. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/LES.2019.2953253"},{"volume-title":"Pattern recognition and machine learning","author":"Bishop Christopher M","key":"e_1_3_2_1_9_1","unstructured":"Christopher M Bishop and Nasser M Nasrabadi. 2006. Pattern recognition and machine learning. Vol. 4. Springer."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-042920-020211"},{"key":"e_1_3_2_1_11_1","volume-title":"A causal model of safety assurance for machine learning. arXiv preprint arXiv:2201.05451","author":"Burton Simon","year":"2022","unstructured":"Simon Burton. 2022. A causal model of safety assurance for machine learning. arXiv preprint arXiv:2201.05451 (2022)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-66284-8_1"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.3389\/fcomp.2023.1132580"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2017.2738640"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/1541880.1541882"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013387"},{"key":"e_1_3_2_1_17_1","volume-title":"Seungjae Ryan Lee, and Jordan Terry.","author":"de Lazcano Rodrigo","year":"2023","unstructured":"Rodrigo de Lazcano, Kallinteris Andreas, Jun Jet Tai, Seungjae Ryan Lee, and Jordan Terry. 2023. Gymnasium Robotics. http:\/\/github.com\/Farama-Foundation\/Gymnasium-Robotics"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2015.199"},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the 5th Machine Learning for Healthcare Conference. PMLR, 508--536","author":"Fox Ian","year":"2020","unstructured":"Ian Fox, Joyce Lee, Rodica Pop-Busui, and Jenna Wiens. 2020. Deep Reinforcement Learning for Closed-Loop Blood Glucose Control. In Proceedings of the 5th Machine Learning for Healthcare Conference. PMLR, 508--536."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886795"},{"key":"e_1_3_2_1_21_1","unstructured":"Marek Grzes. 2017. Reward shaping in episodic reinforcement learning. (2017)."},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the 22nd International Conference on Autonomous Agents and Multiagent Systems.","author":"Haider Tom","year":"2023","unstructured":"Tom Haider, Karsten Roscher, Felippe Schmoeller Roza, and Stephan G\u00fcnnemann. 2023. Out-of-Distribution Detection for Reinforcement Learning Agents with Probabilistic Dynamics Models. In Proceedings of the 22nd International Conference on Autonomous Agents and Multiagent Systems."},{"key":"e_1_3_2_1_23_1","volume-title":"Dirk Eilers, Karsten Roscher, and Stephan G\u00fcnnemann.","author":"Haider Tom","year":"2021","unstructured":"Tom Haider, Felippe Schmoeller Roza, Dirk Eilers, Karsten Roscher, and Stephan G\u00fcnnemann. 2021. Domain Shifts in Reinforcement Learning: Identifying Disturbances in Environments.. In AISafety@ IJCAI."},{"key":"e_1_3_2_1_24_1","volume-title":"Dotan Di Castro, and Shie Mannor","author":"Hallak Assaf","year":"2015","unstructured":"Assaf Hallak, Dotan Di Castro, and Shie Mannor. 2015. Contextual markov decision processes. arXiv preprint arXiv:1502.02259 (2015)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-85729-133-2_1"},{"key":"e_1_3_2_1_26_1","volume-title":"A baseline for detecting misclassified and out-of-distribution examples in neural networks. arXiv preprint arXiv:1610.02136","author":"Hendrycks Dan","year":"2016","unstructured":"Dan Hendrycks and Kevin Gimpel. 2016. A baseline for detecting misclassified and out-of-distribution examples in neural networks. arXiv preprint arXiv:1610.02136 (2016)."},{"key":"e_1_3_2_1_27_1","volume-title":"Seyed Eghbal Ghobadi, Ahmed Hammam, et al.","author":"Houben Sebastian","year":"2022","unstructured":"Sebastian Houben, Stephanie Abrecht, Maram Akila, Andreas B\u00e4r, Felix Brockherde, Patrick Feifel, Tim Fingscheidt, Sujan Sai Gannamaneni, Seyed Eghbal Ghobadi, Ahmed Hammam, et al. 2022. Inspect, understand, overcome: A survey of practical methods for ai safety. In Deep Neural Networks and Data for Automated Driving: Robustness, Uncertainty Quantification, and Insights Towards Safety. Springer International Publishing Cham, 3--78."},{"key":"e_1_3_2_1_28_1","volume-title":"International Conference on Machine Learning. PMLR, 5556--5566","author":"Kuznetsov Arsenii","year":"2020","unstructured":"Arsenii Kuznetsov, Pavel Shvechikov, Alexander Grishin, and Dmitry Vetrov. 2020. Controlling overestimation bias with truncated mixture of continuous distributional quantile critics. In International Conference on Machine Learning. PMLR, 5556--5566."},{"key":"e_1_3_2_1_29_1","volume-title":"Thirty-fifth conference on neural information processing systems datasets and benchmarks track (round 1).","author":"Lai Kwei-Herng","year":"2021","unstructured":"Kwei-Herng Lai, Daochen Zha, Junjie Xu, Yue Zhao, Guanchu Wang, and Xia Hu. 2021. Revisiting time series outlier detection: Definitions and benchmarks. In Thirty-fifth conference on neural information processing systems datasets and benchmarks track (round 1)."},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings of the 21st International Conference on Autonomous Agents and Multiagent Systems. 1799--1803","author":"M\u00fcller Robert","year":"2022","unstructured":"Robert M\u00fcller, Steffen Illium, Thomy Phan, Tom Haider, and Claudia Linnhoff-Popien. 2022. Towards Anomaly Detection in Reinforcement Learning. In Proceedings of the 21st International Conference on Autonomous Agents and Multiagent Systems. 1799--1803."},{"key":"e_1_3_2_1_31_1","volume-title":"Deep Dynamics Models for Learning Dexterous Manipulation. arXiv:1909.11652 [cs] (Sept","author":"Nagabandi Anusha","year":"2019","unstructured":"Anusha Nagabandi, Kurt Konoglie, Sergey Levine, and Vikash Kumar. 2019. Deep Dynamics Models for Learning Dexterous Manipulation. arXiv:1909.11652 [cs] (Sept. 2019)."},{"volume-title":"Computer Safety, Reliability, and Security: 41st International Conference","author":"Osborne Matt","key":"e_1_3_2_1_32_1","unstructured":"Matt Osborne, Richard Hawkins, and John McDermid. 2022. Analysing the Safety of Decision-Making in Autonomous Systems. In Computer Safety, Reliability, and Security: 41st International Conference, SAFECOMP. Springer, 3--16."},{"key":"e_1_3_2_1_33_1","volume-title":"An analysis of ISO 26262: Using machine learning safely in automotive software. arXiv preprint arXiv:1709.02435","author":"Salay Rick","year":"2017","unstructured":"Rick Salay, Rodrigo Queiroz, and Krzysztof Czarnecki. 2017. An analysis of ISO 26262: Using machine learning safely in automotive software. arXiv preprint arXiv:1709.02435 (2017)."},{"key":"e_1_3_2_1_34_1","volume-title":"Archana Sapkota, and Terrance E Boult.","author":"Scheirer Walter J","year":"2012","unstructured":"Walter J Scheirer, Anderson de Rezende Rocha, Archana Sapkota, and Terrance E Boult. 2012. Toward open set recognition. IEEE transactions on pattern analysis and machine intelligence 35, 7 (2012), 1757--1772."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.20378\/irb-47275"},{"key":"e_1_3_2_1_36_1","volume-title":"Uncertainty-based out-of-distribution detection in deep reinforcement learning. arXiv preprint arXiv:1901.02219","author":"Sedlmeier Andreas","year":"2019","unstructured":"Andreas Sedlmeier, Thomas Gabor, Thomy Phan, Lenz Belzner, and Claudia Linnhoff-Popien. 2019. Uncertainty-based out-of-distribution detection in deep reinforcement learning. arXiv preprint arXiv:1901.02219 (2019)."},{"key":"e_1_3_2_1_37_1","volume-title":"Learning to be safe: Deep rl with a safety critic. arXiv preprint arXiv:2010.14603","author":"Srinivasan Krishnan","year":"2020","unstructured":"Krishnan Srinivasan, Benjamin Eysenbach, Sehoon Ha, Jie Tan, and Chelsea Finn. 2020. Learning to be safe: Deep rl with a safety critic. arXiv preprint arXiv:2010.14603 (2020)."},{"key":"e_1_3_2_1_38_1","volume-title":"Generalized out-of-distribution detection: A survey. arXiv preprint arXiv:2110.11334","author":"Yang Jingkang","year":"2021","unstructured":"Jingkang Yang, Kaiyang Zhou, Yixuan Li, and Ziwei Liu. 2021. Generalized out-of-distribution detection: A survey. arXiv preprint arXiv:2110.11334 (2021)."}],"event":{"name":"SAC '24: 39th ACM\/SIGAPP Symposium on Applied Computing","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"],"location":"Avila Spain","acronym":"SAC '24"},"container-title":["Proceedings of the 39th ACM\/SIGAPP Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605098.3635931","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3605098.3635931","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:14Z","timestamp":1750178174000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605098.3635931"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,8]]},"references-count":38,"alternative-id":["10.1145\/3605098.3635931","10.1145\/3605098"],"URL":"https:\/\/doi.org\/10.1145\/3605098.3635931","relation":{},"subject":[],"published":{"date-parts":[[2024,4,8]]},"assertion":[{"value":"2024-05-21","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}