{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T20:31:49Z","timestamp":1743021109991,"version":"3.40.3"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031159077"},{"type":"electronic","value":"9783031159084"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-15908-4_16","type":"book-chapter","created":{"date-parts":[[2022,9,2]],"date-time":"2022-09-02T13:04:24Z","timestamp":1662123864000},"page":"190-204","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Task Independent Safety Assessment for\u00a0Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Mark","family":"Jocas","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Firas","family":"Zoghlami","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Philip","family":"Kurrek","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mario","family":"Gianni","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vahid","family":"Salehi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,9,1]]},"reference":[{"issue":"13","key":"16_CR1","doi-asserted-by":"publisher","first-page":"1608","DOI":"10.1177\/0278364910371999","volume":"29","author":"P Abbeel","year":"2010","unstructured":"Abbeel, P., Coates, A., Ng, A.Y.: Autonomous helicopter aerobatics through apprenticeship learning. Int. J. Robot. Res. 29(13), 1608\u20131639 (2010)","journal-title":"Int. J. Robot. Res."},{"key":"16_CR2","unstructured":"Achiam, J., Held, D., Tamar, A., Abbeel, P.: Constrained policy optimization. arXiv preprint arXiv:1705.10528 (2017)"},{"key":"16_CR3","doi-asserted-by":"crossref","unstructured":"Alshiekh, M., Bloem, R., Ehlers, R., K\u00f6nighofer, B., Niekum, S., Topcu, U.: Safe reinforcement learning via shielding. arXiv preprint arXiv:1708.08611 (2017)","DOI":"10.1609\/aaai.v32i1.11797"},{"key":"16_CR4","doi-asserted-by":"crossref","unstructured":"Arulkumaran, K., Deisenroth, M.P., Brundage, M., Bharath, A.A.: A brief survey of deep reinforcement learning. arXiv preprint arXiv:1708.05866 (2017)","DOI":"10.1109\/MSP.2017.2743240"},{"key":"16_CR5","unstructured":"Babcock, J., Kramar, J., Yampolskiy, R.V.: Guidelines for artificial intelligence containment. arXiv preprint arXiv:1707.08476 (2017)"},{"key":"16_CR6","volume-title":"Systems and Software Verification: Model-Checking Techniques and Tools","author":"B B\u00e9rard","year":"2013","unstructured":"B\u00e9rard, B., Bidoit, M., Finkel, A., Laroussinie, F., Petit, A., Petrucci, L., Schnoebelen, P.: Systems and Software Verification: Model-Checking Techniques and Tools. Springer, Heidelberg (2013)"},{"key":"16_CR7","unstructured":"Berkenkamp, F., Turchetta, M., Schoellig, A.P., Krause, A.: Safe model-based reinforcement learning with stability guarantees. arXiv preprint arXiv:1705.08551 (2017)"},{"key":"16_CR8","doi-asserted-by":"publisher","first-page":"411","DOI":"10.1146\/annurev-control-042920-020211","volume":"5","author":"L Brunke","year":"2021","unstructured":"Brunke, L., et al.: Safe learning in robotics: from learning-based control to safe reinforcement learning. Ann. Rev. Control Robot. Auton. Syst. 5, 411\u2013444 (2021)","journal-title":"Ann. Rev. Control Robot. Auton. Syst."},{"key":"16_CR9","unstructured":"Gao, Y., Lin, J., Yu, F., Levine, S., Darrell, T., et al.: Reinforcement learning from imperfect demonstrations. arXiv preprint arXiv:1802.05313 (2018)"},{"key":"16_CR10","unstructured":"Garcia, J., Fernandez, F.: Safe exploration of state and action spaces in reinforcement learning. CoRR abs\/1402.0560 (2014). http:\/\/arxiv.org\/abs\/1402.0560"},{"issue":"1","key":"16_CR11","first-page":"1437","volume":"16","author":"J Garc\u0131a","year":"2015","unstructured":"Garc\u0131a, J., Fern\u00e1ndez, F.: A comprehensive survey on safe reinforcement learning. J. Mach. Learn. Res. 16(1), 1437\u20131480 (2015)","journal-title":"J. Mach. Learn. Res."},{"key":"16_CR12","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1613\/jair.1666","volume":"24","author":"P Geibel","year":"2005","unstructured":"Geibel, P., Wysotzki, F.: Risk-sensitive reinforcement learning applied to control under constraints. J. Artif. Intell. Res. 24, 81\u2013108 (2005)","journal-title":"J. Artif. Intell. Res."},{"key":"16_CR13","unstructured":"Ha, D., Schmidhuber, J.: World models. arXiv preprint arXiv:1803.10122 (2018)"},{"key":"16_CR14","unstructured":"Ha, S., Xu, P., Tan, Z., Levine, S., Tan, J.: Learning to walk in the real world with minimal human effort. arXiv preprint arXiv:2002.08550 (2020)"},{"key":"16_CR15","unstructured":"Hafner, D., et al.: Learning latent dynamics for planning from pixels. In: Chaudhuri, K., Salakhutdinov, R. (eds.) Proceedings of the 36th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 97, pp. 2555\u20132565. PMLR, 09\u201315 June 2019. http:\/\/proceedings.mlr.press\/v97\/hafner19a.html"},{"key":"16_CR16","unstructured":"Hans, A., Schneega\u00df, D., Sch\u00e4fer, A.M., Udluft, S.: Safe exploration for reinforcement learning. In: ESANN, pp. 143\u2013148 (2008)"},{"key":"16_CR17","unstructured":"Juliani, A., et al.: Unity: a general platform for intelligent agents. arXiv preprint arXiv:1809.02627 (2018)"},{"key":"16_CR18","unstructured":"Kaiser, L., et al.: Model-based reinforcement learning for Atari. arXiv preprint arXiv:1903.00374 (2019)"},{"key":"16_CR19","doi-asserted-by":"publisher","unstructured":"Kurrek, P., Jocas, M., Zoghlami, F., Stoelen, M., Salehi, V.: AI motion control - a generic approach to develop control policies for robotic manipulation tasks. In: Proceedings of the Design Society: International Conference on Engineering Design, vol. 1, no. 1, pp. 3561\u20133570 (2019). https:\/\/doi.org\/10.1017\/dsi.2019.363","DOI":"10.1017\/dsi.2019.363"},{"key":"16_CR20","unstructured":"Menda, K., Driggs-Campbell, K., Kochenderfer, M.J.: DropoutDAgger: a Bayesian approach to safe imitation learning. arXiv preprint arXiv:1709.06166 (2017)"},{"key":"16_CR21","unstructured":"Moldovan, T.M., Abbeel, P.: Safe exploration in Markov decision processes. arXiv preprint arXiv:1205.4810 (2012)"},{"key":"16_CR22","doi-asserted-by":"crossref","unstructured":"Osborne, M., Shin, H.S., Tsourdos, A.: A review of safe online learning for nonlinear control systems** this work has been jointly funded by the EPSRC and BAE systems under an industrial case studentship. In: 2021 International Conference on Unmanned Aircraft Systems (ICUAS), pp. 794\u2013803. IEEE (2021). The authors would also like to thank the following researchers for their kind assistance. Sumeet Singh, Ian Manchester and Johan L\u00f6fberg","DOI":"10.1109\/ICUAS51884.2021.9476765"},{"key":"16_CR23","doi-asserted-by":"crossref","unstructured":"Pan, F., et al.: Policy optimization with model-based explorations. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 4675\u20134682 (2019)","DOI":"10.1609\/aaai.v33i01.33014675"},{"key":"16_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1007\/978-3-030-55754-6_6","volume-title":"NASA Formal Methods","author":"DT Phan","year":"2020","unstructured":"Phan, D.T., Grosu, R., Jansen, N., Paoletti, N., Smolka, S.A., Stoller, S.D.: Neural simplex architecture. In: Lee, R., Jha, S., Mavridou, A., Giannakopoulou, D. (eds.) NFM 2020. LNCS, vol. 12229, pp. 97\u2013114. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-55754-6_6"},{"key":"16_CR25","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1613\/jair.1.11436","volume":"67","author":"R Ramakrishnan","year":"2020","unstructured":"Ramakrishnan, R., Kamar, E., Dey, D., Horvitz, E., Shah, J.: Blind spot detection for safe sim-to-real transfer. J. Artif. Intell. Res. 67, 191\u2013234 (2020)","journal-title":"J. Artif. Intell. Res."},{"key":"16_CR26","unstructured":"Rosenstein, M.T., Barto, A.G., Si, J., Barto, A., Powell, W.: Supervised actor-critic reinforcement learning. In: Learning and Approximate Dynamic Programming: Scaling Up to the Real World, pp. 359\u2013380 (2004)"},{"key":"16_CR27","unstructured":"Saunders, W., Sastry, G., Stuhlmueller, A., Evans, O.: Trial without error: towards safe reinforcement learning via human intervention. arXiv preprint arXiv:1707.05173 (2017)"},{"key":"16_CR28","unstructured":"Stooke, A., Lee, K., Abbeel, P., Laskin, M.: Decoupling representation learning from reinforcement learning. In: International Conference on Machine Learning, pp. 9870\u20139879. PMLR (2021)"},{"key":"16_CR29","doi-asserted-by":"crossref","unstructured":"Tambon, F., et al.: How to certify machine learning based safety-critical systems? A systematic literature review. arXiv preprint arXiv:2107.12045 (2021)","DOI":"10.1007\/s10515-022-00337-x"},{"key":"16_CR30","unstructured":"Thomas, P., Theocharous, G., Ghavamzadeh, M.: High confidence policy improvement. In: Proceedings of the 32nd International Conference on Machine Learning (ICML 2015), pp. 2380\u20132388 (2015)"},{"key":"16_CR31","doi-asserted-by":"publisher","unstructured":"Zoghlami, F., Kurrek, P., Jocas, M., Masala, G., Salehi, V.: Usage identification of anomaly detection in an industrial context. In: Proceedings of the Design Society: International Conference on Engineering Design, vol. 1, no. 1, pp. 3761\u20133770 (2019). https:\/\/doi.org\/10.1017\/dsi.2019.383","DOI":"10.1017\/dsi.2019.383"}],"container-title":["Lecture Notes in Computer Science","Towards Autonomous Robotic Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-15908-4_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,2]],"date-time":"2022-09-02T13:06:35Z","timestamp":1662123995000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-15908-4_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031159077","9783031159084"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-15908-4_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"1 September 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TAROS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Annual Conference Towards Autonomous Robotic Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Culham","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"taros2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ukaeaevents.com\/23rd-taros\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OCS","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"38","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"14","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"10","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"37% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}