{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T22:10:06Z","timestamp":1778364606877,"version":"3.51.4"},"publisher-location":"Cham","reference-count":49,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032190987","type":"print"},{"value":"9783032190994","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-19099-4_40","type":"book-chapter","created":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T22:06:23Z","timestamp":1778364383000},"page":"584-597","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Guidelines for\u00a0Safe and\u00a0Robust Reinforcement Learning: From\u00a0Definitions to\u00a0Design"],"prefix":"10.1007","author":[{"given":"Taku","family":"Yamagata","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ra\u00fal","family":"Santos-Rodr\u00edguez","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,4,1]]},"reference":[{"key":"40_CR1","unstructured":"Agarwal, R., Schuurmans, D., Norouzi, M.: An optimistic perspective on offline reinforcement learning. In: International Conference on Machine Learning, pp. 104\u2013114. PMLR (2020)"},{"key":"40_CR2","doi-asserted-by":"crossref","unstructured":"Alberti, K.G.M.M., Zimmet, P.Z.: Definition, diagnosis and classification of diabetes mellitus and its complications. Part 1: diagnosis and classification of diabetes mellitus. Provisional report of a who consultation. Diabetic Med. 15, 539\u2013553 (1998)","DOI":"10.1002\/(SICI)1096-9136(199807)15:7<539::AID-DIA668>3.0.CO;2-S"},{"key":"40_CR3","volume-title":"Constrained Markov Decision Processes","author":"E Altman","year":"1999","unstructured":"Altman, E.: Constrained Markov Decision Processes, vol. 7. CRC Press, Boca Raton (1999)"},{"key":"40_CR4","doi-asserted-by":"publisher","first-page":"411","DOI":"10.1146\/annurev-control-042920-020211","volume":"5","author":"L Brunke","year":"2022","unstructured":"Brunke, L., et al.: Safe learning in robotics: from learning-based control to safe reinforcement learning. Ann. Rev. Control Robot. Auton. Syst. 5, 411\u2013444 (2022)","journal-title":"Ann. Rev. Control Robot. Auton. Syst."},{"key":"40_CR5","unstructured":"Casper, S., et\u00a0al.: Open problems and fundamental limitations of reinforcement learning from human feedback. arXiv preprint arXiv:2307.15217 (2023)"},{"key":"40_CR6","doi-asserted-by":"publisher","unstructured":"Chen, S., Li, Y.: An overview of robust reinforcement learning. In: 2020 IEEE International Conference on Networking, Sensing and Control (ICNSC), pp.\u00a01\u20136 (2020). https:\/\/doi.org\/10.1109\/ICNSC48988.2020.9238129","DOI":"10.1109\/ICNSC48988.2020.9238129"},{"key":"40_CR7","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/12549.001.0001","volume-title":"AI Ethics","author":"M Coeckelbergh","year":"2020","unstructured":"Coeckelbergh, M.: AI Ethics. The MIT Press, Cambridge (2020)"},{"key":"40_CR8","doi-asserted-by":"publisher","first-page":"476","DOI":"10.2337\/dc14-1952","volume":"38","author":"AK Davis","year":"2015","unstructured":"Davis, A.K., et al.: Prevalence of detectable c-peptide according to age at diagnosis and duration of type 1 diabetes. Diabetes Care 38, 476\u2013481 (2015)","journal-title":"Diabetes Care"},{"key":"40_CR9","unstructured":"Donnot, B.: Grid2op-a testbed platform to model sequential decision making in power systems. GitHub repository (2020)"},{"key":"40_CR10","unstructured":"Finn, C., Abbeel, P., Levine, S.: Model-agnostic meta-learning for fast adaptation of deep networks. In: 34th International Conference on Machine Learning, ICML 2017, vol. 3, pp. 1856\u20131868 (2017)"},{"key":"40_CR11","unstructured":"Garc\u00eda, J., Fern\u00e1ndez, F.: A comprehensive survey on safe reinforcement learning. J. Mach. Learn. Res. 16(42), 1437\u20131480 (2015). http:\/\/jmlr.org\/papers\/v16\/garcia15a.html"},{"issue":"1","key":"40_CR12","first-page":"1437","volume":"16","author":"J Garc\u0131a","year":"2015","unstructured":"Garc\u0131a, J., Fern\u00e1ndez, F.: A comprehensive survey on safe reinforcement learning. J. Mach. Learn. Res. 16(1), 1437\u20131480 (2015)","journal-title":"J. Mach. Learn. Res."},{"key":"40_CR13","unstructured":"Gu, S., et al.: A review of safe reinforcement learning: methods, theory and applications. arXiv preprint arXiv:2205.10330 (2022)"},{"key":"40_CR14","unstructured":"Hadfield-Menell, D., Russell, S.J., Abbeel, P., Dragan, A.: Cooperative inverse reinforcement learning. In: Advances in Neural Information Processing Systems, vol. 29 (2016)"},{"issue":"1","key":"40_CR15","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/s11023-020-09517-8","volume":"30","author":"T Hagendorff","year":"2020","unstructured":"Hagendorff, T.: The ethics of AI ethics: an evaluation of guidelines. Mind. Mach. 30(1), 99\u2013120 (2020). https:\/\/doi.org\/10.1007\/s11023-020-09517-8","journal-title":"Mind. Mach."},{"key":"40_CR16","series-title":"Springer Proceedings in Advanced Robotics","doi-asserted-by":"publisher","first-page":"318","DOI":"10.1007\/978-3-030-44051-0_19","volume-title":"Algorithmic Foundations of Robotics XIII","author":"J Harrison","year":"2020","unstructured":"Harrison, J., Sharma, A., Pavone, M.: Meta-learning priors for efficient online Bayesian regression. In: Morales, M., Tapia, L., S\u00e1nchez-Ante, G., Hutchinson, S. (eds.) WAFR 2018. SPAR, vol. 14, pp. 318\u2013337. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-44051-0_19"},{"key":"40_CR17","unstructured":"High-Level Expert Group on AI: Ethics guidelines for trustworthy ai. Report, European Commission, Brussels (2019). https:\/\/ec.europa.eu\/digital-single-market\/en\/news\/ethics-guidelines-trustworthy-ai"},{"issue":"2","key":"40_CR18","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1287\/moor.1040.0129","volume":"30","author":"GN Iyengar","year":"2005","unstructured":"Iyengar, G.N.: Robust dynamic programming. Math. Oper. Res. 30(2), 257\u2013280 (2005)","journal-title":"Math. Oper. Res."},{"key":"40_CR19","doi-asserted-by":"publisher","first-page":"1537868","DOI":"10.3389\/frwa.2025.1537868","volume":"7","author":"L K\u00e5ge","year":"2025","unstructured":"K\u00e5ge, L., Mili\u0107, V., Andersson, M., Wall\u00e9n, M.: Reinforcement learning applications in water resource management: a systematic literature review. Front. Water 7, 1537868 (2025)","journal-title":"Front. Water"},{"key":"40_CR20","doi-asserted-by":"publisher","first-page":"1655","DOI":"10.2337\/diacare.20.11.1655","volume":"20","author":"BP Kovatchev","year":"1997","unstructured":"Kovatchev, B.P., Cox, D.J., Gonder-Frederick, L.A., Clarke, W.: Symmetrization of the blood glucose measurement scale and its applications. Diabetes Care 20, 1655\u20131658 (1997). https:\/\/doi.org\/10.2337\/diacare.20.11.1655","journal-title":"Diabetes Care"},{"key":"40_CR21","unstructured":"Krajna, A., Brcic, M., Lipic, T., Doncevic, J.: Explainability in reinforcement learning: perspective and position. arXiv preprint arXiv:2203.11547 (2022)"},{"key":"40_CR22","unstructured":"Lawrence, N.D.: Data readiness levels. arXiv preprint arXiv:1705.02245 (2017)"},{"key":"40_CR23","doi-asserted-by":"crossref","unstructured":"Liu, Y., Halev, A., Liu, X.: Policy learning with constraints in model-free reinforcement learning: a survey. In: The 30th International Joint Conference on Artificial Intelligence (IJCAI) (2021)","DOI":"10.24963\/ijcai.2021\/614"},{"key":"40_CR24","doi-asserted-by":"crossref","unstructured":"Madaio, M., Chen, J., Wallach, H., Wortman\u00a0Vaughan, J.: Tinker, tailor, configure, customize: the articulation work of customizing ai fairness checklists. In: Computer-Supported Cooperative Work (2024)","DOI":"10.1145\/3653705"},{"key":"40_CR25","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1177\/1932296813514502","volume":"8","author":"CD Man","year":"2014","unstructured":"Man, C.D., Micheletto, F., Lv, D., Breton, M., Kovatchev, B., Cobelli, C.: The UVA\/PADOVA type 1 diabetes simulator: new features. J. Diabetes Sci. Technol. 8, 26\u201334 (2014). https:\/\/doi.org\/10.1177\/1932296813514502","journal-title":"J. Diabetes Sci. Technol."},{"key":"40_CR26","unstructured":"Marot, A., et al.: Learning to run a power network challenge: a retrospective analysis. In: NeurIPS 2020 Competition and Demonstration Track, pp. 112\u2013132. PMLR (2021)"},{"key":"40_CR27","doi-asserted-by":"crossref","unstructured":"Milani, S., Topin, N., Veloso, M., Fang, F.: Explainable reinforcement learning: a survey and comparative review. ACM Comput. Surv. (2023)","DOI":"10.1145\/3616864"},{"issue":"2\u20133","key":"40_CR28","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1177\/1357034X04042932","volume":"10","author":"A Mol","year":"2004","unstructured":"Mol, A., Law, J.: Embodied action, enacted bodies: the example of hypoglycaemia. Body Soc. 10(2\u20133), 43\u201362 (2004)","journal-title":"Body Soc."},{"key":"40_CR29","doi-asserted-by":"publisher","unstructured":"Moos, J., Hansel, K., Abdulsamad, H., Stark, S., Clever, D., Peters, J.: Robust reinforcement learning: a review of foundations and recent advances. Mach. Learn. Knowl. Extraction 4(1), 276\u2013315 (2022). https:\/\/doi.org\/10.3390\/make4010013. https:\/\/www.mdpi.com\/2504-4990\/4\/1\/13","DOI":"10.3390\/make4010013"},{"issue":"2","key":"40_CR30","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1162\/0899766053011528","volume":"17","author":"J Morimoto","year":"2005","unstructured":"Morimoto, J., Doya, K.: Robust reinforcement learning. Neural Comput. 17(2), 335\u2013359 (2005)","journal-title":"Neural Comput."},{"key":"40_CR31","doi-asserted-by":"publisher","DOI":"10.1016\/j.arcontrol.2024.100937","volume":"57","author":"IS Mughal","year":"2024","unstructured":"Mughal, I.S., Patan\u00e8, L., Caponetto, R.: A comprehensive review of models and nonlinear control strategies for blood glucose regulation in artificial pancreas. Annu. Rev. Control. 57, 100937 (2024)","journal-title":"Annu. Rev. Control."},{"key":"40_CR32","doi-asserted-by":"crossref","unstructured":"Mynatt, E.D., Abowd, G.D., Mamykina, L., Kientz, J.A.: Understanding the potential of ubiquitous computing for chronic disease management. In: Health Informatics: A Patient-Centered Approach to Diabetes. Health Informatics, pp. 85\u2013106 (2010)","DOI":"10.7551\/mitpress\/9780262014328.003.0003"},{"issue":"4","key":"40_CR33","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1007\/s10676-022-09665-8","volume":"24","author":"EA Neufeld","year":"2022","unstructured":"Neufeld, E.A., Bartocci, E., Ciabattoni, A., Governatori, G.: Enforcing ethical goals over reinforcement-learning policies. Ethics Inf. Technol. 24(4), 43 (2022)","journal-title":"Ethics Inf. Technol."},{"key":"40_CR34","unstructured":"NHS Choices: Type 1 diabetes (2018). https:\/\/www.nhs.uk\/conditions\/type-1-diabetes\/"},{"key":"40_CR35","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. Adv. Neural. Inf. Process. Syst. 35, 27730\u201327744 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"40_CR36","unstructured":"Pan, A., Bhatia, K., Steinhardt, J.: The effects of reward misspecification: mapping and mitigating misaligned models. arXiv preprint arXiv:2201.03544 (2022)"},{"issue":"1","key":"40_CR37","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1162\/neco.1991.3.1.88","volume":"3","author":"DA Pomerleau","year":"1991","unstructured":"Pomerleau, D.A.: Efficient training of artificial neural networks for autonomous navigation. Neural Comput. 3(1), 88\u201397 (1991)","journal-title":"Neural Comput."},{"key":"40_CR38","unstructured":"Ross, S., Gordon, G., Bagnell, D.: A reduction of imitation learning and structured prediction to no-regret online learning. In: Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics, pp. 627\u2013635. JMLR Workshop and Conference Proceedings (2011)"},{"key":"40_CR39","doi-asserted-by":"publisher","first-page":"153171","DOI":"10.1109\/ACCESS.2021.3126658","volume":"9","author":"E Salvato","year":"2021","unstructured":"Salvato, E., Fenu, G., Medvet, E., Pellegrino, F.A.: Crossing the reality gap: a survey on sim-to-real transferability of robot controllers in reinforcement learning. IEEE Access 9, 153171\u2013153187 (2021)","journal-title":"IEEE Access"},{"key":"40_CR40","doi-asserted-by":"crossref","unstructured":"Sokol, K., Flach, P.: Explainability fact sheets: a framework for systematic assessment of explainable approaches. In: Proceedings of the 2020 Conference on Fairness, Accountability, and Transparency, pp. 56\u201367 (2020)","DOI":"10.1145\/3351095.3372870"},{"key":"40_CR41","volume-title":"Reinforcement Learning","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning. The MIT Press, Cambridge (1998)"},{"key":"40_CR42","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2021.108682","volume":"206","author":"X Wang","year":"2022","unstructured":"Wang, X., Thomas, J.D., Piechocki, R.J., Kapoor, S., Santos-Rodr\u00edguez, R., Parekh, A.: Self-play learning strategies for resource assignment in open-ran networks. Comput. Netw. 206, 108682 (2022)","journal-title":"Comput. Netw."},{"issue":"3410","key":"40_CR43","doi-asserted-by":"publisher","first-page":"1355","DOI":"10.1126\/science.131.3410.1355","volume":"131","author":"N Wiener","year":"1960","unstructured":"Wiener, N.: Some moral and technical consequences of automation: as machines learn they may develop unforeseen strategies at rates that baffle their programmers. Science 131(3410), 1355\u20131358 (1960)","journal-title":"Science"},{"key":"40_CR44","unstructured":"Xie, J.: Simglucose v0.2.1 (2018). https:\/\/github.com\/jxx123\/simglucose"},{"key":"40_CR45","unstructured":"Yamagata, T., McConville, R., Santos-Rodriguez, R.: Reinforcement learning with feedback from multiple humans with diverse skills. In: NeurIPS 2021 Workshop on Safe and Robust Control of Uncertain Systems, SafeRL 2021 (2021)"},{"key":"40_CR46","unstructured":"Yamagata, T., et al.: Model-based reinforcement learning for type 1 diabetes blood glucose control. In: CEUR Workshop Proceedings, vol. 2820, pp. 72\u201377 (2020)"},{"key":"40_CR47","unstructured":"Yamagata, T., Santos-Rodriguez, R.: Safe and robust reinforcement learning: principles and practice. arXiv:2403.18539 (2024)"},{"issue":"1","key":"40_CR48","doi-asserted-by":"publisher","first-page":"66","DOI":"10.3390\/signals3010006","volume":"3","author":"T Yamagata","year":"2022","unstructured":"Yamagata, T., Santos-Rodr\u00edguez, R., Flach, P.: Continuous adaptation with online meta-learning for non-stationary target regression tasks. Signals 3(1), 66\u201385 (2022)","journal-title":"Signals"},{"key":"40_CR49","doi-asserted-by":"crossref","unstructured":"Zhao, W., Queralta, J.P., Westerlund, T.: Sim-to-real transfer in deep reinforcement learning for robotics: a survey. In: 2020 IEEE Symposium Series on Computational Intelligence (SSCI), pp. 737\u2013744. IEEE (2020)","DOI":"10.1109\/SSCI47803.2020.9308468"}],"container-title":["Communications in Computer and Information Science","Machine Learning and Principles and Practice of Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-19099-4_40","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T22:06:32Z","timestamp":1778364392000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-19099-4_40"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032190987","9783032190994"],"references-count":49,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-19099-4_40","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"1 April 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that\u00a0are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Porto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecmlpkdd.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}