{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,24]],"date-time":"2026-06-24T14:49:23Z","timestamp":1782312563974,"version":"3.54.5"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032061058","type":"print"},{"value":"9783032061065","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,3]],"date-time":"2025-10-03T00:00:00Z","timestamp":1759449600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,3]],"date-time":"2025-10-03T00:00:00Z","timestamp":1759449600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-06106-5_8","type":"book-chapter","created":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T10:08:55Z","timestamp":1759399735000},"page":"129-145","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Viability of\u00a0Future Actions: Robust Safety in\u00a0Reinforcement Learning via\u00a0Entropy Regularization"],"prefix":"10.1007","author":[{"given":"Pierre-Fran\u00e7ois","family":"Massiani","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Alexander","family":"von Rohr","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lukas","family":"Haverbeck","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sebastian","family":"Trimpe","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,10,3]]},"reference":[{"issue":"1","key":"8_CR1","doi-asserted-by":"publisher","first-page":"276","DOI":"10.3390\/make4010013","volume":"4","author":"J Moos","year":"2022","unstructured":"Moos, J.: Robust reinforcement learning: a review of foundations and recent advances. Mach. Learn. Knowl. Extraction 4(1), 276\u2013315 (2022). https:\/\/doi.org\/10.3390\/make4010013","journal-title":"Mach. Learn. Knowl. Extraction"},{"key":"8_CR2","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International conference on machine learning, pp. 1861\u20131870. PMLR (2018)"},{"key":"8_CR3","doi-asserted-by":"crossref","unstructured":"Massiani, P.F., Heim, S., Solowjow, F., Trimpe, S.: Safe Value Functions. IEEE Trans. Autom. Control (2023)","DOI":"10.1109\/TAC.2022.3200948"},{"key":"8_CR4","doi-asserted-by":"publisher","unstructured":"Haarnoja, T., et al.: Composable deep reinforcement learning for robotic manipulation. In: IEEE International Conference on Robotics and Automation, pp. 6244\u20136251 (2018). https:\/\/doi.org\/10.1109\/ICRA.2018.8460756","DOI":"10.1109\/ICRA.2018.8460756"},{"key":"8_CR5","doi-asserted-by":"publisher","unstructured":"Haarnoja, T., et al.: Learning to walk via deep reinforcement learning. In: Proceedings of Robotics: Science and Systems (2019). https:\/\/doi.org\/10.15607\/RSS.2019.XV.011","DOI":"10.15607\/RSS.2019.XV.011"},{"key":"8_CR6","unstructured":"Eysenbach, B., Levine, S.: Maximum entropy RL (provably) solves some robust RL problems. In: International Conference on Learning Representations (2022)"},{"key":"8_CR7","unstructured":"Geist, M., Scherrer, B., Pietquin, O.: A theory of regularized Markov decision processes. In: Proceedings of the 36th International Conference on Machine Learning, vol.\u00a097, pp. 2160\u20132169 (2019)"},{"issue":"4","key":"8_CR8","doi-asserted-by":"publisher","first-page":"939","DOI":"10.1109\/TRO.2019.2910739","volume":"35","author":"S Heim","year":"2019","unstructured":"Heim, S., Badri-Spr\u00f6witz, A.: Beyond basins of attraction: quantifying robustness of natural dynamics. IEEE Trans. Rob. 35(4), 939\u2013952 (2019)","journal-title":"IEEE Trans. Rob."},{"key":"8_CR9","unstructured":"Heim, S., Rohr, A., Trimpe, S., Badri-Spr\u00f6witz, A.: A Learnable Safety Measure. In: Conference on Robot Learning, pp. 627\u2013639. PMLR (2020)"},{"key":"8_CR10","doi-asserted-by":"crossref","unstructured":"Aubin, J.P., Bayen, A.M., Saint-Pierre, P.: Viability theory: new directions. Springer Science and Business Media (2011)","DOI":"10.1007\/978-3-642-16684-6"},{"key":"8_CR11","doi-asserted-by":"publisher","first-page":"411","DOI":"10.1146\/annurev-control-042920-020211","volume":"5","author":"L Brunke","year":"2022","unstructured":"Brunke, L.: Safe learning in robotics: from learning-based control to safe reinforcement learning. Ann. Rev. Control Robot. Auton. Syst. 5, 411\u2013444 (2022)","journal-title":"Ann. Rev. Control Robot. Auton. Syst."},{"key":"8_CR12","unstructured":"Achiam, J., Held, D., Tamar, A., Abbeel, P.: Constrained policy optimization. In: International conference on machine learning, pp. 22\u201331. PMLR (2017)"},{"key":"8_CR13","unstructured":"Kerrigan, E.C., Maciejowski, J.M.: Soft constraints and exact penalty functions in model predictive control. In: Control 2000 Conference, Cambridge, pp. 2319\u20132327 (2000)"},{"issue":"4","key":"8_CR14","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1002\/oca.4660100404","volume":"10","author":"AQ Xing","year":"1989","unstructured":"Xing, A.Q., Wang, C.L.: Applications of the exterior penalty method in constrained optimal control problems. Optimal Control Appl. Methods 10(4), 333\u2013345 (1989)","journal-title":"Optimal Control Appl. Methods"},{"key":"8_CR15","unstructured":"Zhou, K., Doyle, J., Glover, K.: Robust and optimal control. Prentice Hall (1996)"},{"key":"8_CR16","doi-asserted-by":"crossref","unstructured":"Gr\u00fcne, L., Pannek, J.: Nonlinear Model Predictive Control. Springer, 2nd edn. (2017)","DOI":"10.1007\/978-3-319-46024-6"},{"key":"8_CR17","doi-asserted-by":"crossref","unstructured":"Limon, D., et al.: Input-to-state stability: a unifying framework for robust model predictive control. Nonlinear Model Predictive Control: Towards New Challenging Applications, pp. 1\u201326 (2009)","DOI":"10.1007\/978-3-642-01094-1_1"},{"key":"8_CR18","doi-asserted-by":"crossref","unstructured":"Bansal, S., Chen, M., Herbert, S., Tomlin, C.J.: Hamilton-jacobi reachability: a brief overview and recent advances. In: Conference on Decision and Control, pp. 2242\u20132253 (2017)","DOI":"10.1109\/CDC.2017.8263977"},{"issue":"5","key":"8_CR19","doi-asserted-by":"publisher","first-page":"742","DOI":"10.1109\/TAC.2006.875041","volume":"51","author":"GC Calafiore","year":"2006","unstructured":"Calafiore, G.C., Campi, M.C.: The scenario approach to robust control design. IEEE Trans. Autom. Control 51(5), 742\u2013753 (2006)","journal-title":"IEEE Trans. Autom. Control"},{"issue":"2","key":"8_CR20","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1162\/0899766053011528","volume":"17","author":"J Morimoto","year":"2005","unstructured":"Morimoto, J., Doya, K.: Robust reinforcement learning. Neural Comput. 17(2), 335\u2013359 (2005). https:\/\/doi.org\/10.1162\/0899766053011528","journal-title":"Neural Comput."},{"key":"8_CR21","unstructured":"Pinto, L., Davidson, J., Sukthankar, R., Gupta, A.: Robust adversarial reinforcement learning. In: Precup, D., Teh, Y.W. (eds.) Proceedings of the 34th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a070, pp. 2817\u20132826 (2017)"},{"key":"8_CR22","unstructured":"Tessler, C., Efroni, Y., Mannor, S.: Action robust reinforcement learning and applications in continuous control. In: Chaudhuri, K., Salakhutdinov, R. (eds.) Proceedings of the 36th International Conference on Machine Learning. vol.\u00a097, pp. 6215\u20136224 (2019)"},{"key":"8_CR23","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: An introduction. MIT press (2018)"},{"key":"8_CR24","unstructured":"Fox, R., Pakman, A., Tishby, N.: Taming the noise in reinforcement learning via soft updates. In: 32nd Conference on Uncertainty in Artificial Intelligence, pp. 202\u2013211 (2016)"},{"key":"8_CR25","unstructured":"Nachum, O., Norouzi, M., Xu, K., Schuurmans, D.: Bridging the gap between value and policy based reinforcement learning. Advances Neural Inf. Process. Syst. 30 (2017)"},{"key":"8_CR26","unstructured":"Massiani, P.F., Heim, S., Trimpe, S.: On exploration requirements for learning safety constraints. In: Learning for Dynamics and Control, pp. 905\u2013916. PMLR (2021)"},{"key":"8_CR27","doi-asserted-by":"publisher","unstructured":"Towers, M., et al.: Gymnasium (2023). https:\/\/doi.org\/10.5281\/zenodo.8127026, https:\/\/zenodo.org\/record\/8127025","DOI":"10.5281\/zenodo.8127026"},{"key":"8_CR28","unstructured":"Haarnoja, T., et\u00a0al.: Soft actor-critic algorithms and applications. arXiv preprint arXiv:1812.05905 (2018)"},{"key":"8_CR29","unstructured":"Huang, S., et al.: CleanRL: high-quality single-file implementations of deep reinforcement learning algorithms. J. Mach. Learn. Res. 23(274), 1\u201318 (2022). http:\/\/jmlr.org\/papers\/v23\/21-1342.html"},{"key":"8_CR30","doi-asserted-by":"crossref","unstructured":"Todorov, E., Erez, T., Tassa, Y.: Mujoco: a physics engine for model-based control. In: 2012 IEEE\/RSJ international conference on intelligent robots and systems, pp. 5026\u20135033. IEEE (2012)","DOI":"10.1109\/IROS.2012.6386109"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-06106-5_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,24]],"date-time":"2026-06-24T14:13:01Z","timestamp":1782310381000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-06106-5_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,3]]},"ISBN":["9783032061058","9783032061065"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-06106-5_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,3]]},"assertion":[{"value":"3 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of interests"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Porto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecmlpkdd.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}