{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T14:25:12Z","timestamp":1760711112094,"version":"3.40.3"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031229527"},{"type":"electronic","value":"9783031229534"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-22953-4_4","type":"book-chapter","created":{"date-parts":[[2023,1,23]],"date-time":"2023-01-23T17:35:40Z","timestamp":1674495340000},"page":"74-98","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Safe Policy Improvement Approaches and\u00a0Their Limitations"],"prefix":"10.1007","author":[{"given":"Philipp","family":"Scholl","sequence":"first","affiliation":[]},{"given":"Felix","family":"Dietrich","sequence":"additional","affiliation":[]},{"given":"Clemens","family":"Otte","sequence":"additional","affiliation":[]},{"given":"Steffen","family":"Udluft","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,1,20]]},"reference":[{"key":"4_CR1","unstructured":"Brafman, R.I., Tennenholtz, M.: R-MAX - a general polynomial time algorithm for near-optimal reinforcement learning. J. Mach. Learn. Res. 3 (2003)"},{"key":"4_CR2","unstructured":"Chow, Y., Tamar, A., Mannor, S., Pavone, M.: Risk-sensitive and robust decision-making: a CVaR optimization approach. In: Proceedings of the 28th International Conference on Neural Information Processing Systems (2015)"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Dantzig, G.B.: Linear Programming and Extensions. RAND Corporation, Santa Monica (1963)","DOI":"10.7249\/R366"},{"key":"4_CR4","unstructured":"Fujimoto, S., Meger, D., Precup, D.: Off-policy deep reinforcement learning without exploration. In: Proceedings of the 36th International Conference on Machine Learning (2019)"},{"key":"4_CR5","unstructured":"Garc\u00eda, J., Fernandez, F.: A comprehensive survey on safe reinforcement learning. J. Mach. Learn. Res. 16 (2015)"},{"key":"4_CR6","doi-asserted-by":"crossref","unstructured":"Hans, A., Duell, S., Udluft, S.: Agent self-assessment: determining policy quality without execution. In: IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (2011)","DOI":"10.1109\/ADPRL.2011.5967358"},{"key":"4_CR7","doi-asserted-by":"crossref","unstructured":"Hans, A., Udluft, S.: Efficient uncertainty propagation for reinforcement learning with limited data. In: Artificial Neural Networks - ICANN, vol. 5768 (2009)","DOI":"10.1007\/978-3-642-04274-4_8"},{"issue":"301","key":"4_CR8","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1080\/01621459.1963.10500830","volume":"58","author":"W Hoeffding","year":"1963","unstructured":"Hoeffding, W.: Probability inequalities for sums of bounded random variables. J. Am. Stat. Assoc. 58(301), 13\u201330 (1963)","journal-title":"J. Am. Stat. Assoc."},{"key":"4_CR9","series-title":"Adaptation, Learning, and Optimization","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1007\/978-3-642-27645-3_2","volume-title":"Reinforcement Learning","author":"S Lange","year":"2012","unstructured":"Lange, S., Gabel, T., Riedmiller, M.: Batch reinforcement learning. In: Wiering, M., van Otterlo, M. (eds.) Reinforcement Learning. ALO, vol. 12, pp. 45\u201373. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-27645-3_2"},{"key":"4_CR10","unstructured":"Laroche, R., Trichelair, P., Tachet des Combes, R.: Safe policy improvement with baseline bootstrapping. In: Proceedings of the 36th International Conference on Machine Learning (2019)"},{"key":"4_CR11","unstructured":"Leurent, E.: Safe and efficient reinforcement learning for behavioural planning in autonomous driving. Theses, Universit\u00e9 de Lille (2020)"},{"key":"4_CR12","unstructured":"Levine, S., Kumar, A., Tucker, G., Fu, J.: Offline reinforcement learning: tutorial, review, and perspectives on open problems. CoRR abs\/2005.01643 (2020)"},{"key":"4_CR13","unstructured":"Maurer, A., Pontil, M.: Empirical Bernstein bounds and sample-variance penalization. In: COLT (2009)"},{"key":"4_CR14","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1007\/978-3-030-46133-1_4","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"K Nadjahi","year":"2020","unstructured":"Nadjahi, K., Laroche, R., Tachet des Combes, R.: Safe policy improvement with soft baseline bootstrapping. In: Brefeld, U., Fromont, E., Hotho, A., Knobbe, A., Maathuis, M., Robardet, C. (eds.) ECML PKDD 2019. LNCS (LNAI), vol. 11908, pp. 53\u201368. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-46133-1_4"},{"key":"4_CR15","unstructured":"Nilim, A., El Ghaoui, L.: Robustness in Markov decision problems with uncertain transition matrices. In: Proceedings of the 16th International Conference on Neural Information Processing Systems (2003)"},{"key":"4_CR16","unstructured":"Petrik, M., Ghavamzadeh, M., Chow, Y.: Safe policy improvement by minimizing robust baseline regret. In: Proceedings of the 30th International Conference on Neural Information Processing Systems, NIPS 2016, Curran Associates Inc., Red Hook (2016)"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Schaefer, A.M., Schneegass, D., Sterzing, V., Udluft, S.: A neural reinforcement learning approach to gas turbine control. In: International Joint Conference on Neural Networks (2007)","DOI":"10.1109\/IJCNN.2007.4371212"},{"key":"4_CR18","doi-asserted-by":"crossref","unstructured":"Schneegass, D., Hans, A., Udluft, S.: Uncertainty in reinforcement learning - awareness, quantisation, and control. In: Robot Learning. Sciyo (2010)","DOI":"10.5772\/10250"},{"key":"4_CR19","unstructured":"Scholl, P.: Evaluation of safe policy improvement with soft baseline bootstrapping. Master\u2019s thesis, Technical University of Munich (2021)"},{"key":"4_CR20","doi-asserted-by":"publisher","unstructured":"Scholl, P., Dietrich, F., Otte, C., Udluft, S.: Safe policy improvement approaches on discrete Markov decision processes. In: Proceedings of the 14th International Conference on Agents and Artificial Intelligence, ICAART, vol. 2, pp. 142\u2013151. INSTICC, SciTePress (2022). https:\/\/doi.org\/10.5220\/0010786600003116","DOI":"10.5220\/0010786600003116"},{"key":"4_CR21","unstructured":"Sim\u00e3o, T.D., Laroche, R., Tachet des Combes, R.: Safe policy improvement with an estimated baseline policy. In: Proceedings of the 19th International Conference on Autonomous Agents and MultiAgent Systems (2020)"},{"key":"4_CR22","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (2018)"},{"key":"4_CR23","unstructured":"Thomas, P.S.: Safe reinforcement learning. Doctoral dissertations. University of Massachusetts (2015)"},{"key":"4_CR24","unstructured":"Wang, R., Foster, D., Kakade, S.M.: What are the statistical limits of offline RL with linear function approximation? In: International Conference on Learning Representations (2021)"}],"container-title":["Lecture Notes in Computer Science","Agents and Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-22953-4_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,23]],"date-time":"2023-01-23T17:38:42Z","timestamp":1674495522000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-22953-4_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031229527","9783031229534"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-22953-4_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"20 January 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICAART","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Agents and Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 February 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 February 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icaart2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.icaart.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"PRIMORIS","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"302","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"81","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"114","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}