{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T17:30:45Z","timestamp":1743096645800,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031258909"},{"type":"electronic","value":"9783031258916"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-25891-6_16","type":"book-chapter","created":{"date-parts":[[2023,3,9]],"date-time":"2023-03-09T14:03:34Z","timestamp":1678370614000},"page":"193-218","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Enforcing Hard State-Dependent Action Bounds on\u00a0Deep Reinforcement Learning Policies"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4843-3342","authenticated-orcid":false,"given":"Bram","family":"De Cooman","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8846-6352","authenticated-orcid":false,"given":"Johan","family":"Suykens","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2555-4515","authenticated-orcid":false,"given":"Andreas","family":"Ortseifen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,3,10]]},"reference":[{"key":"16_CR1","unstructured":"Achiam, J., Held, D., Tamar, A., Abbeel, P.: Constrained policy optimization. In: 34th International Conference on Machine Learning, ICML 2017, vol. 1, pp. 30\u201347 (2017)"},{"key":"16_CR2","doi-asserted-by":"crossref","unstructured":"Alshiekh, M., Bloem, R., Ehlers, R., K\u00f6nighofer, B., Niekum, S., Topcu, U.: Safe reinforcement learning via shielding. In: 32nd AAAI Conference on Artificial Intelligence, AAAI 2018, pp. 2669\u20132678 (2018)","DOI":"10.1609\/aaai.v32i1.11797"},{"key":"16_CR3","unstructured":"Berkenkamp, F., Turchetta, M., Schoellig, A.P., Krause, A.: Safe model-based reinforcement learning with stability guarantees. In: Advances in Neural Information Processing Systems, vol. 30, pp. 909\u2013919 (2017)"},{"key":"16_CR4","unstructured":"Burkardt, J.: The truncated normal distribution, department of scientific computing. Fla. State Univ. 1\u201335 (2014)"},{"key":"16_CR5","doi-asserted-by":"publisher","unstructured":"Chandak, Y., Theocharous, G., Metevier, B., Thomas, P.S.: Reinforcement learning when all actions are not always available. In: AAAI 2020\u201334th AAAI Conference on Artificial Intelligence, pp. 3381\u20133388 (2020). https:\/\/doi.org\/10.1609\/aaai.v34i04.5740","DOI":"10.1609\/aaai.v34i04.5740"},{"key":"16_CR6","doi-asserted-by":"crossref","unstructured":"Cheng, R., Orosz, G., Murray, R.M., Burdick, J.W.: End-to-end safe reinforcement learning through barrier functions for safety-critical continuous control tasks. In: 33rd AAAI Conference on Artificial Intelligence, AAAI 2019, pp. 3387\u20133395 (2019)","DOI":"10.1609\/aaai.v33i01.33013387"},{"key":"16_CR7","unstructured":"Chow, Y., Nachum, O., Faust, A., Duenez-Guzman, E., Ghavamzadeh, M.: Lyapunov-based safe policy optimization for continuous control (2019)"},{"key":"16_CR8","unstructured":"Dalal, G., Dvijotham, K., Vecerik, M., Hester, T., Paduraru, C., Tassa, Y.: Safe exploration in continuous action spaces (2018)"},{"key":"16_CR9","unstructured":"De Cooman, B., Suykens, J., Ortseifen, A.: Improving temporal smoothness of deterministic reinforcement learning policies with continuous actions. In: 33rd Benelux Conference on Artificial Intelligence, BNAIC 2021, pp. 217\u2013240 (2021)"},{"key":"16_CR10","unstructured":"De Cooman, B., Suykens, J., Ortseifen, A., Subramanya, N.: Method for autonomous driving of a vehicle, a data processing circuit, a computer program, and a computer-readable medium, E.U. Patent Application EP22151063.9. 11 Jan 2022"},{"key":"16_CR11","unstructured":"Figurnov, M., Mohamed, S., Mnih, A.: Implicit reparameterization gradients. In: Advances in Neural Information Processing Systems, vol. 31, pp. 441\u2013452 (2018)"},{"key":"16_CR12","unstructured":"Fujimoto, S., Van Hoof, H., Meger, D.: addressing function approximation error in actor-critic methods. In: 35th International Conference on Machine Learning, ICML 2018, vol. 4, pp. 2587\u20132601 (2018)"},{"key":"16_CR13","unstructured":"Garc\u00eda, J., Fern\u00e1ndez, F.: A comprehensive survey on safe reinforcement learning (2015)"},{"key":"16_CR14","unstructured":"Haarnoja, T., et al.: Soft actor-critic algorithms and applications (2018)"},{"key":"16_CR15","doi-asserted-by":"crossref","unstructured":"Hoel, C.J., Driggs-Campbell, K., Wolff, K., Laine, L., Kochenderfer, M.J.: Combining planning and deep reinforcement learning in tactical decision making for autonomous driving. Technical report (2019)","DOI":"10.1109\/TIV.2019.2955905"},{"key":"16_CR16","unstructured":"Kalweit, G., Huegle, M., Werling, M., Boedecker, J.: Deep constrained Q-learning (2020)"},{"key":"16_CR17","doi-asserted-by":"publisher","first-page":"86","DOI":"10.3141\/1999-10","volume":"1999","author":"A Kesting","year":"2007","unstructured":"Kesting, A., Treiber, M., Helbing, D.: General lane-changing model MOBIL for car-following models. Transp. Res. Rec. 1999, 86\u201394 (2007). https:\/\/doi.org\/10.3141\/1999-10","journal-title":"Transp. Res. Rec."},{"key":"16_CR18","unstructured":"Kingma, D.P., Salimans, T., Welling, M.: Variational dropout and the local reparameterization trick. In: Advances in Neural Information Processing Systems, vol. 28, pp. 2575\u20132583 (2015)"},{"key":"16_CR19","doi-asserted-by":"publisher","unstructured":"Koller, T., Berkenkamp, F., Turchetta, M., Krause, A.: Learning-based model predictive control for safe exploration. In: Proceedings of the IEEE Conference on Decision and Control, pp. 6059\u20136066 (2019). https:\/\/doi.org\/10.1109\/CDC.2018.8619572","DOI":"10.1109\/CDC.2018.8619572"},{"key":"16_CR20","unstructured":"Lillicrap, T.P.: Continuous control with deep reinforcement learning. In: 4th International Conference on Learning Representations, ICLR 2016 - Conference Track Proceedings (2016)"},{"key":"16_CR21","doi-asserted-by":"publisher","unstructured":"Mirchevska, B., Pek, C., Werling, M., Althoff, M., Boedecker, J.: High-level decision making for safe and reasonable autonomous lane changing using reinforcement learning. In: IEEE Conference on Intelligent Transportation Systems, Proceedings, ITSC, pp. 2156\u20132162 (2018). https:\/\/doi.org\/10.1109\/ITSC.2018.8569448","DOI":"10.1109\/ITSC.2018.8569448"},{"key":"16_CR22","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-1433-9","volume-title":"Vehicle Dynamics and Control","author":"R Rajamani","year":"2012","unstructured":"Rajamani, R.: Vehicle Dynamics and Control, 2nd edn. Springer, Cham (2012). https:\/\/doi.org\/10.1007\/978-1-4614-1433-9","edition":"2"},{"key":"16_CR23","unstructured":"Silver, D., Lever, G., Heess, N., Degris, T., Wierstra, D., Riedmiller, M.: Deterministic policy gradient algorithms. In: 31st International Conference on Machine Learning, ICML 2014. vol. 1, pp. 605\u2013619 (2014)"},{"key":"16_CR24","volume-title":"Reinforcement Learning: An Introduction, A Bradford book","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction, A Bradford book, vol. 258, 1st edn. MIT Press, Cambridge (1998)","edition":"1"},{"key":"16_CR25","unstructured":"Tessler, C., Mankowitz, D.J., Mannor, S.: Reward constrained policy optimization. In: 7th International Conference on Learning Representations, ICLR 2019 (2019)"},{"issue":"2","key":"16_CR26","doi-asserted-by":"publisher","first-page":"1805","DOI":"10.1103\/PhysRevE.62.1805","volume":"62","author":"M Treiber","year":"2000","unstructured":"Treiber, M., Hennecke, A., Helbing, D.: Congested traffic states in empirical observations and microscopic simulations. Phys. Rev. E Stat. Phys. Plasmas Fluids Relat Interdiscip. Topics 62(2), 1805\u20131824 (2000). https:\/\/doi.org\/10.1103\/PhysRevE.62.1805","journal-title":"Phys. Rev. E Stat. Phys. Plasmas Fluids Relat Interdiscip. Topics"},{"key":"16_CR27","unstructured":"Wachi, A., Sui, Y.: Safe reinforcement learning in constrained markov decision processes. In: 37th International Conference on Machine Learning, ICML 2020, vol. Part F16814, pp. 9739\u20139748 (2020)"},{"key":"16_CR28","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1007\/978-981-15-4095-0_3","volume-title":"Deep Reinforcement Learning","author":"H Zhang","year":"2020","unstructured":"Zhang, H., Yu, T.: Taxonomy of reinforcement learning algorithms. In: Dong, H., Ding, Z., Zhang, S. (eds.) Deep Reinforcement Learning, pp. 125\u2013133. Springer, Singapore (2020). https:\/\/doi.org\/10.1007\/978-981-15-4095-0_3"}],"container-title":["Lecture Notes in Computer Science","Machine Learning, Optimization, and Data Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-25891-6_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,4,5]],"date-time":"2023-04-05T10:18:07Z","timestamp":1680689887000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-25891-6_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031258909","9783031258916"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-25891-6_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"10 March 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"LOD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Machine Learning, Optimization, and Data Science","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Certosa di Pontignano","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"lod2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/lod2022.icas.cc\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"226","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"85","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"38% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5.6","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}