{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T20:55:30Z","timestamp":1775940930211,"version":"3.50.1"},"reference-count":23,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100000780","name":"European Union","doi-asserted-by":"publisher","award":["825619"],"award-info":[{"award-number":["825619"]}],"id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,18]]},"DOI":"10.1109\/ijcnn52387.2021.9533996","type":"proceedings-article","created":{"date-parts":[[2021,9,20]],"date-time":"2021-09-20T21:27:41Z","timestamp":1632173261000},"page":"1-8","source":"Crossref","is-referenced-by-count":3,"title":["Constraint-Guided Reinforcement Learning: Augmenting the Agent-Environment-Interaction"],"prefix":"10.1109","author":[{"given":"Helge","family":"Spieker","sequence":"first","affiliation":[]}],"member":"263","reference":[{"key":"ref10","article-title":"Generative Adversarial Imitation Learning","author":"ho","year":"2016","journal-title":"NeurIPS"},{"key":"ref11","author":"schulman","year":"2017","journal-title":"Proximal policy optimization algorithms"},{"key":"ref12","article-title":"Asynchronous Methods for Deep Reinforcement Learning","author":"mnih","year":"2016","journal-title":"ICML"},{"key":"ref13","author":"rossi","year":"2006","journal-title":"Handbook of Constraint Programming (Foundations of Artificial Intelligence)"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013387"},{"key":"ref15","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.12107","article-title":"Safe Reinforcement Learning via Formal Methods: Towards Safe Control Through Proof and Learning","author":"fulton","year":"2018","journal-title":"AAAI"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196867"},{"key":"ref17","article-title":"Constrained Policy Optimization","author":"achiam","year":"2017","journal-title":"ICML"},{"key":"ref18","article-title":"A lyapunov-based approach to safe reinforcement learning","author":"chow","year":"2018","journal-title":"NeurIPS"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019785"},{"key":"ref4","article-title":"Learning Combinatorial Optimization Algorithms over Graphs","author":"dai","year":"2017","journal-title":"NeurIPS"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref6","article-title":"Bayesian Reinforcement Learning: A Survey","author":"ghavamzadeh","year":"2015","journal-title":"Foundations and Trends\ufffd in Machine Learning"},{"key":"ref5","article-title":"Challenges of Real-World Reinforcement Learning","author":"dulac-arnold","year":"0","journal-title":"ICML Workshop on Real-Life Reinforcement Learning"},{"key":"ref8","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11797","article-title":"Safe Reinforcement Learning via Shielding","author":"alshiekh","year":"2018","journal-title":"AAAI"},{"key":"ref7","article-title":"A Comprehensive Survey on Safe Reinforcement Learning","author":"garc\u00eda","year":"2015","journal-title":"JMLR"},{"key":"ref2","article-title":"Deep Learning for Video Game Playing","author":"justesen","year":"2019","journal-title":"IEEE Transactions on Games"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1613\/jair.898"},{"key":"ref20","author":"noothigattu","year":"2018","journal-title":"Interpretable Multi-Objective Reinforcement Learning through Policy Orchestration"},{"key":"ref22","article-title":"Constraint acquisition","author":"bessiere","year":"2015","journal-title":"Artificial Intelligence"},{"key":"ref21","article-title":"Learn What Not to Learn: Action Elimination with Deep Reinforcement Learning","author":"zahavy","year":"2018","journal-title":"NeurIPS"},{"key":"ref23","article-title":"Minimalistic Gridworld Environment for OpenAI Gym","author":"chevalier-boisvert","year":"2018","journal-title":"Github Repository"}],"event":{"name":"2021 International Joint Conference on Neural Networks (IJCNN)","location":"Shenzhen, China","start":{"date-parts":[[2021,7,18]]},"end":{"date-parts":[[2021,7,22]]}},"container-title":["2021 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9533266\/9533267\/09533996.pdf?arnumber=9533996","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T19:48:37Z","timestamp":1673293717000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9533996\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,18]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/ijcnn52387.2021.9533996","relation":{},"subject":[],"published":{"date-parts":[[2021,7,18]]}}}