{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T01:44:15Z","timestamp":1729647855412,"version":"3.28.0"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,12,14]],"date-time":"2020-12-14T00:00:00Z","timestamp":1607904000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,12,14]],"date-time":"2020-12-14T00:00:00Z","timestamp":1607904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,12,14]],"date-time":"2020-12-14T00:00:00Z","timestamp":1607904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,12,14]]},"DOI":"10.1109\/cdc42340.2020.9304411","type":"proceedings-article","created":{"date-parts":[[2021,1,13]],"date-time":"2021-01-13T07:27:32Z","timestamp":1610522852000},"page":"3623-3630","source":"Crossref","is-referenced-by-count":0,"title":["Censored Markov Decision Processes: A Framework for Safe Reinforcement Learning in Collaboration with External Systems"],"prefix":"10.1109","author":[{"given":"Masahiro","family":"Kohjima","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Masami","family":"Takahashi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hiroyuki","family":"Toda","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-44491-2_34"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017940631555"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1666"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3761"},{"key":"ref14","first-page":"2067","article-title":"Trial without error: Towards safe reinforcement learning via human intervention","author":"saunders","year":"2018","journal-title":"International Conference on Autonomous Agents and Multiagent Systems"},{"key":"ref15","first-page":"803","article-title":"Lyapunov design for safe reinforcement learning","volume":"3","author":"perkins","year":"2002","journal-title":"Journal of Machine Learning Research"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2014.7039601"},{"key":"ref17","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"garcia","year":"2015","journal-title":"Journal of Machine Learning Research"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.24033\/asens.993"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.24033\/asens.1000"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4684-9455-6"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313433"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.2307\/3215344"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"journal-title":"Using a deep reinforcement learning agent for traffic signal control","year":"2016","author":"genders","key":"ref6"},{"key":"ref5","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11339","article-title":"Cellular network traffic scheduling with deep reinforcement learning","author":"chinchali","year":"2018","journal-title":"AAAI Conference on Artificial Intelligence"},{"key":"ref8","first-page":"1376","article-title":"Synthesizing chemical plant operation procedures using knowledge, dynamic simulation and deep reinforcement learning","author":"kubosawa","year":"2018","journal-title":"SICE Annual Conference"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220096"},{"journal-title":"Introduction to Reinforcement Learning","year":"1998","author":"sutton","key":"ref2"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50021-0"},{"journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming","year":"2005","author":"puterman","key":"ref1"},{"key":"ref20","first-page":"7","article-title":"Processus markoviens et stationnaires. cas denombrable","volume":"18","author":"levy","year":"1958","journal-title":"Ann Inst Poincare"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref21","first-page":"393","article-title":"Reinforcement learning methods for continuous-time markov decision problems","author":"bradtke","year":"1995","journal-title":"Advances in neural information processing systems"},{"key":"ref24","first-page":"1050","article-title":"Multi-time models for temporally abstract planning","author":"precup","year":"1998","journal-title":"Advances in neural information processing systems"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50072-4"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4613-8230-0"},{"key":"ref25","article-title":"Hierarchical control and learning for Markov decision processes","author":"parr","year":"1998","journal-title":"PhD thesis"}],"event":{"name":"2020 59th IEEE Conference on Decision and Control (CDC)","start":{"date-parts":[[2020,12,14]]},"location":"Jeju, Korea (South)","end":{"date-parts":[[2020,12,18]]}},"container-title":["2020 59th IEEE Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9303728\/9303729\/09304411.pdf?arnumber=9304411","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,11]],"date-time":"2022-12-11T11:15:01Z","timestamp":1670757301000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9304411\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,12,14]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/cdc42340.2020.9304411","relation":{},"subject":[],"published":{"date-parts":[[2020,12,14]]}}}