{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T19:10:42Z","timestamp":1768072242742,"version":"3.49.0"},"reference-count":40,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,10,9]],"date-time":"2021-10-09T00:00:00Z","timestamp":1633737600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,10,9]],"date-time":"2021-10-09T00:00:00Z","timestamp":1633737600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,10,9]],"date-time":"2021-10-09T00:00:00Z","timestamp":1633737600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,10,9]]},"DOI":"10.1109\/crc52766.2021.9620159","type":"proceedings-article","created":{"date-parts":[[2021,11,30]],"date-time":"2021-11-30T19:11:00Z","timestamp":1638299460000},"page":"240-249","source":"Crossref","is-referenced-by-count":4,"title":["A Survey of Research on Stability Guarantee of Reinforcement Learning Automatic Control Problem"],"prefix":"10.1109","author":[{"given":"Quan","family":"Gan","sequence":"first","affiliation":[]},{"given":"Fengge","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Junsuo","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.23919\/ACC45564.2020.9147463"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196709"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2018.8619829"},{"key":"ref32","article-title":"Safe planning via model predictive shielding","author":"bastani","year":"2019"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.3046193"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196867"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2018.2853698"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2020.3005923"},{"key":"ref35","first-page":"708","article-title":"Learning for safety- critical control with control barrier functions","author":"taylor","year":"2020","journal-title":"Learning for Dynamics and Control"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013387"},{"key":"ref10","article-title":"Gaussian process optimization in the bandit setting: No regret and experimental design","author":"srinivas","year":"2009"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2017.2668380"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/DEVLRN.2007.4354030"},{"key":"ref12","first-page":"2361","article-title":"Safe policy search for lifelong reinforcement learning with sublinear regret","author":"ammar","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref13","first-page":"6070","article-title":"Risk- constrained reinforcement learning with percentile risk criteria","volume":"18","author":"chow","year":"2017","journal-title":"The Journal of Machine Learning Research"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989680"},{"key":"ref15","article-title":"Safe exploration in continuous action spaces","author":"dalal","year":"2018"},{"key":"ref16","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref17","article-title":"A lyapunov-based approach to safe reinforcement learning","author":"chow","year":"2018"},{"key":"ref18","article-title":"Safe model-based reinforcement learning with stability guarantees","author":"berkenkamp","year":"2017"},{"key":"ref19","article-title":"Lyapunov-based safe policy optimization for continuous control","author":"chow","year":"2019"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2018.8619572"},{"key":"ref4","first-page":"997","article-title":"Safe exploration for optimization with gaussian processes","author":"sui","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2013.02.003"},{"key":"ref3","first-page":"63","article-title":"Gaussian processes in machine learning","author":"rasmussen","year":"2003","journal-title":"Machine Learning Summer School"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3761"},{"key":"ref29","article-title":"Safeinteractivemodel-basedlearning","author":"gallieri","year":"2019"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487170"},{"key":"ref8","first-page":"466","article-title":"The lyapunov neural network: Adaptive stability certification for safe learning of dynamical systems","author":"richards","year":"2018","journal-title":"Conference on Robot Learning"},{"key":"ref7","first-page":"803","article-title":"Lyapunov design for safe reinforcement learning","volume":"3","author":"perkins","year":"2002","journal-title":"Journal of Machine Learning Research"},{"key":"ref2","first-page":"22","article-title":"Constrained policy optimization","author":"achiam","year":"2017","journal-title":"International Conference on Machine Learning"},{"key":"ref9","article-title":"Neural lyapunov control","author":"chang","year":"2020"},{"key":"ref1","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"garc?a","year":"2015","journal-title":"Journal of Machine Learning Research"},{"key":"ref20","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11797","article-title":"Safe reinforcement learning via shielding","volume":"32","author":"alshiekh","year":"2018","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2018.2876389"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2017.2654539"},{"key":"ref24","article-title":"Bayesian optimization with safety constraints: safe and automatic parameter tuning in robotics","author":"berkenkamp","year":"2016"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2016.7798979"},{"key":"ref26","first-page":"545","article-title":"Stability of controllers for gaussian process forward models","author":"vinogradska","year":"2016","journal-title":"Int Conference on Machine Learning"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ECC.2016.7810344"}],"event":{"name":"2021 6th International Conference on Control, Robotics and Cybernetics (CRC)","location":"Shanghai, China","start":{"date-parts":[[2021,10,9]]},"end":{"date-parts":[[2021,10,11]]}},"container-title":["2021 6th International Conference on Control, Robotics and Cybernetics (CRC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9620057\/9620111\/09620159.pdf?arnumber=9620159","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,16]],"date-time":"2023-01-16T15:05:12Z","timestamp":1673881512000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9620159\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,9]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/crc52766.2021.9620159","relation":{},"subject":[],"published":{"date-parts":[[2021,10,9]]}}}