{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T10:04:47Z","timestamp":1767261887415,"version":"3.28.0"},"reference-count":32,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1109\/cdc.2018.8618996","type":"proceedings-article","created":{"date-parts":[[2019,1,24]],"date-time":"2019-01-24T05:12:50Z","timestamp":1548306770000},"page":"6840-6847","source":"Crossref","is-referenced-by-count":16,"title":["Control-Theoretic Analysis of Smoothness for Stability-Certified Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Ming","family":"Jin","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Javad","family":"Lavaei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1613\/jair.4818"},{"key":"ref31","article-title":"Transformation of optimal centralized controllers into near-globally optimal static distributed controllers","author":"fattahi","year":"2017","journal-title":"IEEE Transactions on Automatic Control"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2016.2562062"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1666"},{"key":"ref11","first-page":"1451","article-title":"Safe exploration in Markov decision processes","author":"moldovan","year":"2012","journal-title":"Proc of the International Conference on Machine Learning"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1287\/moor.1120.0566"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2013.02.003"},{"key":"ref14","first-page":"22","article-title":"Constrained policy optimization","author":"achiam","year":"2017","journal-title":"Proc of the International Conference on Machine Learning"},{"key":"ref15","first-page":"803","article-title":"Lyapunov design for safe reinforcement learning","volume":"3","author":"perkins","year":"2002","journal-title":"Journal of Machine Learning Research"},{"key":"ref16","first-page":"908","article-title":"Safe model-based reinforcement learning with stability guarantees","author":"berkenkamp","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2001.945833"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2007.899520"},{"key":"ref19","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"Proc of the International Conference on Machine Learning"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611970777"},{"key":"ref4","volume":"40","author":"zhou","year":"1996","journal-title":"Robust and Optimal Control"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2000.877001"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.arcontrol.2008.03.004"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/9.587335"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2014.2361004"},{"key":"ref5","article-title":"Intriguing properties of neural networks","author":"szegedy","year":"2014","journal-title":"International Conference on Learning Representations"},{"key":"ref8","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"garcia","year":"2015","journal-title":"Journal of Machine Learning Research"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/BF00276494"},{"journal-title":"Deep reinforcement learning An overview","year":"2017","author":"li","key":"ref2"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/S0005-1098(98)00153-8"},{"key":"ref1","volume":"1","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1162\/089976698300017746"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1162\/NECO_a_00928"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/72.165600"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611971309"},{"key":"ref23","first-page":"5769","article-title":"Improved training of Wasserstein GANs","author":"gulrajani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1137\/0306007"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"585","DOI":"10.1016\/j.automatica.2012.11.029","article-title":"On LMI conditions to design observers for lipschitz nonlinear systems","volume":"49","author":"zemouche","year":"2013","journal-title":"Automatica"}],"event":{"name":"2018 IEEE Conference on Decision and Control (CDC)","start":{"date-parts":[[2018,12,17]]},"location":"Miami Beach, FL","end":{"date-parts":[[2018,12,19]]}},"container-title":["2018 IEEE Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8592870\/8618647\/08618996.pdf?arnumber=8618996","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,24]],"date-time":"2020-08-24T00:30:10Z","timestamp":1598229010000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8618996\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/cdc.2018.8618996","relation":{},"subject":[],"published":{"date-parts":[[2018,12]]}}}