{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T04:08:01Z","timestamp":1780632481635,"version":"3.54.1"},"reference-count":45,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100011381","name":"State Key Laboratory of Robotics and System","doi-asserted-by":"publisher","award":["SKLRS-2025-KF-05"],"award-info":[{"award-number":["SKLRS-2025-KF-05"]}],"id":[{"id":"10.13039\/501100011381","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003472","name":"Harbin Institute of Technology","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003472","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62225305"],"award-info":[{"award-number":["62225305"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62527807"],"award-info":[{"award-number":["62527807"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62503507"],"award-info":[{"award-number":["62503507"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Automatica"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.automatica.2026.112964","type":"journal-article","created":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T11:52:55Z","timestamp":1775303575000},"page":"112964","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"C","title":["Reinforcement learning for control with probabilistic stability guarantee: A finite-sample approach"],"prefix":"10.1016","volume":"188","author":[{"given":"Minghao","family":"Han","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lixian","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chenliang","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhipeng","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jun","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wei","family":"Pan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"issue":"10","key":"10.1016\/j.automatica.2026.112964_b1","doi-asserted-by":"crossref","first-page":"1456","DOI":"10.1109\/9.720508","article-title":"Mean square stochastic stability of linear time-delay system with Markovian jumping parameters","volume":"43","author":"Benjelloun","year":"2002","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.automatica.2026.112964_b2","series-title":"Advances in neural information processing systems","first-page":"908","article-title":"Safe model-based reinforcement learning with stability guarantees","author":"Berkenkamp","year":"2017"},{"key":"10.1016\/j.automatica.2026.112964_b3","series-title":"Constrained optimization and Lagrange multiplier methods","author":"Bertsekas","year":"2014"},{"issue":"6","key":"10.1016\/j.automatica.2026.112964_b4","doi-asserted-by":"crossref","first-page":"1081","DOI":"10.1016\/j.automatica.2010.03.007","article-title":"Markov jump linear systems with switching transition rates: Mean square stability with dwell-time","volume":"46","author":"Bolzern","year":"2010","journal-title":"Automatica"},{"key":"10.1016\/j.automatica.2026.112964_b5","series-title":"Openai Gym","author":"Brockman","year":"2016"},{"key":"10.1016\/j.automatica.2026.112964_b6","series-title":"2021 IEEE international conference on robotics and automation","first-page":"1803","article-title":"Stabilizing neural control using self-learned almost Lyapunov critics","author":"Chang","year":"2021"},{"key":"10.1016\/j.automatica.2026.112964_b7","article-title":"Neural Lyapunov control","volume":"32","author":"Chang","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.automatica.2026.112964_b8","first-page":"8223","article-title":"Finite-sample analysis of contractive stochastic approximation using smooth convex envelopes","volume":"33","author":"Chen","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.automatica.2026.112964_b9","first-page":"21440","article-title":"Finite-sample analysis of off-policy TD-learning via generalized Bellman operators","volume":"34","author":"Chen","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.automatica.2026.112964_b10","doi-asserted-by":"crossref","DOI":"10.1016\/j.automatica.2022.110623","article-title":"Finite-sample analysis of nonlinear stochastic approximation with applications in reinforcement learning","volume":"146","author":"Chen","year":"2022","journal-title":"Automatica"},{"key":"10.1016\/j.automatica.2026.112964_b11","article-title":"A Lyapunov-based approach to safe reinforcement learning","volume":"31","author":"Chow","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.automatica.2026.112964_b12","series-title":"Lyapunov-based safe policy optimization for continuous control","author":"Chow","year":"2019"},{"key":"10.1016\/j.automatica.2026.112964_b13","series-title":"Conference on learning theory","first-page":"1199","article-title":"Finite sample analysis of two-timescale stochastic approximation with applications to reinforcement learning","author":"Dalal","year":"2018"},{"key":"10.1016\/j.automatica.2026.112964_b14","series-title":"Conference on robot learning","first-page":"1724","article-title":"Safe nonlinear control using robust neural Lyapunov-barrier functions","author":"Dawson","year":"2022"},{"key":"10.1016\/j.automatica.2026.112964_b15","series-title":"2023 IEEE international conference on robotics and automation","first-page":"2913","article-title":"Learning stabilization control from observations by learning Lyapunov-like proxy models","author":"Ganai","year":"2023"},{"issue":"4","key":"10.1016\/j.automatica.2026.112964_b16","doi-asserted-by":"crossref","first-page":"6217","DOI":"10.1109\/LRA.2020.3011351","article-title":"Actor-critic reinforcement learning for control with stability guarantee","volume":"5","author":"Han","year":"2020","journal-title":"IEEE Robotics and Automation Letters"},{"key":"10.1016\/j.automatica.2026.112964_b17","doi-asserted-by":"crossref","DOI":"10.1016\/j.automatica.2023.111490","article-title":"Specified convergence rate guaranteed output tracking of discrete-time systems via reinforcement learning","volume":"161","author":"Huang","year":"2024","journal-title":"Automatica"},{"issue":"3","key":"10.1016\/j.automatica.2026.112964_b18","doi-asserted-by":"crossref","first-page":"1314","DOI":"10.1109\/TAC.2020.2989702","article-title":"Stochastic approximation for risk-aware Markov decision processes","volume":"66","author":"Huang","year":"2020","journal-title":"IEEE Transactions on Automatic Control"},{"issue":"11","key":"10.1016\/j.automatica.2026.112964_b19","doi-asserted-by":"crossref","first-page":"2917","DOI":"10.1109\/TAC.2015.2414811","article-title":"Global adaptive dynamic programming for continuous-time nonlinear systems","volume":"60","author":"Jiang","year":"2015","journal-title":"IEEE Transactions on Automatic Control"},{"issue":"7976","key":"10.1016\/j.automatica.2026.112964_b20","doi-asserted-by":"crossref","first-page":"982","DOI":"10.1038\/s41586-023-06419-4","article-title":"Champion-level drone racing using deep reinforcement learning","volume":"620","author":"Kaufmann","year":"2023","journal-title":"Nature"},{"key":"10.1016\/j.automatica.2026.112964_b21","article-title":"Finite-sample convergence rates for Q-learning and indirect algorithms","volume":"11","author":"Kearns","year":"1998","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.automatica.2026.112964_b22","series-title":"Stochastic stability of differential equations","author":"Khasminskii","year":"2012"},{"issue":"1","key":"10.1016\/j.automatica.2026.112964_b23","first-page":"3041","article-title":"Finite-sample analysis of least-squares policy iteration","volume":"13","author":"Lazaric","year":"2012","journal-title":"Journal of Machine Learning Research"},{"key":"10.1016\/j.automatica.2026.112964_b24","series-title":"Markov chains and mixing times","author":"Levin","year":"2017"},{"key":"10.1016\/j.automatica.2026.112964_b25","series-title":"The general problem of the stability of motion (in Russian)","author":"Lyapunov","year":"1892"},{"key":"10.1016\/j.automatica.2026.112964_b26","series-title":"Markov chains and stochastic stability","author":"Meyn","year":"2012"},{"issue":"Dec","key":"10.1016\/j.automatica.2026.112964_b27","first-page":"803","article-title":"Lyapunov design for safe reinforcement learning","volume":"3","author":"Perkins","year":"2002","journal-title":"Journal of Machine Learning Research"},{"key":"10.1016\/j.automatica.2026.112964_b28","series-title":"The 2006 IEEE international joint conference on neural network proceedings","first-page":"5059","article-title":"Construction of neural network based Lyapunov functions","author":"Petridis","year":"2006"},{"issue":"6","key":"10.1016\/j.automatica.2026.112964_b29","doi-asserted-by":"crossref","first-page":"2736","DOI":"10.1109\/TAC.2016.2616644","article-title":"Stability analysis of discrete-time infinite-horizon optimal control with discounted cost","volume":"62","author":"Postoyan","year":"2017","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.automatica.2026.112964_b30","unstructured":"Richards,\u00a0S. M., Berkenkamp,\u00a0F., & Krause,\u00a0A. (2018). The Lyapunov Neural Network: Adaptive Stability Certification for Safe Learning of Dynamical Systems. In Conference on robot learning (pp. 466\u2013476)."},{"key":"10.1016\/j.automatica.2026.112964_b31","doi-asserted-by":"crossref","first-page":"400","DOI":"10.1214\/aoms\/1177729586","article-title":"A stochastic approximation method","author":"Robbins","year":"1951","journal-title":"The Annals of Mathematical Statistics"},{"key":"10.1016\/j.automatica.2026.112964_b32","doi-asserted-by":"crossref","DOI":"10.1016\/j.automatica.2024.111886","article-title":"Reinforcement learning-based optimal control for Markov jump systems with completely unknown dynamics","volume":"171","author":"Shi","year":"2025","journal-title":"Automatica"},{"key":"10.1016\/j.automatica.2026.112964_b33","series-title":"Reinforcement learning: An introduction","author":"Sutton","year":"2018"},{"key":"10.1016\/j.automatica.2026.112964_b34","series-title":"Advances in neural information processing systems","first-page":"1609","article-title":"A convergent o(n) temporal-difference algorithm for off-policy learning with linear function approximation","author":"Sutton","year":"2009"},{"issue":"10","key":"10.1016\/j.automatica.2026.112964_b35","doi-asserted-by":"crossref","first-page":"2435","DOI":"10.1016\/j.automatica.2014.08.006","article-title":"Stability analysis for stochastic hybrid systems: A survey","volume":"50","author":"Teel","year":"2014","journal-title":"Automatica"},{"key":"10.1016\/j.automatica.2026.112964_b36","series-title":"2019 IEEE 58th conference on decision and control","first-page":"3648","article-title":"Finite sample analysis of stochastic system identification","author":"Tsiamis","year":"2019"},{"issue":"6","key":"10.1016\/j.automatica.2026.112964_b37","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1109\/MCS.2023.3310345","article-title":"Statistical learning theory for control: A finite-sample perspective","volume":"43","author":"Tsiamis","year":"2023","journal-title":"IEEE Control Systems Magazine"},{"issue":"3\u20134","key":"10.1016\/j.automatica.2026.112964_b38","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1023\/A:1022672621406","article-title":"Simple statistical gradient-following algorithms for connectionist reinforcement learning","volume":"8","author":"Williams","year":"1992","journal-title":"Machine Learning"},{"key":"10.1016\/j.automatica.2026.112964_b39","doi-asserted-by":"crossref","DOI":"10.1016\/j.automatica.2024.112035","article-title":"Data-driven identifier\u2013actor\u2013critic learning for cooperative spacecraft attitude tracking with orientation constraints","volume":"173","author":"Xia","year":"2025","journal-title":"Automatica"},{"key":"10.1016\/j.automatica.2026.112964_b40","series-title":"2018 IEEE conference on decision and control","first-page":"2759","article-title":"A finite sample analysis of the actor-critic algorithm","author":"Yang","year":"2018"},{"key":"10.1016\/j.automatica.2026.112964_b41","first-page":"10887","article-title":"Sample efficient reinforcement learning with REINFORCE","volume":"vol. 35","author":"Zhang","year":"2021"},{"key":"10.1016\/j.automatica.2026.112964_b42","first-page":"1230","article-title":"Finite sample analysis of average-reward TD learning and Q-Learning","volume":"34","author":"Zhang","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.automatica.2026.112964_b43","doi-asserted-by":"crossref","DOI":"10.1016\/j.automatica.2022.110761","article-title":"Linear quadratic tracking control of unknown systems: A two-phase reinforcement learning method","volume":"148","author":"Zhao","year":"2023","journal-title":"Automatica"},{"issue":"7","key":"10.1016\/j.automatica.2026.112964_b44","doi-asserted-by":"crossref","first-page":"1918","DOI":"10.1109\/TAC.2015.2484357","article-title":"The mean-square stability probability of H\u221e control of continuous Markovian jump systems","volume":"61","author":"Zhu","year":"2015","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.automatica.2026.112964_b45","series-title":"Advances in neural information processing systems","first-page":"8665","article-title":"Finite-sample analysis for SARSA with linear function approximation","author":"Zou","year":"2019"}],"container-title":["Automatica"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0005109826001482?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0005109826001482?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T12:58:29Z","timestamp":1777381109000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0005109826001482"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":45,"alternative-id":["S0005109826001482"],"URL":"https:\/\/doi.org\/10.1016\/j.automatica.2026.112964","relation":{},"ISSN":["0005-1098"],"issn-type":[{"value":"0005-1098","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Reinforcement learning for control with probabilistic stability guarantee: A finite-sample approach","name":"articletitle","label":"Article Title"},{"value":"Automatica","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.automatica.2026.112964","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"112964"}}