{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T12:15:22Z","timestamp":1777983322829,"version":"3.51.4"},"reference-count":59,"publisher":"Informa UK Limited","issue":"1","license":[{"start":{"date-parts":[[2022,12,12]],"date-time":"2022-12-12T00:00:00Z","timestamp":1670803200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172211"],"award-info":[{"award-number":["62172211"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172210"],"award-info":[{"award-number":["62172210"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172200"],"award-info":[{"award-number":["62172200"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Leading-edge Technology Program of Jiangsu Natural Science Foundation","award":["BK20202001"],"award-info":[{"award-number":["BK20202001"]}]}],"content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["Connection Science"],"published-print":{"date-parts":[[2022,12,31]]},"DOI":"10.1080\/09540091.2022.2151567","type":"journal-article","created":{"date-parts":[[2022,12,12]],"date-time":"2022-12-12T17:43:25Z","timestamp":1670867005000},"page":"2822-2844","update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":6,"title":["Safe reinforcement learning for dynamical systems using barrier certificates"],"prefix":"10.1080","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7503-6759","authenticated-orcid":false,"given":"Qingye","family":"Zhao","sequence":"first","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, People's Republic of China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0214-8917","authenticated-orcid":false,"given":"Yi","family":"Zhang","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, People's Republic of China"}]},{"given":"Xuandong","family":"Li","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, People's Republic of China"}]}],"member":"301","published-online":{"date-parts":[[2022,12,12]]},"reference":[{"key":"CIT0001","unstructured":"Abadi, M., Barham, P., Chen, J., Chen, Z., Davis, A., Dean, J., Devin, M., Ghemawat, S., Irving, G., Isard, M. & Kudlur, M. (2016). Tensorflow: A system for large-scale machine learning. In 12th  {USENIX} symposium on operating systems design and implementation  ({OSDI}16) (pp. 265\u2013283)."},{"key":"CIT0002","doi-asserted-by":"crossref","unstructured":"Akametalu, A. K., Fisac, J. F., Gillula, J. H., Kaynama, S., Zeilinger, M. N. & C. J. Tomlin (2014). Reachability-based safe learning with Gaussian processes. In 53rd IEEE conference on decision and control (pp. 1424\u20131431).","DOI":"10.1109\/CDC.2014.7039601"},{"key":"CIT0003","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11797"},{"key":"CIT0004","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3496242"},{"key":"CIT0005","unstructured":"Bastani, O. (2019). Safe reinforcement learning via online shielding. Preprint arXiv:1905.10691, 288, 289."},{"key":"CIT0006","unstructured":"Berkenkamp, F., Turchetta, M., Schoellig, A. P. & Krause, A. (2017). Safe model-based reinforcement learning with stability guarantees. Preprint arXiv:1705.08551."},{"key":"CIT0007","unstructured":"Brockman, G., Cheung, V., Pettersson, L., Schneider, J., Schulman, J., Tang, J. & Zaremba, W. (2016). Openai gym. Preprint arXiv:1606.01540."},{"key":"CIT0008","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013387"},{"key":"CIT0009","doi-asserted-by":"crossref","unstructured":"Clavi\u00e8re, A., Asselin, E., Garion, C. & Pagetti, C. (2020). Safety verification of neural network controlled systems. Preprint arXiv:2011.05174.","DOI":"10.1109\/DSN-W52860.2021.00019"},{"key":"CIT0010","doi-asserted-by":"publisher","DOI":"10.1109\/CDC42340.2020.9303896"},{"key":"CIT0011","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-78800-3_24"},{"key":"CIT0012","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD45719.2019.8942130"},{"key":"CIT0013","unstructured":"Dong, K., Luo, Y., Yu, T., Finn, C. & Ma, T. (2020). On the expressivity of neural networks for deep reinforcement learning. In International conference on machine learning (pp. 2627\u20132637)."},{"key":"CIT0014","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2018.08.026"},{"key":"CIT0015","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-77935-5_9"},{"key":"CIT0016","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12107"},{"key":"CIT0017","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-38574-2_14"},{"key":"CIT0018","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886795"},{"key":"CIT0019","unstructured":"Gu, S., Yang, L., Du, Y., Chen, G., Walter, F., Wang, J., Yang, Y. & Knoll, A. (2022). A review of safe reinforcement learning: Methods, theory and applications. Preprint arXiv:2205.10330."},{"key":"CIT0020","unstructured":"Gurobi Optimization, I. (2022). Gurobi optimizer reference manual. Retrieved from https:\/\/www.gurobi.com"},{"key":"CIT0021","unstructured":"Haarnoja, T., Zhou, A., Hartikainen, K., Tucker, G., Ha, S., Tan, J., Kumar, V., Zhu, H., Gupta, A., Abbeel, P. & Levine, S. (2018). Soft actor-critic algorithms and applications. Preprint arXiv:1812.05905."},{"key":"CIT0022","doi-asserted-by":"publisher","DOI":"10.1109\/CDC42340.2020.9304296"},{"key":"CIT0023","doi-asserted-by":"publisher","DOI":"10.1145\/3358228"},{"key":"CIT0024","doi-asserted-by":"publisher","DOI":"10.1145\/3419742"},{"key":"CIT0025","unstructured":"Jin, W., Wang, Z., Yang, Z. & Mou, S. (2020). Neural certificates for safe control policies. Preprint arXiv:2006.08465."},{"key":"CIT0026","unstructured":"Julian, K. D. & Kochenderfer, M. J. (2019). A reachability method for verifying dynamical systems with deep neural network controllers. Preprint arXiv:1903.00520."},{"key":"CIT0027","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-63387-9_5"},{"key":"CIT0028","unstructured":"Lillicrap, T. P., Hunt, J. J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D. & Wierstra, D. (2016). Continuous control with deep reinforcement learning. In ICLR (poster)."},{"key":"CIT0029","unstructured":"Lin, L., Gong, S., Li, T. & Peeta, S. (2018). Deep learning-based human-driven vehicle trajectory prediction and its application for platoon control of connected and autonomous vehicles. In The autonomous vehicles symposium (Vol. 2018)."},{"key":"CIT0030","unstructured":"Luo, Y. & Ma, T. (2021). Learning barrier certificates: Towards safe reinforcement learning with zero training-time violations. In Thirty-fifth conference on neural information processing systems."},{"key":"CIT0031","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i5.20478"},{"key":"CIT0032","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2019.2920206"},{"key":"CIT0033","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-72016-2_20"},{"key":"CIT0034","doi-asserted-by":"crossref","unstructured":"Prajna, S. & Jadbabaie, A. (2004). Safety verification of hybrid systems using barrier certificates. In Proceedings of the 7th international workshop on hybrid systems: Computation and control (HSCC) (pp. 477\u2013492). Springer.","DOI":"10.1007\/978-3-540-24743-2_32"},{"key":"CIT0035","doi-asserted-by":"publisher","DOI":"10.3390\/app9091807"},{"key":"CIT0036","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-018-9791-9"},{"key":"CIT0037","unstructured":"Silver, D., Lever, G., Heess, N., Degris, T., Wierstra, D. & Riedmiller, M. (2014). Deterministic policy gradient algorithms. In International conference on machine learning (pp. 387\u2013395)."},{"key":"CIT0038","doi-asserted-by":"publisher","DOI":"10.1145\/3290354"},{"key":"CIT0039","unstructured":"Srinivasan, K., Eysenbach, B., Ha, S., Tan, J. & Finn, C. (2020). Learning to be safe: Deep rl with a safety critic. Preprint arXiv:2010.14603."},{"key":"CIT0040","unstructured":"Stooke, A., Achiam, J. & Abbeel, P. (2020). Responsive safety in reinforcement learning by PID Lagrangian methods. In International conference on machine learning (pp. 9133\u20139143)."},{"key":"CIT0041","doi-asserted-by":"crossref","unstructured":"Sun, X., Khedr, H. & Shoukry, Y. (2019). Formal verification of neural network controlled autonomous systems. In Proceedings of the 22nd ACM international conference on hybrid systems: Computation and control (pp. 147\u2013156).","DOI":"10.1145\/3302504.3311802"},{"key":"CIT0042","doi-asserted-by":"crossref","unstructured":"Sutton, R. S., Szepesv\u00e1ri, C. & Maei, H. R. (2008). A convergent  O(n) temporal-difference algorithm for off-policy learning with linear function approximation. In Nips.","DOI":"10.1145\/1553374.1553501"},{"key":"CIT0043","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2016.2644871"},{"key":"CIT0044","unstructured":"Taylor, A., Singletary, A., Yue, Y. & Ames, A. (2020). Learning for safety-critical control with control barrier functions. In Learning for dynamics and control (pp. 708\u2013717)."},{"key":"CIT0045","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3070252"},{"key":"CIT0046","unstructured":"Thomas, P. S. (Doctoral Dissertations, 2015). Safe reinforcement learning.\u00a0https:\/\/doi.org\/10.7275\/7529913.0"},{"key":"CIT0047","unstructured":"Wang, S., Pei, K., Whitehouse, J., Yang, J. & Jana, S. (2018). Formal security analysis of neural networks using symbolic intervals. In 27th  {USENIX} security symposium  ({USENIX} security 18) (pp. 1599\u20131614)."},{"key":"CIT0048","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-018-5739-5"},{"key":"CIT0049","doi-asserted-by":"publisher","DOI":"10.1145\/3061639.3062224"},{"key":"CIT0050","unstructured":"Winterer, F. (2017). iSAT3. Retrieved from https:\/\/projects.informatik.uni-freiburg.de\/projects\/isat3"},{"key":"CIT0051","unstructured":"Wong, E. & Kolter, Z. (2018). Provable defenses against adversarial examples via the convex outer adversarial polytope. In International conference on machine learning (pp. 5286\u20135295)."},{"key":"CIT0052","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2991090"},{"key":"CIT0053","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2018.2868859"},{"key":"CIT0054","unstructured":"Yang, T. Y., Rosca, J., Narasimhan, K. & Ramadge, P. J. (2020). Accelerating safe reinforcement learning with constraint-mismatched policies. Preprint arXiv:2006.11645."},{"key":"CIT0055","doi-asserted-by":"publisher","DOI":"10.1016\/j.jfranklin.2019.12.017"},{"key":"CIT0056","doi-asserted-by":"publisher","DOI":"10.1109\/TFUZZ.2021.3075501"},{"key":"CIT0057","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2967871"},{"key":"CIT0058","doi-asserted-by":"crossref","unstructured":"Zhao, H., Zeng, X., Chen, T. & Liu, Z. (2020). Synthesizing barrier certificates using neural networks. In Proceedings of the 23rd international conference on hybrid systems: Computation and control (pp. 1\u201311).","DOI":"10.1145\/3365365.3382222"},{"key":"CIT0059","doi-asserted-by":"crossref","unstructured":"Zhao, Q., Chen, X., Zhao, Z., Zhang, Y., Tang, E. & Li, X. (2022). Verifying neural network controlled systems using neural networks. In 25th ACM international conference on hybrid systems: Computation and control (pp. 1\u201311).","DOI":"10.1145\/3501710.3519511"}],"container-title":["Connection Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/09540091.2022.2151567","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,4,17]],"date-time":"2023-04-17T15:22:26Z","timestamp":1681744946000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/09540091.2022.2151567"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,12]]},"references-count":59,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2022,12,31]]}},"alternative-id":["10.1080\/09540091.2022.2151567"],"URL":"https:\/\/doi.org\/10.1080\/09540091.2022.2151567","relation":{},"ISSN":["0954-0091","1360-0494"],"issn-type":[{"value":"0954-0091","type":"print"},{"value":"1360-0494","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,12,12]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=ccos20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=ccos20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2022-05-29","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2022-11-19","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2022-12-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}