{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T23:53:57Z","timestamp":1780444437610,"version":"3.54.1"},"publisher-location":"Cham","reference-count":34,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031826993","type":"print"},{"value":"9783031827006","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-82700-6_6","type":"book-chapter","created":{"date-parts":[[2025,1,23]],"date-time":"2025-01-23T01:26:29Z","timestamp":1737595589000},"page":"125-147","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Formal Verification of\u00a0Probabilistic Deep Reinforcement Learning Policies with\u00a0Abstract Training"],"prefix":"10.1007","author":[{"given":"Junfeng","family":"Yang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1938-2902","authenticated-orcid":false,"given":"Min","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2730-1511","authenticated-orcid":false,"given":"Xin","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Qin","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,1,24]]},"reference":[{"issue":"6","key":"6_CR1","doi-asserted-by":"publisher","first-page":"624","DOI":"10.3166\/ejc.16.624-641","volume":"16","author":"A Abate","year":"2010","unstructured":"Abate, A., Katoen, J.P., Lygeros, J., Prandini, M.: Approximate model checking of stochastic hybrid systems. Eur. J. Control. 16(6), 624\u2013641 (2010). https:\/\/doi.org\/10.3166\/ejc.16.624-641","journal-title":"Eur. J. Control."},{"key":"6_CR2","doi-asserted-by":"publisher","unstructured":"Abel, D.: A theory of state abstraction for reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, AAAI 2019, vol.\u00a033, pp. 9876\u20139877 (2019). https:\/\/doi.org\/10.1609\/aaai.v33i01.33019876","DOI":"10.1609\/aaai.v33i01.33019876"},{"issue":"6","key":"6_CR3","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1109\/MSP.2017.2743240","volume":"34","author":"K Arulkumaran","year":"2017","unstructured":"Arulkumaran, K., Deisenroth, M.P., Brundage, M., Bharath, A.A.: Deep reinforcement learning: a brief survey. IEEE Signal Process. Mag. 34(6), 26\u201338 (2017). https:\/\/doi.org\/10.1109\/MSP.2017.2743240","journal-title":"IEEE Signal Process. Mag."},{"key":"6_CR4","doi-asserted-by":"publisher","unstructured":"Bacci, E., Giacobbe, M., Parker, D.: Verifying reinforcement learning up to infinity. In: Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence, IJCAI-21 (2021). https:\/\/doi.org\/10.24963\/ijcai.2021\/297","DOI":"10.24963\/ijcai.2021\/297"},{"key":"6_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1007\/978-3-030-57628-8_14","volume-title":"Formal Modeling and Analysis of Timed Systems","author":"E Bacci","year":"2020","unstructured":"Bacci, E., Parker, D.: Probabilistic guarantees for safe deep reinforcement learning. In: Bertrand, N., Jansen, N. (eds.) FORMATS 2020. LNCS, vol. 12288, pp. 231\u2013248. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-57628-8_14"},{"key":"6_CR6","doi-asserted-by":"publisher","unstructured":"Bacci, E., Parker, D.: Verified probabilistic policies for deep reinforcement learning. In: NASA Formal Methods, NFM 2022, vol. 13260, pp. 193\u2013212. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-06773-0_10","DOI":"10.1007\/978-3-031-06773-0_10"},{"key":"6_CR7","unstructured":"Bunel, R.R., Turkaslan, I., Torr, P., Kohli, P., Mudigonda, P.K.: A unified view of piecewise linear neural network verification. In: Advances in Neural Information Processing Systems, NeurIPS 2018, vol.\u00a031 (2018). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2018\/file\/be53d253d6bc3258a8160556dda3e9b2-Paper.pdf"},{"key":"6_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"592","DOI":"10.1007\/978-3-319-63390-9_31","volume-title":"Computer Aided Verification","author":"C Dehnert","year":"2017","unstructured":"Dehnert, C., Junges, S., Katoen, J.-P., Volk, M.: A storm is coming: a modern probabilistic model checker. In: Majumdar, R., Kun\u010dak, V. (eds.) CAV 2017. LNCS, vol. 10427, pp. 592\u2013600. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-63390-9_31"},{"key":"6_CR9","doi-asserted-by":"publisher","unstructured":"Dong, Y., Zhao, X., Huang, X.: Dependability analysis of deep reinforcement learning based robotics and autonomous systems through probabilistic model checking. In: 2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 5171\u20135178. IEEE (2022). https:\/\/doi.org\/10.1109\/IROS47612.2022.9981794","DOI":"10.1109\/IROS47612.2022.9981794"},{"key":"6_CR10","doi-asserted-by":"publisher","unstructured":"Eliyahu, T., Kazak, Y., Katz, G., Schapira, M.: Verifying learning-augmented systems. In: Proceedings of the 2021 ACM SIGCOMM 2021 Conference, SIGCOMM\u201921, pp. 305\u2013318 (2021). https:\/\/doi.org\/10.1145\/3452296.3472936","DOI":"10.1145\/3452296.3472936"},{"key":"6_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1007\/978-3-642-21455-4_3","volume-title":"Formal Methods for Eternal Networked Software Systems","author":"V Forejt","year":"2011","unstructured":"Forejt, V., Kwiatkowska, M., Norman, G., Parker, D.: Automated verification techniques for probabilistic systems. In: Bernardo, M., Issarny, V. (eds.) SFM 2011. LNCS, vol. 6659, pp. 53\u2013113. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-21455-4_3"},{"issue":"3","key":"6_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3310090","volume":"13","author":"J Garc\u00eda","year":"2019","unstructured":"Garc\u00eda, J., Fern\u00e1ndez, F.: Probabilistic policy reuse for safe reinforcement learning. ACM Trans. Auton. Adapt. Syst. (TAAS) 13(3), 1\u201324 (2019). https:\/\/doi.org\/10.1145\/3310090","journal-title":"ACM Trans. Auton. Adapt. Syst. (TAAS)"},{"key":"6_CR13","doi-asserted-by":"publisher","unstructured":"Ghavamzadeh, M., Mannor, S., Pineau, J., Tamar, A., et\u00a0al.: Bayesian reinforcement learning: a survey. Found. Trends\u00ae Mach. Learn. 8(5-6), 359\u2013483 (2015). https:\/\/doi.org\/10.1561\/2200000049","DOI":"10.1561\/2200000049"},{"key":"6_CR14","doi-asserted-by":"publisher","unstructured":"Gu, S., Holly, E., Lillicrap, T., Levine, S.: Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. In: 2017 IEEE International Conference on Robotics and Automation (ICRA), pp. 3389\u20133396. IEEE (2017). https:\/\/doi.org\/10.1109\/ICRA.2017.7989385","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"6_CR15","doi-asserted-by":"publisher","unstructured":"Guttman, A.: R-trees: a dynamic index structure for spatial searching. In: Proceedings of the 1984 ACM SIGMOD International Conference on Management of Data, SIGMOD 1984, pp. 47\u201357 (1984). https:\/\/doi.org\/10.1145\/602259.602266","DOI":"10.1145\/602259.602266"},{"key":"6_CR16","doi-asserted-by":"publisher","unstructured":"Haarnoja, T., et\u00a0al.: Soft actor-critic algorithms and applications. arXiv preprint arXiv:1812.05905 (2018). https:\/\/doi.org\/10.48550\/arXiv.1812.05905","DOI":"10.48550\/arXiv.1812.05905"},{"issue":"5s","key":"6_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3358228","volume":"18","author":"C Huang","year":"2019","unstructured":"Huang, C., Fan, J., Li, W., Chen, X., Zhu, Q.: Reachnn: reachability analysis of neural-network controlled systems. ACM Trans. Embed. Comput. Syst. (TECS) 18(5s), 1\u201322 (2019). https:\/\/doi.org\/10.1145\/3358228","journal-title":"ACM Trans. Embed. Comput. Syst. (TECS)"},{"key":"6_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1007\/978-3-030-81685-8_11","volume-title":"Computer Aided Verification","author":"R Ivanov","year":"2021","unstructured":"Ivanov, R., Carpenter, T., Weimer, J., Alur, R., Pappas, G., Lee, I.: Verisig 2.0: verification of neural network controllers using taylor model preconditioning. In: Silva, A., Leino, K.R.M. (eds.) CAV 2021. LNCS, vol. 12759, pp. 249\u2013262. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-81685-8_11"},{"key":"6_CR19","doi-asserted-by":"publisher","unstructured":"Jin, P., Tian, J., Zhi, D., Wen, X., Zhang, M.: Trainify: a CEGAR-driven training and verification framework for safe deep reinforcement learning. In: Computer Aided Verification, CAV 2022, vol. 13371, pp. 193\u2013218. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-13185-1_10","DOI":"10.1007\/978-3-031-13185-1_10"},{"key":"6_CR20","doi-asserted-by":"publisher","unstructured":"Johnson, T.T., et al.: Arch-comp20 category report: artificial intelligence and neural network control systems (AINNCS) for continuous and hybrid systems plants. EPiC Ser. Comput. 74 (2020).https:\/\/doi.org\/10.29007\/9xgv. https:\/\/par.nsf.gov\/biblio\/10195492","DOI":"10.29007\/9xgv"},{"issue":"6","key":"6_CR21","doi-asserted-by":"publisher","first-page":"4909","DOI":"10.1109\/TITS.2021.3054625","volume":"23","author":"BR Kiran","year":"2021","unstructured":"Kiran, B.R., et al.: Deep reinforcement learning for autonomous driving: a survey. IEEE Trans. Intell. Transp. Syst. 23(6), 4909\u20134926 (2021). https:\/\/doi.org\/10.1109\/TITS.2021.3054625","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"6_CR22","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1007\/978-3-319-57685-5_3","volume-title":"Formal System Verification","author":"M Kwiatkowska","year":"2018","unstructured":"Kwiatkowska, M., Norman, G., Parker, D.: Probabilistic model checking: advances and applications. In: Drechsler, R. (ed.) Formal System Verification, pp. 73\u2013121. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-57685-5_3"},{"key":"6_CR23","doi-asserted-by":"publisher","unstructured":"Li, J., Monroe, W., Ritter, A., Galley, M., Gao, J., Jurafsky, D.: Deep reinforcement learning for dialogue generation. arXiv preprint arXiv:1606.01541 (2016).https:\/\/doi.org\/10.48550\/arXiv.1606.01541","DOI":"10.48550\/arXiv.1606.01541"},{"key":"6_CR24","doi-asserted-by":"publisher","unstructured":"Mnih, V., et\u00a0al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015). https:\/\/doi.org\/10.1038\/nature14236","DOI":"10.1038\/nature14236"},{"key":"6_CR25","unstructured":"Papoudakis, G., Christianos, F., Albrecht, S.: Agent modelling under partial observability for deep reinforcement learning. In: Advances in Neural Information Processing Systems, NeurIPS 2021, vol. 34, pp. 19210\u201319222 (2021). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2021\/file\/a03caec56cd82478bf197475b48c05f9-Paper.pdf"},{"issue":"4","key":"6_CR26","doi-asserted-by":"publisher","first-page":"886","DOI":"10.1007\/s10489-017-0999-8","volume":"48","author":"S Pathak","year":"2017","unstructured":"Pathak, S., Pulina, L., Tacchella, A.: Verification and repair of control policies for safe reinforcement learning. Appl. Intell. 48(4), 886\u2013908 (2017). https:\/\/doi.org\/10.1007\/s10489-017-0999-8","journal-title":"Appl. Intell."},{"key":"6_CR27","doi-asserted-by":"publisher","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017). https:\/\/doi.org\/10.48550\/arXiv.1707.06347","DOI":"10.48550\/arXiv.1707.06347"},{"issue":"5","key":"6_CR28","doi-asserted-by":"publisher","first-page":"1054","DOI":"10.1109\/tnn.1998.712192","volume":"9","author":"R Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement learning: an introduction. IEEE Trans. Neural Networks 9(5), 1054 (1998). https:\/\/doi.org\/10.1109\/tnn.1998.712192","journal-title":"IEEE Trans. Neural Networks"},{"key":"6_CR29","unstructured":"Tian, J., Zhi, D., Liu, S., Wang, P., Chen, C., Zhang, M.: Boosting verification of deep reinforcement learning via piece-wise linear decision neural networks. In: Advances in Neural Information Processing Systems, NeurIPS 2023, vol. 36, pp. 10022\u201310037 (2023). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/1f96b24df4b06f5d68389845a9a13ed9-Paper-Conference.pdf"},{"key":"6_CR30","doi-asserted-by":"publisher","unstructured":"Tian, J., Zhi, D., Liu, S., Wang, P., Katz, G., Zhang, M.: Taming reachability analysis of DNN-controlled systems via abstraction-based training. In: Verification, Model Checking, and Abstract Interpretation, VMCAI 2024, vol. 14500, pp. 73\u201397. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-50521-8_4","DOI":"10.1007\/978-3-031-50521-8_4"},{"key":"6_CR31","doi-asserted-by":"publisher","unstructured":"Wang, Y., Huang, C., Wang, Z., Wang, Z., Zhu, Q.: Verification in the loop: correct-by-construction control learning with reach-avoid guarantees. arXiv preprint arXiv:2106.03245 (2021).https:\/\/doi.org\/10.48550\/arXiv.2106.03245","DOI":"10.48550\/arXiv.2106.03245"},{"key":"6_CR32","doi-asserted-by":"publisher","unstructured":"Xiong, Z., Jagannathan, S.: Scalable synthesis of verified controllers in deep reinforcement learning. arXiv preprint arXiv:2104.10219 (2021). https:\/\/doi.org\/10.48550\/arXiv.2104.10219","DOI":"10.48550\/arXiv.2104.10219"},{"issue":"1","key":"6_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3477600","volume":"55","author":"C Yu","year":"2021","unstructured":"Yu, C., Liu, J., Nemati, S., Yin, G.: Reinforcement learning in healthcare: a survey. ACM Comput. Surv. (CSUR) 55(1), 1\u201336 (2021). https:\/\/doi.org\/10.1145\/3477600","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"6_CR34","doi-asserted-by":"crossref","unstructured":"Zhang, Z., et al.: Boosting verified training for robust image classifications via abstraction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 16251\u201316260 (2023). https:\/\/openaccess.thecvf.com\/content\/CVPR2023\/html\/Zhang_Boosting_Verified_Training_for_Robust_Image_Classifications_via_Abstraction_CVPR_2023_paper.html","DOI":"10.1109\/CVPR52729.2023.01559"}],"container-title":["Lecture Notes in Computer Science","Verification, Model Checking, and Abstract Interpretation"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-82700-6_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,23]],"date-time":"2025-01-23T01:26:38Z","timestamp":1737595598000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-82700-6_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031826993","9783031827006"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-82700-6_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"24 January 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"VMCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Verification, Model Checking, and Abstract Interpretation","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Denver, CO","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 January 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 January 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"vmcai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conf.researchr.org\/home\/VMCAI-2025","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}