{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T17:05:35Z","timestamp":1773075935649,"version":"3.50.1"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319955810","type":"print"},{"value":"9783319955827","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-95582-7_27","type":"book-chapter","created":{"date-parts":[[2018,7,11]],"date-time":"2018-07-11T14:31:17Z","timestamp":1531319477000},"page":"456-465","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":37,"title":["Falsification of Cyber-Physical Systems Using Deep Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Takumi","family":"Akazaki","sequence":"first","affiliation":[]},{"given":"Shuang","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Yoriyuki","family":"Yamagata","sequence":"additional","affiliation":[]},{"given":"Yihai","family":"Duan","sequence":"additional","affiliation":[]},{"given":"Jianye","family":"Hao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,7,12]]},"reference":[{"key":"27_CR1","unstructured":"The ChainerRL Library. https:\/\/github.com\/chainer\/chainerrl"},{"issue":"2s","key":"27_CR2","doi-asserted-by":"publisher","first-page":"95:1","DOI":"10.1145\/2465787.2465797","volume":"12","author":"H Abbas","year":"2013","unstructured":"Abbas, H., Fainekos, G., Sankaranarayanan, S., Ivan\u010di\u0107, F., Gupta, A.: Probabilistic temporal logic falsification of cyber-physical systems. ACM Trans. Embed. Comput. Syst. 12(2s), 95:1\u201395:30 (2013)","journal-title":"ACM Trans. Embed. Comput. Syst."},{"key":"27_CR3","doi-asserted-by":"crossref","unstructured":"Abbas, H., Fainekos, G.E.: Convergence proofs for simulated annealing falsification of safety properties. In: 50th Annual Allerton Conference on Communication, Control, and Computing, Allerton 2012, Allerton Park & Retreat Center, Monticello, IL, USA, 1\u20135 October 2012, pp. 1594\u20131601. IEEE (2012)","DOI":"10.1109\/Allerton.2012.6483411"},{"key":"27_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"439","DOI":"10.1007\/978-3-319-46982-9_27","volume-title":"Runtime Verification","author":"T Akazaki","year":"2016","unstructured":"Akazaki, T.: Falsification of conditional safety properties for cyber-physical systems with gaussian process regression. In: Falcone, Y., S\u00e1nchez, C. (eds.) RV 2016. LNCS, vol. 10012, pp. 439\u2013446. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46982-9_27"},{"key":"27_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"254","DOI":"10.1007\/978-3-642-19835-9_21","volume-title":"Tools and Algorithms for the Construction and Analysis of Systems","author":"Y Annpureddy","year":"2011","unstructured":"Annpureddy, Y., Liu, C., Fainekos, G., Sankaranarayanan, S.: S-TaLiRo: a tool for temporal logic falsification for hybrid systems. In: Abdulla, P.A., Leino, K.R.M. (eds.) TACAS 2011. LNCS, vol. 6605, pp. 254\u2013257. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-19835-9_21"},{"key":"27_CR6","unstructured":"Bardh Hoxha, H.A., Fainekos, G.: Benchmarks for temporal logic requirements for automotive systems. In: Proceedings of Applied Verification for Continuous and Hybrid Systems (2014)"},{"key":"27_CR7","doi-asserted-by":"crossref","unstructured":"Bartocci, E., Bortolussi, L., Nenzi, L., Sanguinetti, G.: On the robustness of temporal properties for stochastic models. In: Dang, T., Piazza, C. (eds.) Proceedings Second International Workshop on Hybrid Systems and Biology, HSB 2013. EPTCS, Taormina, Italy, 2nd September 2013, vol. 125, pp. 3\u201319 (2013)","DOI":"10.4204\/EPTCS.125.1"},{"key":"27_CR8","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.tcs.2015.02.046","volume":"587","author":"E Bartocci","year":"2015","unstructured":"Bartocci, E., Bortolussi, L., Nenzi, L., Sanguinetti, G.: System design of stochastic models using robustness of temporal properties. Theor. Comput. Sci. 587, 3\u201325 (2015)","journal-title":"Theor. Comput. Sci."},{"key":"27_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1007\/978-3-319-75632-5_5","volume-title":"Lectures on Runtime Verification","author":"E Bartocci","year":"2018","unstructured":"Bartocci, E., et al.: Specification-based monitoring of cyber-physical systems: a survey on theory, tools and applications. In: Bartocci, E., Falcone, Y. (eds.) Lectures on Runtime Verification. LNCS, vol. 10457, pp. 135\u2013175. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-75632-5_5"},{"key":"27_CR10","unstructured":"Brockman, G., Cheung, V., Pettersson, L., Schneider, J., Schulman, J., Tang, J., Zaremba, W.: OpenAI gym (2016)"},{"key":"27_CR11","unstructured":"Cook, J.D.: Basic properties of the soft maximum (2011)"},{"key":"27_CR12","volume-title":"Nonparametric Statistics: A Step-by-Step Approach","author":"GW Corder","year":"2014","unstructured":"Corder, G.W., Foreman, D.I.: Nonparametric Statistics: A Step-by-Step Approach. Wiley, Hoboken (2014)"},{"key":"27_CR13","doi-asserted-by":"crossref","unstructured":"Ding, X.C., Smith, S.L., Belta, C., Rus, D.: MDP optimal control under temporal logic constraints. In: Proceedings of the 50th IEEE Conference on Decision and Control and European Control Conference, CDC-ECC 2011, Orlando, FL, USA, 12\u201315 December 2011, pp. 532\u2013538. IEEE (2011)","DOI":"10.1109\/CDC.2011.6161122"},{"issue":"5","key":"27_CR14","doi-asserted-by":"publisher","first-page":"1244","DOI":"10.1109\/TAC.2014.2298143","volume":"59","author":"XC Ding","year":"2014","unstructured":"Ding, X.C., Smith, S.L., Belta, C., Rus, D.: Optimal control of markov decision processes with linear temporal logic constraints. IEEE Trans. Autom. Control 59(5), 1244\u20131257 (2014)","journal-title":"IEEE Trans. Autom. Control"},{"key":"27_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1007\/978-3-642-14295-6_17","volume-title":"Computer Aided Verification","author":"A Donz\u00e9","year":"2010","unstructured":"Donz\u00e9, A.: Breach, a toolbox for verification and parameter synthesis of hybrid systems. In: Touili, T., Cook, B., Jackson, P. (eds.) CAV 2010. LNCS, vol. 6174, pp. 167\u2013170. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-14295-6_17"},{"key":"27_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1007\/978-3-642-15297-9_9","volume-title":"Formal Modeling and Analysis of Timed Systems","author":"A Donz\u00e9","year":"2010","unstructured":"Donz\u00e9, A., Maler, O.: Robust satisfaction of temporal logic over real-valued signals. In: Chatterjee, K., Henzinger, T.A. (eds.) FORMATS 2010. LNCS, vol. 6246, pp. 92\u2013106. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-15297-9_9"},{"key":"27_CR17","unstructured":"Gu, S., Lillicrap, T., Sutskever, I., Levine, S.: Continuous deep q-learning with model-based acceleration. In: Balcan, M.F., Weinberger, K.Q. (eds.) Proceedings of The 33rd International Conference on Machine Learning, Proceedings of Machine Learning Research, PMLR, New York, USA, 20\u201322 June 2016, vol. 48, pp. 2829\u20132838 (2016)"},{"key":"27_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"178","DOI":"10.1007\/978-3-319-11164-3_15","volume-title":"Runtime Verification","author":"H-M Ho","year":"2014","unstructured":"Ho, H.-M., Ouaknine, J., Worrell, J.: Online monitoring of metric temporal logic. In: Bonakdarpour, B., Smolka, S.A. (eds.) RV 2014. LNCS, vol. 8734, pp. 178\u2013192. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-11164-3_15"},{"key":"27_CR19","unstructured":"Hoxha, B., Abbas, H., Fainekos, G.E.: Using S-TaLiRo on industrial size auimmlertomotive models. In: Frehse, G., Althoff, M. (eds.) 1st and 2nd International Workshop on Applied Verification for Continuous and Hybrid Systems, ARCH@CPSWeek 2014.EPiC Series in Computing, Berlin, Germany, 14 April 2014\/ARCH@CPSWeek 2015, Seattle, WA, USA, 13 April 2015, vol. 34, pp. 113\u2013119. EasyChair (2014)"},{"key":"27_CR20","doi-asserted-by":"crossref","unstructured":"Li, X., Ma, Y., Belta, C.: A policy search method for temporal logic specified reinforcement learning tasks. CoRR, abs\/1709.09611 (2017)","DOI":"10.23919\/ACC.2018.8431181"},{"key":"27_CR21","doi-asserted-by":"crossref","unstructured":"Li, X., Vasile, C.I., Belta, C.: Reinforcement learning with temporal logic rewards. In: 2017 IEEE\/RSJ International Conference on Intelligent Robots and Systems, IROS 2017, Vancouver, BC, Canada, 24\u201328 September 2017, pp. 3834\u20133839. IEEE (2017)","DOI":"10.1109\/IROS.2017.8206234"},{"key":"27_CR22","series-title":"Springer Tracts in Advanced Robotics","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/978-3-319-16595-0_20","volume-title":"Algorithmic Foundations of Robotics XI","author":"R Luna","year":"2015","unstructured":"Luna, R., Lahijanian, M., Moll, M., Kavraki, L.E.: Asymptotically optimal stochastic motion planning with temporal goals. In: Akin, H.L., Amato, N.M., Isler, V., van der Stappen, A.F. (eds.) Algorithmic Foundations of Robotics XI. STAR, vol. 107, pp. 335\u2013352. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-16595-0_20"},{"key":"27_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"152","DOI":"10.1007\/978-3-540-30206-3_12","volume-title":"Formal Techniques, Modelling and Analysis of Timed and Fault-Tolerant Systems","author":"O Maler","year":"2004","unstructured":"Maler, O., Nickovic, D.: Monitoring temporal properties of continuous signals. In: Lakhnech, Y., Yovine, S. (eds.) FORMATS\/FTRTFT -2004. LNCS, vol. 3253, pp. 152\u2013166. Springer, Heidelberg (2004). https:\/\/doi.org\/10.1007\/978-3-540-30206-3_12"},{"key":"27_CR24","unstructured":"Mnih, V., Badia, A.P., Mirza, M., Graves, A., Lillicrap, T.P., Harley, T., Silver, D., Kavukcuoglu, K.: Asynchronous methods for deep reinforcement learning, vol. 48 (2016)"},{"issue":"7540","key":"27_CR25","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"27_CR26","doi-asserted-by":"crossref","unstructured":"Sadigh, D., Kim, E.S., Coogan, S., Sastry, S.S., Seshia, S.A.: A learning based approach to control synthesis of Markov decision processes for linear temporal logic specifications. In: 53rd IEEE Conference on Decision and Control, CDC 2014, Los Angeles, CA, USA, 15\u201317 December 2014, pp. 1091\u20131096. IEEE (2014)","DOI":"10.21236\/ADA623517"},{"key":"27_CR27","doi-asserted-by":"crossref","unstructured":"Sankaranarayanan, S., Fainekos, G.E.: Falsification of temporal properties of hybrid systems using the cross-entropy method. In: Dang, T., Mitchell, I.M. (eds.) Hybrid Systems: Computation and Control (part of CPS Week 2012), HSCC 2012, Beijing, China, 17\u201319 April 2012, pp. 125\u2013134. ACM (2012)","DOI":"10.1145\/2185632.2185653"},{"key":"27_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-319-66845-1_1","volume-title":"Integrated Formal Methods","author":"S Silvetti","year":"2017","unstructured":"Silvetti, S., Policriti, A., Bortolussi, L.: An active learning approach to the falsification of black box cyber-physical systems. In: Polikarpova, N., Schneider, S. (eds.) IFM 2017. LNCS, vol. 10510, pp. 3\u201317. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-66845-1_1"},{"key":"27_CR29","doi-asserted-by":"crossref","unstructured":"Soudjani, S.E.Z., Majumdar, R.: Controller synthesis for reward collecting Markov processes in continuous space. In: Frehse, G., Mitra, S. (eds.) Proceedings of the 20th International Conference on Hybrid Systems: Computation and Control, HSCC 2017, Pittsburgh, PA, USA, 18\u201320 April 2017, pp. 45\u201354. ACM (2017)","DOI":"10.1145\/3049797.3049827"}],"container-title":["Lecture Notes in Computer Science","Formal Methods"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-95582-7_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,5]],"date-time":"2025-07-05T18:22:47Z","timestamp":1751739767000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-95582-7_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319955810","9783319955827"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-95582-7_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]}}}