{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,16]],"date-time":"2026-04-16T21:06:32Z","timestamp":1776373592293,"version":"3.51.2"},"reference-count":56,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2021,12,1]],"date-time":"2021-12-01T00:00:00Z","timestamp":1638316800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,12,1]],"date-time":"2021-12-01T00:00:00Z","timestamp":1638316800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,12,1]],"date-time":"2021-12-01T00:00:00Z","timestamp":1638316800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U1836214"],"award-info":[{"award-number":["U1836214"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61802275"],"award-info":[{"award-number":["61802275"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61702362"],"award-info":[{"award-number":["61702362"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Application Foundation and Advanced Technology","award":["16JCQNJC00100"],"award-info":[{"award-number":["16JCQNJC00100"]}]},{"name":"Artificial Intelligence of Tianjin Municipal Science and Technology Commission","award":["17ZXRGGX00150"],"award-info":[{"award-number":["17ZXRGGX00150"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IIEEE Trans. Software Eng."],"published-print":{"date-parts":[[2021,12,1]]},"DOI":"10.1109\/tse.2020.2969178","type":"journal-article","created":{"date-parts":[[2020,1,23]],"date-time":"2020-01-23T21:27:00Z","timestamp":1579814820000},"page":"2823-2840","source":"Crossref","is-referenced-by-count":41,"title":["Falsification of Cyber-Physical Systems Using Deep Reinforcement Learning"],"prefix":"10.1109","volume":"47","author":[{"given":"Yoriyuki","family":"Yamagata","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8766-7235","authenticated-orcid":false,"given":"Shuang","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Takumi","family":"Akazaki","sequence":"additional","affiliation":[]},{"given":"Yihai","family":"Duan","sequence":"additional","affiliation":[]},{"given":"Jianye","family":"Hao","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/11940197_12"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10575-8"},{"key":"ref33","article-title":"Decision-Making with Non-Markovian rewards: From LTL to automata-based reward shaping","year":"2017"},{"key":"ref32","first-page":"159","article-title":"Non-Markovian rewards expressed in LTL: guiding search via reward shaping","author":"camacho","year":"2017","journal-title":"Proc 10th Int Symp Combinatorial Search"},{"key":"ref31","article-title":"Reinforcement learning for LTLf\/LDLf goals","volume":"abs 1807 6333","author":"de giacomo","year":"2018","journal-title":"CoRR"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-95582-7_27"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-11164-3_15"},{"key":"ref36","article-title":"Robust satisfaction of temporal logic specifications via reinforcement learning","volume":"abs 1510 6460","author":"jones","year":"2015","journal-title":"CoRR"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2016.7799279"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206234"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-00768-2_31"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3126521"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-17524-9_10"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/2465787.2465797"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/Allerton.2012.6483411"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IECON.2010.5675195"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2018.08.018"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24953-7_35"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46982-9_27"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-14295-6_17"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-66845-1_1"},{"key":"ref25","author":"polikarpova","year":"2017","journal-title":"An Active Learning Approach to the Falsification of Black Box Cyber-Physical Systems"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/2465787.2465797"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.18637\/jss.v064.i09"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1145\/2562059.2562140"},{"key":"ref55","first-page":"25","article-title":"Benchmarks for temporal logic requirements for automotive systems","author":"hoxha","year":"2014","journal-title":"Proc Appl Verification Continuous Hybrid Syst"},{"key":"ref54","year":"0","journal-title":"Modeling An Automatic Transmission Controller"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-46430-1_16"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.4324\/9781315806730"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-01702-5_16"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-24372-1_1"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.2200\/S00268ED1V01Y201005AIM009"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21236\/ADA531406"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-31980-1_18"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-36580-X_22"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/1755952.1755983"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-30206-3_12"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15297-9_9"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-19835-9_21"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2012.6315384"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/BF01995674"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"254","DOI":"10.1007\/978-3-642-19835-9_21","article-title":"S-TaLiRo: A tool for temporal logic falsification for hybrid systems","author":"annpureddy","year":"2011","journal-title":"Proc 17th Int Conf Tools Algorithms Construction Anal Syst"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/2185632.2185653"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"152","DOI":"10.1007\/978-3-540-30206-3_12","article-title":"Monitoring temporal properties of continuous signals","author":"maler","year":"2004","journal-title":"Proc Int Symp Formal Techn Modelling Anal Timed Fault-Tolerant Syst"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref7","doi-asserted-by":"crossref","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"silver et","year":"2016","journal-title":"Nature"},{"key":"ref49","article-title":"ChainerRL: A deep reinforcement learning library","author":"fujita","year":"0","journal-title":"Proc 33rd Int Conf Neural Inf Process Syst"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.ic.2006.05.002"},{"key":"ref46","article-title":"Basic properties of the soft maximum","author":"cook","year":"2011"},{"key":"ref45","article-title":"Asynchronous methods for deep reinforcement learning","volume":"48","author":"mnih","year":"2016"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-11164-3_19"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511804441"},{"key":"ref42","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref41","author":"sutton","year":"2011","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref44","first-page":"2829","article-title":"Continuous deep Q-learning with model-based acceleration","author":"gu","year":"2016","journal-title":"Proc 33rd Int Conf Mach Learn"},{"key":"ref43","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"arXiv 1312 5602"}],"container-title":["IEEE Transactions on Software Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/32\/9646454\/08967146.pdf?arnumber=8967146","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:50:07Z","timestamp":1652194207000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8967146\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,1]]},"references-count":56,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tse.2020.2969178","relation":{},"ISSN":["0098-5589","1939-3520","2326-3881"],"issn-type":[{"value":"0098-5589","type":"print"},{"value":"1939-3520","type":"electronic"},{"value":"2326-3881","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,12,1]]}}}