{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,6]],"date-time":"2025-08-06T12:49:07Z","timestamp":1754484547208},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10610510","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"10503-10509","source":"Crossref","is-referenced-by-count":3,"title":["Synthesis of Temporally-Robust Policies for Signal Temporal Logic Tasks using Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Siqi","family":"Wang","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University,Department of Automation and Key Laboratory of System Control and Information Processing,Shanghai,China,200240"}]},{"given":"Shaoyuan","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University,Department of Automation and Key Laboratory of System Control and Information Processing,Shanghai,China,200240"}]},{"given":"Li","family":"Yin","sequence":"additional","affiliation":[{"name":"Macau University of Science and Technology,Institute of Systems Engineering,Taipa,China"}]},{"given":"Xiang","family":"Yin","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University,Department of Automation and Key Laboratory of System Control and Information Processing,Shanghai,China,200240"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1007\/978-3-540-30206-3_12"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1007\/978-3-642-39799-8_19"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1016\/j.automatica.2023.111445"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1109\/LRA.2021.3068114"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1016\/j.jprocont.2018.05.011"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/JIOT.2021.3069943"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1145\/3359986.3361203"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1007\/978-3-642-15297-9_9"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1109\/LCSYS.2022.3209928"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1145\/3550072"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1145\/3501710.3519504"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1109\/CDC49753.2023.10384038"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1007\/s10703-016-0261-8"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1007\/978-3-319-21668-3_21"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1016\/j.ifacol.2020.12.2606"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1109\/CDC.2014.7039363"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/LCSYS.2022.3172857"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1109\/LRA.2022.3146951"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1109\/CDC49753.2023.10383500"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1109\/LCSYS.2018.2853182"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1109\/TCNS.2020.3014602"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.23919\/ACC50511.2021.9483028"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1109\/CDC.2016.7799279"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1145\/3302504.3313355"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1109\/CDC45484.2021.9683444"},{"key":"ref26","first-page":"308","article-title":"Tractable reinforcement learning of signal temporal logic objectives","author":"Venkataraman","year":"2020","journal-title":"Learning for Dynamics and Control"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.1109\/ACCESS.2022.3218216"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1109\/CDC45484.2021.9683477"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.1109\/CDC51059.2022.9992914"},{"volume-title":"Reinforcement Learning: An Introduction","year":"2018","author":"Sutton","key":"ref30"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2024,5,13]]},"location":"Yokohama, Japan","end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10610510.pdf?arnumber=10610510","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T05:50:17Z","timestamp":1723269017000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10610510\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10610510","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}