{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T10:04:30Z","timestamp":1767261870273,"version":"3.44.0"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,11]]},"DOI":"10.1109\/iros40897.2019.8968254","type":"proceedings-article","created":{"date-parts":[[2020,1,30]],"date-time":"2020-01-30T23:53:51Z","timestamp":1580428431000},"page":"3481-3486","source":"Crossref","is-referenced-by-count":37,"title":["Structured Reward Shaping using Signal Temporal Logic specifications"],"prefix":"10.1109","author":[{"given":"Anand","family":"Balakrishnan","sequence":"first","affiliation":[{"name":"University of Southern California,Cyber-Physical Systems: Verification, Intelligence, Design and Analysis (CPS-VIDA) Group,Los Angeles,California"}]},{"given":"Jyotirmoy V.","family":"Deshmukh","sequence":"additional","affiliation":[{"name":"University of Southern California,Cyber-Physical Systems: Verification, Intelligence, Design and Analysis (CPS-VIDA) Group,Los Angeles,California"}]}],"member":"263","reference":[{"key":"ref30","first-page":"264","article-title":"Efficient Robust Monitoring for STL","author":"donz\u00e9","year":"2013","journal-title":"Computer Aided Verification Ser Lecture Notes in Computer Science"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/s10703-017-0286-7"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2009.06.021"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2016.7799279"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206234"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref15","first-page":"278","article-title":"Policy Invariance Under Reward Transformations: Theory and Application to Reward Shaping","author":"ng","year":"1999","journal-title":"Proceedings of the Sixteenth International Conference on Machine Learning ser ICML &#x2019;99"},{"key":"ref16","first-page":"429","article-title":"Learning from Demonstration for Shaping Through Inverse Reinforcement Learning","author":"suay","year":"2016","journal-title":"Proceedings of the 2016 International Conference on Autonomous Agents & Multiagent Systems ser AAMAS &#x2019;16 Richland SC International Foundation for Autonomous Agents and Multiagent Systems"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/MRS.2017.8250926"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"92","DOI":"10.1007\/978-3-642-15297-9_9","article-title":"Robust satisfaction of temporal logic over real-valued signals","author":"donze","year":"2010","journal-title":"Proceedings of the 6th International Conference on Formal Modeling and Analysis of Timed Systems"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s10703-018-0319-x"},{"key":"ref28","article-title":"Deep Reinforcement Learning with Double Q-learning","author":"van hasselt","year":"2015","journal-title":"arXiv 1509 06461 [cs]"},{"key":"ref4","article-title":"AI Safety Gridworlds","author":"leike","year":"2017","journal-title":"arXiv 1711 09883 [cs]"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2017.2720851"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref3"},{"key":"ref6","first-page":"565","article-title":"Reward Shaping in Episodic Reinforcement Learning","author":"grzes","year":"2017","journal-title":"Proceedings of the 16th Conference on Autonomous Agents and MultiAgent Systems ser AAMAS &#x2019;17 Richland SC International Foundation for Autonomous Agents and Multiagent Systems"},{"key":"ref29","article-title":"Proximal Policy Optimization Algorithms","author":"schulman","year":"2017","journal-title":"arXiv 1707 06347"},{"key":"ref5","article-title":"Concrete Problems in AI Safety","author":"amodei","year":"2016","journal-title":"arXiv 1606 06565"},{"key":"ref8","article-title":"ST-Lib: A Library for Specifying and Classifying Model Behaviors","author":"kapinski","year":"2016","journal-title":"SAE International Warrendale PA SAE Technical Paper 2016-01-0621"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2015.7171970"},{"key":"ref2","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"arXiv 1509 02971 [cs stat]"},{"key":"ref9","first-page":"135","article-title":"Specification-Based Monitoring of Cyber-Physical Systems: A Survey on Theory, Tools and Applications","author":"bartocci","year":"2018","journal-title":"Runtime Verification"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref20","article-title":"Temporal Logic Robustness for General Signal Classes","author":"houssam abbas","year":"2019","journal-title":"To Appear in the Proc of Hybrid Systems Computation and Control"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"356","DOI":"10.1007\/978-3-319-21668-3_21","article-title":"Time Robustness in MTL and Expressivity in Hybrid System Falsification","author":"akazaki","year":"2015","journal-title":"Computer Aided Verification Ser Lecture Notes in Computer Science"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/2883817.2883839"},{"journal-title":"Learning from delayed rewards","year":"1989","author":"watkins","key":"ref24"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01090-4_16"},{"key":"ref26","article-title":"Openai gym","volume":"abs 1606 1540","author":"brockman","year":"2016","journal-title":"CoRR"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1983.6313077"}],"event":{"name":"2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2019,11,3]]},"location":"Macau, China","end":{"date-parts":[[2019,11,8]]}},"container-title":["2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8957008\/8967518\/08968254.pdf?arnumber=8968254","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,1]],"date-time":"2025-09-01T19:22:21Z","timestamp":1756754541000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8968254\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/iros40897.2019.8968254","relation":{},"subject":[],"published":{"date-parts":[[2019,11]]}}}