{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,26]],"date-time":"2026-06-26T14:39:58Z","timestamp":1782484798222,"version":"3.54.5"},"reference-count":46,"publisher":"American Association for the Advancement of Science (AAAS)","issue":"37","funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-1723995"],"award-info":[{"award-number":["IIS-1723995"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CMMI-1400167"],"award-info":[{"award-number":["CMMI-1400167"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Sci. Robot."],"published-print":{"date-parts":[[2019,12,18]]},"abstract":"<jats:p>A formal methods approach to reinforcement learning generates rewards from a formal language and guarantees safety.<\/jats:p>","DOI":"10.1126\/scirobotics.aay6276","type":"journal-article","created":{"date-parts":[[2019,12,19]],"date-time":"2019-12-19T00:06:06Z","timestamp":1576713966000},"source":"Crossref","is-referenced-by-count":96,"title":["A formal methods approach to interpretable reinforcement learning for robotic planning"],"prefix":"10.1126","volume":"4","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0756-7483","authenticated-orcid":true,"given":"Xiao","family":"Li","sequence":"first","affiliation":[{"name":"Department of Mechanical Engineering, Boston University, Boston, MA, USA."}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0975-2204","authenticated-orcid":true,"given":"Zachary","family":"Serlin","sequence":"additional","affiliation":[{"name":"Department of Mechanical Engineering, Boston University, Boston, MA, USA."}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Guang","family":"Yang","sequence":"additional","affiliation":[{"name":"Division of Systems Engineering, Boston University, Boston, MA, USA."}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Calin","family":"Belta","sequence":"additional","affiliation":[{"name":"Department of Mechanical Engineering, Boston University, Boston, MA, USA."},{"name":"Division of Systems Engineering, Boston University, Boston, MA, USA."}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"221","reference":[{"key":"e_1_3_2_2_2","unstructured":"D. Amodei C. Olah J. Steinhardt P. Christiano J. Schulman D. Man\u00e9 Concrete problems in AI safety. arXiv:1606.06565 [cs.AI] (2016)."},{"key":"e_1_3_2_3_2","unstructured":"T. Arnold D. Kasenberg M. Scheutz Value Alignment or Misalignment\u2014What Will Keep Systems Accountable? AAAI Workshops (2017)."},{"key":"e_1_3_2_4_2","unstructured":"A. Y. Ng D. Harada S. J. Russell Policy invariance under reward transformations: Theory and application to reward shaping in Proceedings of the Sixteenth International Conference on Machine Learning (ICML 1999) pp. 278\u2013287."},{"key":"e_1_3_2_5_2","unstructured":"P. F. Christiano M. Abate D. Amodei Supervising strong learners by amplifying weak experts. arXiv:1810.08575 [cs.LG] (2018)."},{"key":"e_1_3_2_6_2","unstructured":"D. Hadfield-Menell S. J. Russell P. Abbeel A. Dragan Cooperative inverse reinforcement learning in Proceedings of Advances in Neural Information Processing Systems (NeurIPS 2016) pp. 3909\u20133917."},{"key":"e_1_3_2_7_2","unstructured":"J. Leike D. Krueger T. Everitt M. Martic V. Maini S. Legg Scalable agent alignment via reward modeling: A research direction. arXiv:1811.07871 [cs.LG] (2018)."},{"key":"e_1_3_2_8_2","doi-asserted-by":"crossref","unstructured":"G. Mason R. Calinescu D. Kudenko A. Banks Assured reinforcement learning with formally verified abstract policies in Proceedings of the 9th International Conference on Agents and Artificial Intelligence (ICAART 2017) Porto Portugal pp. 105\u2013117.","DOI":"10.5220\/0006156001050117"},{"key":"e_1_3_2_9_2","unstructured":"O. Bastani Y. Pu A. Solar-Lezama Verifiable reinforcement learning via policy extraction in Proceedings of Advances in Neural Information Processing Systems (NeurIPS 2018) pp. 2494\u20132504."},{"key":"e_1_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2870052"},{"key":"e_1_3_2_11_2","doi-asserted-by":"crossref","unstructured":"G. De Giacomo L. Iocchi M. Favorito F. Patrizi Foundations for restraining bolts: Reinforcement learning with LTLf\/LDLf restraining specifications in Proceedings of the International Conference on Automated Planning and Scheduling (ICAPS 2019) pp. 128\u2013136.","DOI":"10.1609\/icaps.v29i1.3549"},{"key":"e_1_3_2_12_2","doi-asserted-by":"crossref","unstructured":"A. Camacho O. Chen S. Sanner S. A. Mcllraith Non-Markovian rewards expressed in LTL: Guiding search via reward shaping in The 10th Annual Symposium on Combinatorial Search (SoCS 2017) pp. 159\u2013160.","DOI":"10.1609\/socs.v8i1.18421"},{"key":"e_1_3_2_13_2","doi-asserted-by":"crossref","unstructured":"D. Aksaray A. Jones Z. Kong M. Schwager C. Belta Q-learning for robust satisfaction of signal temporal logic specifications in Proceedings of the IEEE 55th Conference on Decision and Control (CDC 2016) pp. 6565\u20136570.","DOI":"10.1109\/CDC.2016.7799279"},{"key":"e_1_3_2_14_2","doi-asserted-by":"crossref","unstructured":"A. Balakrishnan J. Deshmukh Structured reward functions using STL in Proceedings of the 22nd ACM International Conference on Hybrid Systems: Computation and Control (HSCC 2019) pp. 270\u2013271.","DOI":"10.1145\/3302504.3313355"},{"key":"e_1_3_2_15_2","doi-asserted-by":"crossref","unstructured":"M. Wen I. Papusha U. Topcu Learning from demonstrations with high-level side information in Proceedings of the 26th International Joint Conference on Artificial Intelligence (IJCAI 2017) pp. 3055\u20133061.","DOI":"10.24963\/ijcai.2017\/426"},{"key":"e_1_3_2_16_2","doi-asserted-by":"crossref","unstructured":"M. Alshiekh R. Bloem R. Ehlers B. K\u00f6nighofer S. Niekum U. Topcu Safe reinforcement learning via shielding in Proceedings of the 32nd AAAI Conference on Artificial Intelligence (AAAI 2018) pp. 2669\u20132678.","DOI":"10.1609\/aaai.v32i1.11797"},{"key":"e_1_3_2_17_2","doi-asserted-by":"crossref","unstructured":"Q. Gao D. Hajinezhad Y. Zhang Y Kantaros M Zavlanos Reduced variance deep reinforcement learning with temporal logic specifications in Proceedings of the 10th ACM\/IEEE International Conference on Cyber-Physical Systems (ICCPS 2019) pp. 237\u2013248.","DOI":"10.1145\/3302509.3311053"},{"key":"e_1_3_2_18_2","unstructured":"M. L. Littman U. Topcu J. Fu C. l. Isbell M. Wen J. MacGlashan Environment-independent task specifications via GLTL. arXiv:1704.04341 [cs.AI] (2017)."},{"key":"e_1_3_2_19_2","doi-asserted-by":"crossref","unstructured":"E. M. Hahn M. Perez S. Schewe F. Somenzi A. Trivedi D. Wojtczak Omega-regular objectives in model-free reinforcement learning in International Conference on Tools and Algorithms for the Construction and Analysis of Systems (TACAS Springer 2019) pp. 395\u2013412.","DOI":"10.1007\/978-3-030-17462-0_27"},{"key":"e_1_3_2_20_2","unstructured":"R. Toro Icarte T. Q. Klassen R. A. Valenzano S. A. Mcllraith Using reward machines for high-level task specification and decomposition in reinforcement learning in International Conference on Machine Learning (ICML 2018) pp. 2112\u20132121."},{"key":"e_1_3_2_21_2","doi-asserted-by":"crossref","unstructured":"B. Araki K. Vodrahalli T. Leech C. I. Vasile M. Donahue D. Rus Learning to plan with logical automata in Robotic: Science and Systems (RSS 2019) pp. 1\u20139.","DOI":"10.15607\/RSS.2019.XV.064"},{"key":"e_1_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1676"},{"key":"e_1_3_2_23_2","unstructured":"F. Bacchus C. Boutilier A. J. Grove Structured solution methods for non-Markovian decision processes in Proceedings of the Fourteenth National Conference on Artificial Intelligence and Ninth Conference on Innovative Applications of Artificial Intelligence (AAAI 1997) pp. 112\u2013117."},{"key":"e_1_3_2_24_2","unstructured":"F. Bacchus C. Boutilier A. Grove Rewarding behaviors in Proceedings of the Thirteenth National Conference on Artificial Intelligence (AAAI 1996) pp. 1160\u20131167."},{"key":"e_1_3_2_25_2","doi-asserted-by":"crossref","unstructured":"A. D. Ames J. W. Grizzle P. Tabuada Control barrier function based quadratic programs with application to adaptive cruise control in Proceedings of the 53rd IEEE Conference on Decision and Control (IEEE 2014) pp. 6271\u20136278.","DOI":"10.1109\/CDC.2014.7040372"},{"key":"e_1_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2019.2920206"},{"key":"e_1_3_2_27_2","doi-asserted-by":"crossref","unstructured":"R. Cheng G. Orosz R. M. Murray J. W. Burdick End-to-end safe reinforcement learning through barrier functions for safety-critical continuous control tasks in The Thirty-Third AAAI Conference on Artificial Intelligence (AAAI 2019) pp. 3387\u20133395.","DOI":"10.1609\/aaai.v33i01.33013387"},{"key":"e_1_3_2_28_2","doi-asserted-by":"crossref","unstructured":"P. Nilsson A. D. Ames Barrier functions: Bridging the gap between planning from specifications and safety critical control in IEEE Conference on Decision and Control (CDC 2018) pp. 765\u2013772.","DOI":"10.1109\/CDC.2018.8619142"},{"key":"e_1_3_2_29_2","doi-asserted-by":"crossref","unstructured":"X. Li C.-I. Vasile C. Belta Reinforcement learning with temporal logic rewards in Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS 2017) pp. 3834\u20133839.","DOI":"10.1109\/IROS.2017.8206234"},{"key":"e_1_3_2_30_2","doi-asserted-by":"crossref","unstructured":"M. F. E. Rohmer S. P. N. Singh V-REP: A versatile and scalable robot simulation framework in Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS 2013) pp. 1321\u20131326.","DOI":"10.1109\/IROS.2013.6696520"},{"key":"e_1_3_2_31_2","unstructured":"J. Ho S. Ermon Generative adversarial imitation learning in Proceedings of Advances in Neural Information Processing Systems (NeurIPS 2016) pp. 4565\u20134573."},{"key":"e_1_3_2_32_2","unstructured":"P. Dhariwal C. Hesse O. Klimov A. Nichol M. Plappert A. Radford J. Schulman S. Sidor Y. Wu P. Zhokhov Openai baselines (2017); https:\/\/github.com\/openai\/baselines."},{"key":"e_1_3_2_33_2","doi-asserted-by":"crossref","unstructured":"X. C. Ding C. Belta C. G. Cassandras Receding horizon surveillance with temporal logic specifications in Proceedings of the 49th IEEE Conference on Descision and Control (CDC 2010) pp. 256\u2013261.","DOI":"10.1109\/CDC.2010.5717131"},{"key":"e_1_3_2_34_2","doi-asserted-by":"crossref","unstructured":"R. S. Sutton A. G. Barto Reinforcement Learning: An Introduction (MIT Press 1998).","DOI":"10.1109\/TNN.1998.712192"},{"key":"e_1_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_36_2","unstructured":"R. S. Sutton D. A. McAllester S. P. Singh Y. Mansour Policy gradient methods for reinforcement learning with function approximation in Proceedings of the 12th International Conference on Neural Information Processing Systems (NeurIPS 1999) pp. 1057\u20131063."},{"key":"e_1_3_2_37_2","unstructured":"T. Degris M. White R. S. Sutton Off-policy actor-critic. arXiv:1205.4839 [cs.LG] (2012)."},{"key":"e_1_3_2_38_2","unstructured":"J. Schulman F. Wolski P. Dhariwal A. Radford O. Klimov Proximal policy optimization algorithms. arXiv:1707.06347 [cs.LG] (2017)."},{"key":"e_1_3_2_39_2","unstructured":"C. Baier J. P. Katoen Principles of Model Checking (MIT Press 2008)."},{"key":"e_1_3_2_40_2","doi-asserted-by":"crossref","unstructured":"R. Cheng G. Orosz R. M. Murray J. W. Burdick End-to-end safe reinforcement learning through barrier functions for safety-critical continuous control tasks. arXiv:1903.08792 [cs.LG] (2019).","DOI":"10.1609\/aaai.v33i01.33013387"},{"key":"e_1_3_2_41_2","doi-asserted-by":"crossref","unstructured":"A. Agrawal K. Sreenath Discrete control barrier functions for safety-critical control of discrete systems with application to bipedal robot navigation in Robotics: Science and Systems (2017).","DOI":"10.15607\/RSS.2017.XIII.073"},{"key":"e_1_3_2_42_2","unstructured":"Gurobi Optimization Gurobi optimizer reference manual (2018)."},{"key":"e_1_3_2_43_2","doi-asserted-by":"crossref","unstructured":"T. Latvala Efficient model checking of safety properties in International SPIN Workshop on Model Checking of Software (Springer 2003) pp. 74\u201388.","DOI":"10.1007\/3-540-44829-2_5"},{"key":"e_1_3_2_44_2","unstructured":"C. Vasile A. Ulusoy LTL Optimal Multi-Agent Planner (LOMAP) Github repository (2017); https:\/\/github.com\/wasserfeder\/lomap."},{"key":"e_1_3_2_45_2","doi-asserted-by":"crossref","unstructured":"O. Maler D. Nickovic Monitoring temporal properties of continuous signals in Formal Techniques Modelling and Analysis of Timed and Fault-Tolerant Systems (Springer 2004) pp. 152\u2013166.","DOI":"10.1007\/978-3-540-30206-3_12"},{"key":"e_1_3_2_46_2","unstructured":"K. Y. Rozier \u201cExplicit or symbolic translation of linear temporal logic to automata \u201d thesis Rice University (2013)."},{"key":"e_1_3_2_47_2","unstructured":"M. Bjelonic YOLO ROS: Real-time object detection for ROS Github repository (2016\u20132018); https:\/\/github.com\/leggedrobotics\/darknet_ros."}],"container-title":["Science Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/syndication.highwire.org\/content\/doi\/10.1126\/scirobotics.aay6276","content-type":"unspecified","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/www.science.org\/doi\/pdf\/10.1126\/scirobotics.aay6276","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,16]],"date-time":"2024-01-16T11:21:22Z","timestamp":1705404082000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.science.org\/doi\/10.1126\/scirobotics.aay6276"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12,18]]},"references-count":46,"journal-issue":{"issue":"37","published-print":{"date-parts":[[2019,12,18]]}},"alternative-id":["10.1126\/scirobotics.aay6276"],"URL":"https:\/\/doi.org\/10.1126\/scirobotics.aay6276","relation":{},"ISSN":["2470-9476"],"issn-type":[{"value":"2470-9476","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,12,18]]},"article-number":"eaay6276"}}