{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T14:24:34Z","timestamp":1766067874066,"version":"3.37.3"},"reference-count":22,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T00:00:00Z","timestamp":1706745600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T00:00:00Z","timestamp":1706745600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T00:00:00Z","timestamp":1706745600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Google and SERB Research Grants"},{"name":"ARTPARK"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2024,2]]},"DOI":"10.1109\/lra.2023.3341775","type":"journal-article","created":{"date-parts":[[2023,12,12]],"date-time":"2023-12-12T18:57:55Z","timestamp":1702407475000},"page":"1373-1379","source":"Crossref","is-referenced-by-count":8,"title":["Funnel-Based Reward Shaping for Signal Temporal Logic Tasks in Reinforcement Learning"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8400-5092","authenticated-orcid":false,"given":"Naman","family":"Saxena","sequence":"first","affiliation":[{"name":"Department of Computer Science and Automation, Indian Institute of Science, Bangalore, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5892-5270","authenticated-orcid":false,"given":"Sandeep","family":"Gorantla","sequence":"additional","affiliation":[{"name":"Robert Bosch Center for Cyber-Physical Systems, Indian Institute of Science, Bangalore, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5452-8850","authenticated-orcid":false,"given":"Pushpak","family":"Jagtap","sequence":"additional","affiliation":[{"name":"Robert Bosch Center for Cyber-Physical Systems, Indian Institute of Science, Bangalore, India"}]}],"member":"263","reference":[{"volume-title":"Principles of Model Checking","year":"2008","author":"Baier","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.23919\/ACC.2018.8431181"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206234"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-30206-3_12"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2015.2398883"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2016.7799279"},{"issue":"3","key":"ref7","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Mach. Learn."},{"key":"ref8","first-page":"308","article-title":"Tractable reinforcement learning of signal temporal logic objectives","volume-title":"Proc. Learn. Dyn. Control","author":"Venkataraman","year":"2020"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2018.8618746"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2017.8264095"},{"article-title":"Multi-agent reinforcement learning guided by signal temporal logic specifications","year":"2023","author":"Wang","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3218216"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/s0927-0507(05)80172-0"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15297-9_9"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2019.05.013"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.23919\/ACC.2019.8814999"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/lra.2023.3341775"},{"article-title":"Multi-goal reinforcement learning: Challenging robotics environments and request for research","year":"2018","author":"Plappert","key":"ref19"},{"key":"ref20","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto","year":"2018"},{"article-title":"A policy gradient approach for finite horizon constrained Markov decision processes","year":"2022","author":"Guin","key":"ref21"},{"key":"ref22","first-page":"10555","article-title":"A finite-time analysis of Q-learning with neural network function approximation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Xu","year":"2020"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7083369\/10360389\/10354421.pdf?arnumber=10354421","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T20:12:29Z","timestamp":1705090349000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10354421\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2]]},"references-count":22,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/lra.2023.3341775","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"type":"electronic","value":"2377-3766"},{"type":"electronic","value":"2377-3774"}],"subject":[],"published":{"date-parts":[[2024,2]]}}}