{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T19:59:46Z","timestamp":1773777586687,"version":"3.50.1"},"reference-count":32,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100002241","name":"Japan Science and Technology Agency, Core Research for Evolutionary Science and Technology (JST CREST), Japan","doi-asserted-by":"publisher","award":["JPMJCR2012"],"award-info":[{"award-number":["JPMJCR2012"]}],"id":[{"id":"10.13039\/501100002241","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001691","name":"Japan Society for the Promotion of Science (JSPS) KAKENHI, Japan","doi-asserted-by":"publisher","award":["JP21J10780"],"award-info":[{"award-number":["JP21J10780"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2022]]},"DOI":"10.1109\/access.2022.3218216","type":"journal-article","created":{"date-parts":[[2022,11,4]],"date-time":"2022-11-04T01:12:58Z","timestamp":1667524378000},"page":"114814-114828","source":"Crossref","is-referenced-by-count":12,"title":["Deep Reinforcement Learning Under Signal Temporal Logic Constraints Using Lagrangian Relaxation"],"prefix":"10.1109","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0145-3381","authenticated-orcid":false,"given":"Junya","family":"Ikemoto","sequence":"first","affiliation":[{"name":"Graduate School of Engineering Science, Osaka University, Toyonaka, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4009-270X","authenticated-orcid":false,"given":"Toshimitsu","family":"Ushio","sequence":"additional","affiliation":[{"name":"Graduate School of Engineering Science, Osaka University, Toyonaka, Japan"}]}],"member":"263","reference":[{"key":"ref32","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"arXiv 1412 6980"},{"key":"ref31","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"fujimoto","year":"2018","journal-title":"Proc Int Conf Mach Learn (ICML)"},{"key":"ref30","article-title":"Auto-encoding variational Bayes","author":"kingma","year":"2013","journal-title":"arXiv 1312 6114"},{"key":"ref10","article-title":"Modular deep reinforcement learning with temporal logic specifications","author":"yuan","year":"2019","journal-title":"arXiv 1909 11591"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3101544"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-30206-3_12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2009.06.021"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2014.7039363"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2018.2853182"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2016.7799279"},{"key":"ref17","article-title":"Tractable reinforcement learning of signal temporal logic objectives","author":"venkataraman","year":"2020","journal-title":"arXiv 2001 09467"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968254"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ETFA52439.2022.9921505"},{"key":"ref28","article-title":"Learning to walk in the real world with minimal human effort","author":"ha","year":"2020","journal-title":"arXiv 2002 08550"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref27","author":"bertsekas","year":"2014","journal-title":"Constrained Optimization and Lagrange Multiplier Methods"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.2977374"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2019.2962625"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2019.2916583"},{"key":"ref8","author":"baier","year":"2008","journal-title":"Principles of Model Checking"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-50763-7"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-4095-0"},{"key":"ref9","article-title":"Logically-constrained reinforcement learning","author":"hasanbeig","year":"2018","journal-title":"arXiv 1801 08099"},{"key":"ref1","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref20","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"arXiv 1509 02971"},{"key":"ref22","author":"altman","year":"1999","journal-title":"Constrained Markov Decision Processes"},{"key":"ref21","article-title":"Soft actor-critic algorithms and applications","author":"haarnoja","year":"2018","journal-title":"arXiv 1812 05905"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CDC45484.2021.9683444"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/614"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3092676"},{"key":"ref25","article-title":"Learning from demonstrations using signal temporal logic","author":"puranic","year":"2021","journal-title":"arXiv 2102 07730"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/9668973\/09933457.pdf?arnumber=9933457","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,28]],"date-time":"2022-11-28T20:05:15Z","timestamp":1669665915000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9933457\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/access.2022.3218216","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]}}}