{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T22:12:35Z","timestamp":1740175955791,"version":"3.37.3"},"reference-count":20,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61903215"],"award-info":[{"award-number":["61903215"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2021,10]]},"DOI":"10.1109\/lra.2021.3091885","type":"journal-article","created":{"date-parts":[[2021,7,21]],"date-time":"2021-07-21T20:17:23Z","timestamp":1626898643000},"page":"6601-6607","source":"Crossref","is-referenced-by-count":5,"title":["Learning to Discover Task-Relevant Features for Interpretable Reinforcement Learning"],"prefix":"10.1109","volume":"6","author":[{"given":"Qiyuan","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoteng","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8748-1964","authenticated-orcid":false,"given":"Yiqin","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chenghao","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9386-5825","authenticated-orcid":false,"given":"Jun","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9107-6390","authenticated-orcid":false,"given":"Yu","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8591-8843","authenticated-orcid":false,"given":"Bin","family":"Liang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","first-page":"12350","article-title":"Towards interpretable reinforcement learning using attention augmented agents","author":"mott","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref11","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref12","first-page":"7487","article-title":"Stabilizing transformers for reinforcement learning","author":"parisotto","year":"2020","journal-title":"Proc Int Conf Mach Learn"},{"article-title":"Reinforcement learning and control as probabilistic inference: Tutorial and review","year":"2018","author":"levine","key":"ref13"},{"key":"ref14","first-page":"361","article-title":"Stochastic structured variational inference","author":"hoffman","year":"2015","journal-title":"Proc 18th Int Conf Artif Intell Statist"},{"key":"ref15","article-title":"Recurrent experience replay in distributed reinforcement learning","author":"kapturowski","year":"2018","journal-title":"Proc Int Conf Learn Representations"},{"article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","year":"2014","author":"chung","key":"ref16"},{"article-title":"Openai gym","year":"2016","author":"brockman","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref19","article-title":"Torcs, the open racing car simulator","volume":"4","author":"wymann","year":"2000"},{"article-title":"Dota 2 with large scale deep reinforcement learning","year":"2019","author":"berner","key":"ref4"},{"key":"ref3","first-page":"4295","article-title":"Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning","author":"rashid","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"article-title":"Representation learning with contrastive predictive coding","year":"2018","author":"oord","key":"ref6"},{"article-title":"Stochastic latent actor-critic: Deep reinforcement learning with a latent variable model","year":"2019","author":"lee","key":"ref5"},{"article-title":"Learning invariant representations for reinforcement learning without reconstruction","year":"2020","author":"zhang","key":"ref8"},{"key":"ref7","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","author":"hafner","year":"2019","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref2","first-page":"1856","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref9","first-page":"1792","article-title":"Visualizing and understanding atari agents","author":"greydanus","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref20","article-title":"Variational recurrent models for solving partially observable control tasks","author":"han","year":"2019","journal-title":"Proc Int Conf Learn Representations"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7083369\/9475905\/09463791.pdf?arnumber=9463791","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:54:11Z","timestamp":1652194451000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9463791\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10]]},"references-count":20,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/lra.2021.3091885","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"type":"electronic","value":"2377-3766"},{"type":"electronic","value":"2377-3774"}],"subject":[],"published":{"date-parts":[[2021,10]]}}}