{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T16:33:26Z","timestamp":1729614806963,"version":"3.28.0"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,7,18]]},"DOI":"10.1109\/icme52920.2022.9859768","type":"proceedings-article","created":{"date-parts":[[2022,8,26]],"date-time":"2022-08-26T15:45:18Z","timestamp":1661528718000},"page":"1-6","source":"Crossref","is-referenced-by-count":1,"title":["Learn Effective Representation for Deep Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Yuan","family":"Zhan","sequence":"first","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiwei","family":"Xu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guoliang","family":"Fan","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","article-title":"Data-efficient learning of feedback policies from image pixels using deep dynamical models","volume":"abs 1510 2173","author":"assael","year":"2015","journal-title":"ArXiv"},{"key":"ref11","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v35i12.17276","article-title":"Improving sample efficiency in model-free reinforcement learning from images","author":"yarats","year":"2021","journal-title":"AAAI"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2017.8080408"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref14","article-title":"Representation learning with contrastive predictive coding","volume":"abs 1807 3748","author":"van den oord","year":"2018","journal-title":"ArXiv"},{"key":"ref15","article-title":"Bootstrap latent-predictive representations for multi-task reinforcement learning","author":"guo","year":"2020","journal-title":"ICML"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-35289-8_14"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"ref19","article-title":"Avoiding latent variable collapse with generative skip models","author":"dieng","year":"2019","journal-title":"AISTATS"},{"key":"ref28","article-title":"Mutual information neural estimation","author":"belghazi","year":"2018","journal-title":"ICML"},{"key":"ref4","article-title":"Embed to control: A locally linear latent dynamics model for control from raw images","author":"watter","year":"2015","journal-title":"NeurIPS"},{"key":"ref27","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"ICML"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2018.07.006"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref5","article-title":"Reinforcement learning with unsupervised auxiliary tasks","volume":"abs 1611 5397","author":"jaderberg","year":"2017","journal-title":"ArXiv"},{"key":"ref8","article-title":"Towards characterizing divergence in deep q-learning","volume":"abs 1903 8894","author":"achiam","year":"2019","journal-title":"ArXiv"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref9","article-title":"D2rl: Deep dense architectures in reinforcement learning","volume":"abs 2010 9163","author":"sinha","year":"2020","journal-title":"ArXiv"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref22","article-title":"Proximal policy optimization algorithms","volume":"abs 1707 6347","author":"schulman","year":"2017","journal-title":"ArXiv"},{"key":"ref21","article-title":"Can increasing input dimensionality improve deep reinforcement learning?","author":"ota","year":"2020","journal-title":"ICML"},{"key":"ref24","article-title":"Xception: Deep learning with depth-wise separable convolutions","author":"chollet","year":"2017","journal-title":"CVPR"},{"key":"ref23","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"ICML"},{"key":"ref26","article-title":"Deep variational information bottleneck","volume":"abs 1612 410","author":"alemi","year":"2017","journal-title":"ArXiv"},{"key":"ref25","article-title":"The information bottleneck method","volume":"physics 4057","author":"tishby","year":"2000","journal-title":"ArXiv"}],"event":{"name":"2022 IEEE International Conference on Multimedia and Expo (ICME)","start":{"date-parts":[[2022,7,18]]},"location":"Taipei, Taiwan","end":{"date-parts":[[2022,7,22]]}},"container-title":["2022 IEEE International Conference on Multimedia and Expo (ICME)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9859562\/9858923\/09859768.pdf?arnumber=9859768","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,15]],"date-time":"2023-02-15T22:20:48Z","timestamp":1676499648000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9859768\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,18]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/icme52920.2022.9859768","relation":{},"subject":[],"published":{"date-parts":[[2022,7,18]]}}}