{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:11:21Z","timestamp":1740100281884,"version":"3.37.3"},"reference-count":46,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100017412","name":"BD","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100017412","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,30]]},"DOI":"10.1109\/icra48506.2021.9561799","type":"proceedings-article","created":{"date-parts":[[2021,10,20]],"date-time":"2021-10-20T00:28:35Z","timestamp":1634689715000},"page":"10786-10792","source":"Crossref","is-referenced-by-count":0,"title":["Zero-shot Policy Learning with Spatial Temporal Reward Decomposition on Contingency-aware Observation"],"prefix":"10.1109","author":[{"given":"Huazhe","family":"Xu","sequence":"first","affiliation":[]},{"given":"Boyuan","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Trevor","family":"Darrell","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"article-title":"Assessing generalization in deep reinforcement learning","year":"2018","author":"packer","key":"ref39"},{"article-title":"Learning to act by predicting the future","year":"2016","author":"dosovitskiy","key":"ref38"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"article-title":"Rudder: Return decomposition for delayed rewards","year":"2018","author":"arjona-medina","key":"ref32"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCKE.2012.6395362"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2010.01.001"},{"key":"ref37","first-page":"5048","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"NeurIPS"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2007.08.009"},{"key":"ref35","first-page":"2753","article-title":"# exploration: A study of count-based exploration for deep reinforcement learning","author":"tang","year":"2017","journal-title":"NeurIPS"},{"key":"ref34","doi-asserted-by":"crossref","first-page":"222","DOI":"10.7551\/mitpress\/3115.003.0030","article-title":"A possibility for implementing curiosity and boredom in model-building neural controllers","author":"schmidhuber","year":"1991","journal-title":"Proc of the International Conference on Simulation of Adaptive Behavior From Animals to Animats"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461196"},{"key":"ref40","article-title":"Super Mario Bros for OpenAI Gym","author":"kauten","year":"2018","journal-title":"Github"},{"key":"ref11","first-page":"9804","article-title":"Object-oriented dynamics predictor","author":"zhu","year":"2018","journal-title":"Advances in neural information processing systems"},{"article-title":"Task-agnostic dynamics priors for deep reinforcement learning","year":"2019","author":"du","key":"ref12"},{"article-title":"Strategic object oriented reinforcement learning","year":"2018","author":"keramati","key":"ref13"},{"key":"ref14","article-title":"Optimizing object-based perception and control by free-energy principle","author":"li","year":"2019","journal-title":"CoRR"},{"key":"ref15","first-page":"1480","article-title":"Darla: Improving zero-shot transfer in reinforcement learning","author":"higgins","year":"2017","journal-title":"ICML"},{"key":"ref16","first-page":"7156","article-title":"Hierarchical reinforcement learning for zero-shot generalization with subtask dependencies","author":"sohn","year":"2018","journal-title":"NeurIPS"},{"key":"ref17","first-page":"2661","article-title":"Zero-shot task generalization with multi-task deep reinforcement learning","author":"oh","year":"2017","journal-title":"ICML"},{"key":"ref18","article-title":"Algorithms for inverse reinforcement learning","author":"ng","year":"2000","journal-title":"ICML"},{"key":"ref19","first-page":"1837","article-title":"Multi-robot inverse reinforcement learning under occlusion with state transition estimation","author":"bogert","year":"2015","journal-title":"AAMAS"},{"key":"ref28","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume":"99","author":"ng","year":"1999","journal-title":"ICML"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.4324\/9781315740218"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968063"},{"article-title":"Continuous control with deep reinforcement learning","year":"2015","author":"lillicrap","key":"ref3"},{"article-title":"Contingency-aware exploration in reinforcement learning","year":"2018","author":"choi","key":"ref6"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273572"},{"article-title":"Investigating human priors for playing video games","year":"2018","author":"dubey","key":"ref5"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794224"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"789","DOI":"10.1016\/S0005-1098(99)00214-9","article-title":"Constrained model predictive control: Stability and optimality","volume":"36","author":"mayne","year":"2000","journal-title":"Automatica"},{"article-title":"High-dimensional continuous control using generalized advantage estimation","year":"2015","author":"schulman","key":"ref2"},{"article-title":"Grasp2vec: Learning object representations from self-supervised grasping","year":"2018","author":"jang","key":"ref9"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"article-title":"Multi-goal reinforcement learning: Challenging robotics environments and request for research","year":"2018","author":"plappert","key":"ref46"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ISIT.2002.1023403"},{"article-title":"Exploration by random network distillation","year":"2018","author":"burda","key":"ref45"},{"key":"ref22","first-page":"691","article-title":"Inverse reinforcement learning in partially observable environments","volume":"12","author":"choi","year":"2011","journal-title":"Journal of Machine Learning Research"},{"key":"ref21","first-page":"1034","article-title":"Expectation-maximization for inverse reinforcement learning with hidden data","author":"bogert","year":"2016","journal-title":"AAMAS"},{"key":"ref42","first-page":"305","article-title":"Alvinn: An autonomous land vehicle in a neural network","author":"pomerleau","year":"1989","journal-title":"Advances in neural information processing systems"},{"key":"ref24","first-page":"1071","article-title":"Learning neural network policies with guided policy search under unknown dynamics","author":"levine","year":"2014","journal-title":"NIPS"},{"article-title":"Mapping state space using landmarks for universal goal reaching","year":"2019","author":"huang","key":"ref41"},{"key":"ref23","first-page":"1449","article-title":"A game-theoretic approach to apprentice-ship learning","author":"syed","year":"2008","journal-title":"Advances in neural information processing systems"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref44"},{"key":"ref26","first-page":"78","article-title":"Feature selection, l 1 vs. l 2 regularization, and rotational invariance","author":"ng","year":"2004","journal-title":"Proceedings of the twenty-first international conference on Machine learning"},{"key":"ref43","first-page":"103","article-title":"A framework for behavioural cloning","volume":"15","author":"bain","year":"1995","journal-title":"Machine Intelligence"},{"key":"ref25","first-page":"1153","article-title":"Boosting structured prediction for imitation learning","author":"bagnell","year":"2007","journal-title":"Advances in neural information processing systems"}],"event":{"name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2021,5,30]]},"location":"Xi'an, China","end":{"date-parts":[[2021,6,5]]}},"container-title":["2021 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9560720\/9560666\/09561799.pdf?arnumber=9561799","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T06:15:39Z","timestamp":1725948939000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9561799\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,30]]},"references-count":46,"URL":"https:\/\/doi.org\/10.1109\/icra48506.2021.9561799","relation":{},"subject":[],"published":{"date-parts":[[2021,5,30]]}}}