{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:11:14Z","timestamp":1740100274320,"version":"3.37.3"},"reference-count":45,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,30]]},"DOI":"10.1109\/icra48506.2021.9561402","type":"proceedings-article","created":{"date-parts":[[2021,10,20]],"date-time":"2021-10-20T00:28:35Z","timestamp":1634689715000},"page":"6635-6641","source":"Crossref","is-referenced-by-count":4,"title":["DisCo RL: Distribution-Conditioned Reinforcement Learning for General-Purpose Policies"],"prefix":"10.1109","author":[{"given":"Soroush","family":"Nasiriany","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vitchyr H.","family":"Pong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ashvin","family":"Nair","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexander","family":"Khazatsky","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Glen","family":"Berseth","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sergey","family":"Levine","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref38","first-page":"362","article-title":"Expectation propagation for approximate bayesian inference","author":"minka","year":"2001","journal-title":"UAI"},{"article-title":"Density estimation using real nvp","year":"2016","author":"dinh","key":"ref33"},{"article-title":"Contextual markov decision processes","year":"2015","author":"hallak","key":"ref32"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref30","article-title":"Efficient exploration via state marginal matching","author":"lee","year":"2019","journal-title":"ICLRE"},{"key":"ref37","article-title":"Stochastic Backpropagation and Approximate Inference in Deep Generative Models","author":"rezende","year":"2014","journal-title":"ICML"},{"key":"ref36","article-title":"Auto-Encoding Variational Bayes","author":"kingma","year":"2014","journal-title":"ICLRE"},{"key":"ref35","first-page":"10 215","article-title":"Glow: Generative flow with invertible 1x1 convolutions","author":"kingma","year":"2018","journal-title":"NeurIPS"},{"key":"ref34","first-page":"4790","article-title":"Conditional image generation with pixelcnn decoders","author":"van den oord","year":"2016","journal-title":"NeurIPS"},{"key":"ref10","article-title":"Sub-goal trees&#x2013;a framework for goal-directed trajectory prediction and optimization","author":"jurgenson","year":"2020","journal-title":"ICML"},{"key":"ref40","article-title":"Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor","author":"haarnoja","year":"2018","journal-title":"ICML"},{"key":"ref11","article-title":"Hindsight Experience Replay","author":"andrychowicz","year":"2017","journal-title":"NeurIPS"},{"article-title":"Hierarchical Actor-Critic","year":"2017","author":"levy","key":"ref12"},{"key":"ref13","article-title":"Hindsight policy gradients","author":"rauber","year":"2017","journal-title":"CoRR"},{"article-title":"Automatic goal generation for reinforcement learning agents","year":"0","author":"florensa","key":"ref14"},{"article-title":"Multi-Goal Reinforcement Learning: Challenging Robotics Environments and Request for Research","year":"2018","author":"plappert","key":"ref15"},{"key":"ref16","first-page":"1331","article-title":"Curious: intrinsically motivated modular multi-goal reinforcement learning","author":"colas","year":"2019","journal-title":"ICML"},{"key":"ref17","first-page":"487","article-title":"Curiosity driven exploration of learned disentangled goal spaces","author":"laversanne-finot","year":"2018","journal-title":"CoRL"},{"key":"ref18","first-page":"530","article-title":"Contextual imagined goals for self-supervised robotic learning","author":"nair","year":"2020","journal-title":"CoRL"},{"key":"ref19","article-title":"Goal-aware prediction: Learning to model what matters","author":"nair","year":"2020","journal-title":"ICML"},{"key":"ref28","article-title":"Universal successor features approximators","author":"borsa","year":"2018","journal-title":"ICLRE"},{"article-title":"Floyd-warshall reinforcement learning: Learning from past experiences to reach new goals","year":"2018","author":"dhiman","key":"ref4"},{"key":"ref27","first-page":"4055","article-title":"Successor features for transfer in reinforcement learning","author":"barreto","year":"2017","journal-title":"NeurIPS"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00278"},{"key":"ref6","article-title":"Data-Efficient Hierarchical Reinforcement Learning","author":"nachum","year":"2018","journal-title":"NeurIPS"},{"key":"ref29","first-page":"501","article-title":"Transfer in deep reinforcement learning using successor features and generalised policy improvement","author":"barreto","year":"2018","journal-title":"ICML"},{"key":"ref5","article-title":"Temporal Difference Models: Model-Free Deep RL For Model-Based Control","author":"pong","year":"2018","journal-title":"ICLRE"},{"key":"ref8","article-title":"Unsupervised Control Through Non-Parametric Discriminative Rewards","author":"warde-farley","year":"2019","journal-title":"ICLRE"},{"key":"ref7","article-title":"Visual Reinforcement Learning with Imagined Goals","author":"nair","year":"2018","journal-title":"NeurIPS"},{"key":"ref2","article-title":"Universal Value Function Approximators","author":"schaul","year":"2015","journal-title":"ICML"},{"key":"ref9","article-title":"Skew-fit: State-covering self-supervised reinforcement learning","author":"pong","year":"2020","journal-title":"ICML"},{"key":"ref1","first-page":"1094","article-title":"Learning to achieve goals","volume":"2","author":"kaelbling","year":"1993","journal-title":"IJCAI"},{"article-title":"Generative adversarial imitation from observation","year":"2018","author":"torabi","key":"ref20"},{"key":"ref45","first-page":"13 485","article-title":"Exploration via hindsight goal generation","author":"ren","year":"2019","journal-title":"NeurIPS"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/687"},{"key":"ref21","first-page":"8538","article-title":"Variational inverse control with events: A general framework for data-driven reward definition","author":"fu","year":"2018","journal-title":"NeurIPS"},{"article-title":"IKEA furniture assembly environment for long-horizon complex manipulation tasks","year":"2019","author":"lee","key":"ref42"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8462901"},{"key":"ref41","article-title":"Rewriting history with inverse rl: Hindsight inference for policy improvement","author":"eysenbach","year":"2020","journal-title":"NeurIPS"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8462891"},{"key":"ref44","article-title":"Go-explore: a new approach for hard-exploration problems","author":"ecoffet","year":"2019","journal-title":"CoRR"},{"article-title":"Deep successor reinforcement learning","year":"2016","author":"kulkarni","key":"ref26"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114729"},{"key":"ref25","first-page":"1755","article-title":"Imitating latent policies from observation","author":"edwards","year":"2019","journal-title":"ICML"}],"event":{"name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2021,5,30]]},"location":"Xi'an, China","end":{"date-parts":[[2021,6,5]]}},"container-title":["2021 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9560720\/9560666\/09561402.pdf?arnumber=9561402","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:47:08Z","timestamp":1652197628000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9561402\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,30]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/icra48506.2021.9561402","relation":{},"subject":[],"published":{"date-parts":[[2021,5,30]]}}}