{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T19:22:43Z","timestamp":1768072963616,"version":"3.49.0"},"reference-count":57,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T00:00:00Z","timestamp":1666483200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T00:00:00Z","timestamp":1666483200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["W911NF-17-2-0181"],"award-info":[{"award-number":["W911NF-17-2-0181"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000183","name":"ARO","doi-asserted-by":"publisher","award":["W911NF-21-1-0097"],"award-info":[{"award-number":["W911NF-21-1-0097"]}],"id":[{"id":"10.13039\/100000183","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000181","name":"AFOSR","doi-asserted-by":"publisher","award":["FA9550-22-1-0273"],"award-info":[{"award-number":["FA9550-22-1-0273"]}],"id":[{"id":"10.13039\/100000181","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,10,23]]},"DOI":"10.1109\/iros47612.2022.9981999","type":"proceedings-article","created":{"date-parts":[[2022,12,26]],"date-time":"2022-12-26T19:38:15Z","timestamp":1672083495000},"page":"4076-4083","source":"Crossref","is-referenced-by-count":15,"title":["Planning to Practice: Efficient Online Fine-Tuning by Composing Goals in Latent Space"],"prefix":"10.1109","author":[{"given":"Kuan","family":"Fang","sequence":"first","affiliation":[{"name":"University of California,Berkeley"}]},{"given":"Patrick","family":"Yin","sequence":"additional","affiliation":[{"name":"University of California,Berkeley"}]},{"given":"Ashvin","family":"Nair","sequence":"additional","affiliation":[{"name":"University of California,Berkeley"}]},{"given":"Sergey","family":"Levine","sequence":"additional","affiliation":[{"name":"University of California,Berkeley"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.5555\/2999134.2999257"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1177\/0278364917710318"},{"issue":"12","key":"ref3","article-title":"Conservative Data Sharing for Multi-Task Offline Reinforcement Learning","volume":"34","author":"Yu","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561692"},{"key":"ref5","volume":"4","author":"Chebotar","year":"2021","journal-title":"Actionable Models: Unsupervised Offline Reinforcement Learning of Robotic Skills"},{"key":"ref6","author":"Kalashnikov","year":"2021","journal-title":"MT-Opt: Continuous Multi-Task Robotic Reinforcement Learning at Scale"},{"key":"ref7","article-title":"Learning Structured Output Representation using Deep Conditional Generative Models","author":"Sohn","year":"2015","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_2"},{"key":"ref9","first-page":"2052","article-title":"Off-Policy Deep Reinforcement Learning Without Exploration","volume-title":"International Conference on Machine Learning (ICML)","author":"Fujimoto","year":"2019"},{"key":"ref10","article-title":"Stabilizing Off-Policy Q-Learning via Bootstrapping Error Reduction","author":"Kumar","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref11","author":"Zhang","year":"2021","journal-title":"BRAC+: Going deeper with behavior regularized offline reinforcement learning"},{"key":"ref12","article-title":"Conservative Q-Learning for Offline Reinforcement Learning","author":"Kumar","year":"2020","journal-title":"arXiv preprint"},{"key":"ref13","article-title":"A Minimalist Approach to Offline Reinforcement Learning","author":"Fujimoto","year":"2021","journal-title":"arXiv preprint"},{"key":"ref14","article-title":"COG: Connecting New Skills to Past Experience with Offline Reinforcement Learning","volume-title":"Conference on Robot Learning (CoRL)","author":"Singh","year":"2020"},{"key":"ref15","author":"Nair","year":"2020","journal-title":"AWAC: Accelerating Online Reinforcement Learning with Offline Datasets"},{"key":"ref16","author":"Villaflor","year":"2020","journal-title":"Fine-tuning Offline Reinforcement Learning With Model-Based Policy Optimization"},{"key":"ref17","author":"Lu","year":"2021","journal-title":"AW-Opt: Learning Robotic Skills with Imitation and Reinforcement at Scale"},{"key":"ref18","author":"Lee","year":"2021","journal-title":"Offline-to-Online Reinforcement Learning via Balanced Replay and Pessimistic Q-Ensemble"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s11633-022-1383-7"},{"key":"ref20","first-page":"651","article-title":"Scalable Deep Reinforcement Learning for Vision-Based Robotic Manipulation","volume-title":"Conference on Robot Learning","author":"Kalashnikov","year":"2018"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2020.xvi.076"},{"key":"ref22","article-title":"Learning to Achieve Goals","author":"Kaelbling","year":"1993","journal-title":"IJCAI"},{"key":"ref23","article-title":"Universal Value Function Approximators","volume-title":"International Conference on Machine Learning (ICML)","author":"Schaul","year":"2015"},{"key":"ref24","article-title":"C-Learning: Learning to Achieve Goals via Recursive Classification","volume":"abs\/2011.08909","author":"Eysenbach","year":"2021","journal-title":"ArXiv"},{"key":"ref25","article-title":"Hindsight Experience Replay","author":"Andrychowicz","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref26","article-title":"Skew-Fit: State-Covering Self-Supervised Reinforcement Learning","volume":"abs\/1903.03698","author":"Pong","year":"2020","journal-title":"ArXiv"},{"key":"ref27","article-title":"Curriculum-guided Hindsight Experience Replay","author":"Fang","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref28","article-title":"Goal-conditioned Imitation Learning","author":"Ding","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref29","article-title":"Relay Policy Learning: Solving Long-Horizon Tasks via Imitation and Reinforcement Learning","volume-title":"Conference on Robot Learning (CoRL)","author":"Gupta","year":"2019"},{"key":"ref30","article-title":"Policy Continuation with Hindsight Inverse Dynamics","author":"Sun","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref31","article-title":"Rewriting History with Inverse RL: Hindsight Inference for Policy Improvement","volume":"abs\/2002.11089","author":"Eysenbach","year":"2020","journal-title":"ArXiv"},{"key":"ref32","article-title":"Learning to Reach Goals via Iterated Supervised Learning","author":"Ghosh","year":"2021","journal-title":"arXiv: Learning"},{"key":"ref33","article-title":"Visual Reinforcement Learning with Imagined Goals","author":"Nair","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref34","article-title":"Contextual Imagined Goals for Self-Supervised Robotic Learning","volume-title":"Conference on Robot Learning (CoRL)","author":"Nair","year":"2019"},{"key":"ref35","article-title":"Goal-Conditioned Reinforcement Learning with Imagined Subgoals","volume-title":"International Conference on Machine Learning (ICML)","author":"Chane-Sane","year":"2021"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/0004-3702(71)90010-5"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/70.508439"},{"key":"ref38","article-title":"D* lite","volume-title":"AAAI Conference on Artificial Intelligence","author":"Koenig","year":"2002"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1177\/0278364911406761"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913488805"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2011.5980280"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2014.6906922"},{"key":"ref43","article-title":"Planning with Goal-Conditioned Policies","author":"Nasiriany","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref44","article-title":"Search on the Replay Buffer: Bridging Planning and Reinforcement Learning","author":"Eysenbach","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref45","article-title":"Dynamics Learning with Cascaded Variational Inference for Multi-Step Manipulation","volume-title":"Conference on Robot Learning (CoRL)","author":"Fang","year":"2019"},{"key":"ref46","article-title":"PlanGAN: Model-based Planning With Sparse Rewards and Multiple Goals","volume":"abs\/2006.00900","author":"Charlesworth","year":"2020","journal-title":"ArXiv"},{"key":"ref47","article-title":"Long-Horizon Visual Planning with Goal-Conditioned Hierarchical Predictors","volume":"abs\/2006.13205","author":"Pertsch","year":"2020","journal-title":"ArXiv"},{"key":"ref48","author":"Sharma","year":"2021","journal-title":"Autonomous Reinforcement Learning via Subgoal Curricula"},{"key":"ref49","article-title":"C-Planning: An Automatic Curriculum for Learning Goal-Reaching Tasks","volume":"abs\/2110.12080","author":"Zhang","year":"2021","journal-title":"ArXiv"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460730"},{"key":"ref51","article-title":"Hierarchical Foresight: Self-Supervised Learning of Long-Horizon Tasks via Visual Subgoal Generation","author":"Nair","year":"2019","journal-title":"arXiv preprint"},{"key":"ref52","article-title":"Auto-Encoding Variational Bayes","volume-title":"International Conference on Learning Representations (ICLR)","author":"Kingma","year":"2014"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3057563"},{"key":"ref54","author":"Coumans","year":"2016","journal-title":"Pybullet, a python module for physics simulation for games, robotics and machine learning"},{"key":"ref55","article-title":"Neural Discrete Representation Learning","author":"van den Oord","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"ref57","article-title":"Offline Reinforcement Learning With Implicit Q-Learning","author":"Kostrikov","year":"2021","journal-title":"arXiv preprint"}],"event":{"name":"2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","location":"Kyoto, Japan","start":{"date-parts":[[2022,10,23]]},"end":{"date-parts":[[2022,10,27]]}},"container-title":["2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9981026\/9981028\/09981999.pdf?arnumber=9981999","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,14]],"date-time":"2024-03-14T04:13:27Z","timestamp":1710389607000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9981999\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,23]]},"references-count":57,"URL":"https:\/\/doi.org\/10.1109\/iros47612.2022.9981999","relation":{},"subject":[],"published":{"date-parts":[[2022,10,23]]}}}