{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,4]],"date-time":"2025-09-04T13:19:02Z","timestamp":1756991942971,"version":"3.28.0"},"reference-count":67,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10610606","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"9168-9175","source":"Crossref","is-referenced-by-count":6,"title":["SPRINT: Scalable Policy Pre-Training via Language Instruction Relabeling"],"prefix":"10.1109","author":[{"given":"Jesse","family":"Zhang","sequence":"first","affiliation":[{"name":"University of Southern California"}]},{"given":"Karl","family":"Pertsch","sequence":"additional","affiliation":[{"name":"UC Berkeley"}]},{"given":"Jiahui","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Southern California"}]},{"given":"Joseph J.","family":"Lim","sequence":"additional","affiliation":[{"name":"Korea Advanced Institute of Science and Technology"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/4-431-31381-8_23"},{"key":"ref3","article-title":"Learning an embedding space for transferable robot skills","author":"Hausman","year":"2018","journal-title":"ICLR"},{"key":"ref4","article-title":"Learning latent plans from play","author":"Lynch","year":"2020","journal-title":"CoRL"},{"key":"ref5","article-title":"Accelerating reinforcement learning with learned skill priors","author":"Pertsch","year":"2020","journal-title":"CoRL"},{"journal-title":"arXiv: 2304.13653.","article-title":"Learning agile soccer skills for a bipedal robot with deep reinforcement learning, 2023","author":"Haarnoja","key":"ref6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3180108"},{"key":"ref8","doi-asserted-by":"crossref","DOI":"10.15607\/RSS.2021.XVII.047","article-title":"Language conditioned imitation learning over unstructured data","volume-title":"Robotics: Science and Systems","author":"Lynch","year":"2021"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/lra.2023.3295255"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2023.xix.025"},{"key":"ref11","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR42600.2020.01075","article-title":"ALFRED: A Benchmark for Interpreting Grounded Instructions for Everyday Tasks","volume-title":"The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Shridhar"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1197"},{"key":"ref13","article-title":"R3m: A universal visual representation for robot manipulation","author":"Nair","year":"2022","journal-title":"CoRL"},{"key":"ref14","article-title":"Minedojo: Building open-ended embodied agents with internet-scale knowledge","author":"Fan","year":"2022","journal-title":"arXiv preprint arXiv: Arxiv-2206.08853"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.3115\/1687878.1687892"},{"article-title":"Modular multitask reinforcement learning with policy sketches","volume-title":"International Conference on Machine Learning","author":"Andreas","key":"ref16"},{"key":"ref17","article-title":"Language models as zero-shot planners: Extracting actionable knowledge for embodied agents","author":"Huang","year":"2022","journal-title":"arXiv preprint arXiv:2201.07207"},{"key":"ref18","article-title":"Do as i can and not as i say: Grounding language in robotic affordances","author":"Ahn","year":"2022","journal-title":"arXiv preprint arXiv:2204.01691"},{"key":"ref19","article-title":"Inner monologue: Embodied reasoning through planning with language models","author":"Huang","year":"2022","journal-title":"arXiv preprint arXiv:2207.05608"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161317"},{"key":"ref21","article-title":"Language as a cognitive tool to imagine goals in curiosity driven exploration","author":"Colas","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI47803.2020.9308603"},{"key":"ref23","article-title":"Pre-trained language models for interactive decision-making","volume-title":"Advances in Neural Information Processing Systems","author":"Li","year":"2022"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2002.1014739"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2010.5509336"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref27","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020","journal-title":"arXiv preprint arXiv:2005.01643"},{"volume-title":"Advantage-weighted regression: Simple and scalable off-policy reinforcement learning","year":"2019","author":"Peng","key":"ref28"},{"article-title":"Cog: Connecting new skills to past experience with offline reinforcement learning","volume-title":"Conference on Robot Learning","author":"Singh","key":"ref29"},{"key":"ref30","article-title":"Accelerating online reinforcement learning with offline datasets","author":"Nair","year":"2020","journal-title":"arXiv preprint arXiv:2006.09359"},{"article-title":"Offline reinforcement learning with implicit q-learning","volume-title":"International Conference on Learning Representations","author":"Kostrikov","key":"ref31"},{"key":"ref32","article-title":"RL 2: Fast reinforcement learning via slow reinforcement learning","author":"Duan","year":"2016","journal-title":"arXiv preprint arXiv:1611.02779"},{"key":"ref33","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","author":"Finn","year":"2017","journal-title":"ICML"},{"key":"ref34","article-title":"Efficient off-policy meta-reinforcement learning via probabilistic context variables","author":"Rakelly","year":"2019","journal-title":"ICML"},{"article-title":"Skill-based meta-reinforcement learning","volume-title":"International Conference on Learning Representations (ICLR)","author":"Nam","key":"ref35"},{"key":"ref36","article-title":"Variational option discovery algorithms","author":"Achiam","year":"2018","journal-title":"arXiv"},{"key":"ref37","article-title":"Diversity is all you need: Learning skills without a reward function","author":"Eysenbach","year":"2019","journal-title":"ICLR"},{"key":"ref38","article-title":"Dynamics-aware unsupervised discovery of skills","author":"Sharma","year":"2019","journal-title":"arXiv"},{"key":"ref39","article-title":"Opal: Offline primitive discovery for accelerating offline reinforcement learning","author":"Ajay","year":"2020","journal-title":"arXiv preprint arXiv:2010.13611"},{"key":"ref40","article-title":"Parrot: Data-driven behavioral priors for reinforcement learning","author":"Singh","year":"2021","journal-title":"ICLR"},{"journal-title":"Discovering and achieving goals via world models","year":"2021","author":"Mendonca","key":"ref41"},{"key":"ref42","first-page":"1518","article-title":"Actionable models: Unsupervised offline reinforcement learning of robotic skills","volume-title":"Proceedings of the 38th International Conference on Machine Learning","volume":"139","author":"Chebotar"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.027"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.010"},{"key":"ref45","article-title":"Cacti: A framework for scalable multi-task multi-scene visual imitation learning","author":"Mandi","year":"2023","journal-title":"arXiv: 2212.05711"},{"article-title":"Robotic skill acquistion via instruction augmentation with vision-language models","volume-title":"Proceedings of Robotics: Science and Systems","author":"Xiao","key":"ref46"},{"key":"ref47","article-title":"Relay policy learning: Solving long-horizon tasks via imitation and reinforcement learning","author":"Gupta","year":"2019","journal-title":"CoRL"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2022.XVIII.063"},{"key":"ref49","article-title":"Demonstration-guided reinforcement learning with learned skills","author":"Pertsch","year":"2021","journal-title":"CoRL"},{"key":"ref50","first-page":"1094","article-title":"Learning to achieve goals","volume-title":"PROC. OF IJCAI-93","author":"Kaelbling"},{"key":"ref51","first-page":"1312","article-title":"Universal value function approximators","volume-title":"Proceedings of the 32nd International Conference on Machine Learning","volume":"37","author":"Schaul"},{"article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","year":"2018","author":"Devlin","key":"ref52"},{"volume-title":"Language models are few-shot learners","year":"2020","author":"Brown","key":"ref53"},{"volume-title":"GPT-J-6B: A 6 Billion Parameter Autoregressive Language Model","year":"2021","author":"Wang","key":"ref54"},{"article-title":"Scaling language models: Methods, analysis & insights from training gopher","year":"2021","author":"Rae","key":"ref55"},{"volume-title":"Training compute-optimal large language models","year":"2022","author":"Hoffmann","key":"ref56"},{"article-title":"Opt: Open pre-trained transformer language models","year":"2022","author":"Zhang","key":"ref57"},{"volume-title":"Palm: Scaling language modeling with pathways","year":"2022","author":"Chowdhery","key":"ref58"},{"key":"ref59","article-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"arXiv: 2302.13971"},{"volume-title":"Contrastive learning as goal-conditioned reinforcement learning","year":"2022","author":"Eysenbach","key":"ref60"},{"journal-title":"arXiv: 1910.10897","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning, 2019","author":"Yu","key":"ref61"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01564"},{"volume-title":"Clvr jaco play dataset, version 1.0.0","year":"2023","author":"Dass","key":"ref63"},{"article-title":"BC-z: Zero-shot task generalization with robotic imitation learning","volume-title":"5th Annual Conference on Robot Learning","author":"Jang","key":"ref64"},{"key":"ref65","article-title":"Offline rl for natural language generation with implicit language q learning","author":"Snell","year":"2023","journal-title":"arXiv: 2206.11871"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.016"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.013"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2024,5,13]]},"location":"Yokohama, Japan","end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10610606.pdf?arnumber=10610606","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T05:15:27Z","timestamp":1723266927000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10610606\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":67,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10610606","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}