{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:35:52Z","timestamp":1776886552218,"version":"3.51.2"},"reference-count":50,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,6]]},"DOI":"10.1109\/icassp49660.2025.10888998","type":"proceedings-article","created":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T17:15:19Z","timestamp":1741799719000},"page":"1-5","source":"Crossref","is-referenced-by-count":1,"title":["Sample Efficient Reinforcement Learning via Large Vision Language Model Distillation"],"prefix":"10.1109","author":[{"given":"Donghoon","family":"Lee","sequence":"first","affiliation":[{"name":"KAIST,Robotics Program,Daejeon,South Korea"}]},{"given":"Tung M.","family":"Luu","sequence":"additional","affiliation":[{"name":"KAIST,Electrical Engineering,Daejeon,South Korea"}]},{"given":"Younghwan","family":"Lee","sequence":"additional","affiliation":[{"name":"KAIST,Electrical Engineering,Daejeon,South Korea"}]},{"given":"Chang D.","family":"Yoo","sequence":"additional","affiliation":[{"name":"KAIST,Electrical Engineering,Daejeon,South Korea"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2019.01.003"},{"key":"ref3","article-title":"Qt-opt: Scalable deep reinforcement learning for vision-based robotic manipulation","author":"Kalashnikov","year":"2018","journal-title":"CoRL"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3069975"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3182107"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10802437"},{"key":"ref8","article-title":"Dota 2 with large scale deep reinforcement learning","author":"Berner","year":"2019"},{"key":"ref9","article-title":"A comprehensive survey on safe reinforcement learning","volume-title":"JMLR","author":"Garc\u0131a"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/HUMANOIDS.2015.7363436"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"ref12","article-title":"Language models are unsupervised multitask learners","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"key":"ref13","article-title":"Language models are few-shot learners","author":"Brown","year":"2020"},{"key":"ref14","article-title":"On the opportunities and risks of foundation models","author":"Bommasani","year":"2021"},{"key":"ref15","article-title":"Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context","author":"Reid","year":"2024"},{"key":"ref16","article-title":"Openai. gpt-4v","year":"2023"},{"key":"ref17","article-title":"Do as i can, not as i say: Grounding language in robotic affordances","author":"Ahn","year":"2022"},{"key":"ref18","article-title":"Language models as zero-shot planners: Extracting actionable knowledge for embodied agents","volume-title":"ICML","author":"Huang"},{"key":"ref19","article-title":"Large language models as generalizable policies for embodied tasks","volume-title":"ICLR","author":"Szot"},{"key":"ref20","article-title":"Fine-tuning large vision-language models as decision-making agents via reinforcement learning","author":"Zhai","year":"2024"},{"key":"ref21","article-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"ref22","article-title":"Palm: Scaling language modeling with pathways","author":"Chowdhery","year":"2023","journal-title":"JMLR"},{"key":"ref23","article-title":"Distilling the knowledge in a neural network","author":"Hinton","year":"2015"},{"key":"ref24","article-title":"Proximal policy optimization algorithms","author":"at al","year":"2017"},{"key":"ref25","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"ICML","author":"M"},{"key":"ref26","article-title":"Guiding pretraining in reinforcement learning with large language models","volume-title":"ICML","author":"Du"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161317"},{"key":"ref28","article-title":"Inner monologue: Embodied reasoning through planning with language models","author":"Huang","year":"2022"},{"key":"ref29","article-title":"Enabling intelligent interactions between an agent and an llm: A reinforcement learning approach","volume-title":"RLC","author":"Hu"},{"key":"ref30","article-title":"Grounding large language models in interactive environments with online reinforcement learning","volume-title":"ICML","author":"Carta"},{"key":"ref31","article-title":"Introducing gemini: our largest and most capable ai model","year":"2023"},{"key":"ref32","article-title":"Qwen2. 5 technical report","author":"Yang","year":"2024"},{"key":"ref33","volume-title":"The claude 3 model family: Opus, sonnet, haiku"},{"key":"ref34","article-title":"Plan-seq-learn: Language model guided rl for solving long horizon robotics tasks","author":"Dalal","year":"2024"},{"key":"ref35","article-title":"Reward design with language models","author":"Kwon","year":"2023"},{"key":"ref36","article-title":"Vision-language models are zero-shot reward models for reinforcement learning","author":"Rocamonde","year":"2023"},{"key":"ref37","article-title":"Rl-vlm-f: Reinforcement learning from vision language foundation model feedback","author":"Wang","year":"2024"},{"key":"ref38","article-title":"Voyager: An open-ended embodied agent with large language models","author":"Wang","year":"2023"},{"key":"ref39","article-title":"Bootstrap your own skills: Learning to solve new tasks with large language model guidance","author":"Zhang","year":"2023"},{"key":"ref40","article-title":"Embodiedgpt: Vision-language pre-training via embodied chain of thought","author":"Mu","year":"2024","journal-title":"NeurIPS"},{"key":"ref41","article-title":"Vision-language models provide promptable representations for reinforcement learning","author":"Chen","year":"2024"},{"key":"ref42","article-title":"Policy distillation","author":"Rusu","year":"2015","journal-title":"CoRR"},{"key":"ref43","article-title":"Actor-mimic: Deep multitask and transfer reinforcement learning","author":"Parisotto","year":"2015"},{"key":"ref44","article-title":"Guided policy search","volume-title":"ICML","author":"Levine"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-019-09430-0"},{"key":"ref47","article-title":"Reincarnating reinforcement learning: Reusing prior computation to accelerate progress","author":"Agarwal","year":"2022","journal-title":"NeurIPS"},{"key":"ref48","article-title":"Kickstarting deep reinforcement learning","author":"Schmitt","year":"2018"},{"key":"ref49","article-title":"When does label smoothing help?","author":"Muller","year":"2019","journal-title":"NeurIPS"},{"key":"ref50","article-title":"Minigrid & miniworld: Modular & customizable reinforcement learning environments for goal-oriented tasks","author":"Chevalier-Boisvert","year":"2024","journal-title":"NeurIPS"}],"event":{"name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Hyderabad, India","start":{"date-parts":[[2025,4,6]]},"end":{"date-parts":[[2025,4,11]]}},"container-title":["ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10887540\/10887541\/10888998.pdf?arnumber=10888998","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T05:26:08Z","timestamp":1774416368000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10888998\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,6]]},"references-count":50,"URL":"https:\/\/doi.org\/10.1109\/icassp49660.2025.10888998","relation":{},"subject":[],"published":{"date-parts":[[2025,4,6]]}}}