{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:48:36Z","timestamp":1776887316235,"version":"3.51.2"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,6]]},"DOI":"10.1109\/icassp49660.2025.10889042","type":"proceedings-article","created":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T17:15:02Z","timestamp":1741799702000},"page":"1-5","source":"Crossref","is-referenced-by-count":2,"title":["Reward Generation via Large Vision-Language Model in Offline Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Younghwan","family":"Lee","sequence":"first","affiliation":[{"name":"KAIST,Electrical Engineering,Daejeon,South Korea"}]},{"given":"Tung M.","family":"Luu","sequence":"additional","affiliation":[{"name":"KAIST,Electrical Engineering,Daejeon,South Korea"}]},{"given":"Donghoon","family":"Lee","sequence":"additional","affiliation":[{"name":"KAIST,Robotics Program,Daejeon,South Korea"}]},{"given":"Chang D.","family":"Yoo","sequence":"additional","affiliation":[{"name":"KAIST,Electrical Engineering,Daejeon,South Korea"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3250269"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-09997-9"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.2196\/18477"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"ref5","article-title":"Offline reinforcement learning: Tutorial, review","volume":"5","author":"Levine","year":"2020","journal-title":"and Perspectives on Open Problems"},{"key":"ref6","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"International conference on machine learning","author":"Fujimoto"},{"key":"ref7","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref8","article-title":"Provably good batch off-policy reinforcement learning without great exploration","author":"Liu","year":"2020","journal-title":"NeurIPS"},{"key":"ref9","article-title":"Hindsight experience replay","volume":"30","author":"Andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3069975"},{"key":"ref11","first-page":"310","article-title":"Offline goal-conditioned reinforcement learning via f-advantage regression","volume":"35","author":"Ma","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10802437"},{"key":"ref13","article-title":"Scaling pareto-efficient decision making via offline multi-objective rl","volume-title":"International Conference on Learning Representations","author":"Zhu"},{"key":"ref14","article-title":"Inverse reward design","volume":"30","author":"Hadfield-Menell","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref15","article-title":"Pebble: Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training","author":"Lee","year":"2021"},{"key":"ref16","article-title":"Uni-rlhf: Universal platform and benchmark suite for reinforcement learning with diverse human feedback","volume-title":"International Conference on Learning Representations","author":"Yuan"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10341912"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2020.xvi.064"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2024\/586"},{"key":"ref20","first-page":"23 716","article-title":"Flamingo: a visual language model for few-shot learning","volume":"35","author":"Alayrac","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"ref22","article-title":"Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context","author":"Reid","year":"2024"},{"key":"ref23","article-title":"Gpt-4 technical report","author":"Achiam","year":"2023"},{"key":"ref24","article-title":"Do as i can, not as i say: Grounding language in robotic affordances","volume-title":"Conference on Robot Learning","author":"Ahn"},{"key":"ref25","first-page":"9118","article-title":"Language models as zero-shot planners: Extracting actionable knowledge for embodied agents","volume-title":"International conference on machine learning","author":"Huang"},{"key":"ref26","first-page":"8657","article-title":"Guiding pretraining in reinforcement learning with large language models","volume-title":"International Conference on Machine Learning","author":"Du"},{"key":"ref27","article-title":"Bootstrap your own skills: Learning to solve new tasks with large language model guidance","author":"Zhang","year":"2023"},{"key":"ref28","article-title":"Large language models as generalizable policies for embodied tasks","volume-title":"International Conference on Learning Representations","author":"Szot"},{"key":"ref29","article-title":"Fine-tuning large vision-language models as decision-making agents via reinforcement learning","author":"Zhai","year":"2024"},{"key":"ref30","article-title":"Accelerating online reinforcement learning with offline datasets","volume":"abs\/2006.09359","author":"Ashvin","year":"2020","journal-title":"CoRR"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610606"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01075"},{"key":"ref33","article-title":"Vision-language models are zero-shot reward models for reinforcement learning","volume-title":"International Conference on Machine Learning","author":"Rocamonde"},{"key":"ref34","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"International conference on machine learning","author":"Radford"},{"key":"ref35","first-page":"12 888","article-title":"Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation","volume-title":"International conference on machine learning","author":"Li"},{"key":"ref36","article-title":"Roboclip: One demonstration is enough to learn robot policies","volume":"36","author":"Sontakke","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3180108"}],"event":{"name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Hyderabad, India","start":{"date-parts":[[2025,4,6]]},"end":{"date-parts":[[2025,4,11]]}},"container-title":["ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10887540\/10887541\/10889042.pdf?arnumber=10889042","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T05:23:27Z","timestamp":1774416207000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10889042\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,6]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/icassp49660.2025.10889042","relation":{},"subject":[],"published":{"date-parts":[[2025,4,6]]}}}