{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T02:16:17Z","timestamp":1773972977686,"version":"3.50.1"},"reference-count":49,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10611197","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"18362-18369","source":"Crossref","is-referenced-by-count":10,"title":["Boosting Offline Reinforcement Learning for Autonomous Driving with Hierarchical Latent Skills"],"prefix":"10.1109","author":[{"given":"Zenan","family":"Li","sequence":"first","affiliation":[{"name":"Tsinghua University"}]},{"given":"Fan","family":"Nie","sequence":"additional","affiliation":[{"name":"Shanghai Qi Zhi Institute"}]},{"given":"Qiao","family":"Sun","sequence":"additional","affiliation":[{"name":"Shanghai Qi Zhi Institute"}]},{"given":"Fang","family":"Da","sequence":"additional","affiliation":[{"name":"QCraft Inc"}]},{"given":"Hang","family":"Zhao","sequence":"additional","affiliation":[{"name":"Tsinghua University"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1","article-title":"Carla: An open urban driving simulator","volume-title":"Conference on robot learning","author":"Dosovitskiy"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9982008"},{"key":"ref3","article-title":"nuplan: A closed-loop ml-based planning benchmark for autonomous vehicles","author":"Caesar","year":"2021"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00957"},{"key":"ref5","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to noregret online learning","volume-title":"Proceedings of the fourteenth international conference on artificial intelligence and statistics","author":"Ross"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01178"},{"key":"ref7","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020"},{"key":"ref8","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"International conference on machine learning","author":"Fujimoto"},{"key":"ref9","first-page":"21810","article-title":"Morel: Model-based offline reinforcement learning","volume":"33","author":"Kidambi","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref10","article-title":"Offline reinforcement learning for autonomous driving with safety and exploration enhancement","author":"Shi","year":"2021"},{"key":"ref11","article-title":"Umbrella: Uncertainty-aware model-based offline reinforcement learning leveraging planning","author":"Diehl","year":"2021"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3190100"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/iros55552.2023.10341449"},{"key":"ref14","article-title":"Hindsight experience replay","volume":"30","author":"Andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref15","first-page":"1430","article-title":"Goal-conditioned reinforcement learning with imagined sub-goals","volume-title":"International Conference on Machine Learning","author":"Chane-Sane"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981695"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1523\/JNEUROSCI.0965-04.2004"},{"key":"ref18","article-title":"Transferable and adaptable driving behavior prediction","author":"Wang","year":"2022"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19842-7_16"},{"key":"ref21","first-page":"188","article-title":"Accelerating reinforcement learning with learned skill priors","volume-title":"Conference on robot learning","author":"Pertsch"},{"key":"ref22","first-page":"1113","article-title":"Learning latent plans from play","volume-title":"Conference on robot learning","author":"Lynch"},{"key":"ref23","article-title":"Opal: Offline primitive discovery for accelerating offline reinforcement learning","volume-title":"International Conference on Learning Representations","author":"Ajay"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-4012"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2889774"},{"key":"ref26","article-title":"On the quantitative analysis of decoder-based generative models","volume-title":"International Conference on Learning Representations","author":"Wu"},{"key":"ref27","author":"Burgess","year":"2018","journal-title":"Understanding disentangling in beta-vae"},{"key":"ref28","first-page":"19667","article-title":"Nvae: A deep hierarchical variational autoencoder","volume":"33","author":"Vahdat","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref29","article-title":"Categorical reparameterization with gumbel-softmax","author":"Jang","year":"2016"},{"key":"ref30","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref31","article-title":"Offline reinforcement learning with implicit q-learning","author":"Kostrikov","year":"2021"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2019.xv.031"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3355089.3356505"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01494"},{"key":"ref35","article-title":"Model-based imitation learning for urban driving","author":"Hu","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2014.6856581"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2019.2904385"},{"key":"ref38","first-page":"20132","article-title":"A minimalist approach to offline reinforcement learning","volume":"34","author":"Fujimoto","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref39","first-page":"11319","article-title":"Uncertainty weighted actor-critic for offline reinforcement learning","volume-title":"International Conference on Machine Learning","author":"Wu"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196935"},{"key":"ref41","article-title":"Skill-based meta-reinforcement learning","volume-title":"International Conference on Learning Representations","author":"Nam"},{"key":"ref42","article-title":"Latent plans for task-agnostic offline reinforcement learning","volume-title":"6th Annual Conference on Robot Learning","author":"Rosete-Beas"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2018.2804159"},{"key":"ref44","volume-title":"Markov decision processes: discrete stochastic dynamic programming.","author":"Puterman","year":"2014"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.3390\/electronics9081295"},{"key":"ref46","article-title":"beta-vae: Learning basic visual concepts with a constrained variational framework","volume-title":"International conference on learning representations","author":"Higgins"},{"key":"ref47","article-title":"Carla autonomous driving leaderboard","author":"team","year":"2020"},{"key":"ref48","article-title":"Exponentially weighted imitation learning for batched historical data","volume":"31","author":"Wang","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"11","key":"ref49","article-title":"Visualizing data using t-sne","volume":"9","author":"Van der Maaten","year":"2008","journal-title":"Journal of machine learning research"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","location":"Yokohama, Japan","start":{"date-parts":[[2024,5,13]]},"end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10611197.pdf?arnumber=10611197","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,11]],"date-time":"2024-08-11T04:11:09Z","timestamp":1723349469000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10611197\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":49,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10611197","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}