{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T14:27:27Z","timestamp":1766068047446,"version":"3.28.0"},"reference-count":44,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10611123","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"7447-7454","source":"Crossref","is-referenced-by-count":3,"title":["Guided Online Distillation: Promoting Safe Reinforcement Learning by Offline Demonstration"],"prefix":"10.1109","author":[{"given":"Jinning","family":"Li","sequence":"first","affiliation":[{"name":"University of California,Berkeley,CA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinyi","family":"Liu","sequence":"additional","affiliation":[{"name":"University of Michigan,Ann Arbor,MI,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Banghua","family":"Zhu","sequence":"additional","affiliation":[{"name":"University of California,Berkeley,CA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiantao","family":"Jiao","sequence":"additional","affiliation":[{"name":"University of California,Berkeley,CA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Masayoshi","family":"Tomizuka","sequence":"additional","affiliation":[{"name":"University of California,Berkeley,CA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chen","family":"Tang","sequence":"additional","affiliation":[{"name":"University of California,Berkeley,CA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Zhan","sequence":"additional","affiliation":[{"name":"University of California,Berkeley,CA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00957"},{"key":"ref2","first-page":"22","article-title":"Constrained policy optimization","volume-title":"International conference on machine learning","author":"Achiam"},{"key":"ref3","first-page":"13644","article-title":"Constrained variational policy optimization for safe reinforcement learning","volume-title":"International Conference on Machine Learning","author":"Liu"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561195"},{"key":"ref5","first-page":"2821","article-title":"Relaxed exploration constrained reinforcement learning","volume-title":"Proceedings of the 2023 International Conference on Autonomous Agents and Multiagent Systems","author":"Shperberg"},{"key":"ref6","first-page":"34556","article-title":"Jump-start reinforcement learning","volume-title":"International Conference on Machine Learning","author":"Uchendu"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2976272"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3070252"},{"key":"ref9","first-page":"12151","article-title":"Safe reinforcement learning via curriculum induction","volume":"33","author":"Turchetta","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/icra57147.2024.10610651"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00895"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3094821"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2023.10.1748"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3054912"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460487"},{"article-title":"Offline reinforcement learning with implicit q-learning","year":"2021","author":"Kostrikov","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3190100"},{"key":"ref19","first-page":"1455","article-title":"Dealing with the unknown: Pessimistic offline reinforcement learning","volume-title":"Conference on Robot Learning","author":"Li"},{"article-title":"What matters in learning from offline human demonstrations for robot manipulation","year":"2021","author":"Mandlekar","key":"ref20"},{"article-title":"When should we prefer offline reinforcement learning over behavioral cloning?","year":"2022","author":"Kumar","key":"ref21"},{"key":"ref22","first-page":"15084","article-title":"Decision transformer: Reinforcement learning via sequence modeling","volume":"34","author":"Chen","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref24","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref25","first-page":"8821","article-title":"Zero-shot text-to-image generation","volume-title":"International Conference on Machine Learning","author":"Ramesh"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197145"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1115\/DSCC2020-3328"},{"article-title":"Awac: Accelerating online reinforcement learning with offline datasets","year":"2020","author":"Nair","key":"ref28"},{"article-title":"Cal-ql: Calibrated offline rl pre-training for efficient online fine-tuning","year":"2023","author":"Nakamoto","key":"ref29"},{"article-title":"Efficient sim-to-real transfer of contact-rich manipulation skills with online admittance residual learning","volume-title":"7th Annual Conference on Robot Learning","author":"Zhang","key":"ref30"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3158376"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3108510"},{"article-title":"Policy distillation","year":"2015","author":"Rusu","key":"ref33"},{"key":"ref34","first-page":"1331","article-title":"Distilling policy distillation","volume-title":"The 22nd international conference on artificial intelligence and statistics","author":"Czarnecki"},{"article-title":"Discorl: Continual reinforcement learning via policy distillation","year":"2019","author":"Traor\u00e9","key":"ref35"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/s001860050035"},{"article-title":"On the robustness of safe reinforcement learning under observational perturbations","year":"2022","author":"Liu","key":"ref37"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"Schulman","key":"ref38"},{"article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","year":"2018","author":"Haarnoja","key":"ref39"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.03.003"},{"article-title":"Improving language understanding by generative pre-training","year":"2018","author":"Radford","key":"ref41"},{"article-title":"Benchmarking Safe Exploration in Deep Reinforcement Learning","year":"2019","author":"Ray","key":"ref42"},{"article-title":"Bullet-safety-gym: A framework for constrained reinforcement learning","year":"2022","author":"Gronauer","key":"ref43"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3190471"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2024,5,13]]},"location":"Yokohama, Japan","end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10611123.pdf?arnumber=10611123","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,11]],"date-time":"2024-08-11T04:11:48Z","timestamp":1723349508000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10611123\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":44,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10611123","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}