{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T10:36:27Z","timestamp":1763202987050,"version":"3.44.0"},"reference-count":47,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022YFB2502904"],"award-info":[{"award-number":["2022YFB2502904"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62333015,62273017"],"award-info":[{"award-number":["62333015,62273017"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Beijing Natural Science Foundation","doi-asserted-by":"publisher","award":["L231014"],"award-info":[{"award-number":["L231014"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004147","name":"Tsinghua University","doi-asserted-by":"publisher","award":["20242001120"],"award-info":[{"award-number":["20242001120"]}],"id":[{"id":"10.13039\/501100004147","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11127503","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"1421-1428","source":"Crossref","is-referenced-by-count":1,"title":["H2O+: An Improved Framework for Hybrid Offline-and-Online RL with Dynamics Gaps"],"prefix":"10.1109","author":[{"given":"Haoyi","family":"Niu","sequence":"first","affiliation":[{"name":"Tsinghua University"}]},{"given":"Tianying","family":"Ji","sequence":"additional","affiliation":[{"name":"Tsinghua University"}]},{"given":"Bingqi","family":"Liu","sequence":"additional","affiliation":[{"name":"Beihang University"}]},{"given":"Haocheng","family":"Zhao","sequence":"additional","affiliation":[{"name":"Tsinghua University"}]},{"given":"Xiangyu","family":"Zhu","sequence":"additional","affiliation":[{"name":"Tsinghua University"}]},{"given":"Jianying","family":"Zheng","sequence":"additional","affiliation":[{"name":"Beihang University"}]},{"given":"Pengfei","family":"Huang","sequence":"additional","affiliation":[{"name":"Tsinghua University"}]},{"given":"Guyue","family":"Zhou","sequence":"additional","affiliation":[{"name":"Tsinghua University"}]},{"given":"Jianming","family":"Hu","sequence":"additional","affiliation":[{"name":"Tsinghua University"}]},{"given":"Xianyuan","family":"Zhan","sequence":"additional","affiliation":[{"name":"Tsinghua University"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i7.20730"},{"key":"ref4","article-title":"Off-dynamics reinforcement learning: Training for transfer with domain classifiers","volume-title":"in International Conference on Learning Representations","author":"Eysenbach","year":"2020"},{"key":"ref5","article-title":"When to trust your simulator: Dynamics-aware hybrid offline-and-online reinforcement learning","volume-title":"in Advances in Neural Information Processing Systems","author":"Niu","year":"2022"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abc5986"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abm6597"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460528"},{"key":"ref11","first-page":"1066","article-title":"Sim2real transfer for deep reinforcement learning with stochastic state transition delays","volume-title":"in Conference on Robot Learning","author":"Sandha","year":"2021"},{"journal-title":"Solving rubik\u2019s cube with a robot hand","year":"2019","author":"Akkaya","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2024\/906"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2017.XIII.048"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793789"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3052391"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9562091"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.029"},{"journal-title":"Epopt: Learning robust neural network policies using model ensembles","year":"2016","author":"Rajeswaran","key":"ref19"},{"key":"ref20","first-page":"1162","article-title":"Active domain randomization","volume-title":"in Conference on Robot Learning","author":"Mehta","year":"2020"},{"journal-title":"How to pick the domain randomization parameters for sim-to-real transfer of reinforcement learning policies?","year":"2019","author":"Vuong","key":"ref21"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3226027"},{"key":"ref23","article-title":"Dara: Dynamics-aware reward augmentation in offline reinforcement learning","volume-title":"in International Conference on Learning Representations","author":"Liu","year":"2022"},{"journal-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","year":"2020","author":"Levine","key":"ref24"},{"key":"ref25","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"in International Conference on Machine Learning","author":"Fujimoto","year":"2019"},{"key":"ref26","first-page":"11761","article-title":"Stabilizing offpolicy q-learning via bootstrapping error reduction","author":"Kumar","year":"2019","journal-title":"in Advances in Neural Information Processing Systems"},{"key":"ref27","article-title":"A minimalist approach to offline reinforcement learning","volume":"34","author":"Fujimoto","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref28","article-title":"Offline reinforcement learning with implicit q-learning","volume-title":"in International Conference on Learning Representations","author":"Kostrikov","year":"2022"},{"key":"ref29","article-title":"A policy-guided imitation approach for offline reinforcement learning","author":"Xu","year":"2022","journal-title":"in Advances in Neural Information Processing Systems"},{"key":"ref30","article-title":"Offline rl with no ood actions: In-sample learning via implicit value regularization","volume-title":"in The Eleventh International Conference on Learning Representations","author":"Xu","year":"2023"},{"key":"ref31","article-title":"Extreme q-learning: Maxent RL without entropy","volume-title":"in The Eleventh International Conference on Learning Representations","author":"Garg","year":"2023"},{"key":"ref32","article-title":"When data geometry meets deep function: Generalizing offline reinforcement learning","volume-title":"in The Eleventh International Conference on Learning Representations","author":"Li","year":"2023"},{"key":"ref33","article-title":"Conservative q-learning for offline reinforcement learning","author":"Kumar","year":"2020","journal-title":"in Neural Information Processing Systems (NeurIPS)"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-1768-8_11"},{"journal-title":"Awac: Accelerating online reinforcement learning with offline datasets","year":"2020","author":"Nair","key":"ref35"},{"key":"ref36","first-page":"1702","article-title":"Offline-to-online reinforcement learning via balanced replay and pessimistic q-ensemble","volume-title":"in Conference on Robot Learning","author":"Lee","year":"2022"},{"key":"ref37","article-title":"Policy expansion for bridging offline-to-online reinforcement learning","volume-title":"in The Eleventh International Conference on Learning Representations","author":"Zhang","year":"2023"},{"key":"ref38","article-title":"Hybrid RL: Using both offline and online data can make RL efficient","volume-title":"in The Eleventh International Conference on Learning Representations","author":"Song","year":"2023"},{"journal-title":"Efficient online reinforcement learning with offline data","year":"2023","author":"Ball","key":"ref39"},{"journal-title":"Leveraging offline data in online reinforcement learning","year":"2022","author":"Wagenmaker","key":"ref40"},{"key":"ref41","article-title":"Seizing serendipity: Exploiting the value of past success in off-policy actor-critic","volume-title":"in Forty-first International Conference on Machine Learning","author":"Ji","year":"2024"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref43","article-title":"Cal-QL: Calibrated offline RL pre-training for efficient online fine-tuning","volume-title":"in Workshop on Reincarnating Reinforcement Learning at ICLR 2023","author":"Nakamoto","year":"2023"},{"journal-title":"Idql: Implicit q-learning as an actor-critic method with diffusion policies","year":"2023","author":"Hansen-Estruch","key":"ref44"},{"key":"ref45","first-page":"1861","article-title":"Soft actor-critic: Offpolicy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"in International Conference on Machine Learning","author":"Haarnoja","year":"2018"},{"journal-title":"D4rl: Datasets for deep data-driven reinforcement learning","year":"2020","author":"Fu","key":"ref46"},{"journal-title":"Isaac gym: High performance gpu-based physics simulation for robot learning","year":"2021","author":"Makoviychuk","key":"ref47"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2025,5,19]]},"location":"Atlanta, GA, USA","end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11127503.pdf?arnumber=11127503","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T06:15:23Z","timestamp":1756880123000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11127503\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":47,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11127503","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}