{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T12:41:04Z","timestamp":1766061664181,"version":"3.48.0"},"reference-count":43,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iros60139.2025.11246982","type":"proceedings-article","created":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:54:45Z","timestamp":1764269685000},"page":"7584-7591","source":"Crossref","is-referenced-by-count":0,"title":["DB-MPO: Demonstration Boosted Reactive Grasping For Two-Finger Gripper"],"prefix":"10.1109","author":[{"given":"Boya","family":"Zhang","sequence":"first","affiliation":[{"name":"University of T&#x00FC;bingen,Department of Computer Science,Germany"}]},{"given":"Andreas","family":"Zell","sequence":"additional","affiliation":[{"name":"University of T&#x00FC;bingen,Department of Computer Science,Germany"}]},{"given":"Georg","family":"Martius","sequence":"additional","affiliation":[{"name":"University of T&#x00FC;bingen,Department of Computer Science,Germany"}]}],"member":"263","reference":[{"key":"ref1","first-page":"2376","article-title":"Conditioned reinforcement learning for few-shot imitation","volume-title":"International Conference on Machine Learning","author":"Dance"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s41315-019-00103-5"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3165531"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.aei.2024.102625"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1017\/S0263574722001710"},{"article-title":"Isaac gym: High performance gpu-based physics simulation for robot learning","year":"2021","author":"Makoviychuk","key":"ref6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3270034"},{"article-title":"Skillmimicgen: Automated demonstration generation for efficient skill learning and deployment","year":"2024","author":"Garrett","key":"ref9"},{"article-title":"Robogen: Towards unleashing infinite data for automated robot learning via generative simulation","year":"2023","author":"Wang","key":"ref10"},{"article-title":"Automated creation of digital cousins for robust policy learning","volume-title":"Conference on Robot Learning (CoRL)","author":"Dai","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s00170-022-08652-z"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.rcim.2018.12.007"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00084"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636856"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560942"},{"key":"ref17","first-page":"62 244","article-title":"Cal-ql: Calibrated offline rl pre-training for efficient online fine-tuning","volume":"36","author":"Nakamoto","year":"2023","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2024.xx.056"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2023.1038658"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/Humanoids58906.2024.10769950"},{"article-title":"Dextrah-rgb: Visuomotor policies to grasp anything with dexterous hands","year":"2024","author":"Singh","key":"ref21"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.aau4984"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01146"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160842"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3492118"},{"article-title":"Dexgraspnet 2.0: Learning generative dexterous grasping in large-scale synthetic cluttered scenes","volume-title":"8th Annual Conference on Robot Learning","author":"Zhang","key":"ref26"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.19139\/soic-2310-5070-1797"},{"key":"ref28","first-page":"1634","article-title":"Smooth exploration for robotic reinforcement learning","volume-title":"Proceedings of the 5th Conference on Robot Learning","author":"Raffin"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.3390\/electronics11244192"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561379"},{"article-title":"Maximum a posteriori policy optimisation","year":"2018","author":"Abdolmaleki","key":"ref31"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref33","first-page":"255","article-title":"Issues in using function approximation for reinforcement learning","volume-title":"Proceedings of the 1993 connectionist models summer school","author":"Thrun"},{"article-title":"Relative entropy regularized policy iteration","year":"2018","author":"Abdolmaleki","key":"ref34"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2006.479"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1002\/aisy.202300042"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA55743.2025.11127409"},{"article-title":"Efficient fine-tuning of behavior cloned policies with reinforcement learning from limited demonstrations","volume-title":"NeurIPS 2024 Workshop on Fine-Tuning in Modern Machine Learning: Principles and Scalability","author":"Noh","key":"ref38"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2024.3468770"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610286"},{"article-title":"Exploration by random network distillation","year":"2018","author":"Burda","key":"ref41"},{"article-title":"Pink noise is all you need: Colored noise exploration in deep reinforcement learning","volume-title":"The Eleventh International Conference on Learning Representations","author":"Eberhard","key":"ref42"},{"article-title":"Edgesam: Prompt-in-the-loop distillation for on-device deployment of sam","year":"2023","author":"Zhou","key":"ref43"}],"event":{"name":"2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2025,10,19]]},"location":"Hangzhou, China","end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11245651\/11245652\/11246982.pdf?arnumber=11246982","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T12:37:33Z","timestamp":1766061453000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11246982\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/iros60139.2025.11246982","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}