{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,9]],"date-time":"2025-05-09T04:40:05Z","timestamp":1746765605945,"version":"3.40.5"},"reference-count":44,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1109\/lra.2025.3564780","type":"journal-article","created":{"date-parts":[[2025,4,28]],"date-time":"2025-04-28T17:34:55Z","timestamp":1745861695000},"page":"6143-6150","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing Exploration With Diffusion Policies in Hybrid Off-Policy RL: Application to Non-Prehensile Manipulation"],"prefix":"10.1109","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-0162-2084","authenticated-orcid":false,"given":"Huy","family":"Le","sequence":"first","affiliation":[{"name":"Bosch Center for Artificial Intelligence, Renningen, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9080-8105","authenticated-orcid":false,"given":"Tai","family":"Hoang","sequence":"additional","affiliation":[{"name":"Institute for Anthropomatics and Robotics, Karlsruhe Institute of Technology, Karlsruhe, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6454-777X","authenticated-orcid":false,"given":"Miroslav","family":"Gabriel","sequence":"additional","affiliation":[{"name":"Bosch Center for Artificial Intelligence, Renningen, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5483-4225","authenticated-orcid":false,"given":"Gerhard","family":"Neumann","sequence":"additional","affiliation":[{"name":"Institute for Anthropomatics and Robotics, Karlsruhe Institute of Technology, Karlsruhe, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9646-267X","authenticated-orcid":false,"given":"Ngo Anh","family":"Vien","sequence":"additional","affiliation":[{"name":"Bosch Center for Artificial Intelligence, Renningen, Germany"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1109\/IROS.2016.7758091"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1109\/LRA.2022.3142397"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1109\/ICRA46639.2022.9811872"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1109\/ICRA.2019.8794366"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1109\/ICCV48922.2021.00674"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/ICRA46639.2022.9811735"},{"key":"ref7","first-page":"241","article-title":"HACMan: Learning hybrid actor-critic maps for 6D non-prehensile manipulation","volume-title":"Proc. Conf. Robot Learn.","volume":"229","author":"Zhou","year":"2023"},{"key":"ref8","article-title":"Neural probabilistic motor primitives for humanoid control","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Merel","year":"2018"},{"key":"ref9","first-page":"8198","article-title":"One solution is not all you need: Few-shot extrapolation via structured","volume":"33","author":"Kumar","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref10","article-title":"Towards diverse behaviors: A benchmark for imitation learning with human demonstrations","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Jia","year":"2024"},{"key":"ref11","first-page":"11918","article-title":"Generative modeling by estimating gradients of the data distribution","volume-title":"Proc. 33rd Int. Conf. Neural Inf. Process. Syst.","author":"Song","year":"2019"},{"key":"ref12","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Ho","year":"2020"},{"key":"ref13","article-title":"Consistency models as a rich and efficient policy class for reinforcement learning","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Ding","year":"2023"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.15607\/RSS.2023.XIX.026"},{"key":"ref15","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"key":"ref16","article-title":"Diffusion policies as an expressive policy class for offline reinforcement learning","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Wang","year":"2023"},{"year":"2023","author":"Hansen-Estruch","article-title":"IDQL: Implicit Q-learning as an actor-critic method with diffusion policies","key":"ref17"},{"key":"ref18","first-page":"22825","article-title":"Contrastive energy prediction for exact energy-guided diffusion sampling in offline reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lu","year":"2023"},{"key":"ref19","article-title":"Reasoning with latent diffusion in offline reinforcement learning","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Venkatraman","year":"2024"},{"key":"ref20","first-page":"38456","article-title":"Learning multimodal behaviors from scratch with diffusion policy gradient","volume-title":"Proc. 38th Annu. Conf. Neural Inf. Process. Syst.","author":"Li","year":"2024"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.15607\/RSS.2023.XIX.028"},{"year":"2023","author":"Pearce","article-title":"Imitating human behaviour with diffusion models","key":"ref22"},{"key":"ref23","article-title":"Offline reinforcement learning via high-fidelity generative behavior modeling","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Chen","year":"2022"},{"year":"2023","author":"He","article-title":"Diffcps: Diffusion model based constrained policy search for offline reinforcement learning","key":"ref24"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1145\/1273496.1273590"},{"year":"2023","author":"Lee","article-title":"Aligning text-to-image models using human feedback","key":"ref26"},{"key":"ref27","article-title":"Training diffusion models with reinforcement learning","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Black","year":"2024"},{"key":"ref28","first-page":"2024","article-title":"Feedback efficient online fine-tuning of diffusion models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Uehara"},{"year":"2024","author":"Uehara","article-title":"Fine-tuning of continuous-time diffusion models as entropy-regularized control","key":"ref29"},{"year":"2023","author":"Psenka","article-title":"Learning a diffusion model policy from rewards via Q-score matching","key":"ref30"},{"key":"ref31","first-page":"150","article-title":"Learning to grasp the ungraspable with emergent extrinsic dexterity","volume-title":"Proc. Conf. Robot Learn.","author":"Zhou","year":"2023"},{"doi-asserted-by":"publisher","key":"ref32","DOI":"10.15607\/RSS.2024.XX.129"},{"doi-asserted-by":"publisher","key":"ref33","DOI":"10.1109\/lra.2024.3382529"},{"doi-asserted-by":"publisher","key":"ref34","DOI":"10.1109\/LRA.2023.3248443"},{"volume-title":"Reinforcement Learning: An Introduction","year":"2018","author":"Sutton","key":"ref35"},{"key":"ref36","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto","year":"2018"},{"key":"ref37","first-page":"32211","article-title":"Consistency models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Song","year":"2023"},{"key":"ref38","first-page":"67195","article-title":"Efficient diffusion policies for offline reinforcement learning","volume-title":"Proc. 37th Int. Conf. Neural Inf. Process. Syst.","author":"Kang","year":"2024"},{"key":"ref39","first-page":"20132","article-title":"A minimalist approach to offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Fujimoto","year":"2021"},{"year":"2018","author":"Levine","article-title":"Reinforcement learning and control as probabilistic inference: Tutorial and review","key":"ref40"},{"year":"2020","author":"Zhu","article-title":"robosuite: A modular simulation framework and benchmark for robot learning","key":"ref41"},{"doi-asserted-by":"publisher","key":"ref42","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref43","article-title":"Deep reinforcement learning at the edge of the statistical precipice","volume-title":"Proc. 35th Int. Conf. Neural Inf. Process. Syst.","volume":"2930429320","author":"Agarwal","year":"2021"},{"key":"ref44","first-page":"2024","article-title":"CORN: Contact-based object representation for nonprehensile manipulation of general unseen objects","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Cho"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7083369\/10969146\/10978025.pdf?arnumber=10978025","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,9]],"date-time":"2025-05-09T04:22:52Z","timestamp":1746764572000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10978025\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":44,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/lra.2025.3564780","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"type":"electronic","value":"2377-3766"},{"type":"electronic","value":"2377-3774"}],"subject":[],"published":{"date-parts":[[2025,6]]}}}