{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,24]],"date-time":"2026-06-24T15:02:08Z","timestamp":1782313328304,"version":"3.54.5"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2025,8,31]],"date-time":"2025-08-31T00:00:00Z","timestamp":1756598400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,31]],"date-time":"2025-08-31T00:00:00Z","timestamp":1756598400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1007\/s13042-025-02780-0","type":"journal-article","created":{"date-parts":[[2025,8,31]],"date-time":"2025-08-31T07:15:40Z","timestamp":1756624540000},"page":"9717-9729","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Diffusion policy with Q-ensemble for offline reinforcement learning"],"prefix":"10.1007","volume":"16","author":[{"given":"Dong","family":"Wang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xu","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Qiang","family":"Hua","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chunru","family":"Dong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Feng","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,8,31]]},"reference":[{"key":"2780_CR1","unstructured":"Levine S, Kumar A, Tucker G, Fu J (2020) Offline reinforcement learning: tutorial, review, and perspectives on open problems. arXiv:Learning"},{"key":"2780_CR2","unstructured":"Fu J, Kumar A, Soh M, Levine S (2019) Diagnosing bottlenecks in deep q-learning algorithms. In: International conference on machine learning. PMLR, pp 2021\u20132030"},{"key":"2780_CR3","first-page":"22955","volume":"35","author":"NM Shafiullah","year":"2022","unstructured":"Shafiullah NM, Cui Z, Altanzaya AA, Pinto L (2022) Behavior transformers: cloning $$k$$ modes with one stone. Adv Neural Inf Process Syst 35:22955\u201322968","journal-title":"Adv Neural Inf Process Syst"},{"key":"2780_CR4","unstructured":"Kumar A, Fu J, Soh M, Tucker G, Levine S (2019) Stabilizing off-policy q-learning via bootstrapping error reduction. Neural Inf Process Syst Vol. 32"},{"key":"2780_CR5","first-page":"1179","volume":"33","author":"A Kumar","year":"2020","unstructured":"Kumar A, Zhou A, Tucker G, Levine S (2020) Conservative q-learning for offline reinforcement learning. Adv Neural Inf Process Syst 33:1179\u20131191","journal-title":"Adv Neural Inf Process Syst"},{"key":"2780_CR6","unstructured":"Fujimoto S, Gu S (2021) A minimalist approach to offline reinforcement learning. Neural Inf Process Syst 34:20132-20145"},{"key":"2780_CR7","unstructured":"Wang Z, Hunt JJ, Zhou M (2023) Diffusion policies as an expressive policy class for offline reinforcement learning. In Proc. ICLR 2023, Kigali, Rwanda, May 1-5, 2023"},{"key":"2780_CR8","unstructured":"Fujimoto S, Meger D, Precup D (2019) Off-policy deep reinforcement learning without exploration. In: International conference on machine learning. PMLR, pp 2052\u20132062"},{"key":"2780_CR9","unstructured":"Chen H, Lu C, Ying C, Su H, Zhu J (2022) Offline reinforcement learning via high-fidelity generative behavior modeling. In Proc. ICLR 2023, Kigali, Rwanda, May 1-5, 2023"},{"key":"2780_CR10","unstructured":"Ho J, Jain A, Abbeel P (2020) Denoising diffusion probabilistic models. Neural Inf Process Syst 33:6840-6851"},{"key":"2780_CR11","unstructured":"Song Y, Durkan C, Murray I, Ermon S (2021) Maximum likelihood training of score-based diffusion models. Adv Neural Inf Process Syst 34:1415-1428"},{"issue":"9","key":"2780_CR12","doi-asserted-by":"publisher","first-page":"10850","DOI":"10.1109\/TPAMI.2023.3261988","volume":"45","author":"F-A Croitoru","year":"2023","unstructured":"Croitoru F-A, Hondru V, Ionescu RT, Shah M (2023) Diffusion models in vision: a survey. IEEE Trans Pattern Anal Mach Intell 45(9):10850\u201310869","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2780_CR13","unstructured":"Dhariwal P, Nichol A (2021) Diffusion models beat gans on image synthesis. Neural Inf Process Syst"},{"key":"2780_CR14","doi-asserted-by":"crossref","unstructured":"Peebles W, Xie S (2023) Scalable diffusion models with transformers. In Proc. ICCV 2023, Paris, France, October 1-6 2023. pp 4195\u20134205","DOI":"10.1109\/ICCV51070.2023.00387"},{"issue":"10","key":"2780_CR15","doi-asserted-by":"publisher","first-page":"1469","DOI":"10.3390\/e25101469","volume":"25","author":"R Yang","year":"2023","unstructured":"Yang R, Srivastava P, Mandt S (2023) Diffusion probabilistic modeling for video generation. Entropy 25(10):1469","journal-title":"Entropy"},{"key":"2780_CR16","unstructured":"Hansen-Estruch P, Kostrikov I, Janner M, Kuba,JG, Levine S (2023) Idql: Implicit q-learning as an actor-critic method with diffusion policies. arXiv preprint arXiv:2304.10573"},{"key":"2780_CR17","doi-asserted-by":"crossref","unstructured":"Alonso E, Jelley A, Micheli V, Kanervisto A, Storkey A, Pearce T, Fleuret F (2024) Diffusion for world modeling: visual details matter in atari. Adv Neural Inf Process Syst 37:58757-58791","DOI":"10.52202\/079017-1873"},{"key":"2780_CR18","unstructured":"Janner M, Du Y, Tenenbaum J, Levine S (2022) Planning with diffusion for flexible behavior synthesis. In: International conference on machine learning. PMLR, pp 9902\u20139915"},{"key":"2780_CR19","doi-asserted-by":"crossref","unstructured":"Chi C, Xu Z, Feng S, Cousineau E, Du Y, Burchfiel B, Tedrake R, Song S (2024) Diffusion policy: Visuomotor policy learning via action diffusion. Int J Robot Res pp 1-21","DOI":"10.15607\/RSS.2023.XIX.026"},{"key":"2780_CR20","first-page":"46323","volume":"36","author":"C Lu","year":"2024","unstructured":"Lu C, Ball P, Teh YW, Parker-Holder J (2024) Synthetic experience replay. Adv Neural Inf Process Syst 36:46323\u201346344","journal-title":"Adv Neural Inf Process Syst"},{"key":"2780_CR21","doi-asserted-by":"publisher","first-page":"1199","DOI":"10.1109\/TPAMI.2023.3328397","volume":"46","author":"Z Deng","year":"2023","unstructured":"Deng Z, Fu Z, Wang L, Yang Z, Bai C, Zhou T, Wang Z, Jiang J (2023) False correlation reduction for offline reinforcement learning. IEEE Trans Pattern Anal Mach Intell 46:1199\u20131211","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2780_CR22","unstructured":"Liang Z, Mu Y, Ding M, Ni F, Tomizuka M, Luo P (2023) Adaptdiffuser: diffusion models as adaptive self-evolving planners. In: International conference on machine learning. PMLR, pp 20725\u201320745"},{"key":"2780_CR23","first-page":"64896","volume":"36","author":"H He","year":"2023","unstructured":"He H, Bai C, Xu K, Yang Z, Zhang W, Wang D, Zhao B, Li X (2023) Diffusion model is an effective planner and data synthesizer for multi-task reinforcement learning. Adv Neural Inf Process Syst 36:64896\u201364917","journal-title":"Adv Neural Inf Process Syst"},{"key":"2780_CR24","unstructured":"Hu J, Sun Y, Huang S, Guo S, Chen H, Shen L, Sun L, Chang Y, Tao D (2023) Instructed diffuser with temporal condition guidance for offline reinforcement learning. arXiv preprint arXiv:2306.04875"},{"key":"2780_CR25","doi-asserted-by":"crossref","unstructured":"Sridhar A, Shah D, Glossop C, Levine S (2024) Nomad: goal masked diffusion policies for navigation and exploration. In: 2024 IEEE international conference on robotics and automation (ICRA). IEEE, pp 63\u201370","DOI":"10.1109\/ICRA57147.2024.10610665"},{"key":"2780_CR26","unstructured":"Zhu Z, Liu M, Mao L, Kang B, Xu M, Yu Y, Ermon S, Zhang W (2023) Madiff: offline multi-agent learning with diffusion models. arXiv preprint arXiv:2305.17330"},{"key":"2780_CR27","unstructured":"Xu M, Yu L, Song Y, Shi C, Ermon S, Tang J (2022) Geodiff: a geometric diffusion model for molecular conformation generation. In Proc. ICLR 2022, Virtual Event, April 25-29, 2022"},{"key":"2780_CR28","doi-asserted-by":"crossref","unstructured":"Chen Z, Kiami S, Gupta A, Kumar V (2023) Genaug: retargeting behaviors to unseen situations via generative augmentation. arXiv preprint arXiv:2302.06671","DOI":"10.15607\/RSS.2023.XIX.010"},{"key":"2780_CR29","unstructured":"Fujimoto S, Hoof H, Meger D (2018) Addressing function approximation error in actor-critic methods. In: International conference on machine learning. PMLR, pp 1587\u20131596"},{"issue":"7540","key":"2780_CR30","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"2780_CR31","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International conference on machine learning. PMLR, pp 1861\u20131870"},{"key":"2780_CR32","unstructured":"Luo C (2022) Understanding diffusion models: a unified perspective. arXiv preprint arXiv:2208.11970"},{"key":"2780_CR33","doi-asserted-by":"publisher","first-page":"859","DOI":"10.1080\/01621459.2017.1285773","volume":"112","author":"DM Blei","year":"2017","unstructured":"Blei DM, Kucukelbir A, McAuliffe JD (2017) Variational inference: a review for statisticians. J Am Stat Assoc 112:859\u2013877","journal-title":"J Am Stat Assoc"},{"key":"2780_CR34","unstructured":"Osband I, Blundell C, Pritzel A, Van\u00a0Roy B (2016) Deep exploration via bootstrapped dqn. Adv Neural Inf Process Syst 29:4033-4041"},{"key":"2780_CR35","doi-asserted-by":"crossref","unstructured":"Van\u00a0Hasselt H, Guez A, Silver D (2016) Deep reinforcement learning with double q-learning. In Proc.AAAl 2016, Phoneix Arizona, February 12-17, 30:2094-2100","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"2780_CR36","first-page":"14129","volume":"33","author":"T Yu","year":"2020","unstructured":"Yu T, Thomas G, Yu L, Ermon S, Zou JY, Levine S, Finn C, Ma T (2020) Mopo: model-based offline policy optimization. Adv Neural Inf Process Syst 33:14129\u201314142","journal-title":"Adv Neural Inf Process Syst"},{"key":"2780_CR37","first-page":"7436","volume":"34","author":"G An","year":"2021","unstructured":"An G, Moon S, Kim J-H, Song HO (2021) Uncertainty-based offline reinforcement learning with diversified q-ensemble. Adv Neural Inf Process Syst 34:7436\u20137447","journal-title":"Adv Neural Inf Process Syst"},{"key":"2780_CR38","unstructured":"Xiao Z, Kreis K, Vahdat A (2022) Tackling the generative learning trilemma with denoising diffusion GANs. In Proc. ICLR 2022, Virtual Event, April 25-29, 2022"},{"key":"2780_CR39","unstructured":"Song Y, Sohl-Dickstein J, Kingma DP, Kumar A, Ermon S, Poole B (2021) Score-based generative modeling through stochastic differential equations. In Proc. ICLR 2021, Virtual Event, Austria, May 3-7, 2021"},{"key":"2780_CR40","unstructured":"Kostrikov I, Nair A, Levine S (2021) Offline reinforcement learning with implicit q-learning. arXiv preprint arXiv:2110.06169"},{"key":"2780_CR41","first-page":"15084","volume":"34","author":"L Chen","year":"2021","unstructured":"Chen L, Lu K, Rajeswaran A, Lee K, Grover A, Laskin M, Abbeel P, Srinivas A, Mordatch I (2021) Decision transformer: reinforcement learning via sequence modeling. Adv Neural Inf Process Syst 34:15084\u201315097","journal-title":"Adv Neural Inf Process Syst"},{"key":"2780_CR42","first-page":"21810","volume":"33","author":"R Kidambi","year":"2020","unstructured":"Kidambi R, Rajeswaran A, Netrapalli P, Joachims T (2020) Morel: model-based offline reinforcement learning. Adv Neural Inf Process Syst 33:21810\u201321823","journal-title":"Adv Neural Inf Process Syst"},{"key":"2780_CR43","first-page":"5775","volume":"35","author":"C Lu","year":"2022","unstructured":"Lu C, Zhou Y, Bao F, Chen J, Li C, Zhu J (2022) Dpm-solver: a fast ode solver for diffusion probabilistic model sampling in around 10 steps. Adv Neural Inf Process Syst 35:5775\u20135787","journal-title":"Adv Neural Inf Process Syst"},{"key":"2780_CR44","unstructured":"Song J, Meng C, Ermon S (2021) Denoising diffusion implicit models. In Proc. ICLR 2021, Virtual Event, Austria, May 3-7, 2021"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-025-02780-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-025-02780-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-025-02780-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T11:24:43Z","timestamp":1762514683000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-025-02780-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,31]]},"references-count":44,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2025,11]]}},"alternative-id":["2780"],"URL":"https:\/\/doi.org\/10.1007\/s13042-025-02780-0","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,31]]},"assertion":[{"value":"20 March 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 August 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 August 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}