{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T16:04:06Z","timestamp":1777651446217,"version":"3.51.4"},"reference-count":44,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62136008"],"award-info":[{"award-number":["62136008"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62103409"],"award-info":[{"award-number":["62103409"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Beijing Natural Science Foundation","award":["4232056"],"award-info":[{"award-number":["4232056"]}]},{"DOI":"10.13039\/501100004739","name":"Youth Innovation Promotion Association CAS","doi-asserted-by":"publisher","award":["2021132"],"award-info":[{"award-number":["2021132"]}],"id":[{"id":"10.13039\/501100004739","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Artif. Intell."],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1109\/tai.2024.3387401","type":"journal-article","created":{"date-parts":[[2024,4,10]],"date-time":"2024-04-10T14:33:17Z","timestamp":1712759597000},"page":"4585-4594","source":"Crossref","is-referenced-by-count":11,"title":["Stabilizing Diffusion Model for Robotic Control With Dynamic Programming and Transition Feasibility"],"prefix":"10.1109","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2559-9585","authenticated-orcid":false,"given":"Haoran","family":"Li","sequence":"first","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-0973-0467","authenticated-orcid":false,"given":"Yaocheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1901-7876","authenticated-orcid":false,"given":"Haowei","family":"Wen","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5384-423X","authenticated-orcid":false,"given":"Yuanheng","family":"Zhu","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8218-9633","authenticated-orcid":false,"given":"Dongbin","family":"Zhao","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lillicrap","year":"2016"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2018.2823329"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2022.3218940"},{"key":"ref4","article-title":"AWAC: Accelerating online reinforcement learning with offline datasets","author":"Nair","year":"2020"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC55140.2022.9922100"},{"key":"ref6","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto","year":"2019"},{"key":"ref7","first-page":"11761","article-title":"Stabilizing off-policy Q-learning via bootstrapping error reduction","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Kumar","year":"2019"},{"key":"ref8","article-title":"Behavior regularized offline reinforcement learning","author":"Wu","year":"2020"},{"key":"ref9","article-title":"Advantage-weighted regression: Simple and scalable off-policy reinforcement learning","author":"Peng","year":"2019"},{"key":"ref10","article-title":"AlgaeDICE: Policy gradient from arbitrary experience","author":"Nachum","year":"2019"},{"key":"ref11","first-page":"1179","article-title":"Conservative Q-learning for offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Kumar","year":"2020"},{"key":"ref12","first-page":"1","article-title":"Offline reinforcement learning with implicit Q-learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kostrikov","year":"2022"},{"key":"ref13","first-page":"1","article-title":"Offline RL with no OOD actions: In-sample learning via implicit value regularization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Xu","year":"2023"},{"key":"ref14","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Ho","year":"2020"},{"key":"ref15","first-page":"11895","article-title":"Generative modeling by estimating gradients of the data distribution","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Song","year":"2019"},{"key":"ref16","first-page":"1","article-title":"Offline reinforcement learning via high-fidelity generative behavior modeling","volume-title":"Proc. Int. Conf. Learn. Representations","volume":"10","author":"Chen","year":"2022"},{"key":"ref17","article-title":"Know your boundaries: The necessity of explicit behavioral cloning in offline RL","author":"Goo","year":"2022"},{"key":"ref18","article-title":"Imitating human behaviour with diffusion models","author":"Pearce","year":"2023"},{"key":"ref19","first-page":"1","article-title":"Diffusion policies as an expressive policy class for offline reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang","year":"2022"},{"key":"ref20","article-title":"IDQL: Implicit Q-learning as an actor-critic method with diffusion policies","author":"Hansen-Estruch","year":"2023"},{"key":"ref21","first-page":"9902","article-title":"Planning with diffusion for flexible behavior synthesis","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Janner","year":"2022"},{"key":"ref22","first-page":"1","article-title":"Is conditional generative modeling all you need for decision making?","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Ajay","year":"2023"},{"key":"ref23","article-title":"Synthetic experience replay","author":"Lu","year":"2023"},{"key":"ref24","first-page":"15084","article-title":"Decision transformer: Reinforcement learning via sequence modeling","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Chen","year":"2021"},{"key":"ref25","first-page":"38989","article-title":"Q-learning decision transformer: Leveraging dynamic programming for conditional sequence modelling in offline RL","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Yamagata","year":"2023"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2927869"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCSS53909.2021.9722012"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2022.3222143"},{"key":"ref29","article-title":"D4RL: Datasets for deep data-driven reinforcement learning","author":"Fu","year":"2020"},{"key":"ref30","first-page":"18353","article-title":"BAIL: Best-action imitation learning for batch deep reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Chen","year":"2020"},{"key":"ref31","first-page":"7768","article-title":"Critic regularized regression","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wang","year":"2020"},{"key":"ref32","first-page":"1273","article-title":"Offline reinforcement learning as one big sequence modeling problem","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Janner","year":"2021"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3250269"},{"key":"ref34","first-page":"158","article-title":"Implicit behavioral cloning","volume-title":"Proc. Conf. Robot Learn.","author":"Florence","year":"2022"},{"key":"ref35","article-title":"Boosting continuous control with consistency policy","author":"Chen","year":"2024"},{"key":"ref36","first-page":"1","article-title":"Classifier-free diffusion guidance","volume-title":"Proc. NeurIPS Workshop Deep Generative Models Downstream Appl.","author":"Ho","year":"2021"},{"key":"ref37","first-page":"4085","article-title":"A policy-guided imitation approach for offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Xu","year":"2022"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.1.88"},{"key":"ref39","first-page":"21810","article-title":"MORel: Model-based offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Kidambi","year":"2020"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/VCIP.2017.8305148"},{"key":"ref41","first-page":"5775","article-title":"DPM-solver: A fast ODE solver for diffusion probabilistic model sampling in around 10 steps","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lu","year":"2022"},{"key":"ref42","first-page":"1","article-title":"Denoising diffusion implicit models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Song","year":"2021"},{"key":"ref43","article-title":"DPM-solver++: Fast solver for guided sampling of diffusion probabilistic models","author":"Lu","year":"2023"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/960126.806879"}],"container-title":["IEEE Transactions on Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9078688\/10673734\/10496464.pdf?arnumber=10496464","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T01:09:34Z","timestamp":1755911374000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10496464\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9]]},"references-count":44,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/tai.2024.3387401","relation":{},"ISSN":["2691-4581"],"issn-type":[{"value":"2691-4581","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9]]}}}