{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T13:08:01Z","timestamp":1774530481927,"version":"3.50.1"},"reference-count":78,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.neucom.2026.133343","type":"journal-article","created":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T14:56:59Z","timestamp":1773759419000},"page":"133343","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["HAVEN: Hierarchical diffusion and value-based trajectory selection for offline safe reinforcement learning"],"prefix":"10.1016","volume":"681","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-3288-6338","authenticated-orcid":false,"given":"Erlie","family":"Wang","sequence":"first","affiliation":[]},{"given":"He","family":"Diao","sequence":"additional","affiliation":[]},{"given":"Xianglin","family":"Chen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1199-4106","authenticated-orcid":false,"given":"Jingkui","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Xiaofeng","family":"Chai","sequence":"additional","affiliation":[]},{"given":"Qiang","family":"Qi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8330-2164","authenticated-orcid":false,"given":"Ping","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2026.133343_bib0005","author":"Levine"},{"key":"10.1016\/j.neucom.2026.133343_bib0010","author":"Fu"},{"key":"10.1016\/j.neucom.2026.133343_bib0015","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2024.129079","article-title":"Using offline data to speed up reinforcement learning in procedurally generated environments","volume":"618","author":"Andres","year":"2025","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133343_bib0020","series-title":"2022 IEEE 25th International Conference on Intelligent Transportation Systems (ITSC)","first-page":"3417","article-title":"Offline reinforcement learning for autonomous driving with real world driving data","author":"Fang","year":"2022"},{"key":"10.1016\/j.neucom.2026.133343_bib0025","doi-asserted-by":"crossref","first-page":"4639","DOI":"10.1109\/LRA.2024.3379805","article-title":"Safety-aware causal representation for trustworthy offline reinforcement learning in autonomous driving","volume":"9","author":"Lin","year":"2024","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.neucom.2026.133343_bib0030","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2024.128482","article-title":"An improved hierarchical deep reinforcement learning algorithm for multi-intelligent vehicle lane change","volume":"609","author":"Gao","year":"2024","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133343_bib0035","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2025.130586","article-title":"Collision-free motion-constrained path planning for multiple unmanned delivery vehicles based on heuristic deep reinforcement learning","author":"Han","year":"2025","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133343_bib0040","first-page":"66328","article-title":"Seeing is not believing: robust reinforcement learning against spurious correlation","volume":"36","author":"Ding","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133343_bib0045","series-title":"Conference on Robot Learning","first-page":"651","article-title":"Scalable deep reinforcement learning for vision-based robotic manipulation","author":"Kalashnikov","year":"2018"},{"key":"10.1016\/j.neucom.2026.133343_bib0050","author":"Chebotar"},{"key":"10.1016\/j.neucom.2026.133343_bib0055","series-title":"Proceedings of the 5th Conference on Robot Learning","first-page":"907","article-title":"S4RL: surprisingly simple self-supervision for offline reinforcement learning in Robotics","volume":"vol. 164","author":"Sinha","year":"2022"},{"key":"10.1016\/j.neucom.2026.133343_bib0060","doi-asserted-by":"crossref","first-page":"12","DOI":"10.1016\/j.neucom.2020.09.055","article-title":"Safety robustness of reinforcement learning policies: a view from robust control","volume":"422","author":"Xiong","year":"2021","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133343_bib0065","doi-asserted-by":"crossref","DOI":"10.1016\/j.jbi.2023.104376","article-title":"Offline reinforcement learning for safer blood glucose control in people with type 1 diabetes","volume":"142","author":"Emerson","year":"2023","journal-title":"J. Biomed. Inform."},{"key":"10.1016\/j.neucom.2026.133343_bib0070","author":"Verma"},{"key":"10.1016\/j.neucom.2026.133343_bib0075","series-title":"2010 IEEE International Conference on Robotics and Automation","first-page":"5582","article-title":"Increasing robotic wheelchair safety with collaborative control: evidence from secondary task experiments","author":"Carlson","year":"2010"},{"key":"10.1016\/j.neucom.2026.133343_bib0080","doi-asserted-by":"crossref","DOI":"10.1016\/j.rcim.2020.102022","article-title":"Safety assurance mechanisms of collaborative robotic systems in manufacturing","volume":"67","author":"Bi","year":"2021","journal-title":"Robot. Comput.-Integr. Manuf."},{"key":"10.1016\/j.neucom.2026.133343_bib0085","series-title":"Constrained Markov Decision Processes","author":"Altman","year":"2021"},{"key":"10.1016\/j.neucom.2026.133343_bib0090","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"15313","article-title":"Evaluating model-free reinforcement learning toward safety-critical tasks","volume":"vol. 37","author":"Zhang","year":"2023"},{"issue":"285","key":"10.1016\/j.neucom.2026.133343_bib0095","first-page":"1","article-title":"OmniSafe: an infrastructure for accelerating safe reinforcement learning research","volume":"25","author":"Ji","year":"2024","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.neucom.2026.133343_bib0100","first-page":"78451","article-title":"OASIS: conditional distribution shaping for offline safe reinforcement learning","volume":"37","author":"Yao","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133343_bib0105","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"8753","article-title":"Constraints penalized q-learning for safe offline reinforcement learning","volume":"vol. 36","author":"Xu","year":"2022"},{"key":"10.1016\/j.neucom.2026.133343_bib0110","series-title":"International Conference on Machine Learning","first-page":"6120","article-title":"OptiDICE: offline policy optimization via stationary distribution correction estimation","author":"Lee","year":"2021"},{"key":"10.1016\/j.neucom.2026.133343_bib0115","author":"Nachum"},{"key":"10.1016\/j.neucom.2026.133343_bib0120","first-page":"26091","article-title":"Deep hierarchical planning from pixels","volume":"35","author":"Hafner","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"10","key":"10.1016\/j.neucom.2026.133343_bib0125","doi-asserted-by":"crossref","first-page":"14959","DOI":"10.1109\/TNNLS.2023.3282380","article-title":"Heuristic heterogeneous graph reasoning networks for fact verification","volume":"35","author":"Wu","year":"2024","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.neucom.2026.133343_bib0130","series-title":"Proceedings of the Tenth National Conference on Artificial Intelligence","first-page":"202","article-title":"Reinforcement learning with a hierarchy of abstract models","author":"Singh","year":"1992"},{"key":"10.1016\/j.neucom.2026.133343_bib0135","first-page":"17321","article-title":"Long-horizon visual planning with goal-conditioned hierarchical predictors","volume":"33","author":"Pertsch","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133343_bib0140","series-title":"International Conference on Machine Learning","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","author":"Fujimoto","year":"2019"},{"key":"10.1016\/j.neucom.2026.133343_bib0145","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133343_bib0150","article-title":"Stabilizing off-policy q-learning via bootstrapping error reduction","volume":"32","author":"Kumar","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133343_bib0155","doi-asserted-by":"crossref","first-page":"11534","DOI":"10.52202\/068431-0838","article-title":"S2P: state-conditioned image synthesis for data augmentation in offline reinforcement learning","volume":"35","author":"Cho","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133343_bib0160","author":"Corrado"},{"key":"10.1016\/j.neucom.2026.133343_bib0165","author":"Huang"},{"key":"10.1016\/j.neucom.2026.133343_bib0170","series-title":"International Conference on Learning Representations","article-title":"DARA: dynamics-aware reward augmentation in offline reinforcement learning","author":"Liu","year":"2022"},{"key":"10.1016\/j.neucom.2026.133343_bib0175","series-title":"International Conference on Machine Learning","first-page":"25611","article-title":"How to leverage unlabeled data in offline reinforcement learning","author":"Yu","year":"2022"},{"issue":"12","key":"10.1016\/j.neucom.2026.133343_bib0180","doi-asserted-by":"crossref","first-page":"7732","DOI":"10.1109\/TSMC.2023.3297711","article-title":"Self-supervised imitation for offline reinforcement learning with hindsight relabeling","volume":"53","author":"Yu","year":"2023","journal-title":"IEEE Trans. Syst. Man Cybern. Syst."},{"issue":"1","key":"10.1016\/j.neucom.2026.133343_bib0185","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"Garc\u0131a","year":"2015","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.neucom.2026.133343_bib0190","series-title":"International Conference on Machine Learning","first-page":"22","article-title":"Constrained policy optimization","author":"Achiam","year":"2017"},{"key":"10.1016\/j.neucom.2026.133343_bib0195","author":"Gu"},{"key":"10.1016\/j.neucom.2026.133343_bib0200","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"10639","article-title":"WCSAC: worst-case soft actor critic for safety-constrained reinforcement learning","volume":"vol. 35","author":"Yang","year":"2021"},{"issue":"167","key":"10.1016\/j.neucom.2026.133343_bib0205","first-page":"1","article-title":"Risk-constrained reinforcement learning with percentile risk criteria","volume":"18","author":"Chow","year":"2018","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.neucom.2026.133343_bib0210","author":"Tessler"},{"key":"10.1016\/j.neucom.2026.133343_bib0215","first-page":"8378","article-title":"Natural policy gradient primal-dual method for constrained markov decision processes","volume":"33","author":"Ding","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133343_bib0220","author":"Fujimoto"},{"key":"10.1016\/j.neucom.2026.133343_bib0225","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2025.129912","article-title":"Hybrid safe reinforcement learning: tackling distribution shift and outliers with the student-t\u2019s process","volume":"634","author":"Hickman","year":"2025","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133343_bib0230","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2025.130041","article-title":"NLBAC: a neural ode-based algorithm for state-wise stable and safe reinforcement learning","volume":"638","author":"Zhao","year":"2025","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2026.133343_bib0235","series-title":"International Conference on Machine Learning","first-page":"3703","article-title":"Batch policy learning under constraints","author":"Le","year":"2019"},{"key":"10.1016\/j.neucom.2026.133343_bib0240","series-title":"International Conference on Machine Learning","first-page":"17801","article-title":"Constrained offline policy optimization","author":"Polosky","year":"2022"},{"key":"10.1016\/j.neucom.2026.133343_bib0245","author":"Lee"},{"key":"10.1016\/j.neucom.2026.133343_bib0250","first-page":"15084","article-title":"Decision transformer: reinforcement learning via sequence modeling","volume":"34","author":"Chen","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133343_bib0255","author":"Janner"},{"key":"10.1016\/j.neucom.2026.133343_bib0260","series-title":"International Conference on Machine Learning","first-page":"21611","article-title":"Constrained decision transformer for offline safe reinforcement learning","author":"Liu","year":"2023"},{"key":"10.1016\/j.neucom.2026.133343_bib0265","series-title":"International Conference on Machine Learning","first-page":"21127","article-title":"Safe offline reinforcement learning with real-time budget constraints","author":"Lin","year":"2023"},{"key":"10.1016\/j.neucom.2026.133343_bib0270","author":"Zheng"},{"key":"10.1016\/j.neucom.2026.133343_bib0275","first-page":"55006","article-title":"LIMA: less is more for alignment","volume":"36","author":"Zhou","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133343_bib0280","first-page":"54463","article-title":"APIGen: automated pipeline for generating verifiable and diverse function-calling datasets","volume":"37","author":"Liu","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133343_bib0285","author":"Chen"},{"key":"10.1016\/j.neucom.2026.133343_bib0290","series-title":"International Conference on Machine Learning","first-page":"20035","article-title":"Hierarchical diffusion for offline decision making","author":"Li","year":"2023"},{"key":"10.1016\/j.neucom.2026.133343_bib0295","author":"Ajay"},{"key":"10.1016\/j.neucom.2026.133343_bib0300","series-title":"The Thirty-Eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track","article-title":"CleanDiffuser: an easy-to-use modularized library for diffusion models in decision making","author":"Dong","year":"2024"},{"key":"10.1016\/j.neucom.2026.133343_bib0305","first-page":"67195","article-title":"Efficient diffusion policies for offline reinforcement learning","volume":"36","author":"Kang","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133343_bib0310","author":"Dong"},{"key":"10.1016\/j.neucom.2026.133343_bib0315","series-title":"International Conference on Machine Learning","first-page":"5774","article-title":"Offline reinforcement learning with Fisher divergence critic regularization","author":"Kostrikov","year":"2021"},{"key":"10.1016\/j.neucom.2026.133343_bib0320","series-title":"International Conference on Machine Learning","first-page":"2256","article-title":"Deep unsupervised learning using nonequilibrium Thermodynamics","author":"Sohl-Dickstein","year":"2015"},{"key":"10.1016\/j.neucom.2026.133343_bib0325","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133343_bib0330","series-title":"International Conference on Machine Learning","first-page":"8162","article-title":"Improved denoising diffusion probabilistic models","author":"Nichol","year":"2021"},{"issue":"4","key":"10.1016\/j.neucom.2026.133343_bib0335","doi-asserted-by":"crossref","first-page":"10216","DOI":"10.1109\/LRA.2022.3190100","article-title":"Hierarchical planning through goal-conditioned offline reinforcement learning","volume":"7","author":"Li","year":"2022","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.neucom.2026.133343_bib0340","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133343_bib0345","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"21099","article-title":"Balance reward and safety optimization for safe reinforcement learning: a perspective of gradient manipulation","volume":"vol. 38","author":"Gu","year":"2024"},{"key":"10.1016\/j.neucom.2026.133343_bib0350","author":"Lee"},{"key":"10.1016\/j.neucom.2026.133343_bib0355","author":"Liu"},{"key":"10.1016\/j.neucom.2026.133343_bib0360","unstructured":"S. Gronauer, Bullet-safety-gym: a framework for constrained reinforcement learning, MediaTUM, Tech. Rep., 2022, https:\/\/doi.org\/10.14459\/2022md1639974"},{"key":"10.1016\/j.neucom.2026.133343_bib0365","series-title":"International Conference on Machine Learning","first-page":"9133","article-title":"Responsive safety in reinforcement learning by PID lagrangian methods","author":"Stooke","year":"2020"},{"key":"10.1016\/j.neucom.2026.133343_bib0370","article-title":"Semi-supervised learning with deep generative models","volume":"27","author":"Kingma","year":"2014","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.neucom.2026.133343_bib0375","author":"Song"},{"key":"10.1016\/j.neucom.2026.133343_bib0380","series-title":"International Conference on Machine Learning","first-page":"32211","article-title":"Consistency models","author":"Song","year":"2023"},{"key":"10.1016\/j.neucom.2026.133343_bib0385","series-title":"European Conference on Computer Vision","first-page":"1","article-title":"Diffusion models as optimizers for efficient planning in offline RL","author":"Huang","year":"2024"},{"key":"10.1016\/j.neucom.2026.133343_bib0390","author":"Chen"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S092523122600740X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S092523122600740X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T12:32:12Z","timestamp":1774528332000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S092523122600740X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":78,"alternative-id":["S092523122600740X"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133343","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"HAVEN: Hierarchical diffusion and value-based trajectory selection for offline safe reinforcement learning","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2026.133343","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"133343"}}