{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T14:25:18Z","timestamp":1775744718216,"version":"3.50.1"},"reference-count":44,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T00:00:00Z","timestamp":1722470400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key R&#x0026;D Program of China","award":["2022ZD0119901"],"award-info":[{"award-number":["2022ZD0119901"]}]},{"name":"Shanghai Science and Technology program","award":["22015810300"],"award-info":[{"award-number":["22015810300"]}]},{"name":"Hainan Province Science and Technology Special Fund","award":["ZDYF2021GXJS041"],"award-info":[{"award-number":["ZDYF2021GXJS041"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U2141234"],"award-info":[{"award-number":["U2141234"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2024,8]]},"DOI":"10.1109\/tpami.2024.3364844","type":"journal-article","created":{"date-parts":[[2024,2,12]],"date-time":"2024-02-12T19:05:15Z","timestamp":1707764715000},"page":"5260-5272","source":"Crossref","is-referenced-by-count":12,"title":["Efficient Offline Reinforcement Learning With Relaxed Conservatism"],"prefix":"10.1109","volume":"46","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5123-5043","authenticated-orcid":false,"given":"Longyang","family":"Huang","sequence":"first","affiliation":[{"name":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2026-6856","authenticated-orcid":false,"given":"Botao","family":"Dong","sequence":"additional","affiliation":[{"name":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4700-1276","authenticated-orcid":false,"given":"Weidong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2599820"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-042920-020211"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10827"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2020.2998695"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2022.3155483"},{"key":"ref7","first-page":"27580","article-title":"Online and offline reinforcement learning by planning with a learned model","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Schrittwieser"},{"key":"ref8","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020"},{"key":"ref9","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref10","first-page":"11784","article-title":"Stabilizing off policy Q learning via bootstrapping error reduction","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Kumar"},{"key":"ref11","first-page":"104","article-title":"An optimistic perspective on offline reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Agarwal"},{"key":"ref12","article-title":"Behavior regularized offline reinforcement learning","author":"Wu","year":"2019"},{"key":"ref13","article-title":"Offline reinforcement learning with soft behavior regularization","author":"Xu","year":"2021"},{"key":"ref14","first-page":"20132","article-title":"A minimalist approach to offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Fujimoto"},{"key":"ref15","first-page":"5774","article-title":"Offline reinforcement learning with fisher divergence critic regularization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kostrikov"},{"key":"ref16","article-title":"Offline reinforcement learning with implicit q learning","author":"Kostrikov","year":"2021"},{"key":"ref17","first-page":"1179","article-title":"Conservative Q learning for offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kumar"},{"key":"ref18","first-page":"11319","article-title":"Uncertainty weighted actor-critic for offline reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wu"},{"key":"ref19","article-title":"D4RL: Datasets for deep data-driven reinforcement learning","author":"Fu","year":"2020"},{"key":"ref20","article-title":"Density estimation for conservative Q-Learning","volume-title":"Proc. Workshop Generalizable Policy Learn. Phys. World","author":"Daoudi"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-017-5650-8"},{"key":"ref22","article-title":"Actionable models: Unsupervised offline reinforcement learning of robotic skills","author":"Chebotar","year":"2021"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981126"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/tpwrs.2022.3159825"},{"key":"ref25","article-title":"Offline reinforcement learning with uncertainty for treatment strategies in sepsis","author":"Liu","year":"2021"},{"key":"ref26","first-page":"2","article-title":"Model selection for offline reinforcement learning: Practical considerations for healthcare settings","volume-title":"Proc. Mach. Learn. Healthcare Conf.","author":"Tang"},{"key":"ref27","article-title":"When should we prefer offline reinforcement learning over behavioral cloning?","author":"Kumar","year":"2022"},{"key":"ref28","article-title":"When data geometry meets deep function: Generalizing offline reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Li"},{"key":"ref29","article-title":"Batch reinforcement learning through continuation method","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Guo"},{"key":"ref30","article-title":"Latent-variable advantage-weighted policy optimization for offline RL","author":"Chen","year":"2022"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ijcnn55064.2022.9892633"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00328"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2016.2603151"},{"key":"ref34","article-title":"Advantage-weighted regression: Simple and scalable off-policy reinforcement learning","author":"Peng","year":"2019"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.3390\/foundations1020018"},{"key":"ref36","first-page":"4933","article-title":"Offline RL without off-policy evaluation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Brandfonbrener"},{"key":"ref37","first-page":"21810","article-title":"Morel: Model-based offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kidambi"},{"key":"ref38","first-page":"14129","article-title":"MOPO: Model-based offline policy optimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yu"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref40","first-page":"15737","article-title":"Error bounds of imitating policies and environments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Xu"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00942"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114723"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i8.20855"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3096966"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10582780\/10432784.pdf?arnumber=10432784","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,5]],"date-time":"2024-07-05T04:01:30Z","timestamp":1720152090000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10432784\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8]]},"references-count":44,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2024.3364844","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8]]}}}