{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,12]],"date-time":"2026-04-12T05:31:41Z","timestamp":1775971901646,"version":"3.50.1"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"5-6","license":[{"start":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T00:00:00Z","timestamp":1730505600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T00:00:00Z","timestamp":1730505600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100013076","name":"National Major Science and Technology Projects of China","doi-asserted-by":"publisher","award":["2019YFB1705702"],"award-info":[{"award-number":["2019YFB1705702"]}],"id":[{"id":"10.13039\/501100013076","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1007\/s13042-024-02439-2","type":"journal-article","created":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T12:02:40Z","timestamp":1730548960000},"page":"3113-3130","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Uncertainty-based bootstrapped optimization for offline reinforcement learning"],"prefix":"10.1007","volume":"16","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9423-6369","authenticated-orcid":false,"given":"Tianyi","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3492-0211","authenticated-orcid":false,"given":"Genke","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Chu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,2]]},"reference":[{"issue":"11","key":"2439_CR1","doi-asserted-by":"publisher","first-page":"13344","DOI":"10.1109\/TPAMI.2023.3292075","volume":"45","author":"Z Zhu","year":"2023","unstructured":"Zhu Z, Lin K, Jain AK, Zhou J (2023) Transfer learning in deep reinforcement learning: A survey. IEEE Trans Pattern Anal Mach Intell 45(11):13344\u201313362. https:\/\/doi.org\/10.1109\/TPAMI.2023.3292075","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"17","key":"2439_CR2","doi-asserted-by":"publisher","first-page":"12271","DOI":"10.1007\/s00521-020-05663-3","volume":"35","author":"Z Zha","year":"2023","unstructured":"Zha Z, Wang B, Tang X (2023) Evaluate, explain, and explore the state more exactly: an improved Actor-Critic algorithm for complex environment. Neural Comput Appl 35(17):12271\u201312282. https:\/\/doi.org\/10.1007\/s00521-020-05663-3","journal-title":"Neural Comput Appl"},{"issue":"7540","key":"2439_CR3","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533. https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"issue":"7587","key":"2439_CR4","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D et al (2016) Mastering the game of Go with deep neural networks and tree search. Nature 529(7587):484\u2013489. https:\/\/doi.org\/10.1038\/nature16961","journal-title":"Nature"},{"key":"2439_CR5","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1038\/s41586-020-03051-4","volume":"588","author":"J Schrittwieser","year":"2020","unstructured":"Schrittwieser J et al (2020) Mastering Atari, Go, chess and shogi by planning with a learned model. Nature 588:604\u2013609. https:\/\/doi.org\/10.1038\/s41586-020-03051-4","journal-title":"Nature"},{"key":"2439_CR6","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2016) Continuous control with deep reinforcement learning. In: Proceedings of international conference learning representation"},{"issue":"6","key":"2439_CR7","doi-asserted-by":"publisher","first-page":"2239","DOI":"10.1007\/s13042-022-01759-5","volume":"14","author":"D Fan","year":"2023","unstructured":"Fan D, Shen H, Dong L (2023) Twin attentive deep reinforcement learning for multi-agent defensive convoy. Int J Mach Learn Cybern 14(6):2239\u20132250. https:\/\/doi.org\/10.1007\/s13042-022-01759-5","journal-title":"Int J Mach Learn Cybern"},{"key":"2439_CR8","unstructured":"Shulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. Available: arXiv:1707.06434"},{"issue":"23","key":"2439_CR9","doi-asserted-by":"publisher","first-page":"28207","DOI":"10.1007\/s10489-023-05007-3","volume":"53","author":"Y Gong","year":"2023","unstructured":"Gong Y, Xiong H, Li M, Wang H, Nian X (2023) Reinforcement learning for multi-agent formation navigation with scalability. Appl Intel l. 53(23):28207\u201328225. https:\/\/doi.org\/10.1007\/s10489-023-05007-3","journal-title":"Appl Intel l."},{"issue":"5","key":"2439_CR10","doi-asserted-by":"publisher","first-page":"1663","DOI":"10.1007\/s13042-023-01989-1","volume":"15","author":"P Wu","year":"2024","unstructured":"Wu P, Luo S, Tian L, Mao B, Chen W (2024) Consistent epistemic planning for multiagent deep reinforcement learning. Int J Mach Learn Cybern 15(5):1663\u20131675. https:\/\/doi.org\/10.1007\/s13042-023-01989-1","journal-title":"Int J Mach Learn Cybern"},{"issue":"8","key":"2439_CR11","doi-asserted-by":"publisher","first-page":"7577","DOI":"10.1109\/TCYB.2020.3041265","volume":"52","author":"X Qu","year":"2022","unstructured":"Qu X, Ong Y, Gupta A (2022) Frame-correlation transfers trigger economical attacks on deep reinforcement learning policies. IEEE Trans. Cybern. 52(8):7577\u20137590. https:\/\/doi.org\/10.1109\/TCYB.2020.3041265","journal-title":"IEEE Trans. Cybern."},{"key":"2439_CR12","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: off-policy maximum deep reinforcement learning with a stochastic actor. In: Proceedings of International Conference Machine Learning, pp. 2976\u20132989"},{"issue":"8","key":"2439_CR13","doi-asserted-by":"publisher","first-page":"3796","DOI":"10.1109\/TNNLS.2021.3124466","volume":"34","author":"LC Garaffa","year":"2023","unstructured":"Garaffa LC, Basso M, Konzen AA, de Freitas EP (2023) Reinforcement learning for mobile robotics exploration: a survey. IEEE Trans Neural Netw Learn Syst 34(8):3796\u20133810. https:\/\/doi.org\/10.1109\/TNNLS.2021.3124466","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"5","key":"2439_CR14","doi-asserted-by":"publisher","first-page":"1499","DOI":"10.1007\/s10994-023-06318-9","volume":"112","author":"C Yu","year":"2023","unstructured":"Yu C, Chen M, Lin H (2023) Learning key steps to attack deep reinforcement learning agents. Mach Learn 112(5):1499\u20131522. https:\/\/doi.org\/10.1007\/s10994-023-06318-9","journal-title":"Mach Learn"},{"key":"2439_CR15","unstructured":"Fakoor R, Mueller J, Asadi K, Chaudhari P, Smola AJ (2021) Continuous doubly constrained batch reinforcement learning. In: Advanced neural information process system, pp 11260\u201311273"},{"key":"2439_CR16","doi-asserted-by":"publisher","first-page":"383","DOI":"10.1016\/j.neucom.2021.07.014","volume":"459","author":"X Zhang","year":"2021","unstructured":"Zhang X, Liu Y, Xu X, Huang Q, Mao H, Carie A (2021) Structural relational inference actor-critic for multi-agent reinforcement learning. Neurocomputing 459:383\u2013394. https:\/\/doi.org\/10.1016\/j.neucom.2021.07.014","journal-title":"Neurocomputing"},{"key":"2439_CR17","unstructured":"Li Z, Li Y, Zhang Y, Zhang T, Luo ZQ (2022) HyperDQN: a randomized exploration method for deep reinforcement learning. In: Proceedings of International Conference on Learning Representation"},{"issue":"12","key":"2439_CR18","doi-asserted-by":"publisher","first-page":"8156","DOI":"10.1109\/TIT.2022.3185139","volume":"68","author":"P Rashidinejad","year":"2022","unstructured":"Rashidinejad P, Zhu B, Ma C, Jiao J, Russell S (2022) Bridging offline reinforcement learning and imitation learning: A tale of pessimism. IEEE Trans Inf Theory 68(12):8156\u20138196. https:\/\/doi.org\/10.1109\/TIT.2022.3185139","journal-title":"IEEE Trans Inf Theory"},{"key":"2439_CR19","first-page":"14129","volume":"33","author":"T Yu","year":"2020","unstructured":"Yu T, Thomas G, Yu L, Ermon S, Zou JY, Levine S, Finn C, Ma T (2020) Mopo: Model-based offline policy optimization. Adv Neural Inf Proces Syst 33:14129\u201314142","journal-title":"Adv Neural Inf Proces Syst"},{"key":"2439_CR20","first-page":"21810","volume":"33","author":"R Kidambi","year":"2020","unstructured":"Kidambi R, Rajeswaran A, Netrapalli P, Joachims T (2020) Morel: Model-based offline reinforcement learning. Adv Neural Inf Proces Syst 33:21810\u201321823","journal-title":"Adv Neural Inf Proces Syst"},{"key":"2439_CR21","first-page":"20132","volume":"34","author":"S Fujimoto","year":"2021","unstructured":"Fujimoto S, Gu SS (2021) A minimalist approach to offline reinforcement learning. Adv Neural Inf Process Syst 34:20132\u201320145","journal-title":"Adv Neural Inf Process Syst"},{"key":"2439_CR22","unstructured":"Fujimoto S, Meger D, Precup D (2019) \u201cOff-policy deep reinforcement learning without exploration,\u201d in Proc. Int. Conf. Mach. Learn., pp. 3599-3609, Jun"},{"key":"2439_CR23","unstructured":"Laroche R, Trichelair P, Des Combes RT (2019) \u201cSafe policy improvement with baseline bootstrapping,\u201d in Proc. Int. Conf. Mach. Learn., pp. 3652-3661, Jun"},{"key":"2439_CR24","unstructured":"Kumar A, Fu J, Soh M, Tucker G, Levine S (2019) \u201cStabilizing off-policy q-learning via bootstrapping error reduction,\u201d in Adv. neural inf. proces. syst., pp. 11784-11794, Dec"},{"key":"2439_CR25","unstructured":"Peng X, Kumar A, Zhang G, Levine S (2019) \u201cAdvantage-weighted regression: simple and scalable off-policy reinforcement learning,\u201d [Online]. Available: arxiv:1910.00177"},{"key":"2439_CR26","unstructured":"Wu Y, Zhai S, Srivastava N, Susskind J, Zhang J, Salakhutdinov R, Goh H (2021) \u201cUncertainty weighted actor-critic for offline reinforcement learning,\u201d in Proc. Int. Conf. Mach. Learn., pp. 11319-11328"},{"key":"2439_CR27","unstructured":"T. Hiraoka, T. Imagawa, T. Hashimoto, T. Onishi, and Y. Tsuruoka, Y, \u201cDropout Q-functions for doubly efficient reinforcement learning,\u201d in Proc. Int. Conf. Learn. Represent., Apr, (2022)"},{"issue":"10","key":"2439_CR28","doi-asserted-by":"publisher","first-page":"11773","DOI":"10.1007\/s10462-023-10443-1","volume":"56","author":"M Magris","year":"2023","unstructured":"Magris M, Iosifidis A (2023) Bayesian learning for neural networks: an algorithmic survey. Artif Intell Rev 56(10):11773\u201311823. https:\/\/doi.org\/10.1007\/s10462-023-10443-1","journal-title":"Artif Intell Rev"},{"key":"2439_CR29","unstructured":"Agarwal R, Schuurmans D, Norouzi M (2020) An optimistic perspective on offline reinforcement learning. In: Proceedings of International Conference on Machine Learning, pp 104\u2013114"},{"key":"2439_CR30","unstructured":"An G, Moon S, Kim J, Song H (2021) \u201cUncertainty-based offline reinforcement learning with diversified q-ensemble,\u201d in Adv. neural inf. proces. syst., pp. 7436-7447, Dec"},{"key":"2439_CR31","unstructured":"Bai C, Wang L, Yang Z, Deng Z, Garg A, Liu P, Wang Z (2022) \u201cPessimistic bootstrapping for uncertainty-driven offline reinforcement learning,\u201d [Online]. Available: arxiv:2202.11566"},{"issue":"12","key":"2439_CR32","doi-asserted-by":"publisher","first-page":"1497","DOI":"10.1038\/s42256-023-00767-6","volume":"5","author":"R Pacelli","year":"2023","unstructured":"Pacelli R, Ariosto S, Pastore M, Ginelli F, Gherardi M, Rotondo P (2023) ,\u201cA statistical mechanics framework for Bayesian deep neural networks beyond the infinite-width limit,\u2019\u2019. Nat. Mach. Intel l. 5(12):1497\u20131507. https:\/\/doi.org\/10.1038\/s42256-023-00767-6","journal-title":"Nat. Mach. Intel l."},{"key":"2439_CR33","doi-asserted-by":"publisher","unstructured":"Mourgias-Alexandris G et al (2022) \u201cNoise-resilient and high-speed deep learning with coherent silicon photonics,\u201d Nat Commun, vol. 13, Sep. https:\/\/doi.org\/10.1038\/s41467-022-33259-z","DOI":"10.1038\/s41467-022-33259-z"},{"issue":"4","key":"2439_CR34","doi-asserted-by":"publisher","first-page":"580","DOI":"10.1109\/TG.2022.3185330","volume":"15","author":"L Meng","year":"2023","unstructured":"Meng L, Goodwin M, Yazidi A, Engelstad PE (2023) Improving the Diversity of Bootstrapped DQN by Replacing Priors With Noise. IEEE Trans. Games 15(4):580\u2013589. https:\/\/doi.org\/10.1109\/TG.2022.3185330","journal-title":"IEEE Trans. Games"},{"issue":"11","key":"2439_CR35","doi-asserted-by":"publisher","first-page":"4440","DOI":"10.1109\/TPS.2022.3180233","volume":"50","author":"E Miralles-Dolz","year":"2022","unstructured":"Miralles-Dolz E, Pearce A, Morris J, Patelli E (2022) Toward DEMO power plant concept selection under epistemic uncertainty. IEEE Trans Plasma Sci 50(11):4440\u20134445. https:\/\/doi.org\/10.1109\/TPS.2022.3180233","journal-title":"IEEE Trans Plasma Sci"},{"issue":"3","key":"2439_CR36","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1109\/TSUSC.2023.3251302","volume":"8","author":"L Chen","year":"2023","unstructured":"Chen L, Meng F, Zhang Y (2023) Fast human-in-the-loop control for HVAC systems via meta-learning and model-based offline reinforcement Learning. IEEE Trans. Sust. Comp. 8(3):504\u2013521. https:\/\/doi.org\/10.1109\/TSUSC.2023.3251302","journal-title":"IEEE Trans. Sust. Comp."},{"key":"2439_CR37","unstructured":"Mai V, Mani K, Paull L (2022) \u201cSample efficient deep reinforcement learning via uncertainty estimation,\u201d in Proc. Int. Conf. Learn. Represent., Apr,"},{"key":"2439_CR38","unstructured":"Sokar G, Agarwal R, Castro PS, Evci U (2023) \u201cThe dormant neuron phenomenon in deep reinforcement learning,\u201d in Proc. Int. Conf. Mach. Learn., pp. 32145-32168, Jul,"},{"issue":"1","key":"2439_CR39","first-page":"723","volume":"13","author":"A Gretton","year":"2012","unstructured":"Gretton A, Borgwardt KM, Rasch MJ, Sch\u00f6lkopf B, Smola A (2012) A kernel two-sample test. J Mach Learn Res 13(1):723\u2013773","journal-title":"J Mach Learn Res"},{"key":"2439_CR40","unstructured":"Kumar A, Zhou A, Tucker G, Levine S (2020) \u201cConservative q-learning for offline reinforcement learning,\u201d in Adv. neural inf. proces. syst., pp.1179-1191,"},{"key":"2439_CR41","doi-asserted-by":"crossref","unstructured":"Zeng X, Peng H, Li A, Liu C, He L, Yu PS (2023) \u201cHierarchical state abstraction based on structural information principles,\u201d in IJCAI Int. Joint Conf. Artif. Intell., pp. 4549-4557, Aug,","DOI":"10.24963\/ijcai.2023\/506"},{"issue":"10","key":"2439_CR42","first-page":"11772","volume":"37","author":"X Zeng","year":"2023","unstructured":"Zeng X, Peng H, Li A (2023) Effective and stable role-based multi-agent collaboration by structural information principles. Proc. AAAI Conf. Artif. Intell. 37(10):11772\u201311780","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"2439_CR43","unstructured":"Fu J, Kumar A, Nachum O, Tucker G, Levine S (2020) \u201cD4rl: datasets for deep data-driven reinforcement learning,\u201d [Online]. Available: arxiv:2004.07219"},{"key":"2439_CR44","unstructured":"Brockman G et al (2016) \u201cOpenAI gym,\u201d [Online]. Available: arxiv:1606.01540"},{"key":"2439_CR45","doi-asserted-by":"crossref","unstructured":"Todorov E, Erez T, Tassa Y (2012) \u201cMujoco: A physics engine for model-based control,\u201d in Proc. IEEE\/RSJ Int. Conf. Intell. Robots Syst., pp. 5026-5033, Oct,","DOI":"10.1109\/IROS.2012.6386109"},{"key":"2439_CR46","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) \u201cSoft actor critic: off-policy maximum deep reinforcement learning with a stochastic actor,\u201d in Proc. Int. Conf. Mach. Learn., pp. 2976-2989, Jul,"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02439-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-024-02439-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02439-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,7]],"date-time":"2025-06-07T04:29:13Z","timestamp":1749270553000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-024-02439-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,2]]},"references-count":46,"journal-issue":{"issue":"5-6","published-print":{"date-parts":[[2025,6]]}},"alternative-id":["2439"],"URL":"https:\/\/doi.org\/10.1007\/s13042-024-02439-2","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,2]]},"assertion":[{"value":"1 June 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 October 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflict of interest to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"The research not involving human participants or animals.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}}]}}