{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T19:13:54Z","timestamp":1773774834329,"version":"3.50.1"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"25","license":[{"start":{"date-parts":[[2024,5,14]],"date-time":"2024-05-14T00:00:00Z","timestamp":1715644800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,5,14]],"date-time":"2024-05-14T00:00:00Z","timestamp":1715644800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"National Key Research and Development Program of China","award":["No. 2021YFB2801900, 2021YFB2801901, 2021YFB2801902, 2021YFB2801903, 2021YFB2801904, 2018YFE0201200"],"award-info":[{"award-number":["No. 2021YFB2801900, 2021YFB2801901, 2021YFB2801902, 2021YFB2801903, 2021YFB2801904, 2018YFE0201200"]}]},{"DOI":"10.13039\/100014717","name":"National Outstanding Youth Science Fund Project of National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No. 62022062"],"award-info":[{"award-number":["No. 62022062"]}],"id":[{"id":"10.13039\/100014717","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No. 61974177"],"award-info":[{"award-number":["No. 61974177"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["QTZX23041"],"award-info":[{"award-number":["QTZX23041"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012239","name":"Hubei Technological Innovation Special Fund","doi-asserted-by":"publisher","award":["No. 5001-20109215456"],"award-info":[{"award-number":["No. 5001-20109215456"]}],"id":[{"id":"10.13039\/501100012239","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1007\/s00521-024-09839-z","type":"journal-article","created":{"date-parts":[[2024,5,14]],"date-time":"2024-05-14T13:02:16Z","timestamp":1715691736000},"page":"15255-15277","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Judgmentally adjusted Q-values based on Q-ensemble for offline reinforcement learning"],"prefix":"10.1007","volume":"36","author":[{"given":"Wenzhuo","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuying","family":"Xiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanan","family":"Han","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xingxing","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yahui","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yue","family":"Hao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,5,14]]},"reference":[{"key":"9839_CR1","unstructured":"Badia AP, Piot B, Kapturowski S, Sprechmann P, Vitvitskyi A, Guo ZD, Blundell C (2020) Agent57: Outperforming the atari human benchmark. In: International Conference on Machine Learning, pp. 507\u2013517 . PMLR"},{"key":"9839_CR2","unstructured":"Berner C, Brockman G, Chan B, Cheung V, D\u0119biak P, Dennison C, Farhi D, Fischer Q, Hashme S, Hesse C et al. (2019) Dota 2 with large scale deep reinforcement learning. arXiv preprint arXiv:1912.06680"},{"key":"9839_CR3","first-page":"24639","volume":"35","author":"B Baker","year":"2022","unstructured":"Baker B, Akkaya I, Zhokov P, Huizinga J, Tang J, Ecoffet A, Houghton B, Sampedro R, Clune J (2022) Video pretraining (vpt): learning to act by watching unlabeled online videos. Adv Neural Inf Process Syst 35:24639\u201324654","journal-title":"Adv Neural Inf Process Syst"},{"key":"9839_CR4","unstructured":"Levine S, Kumar A, Tucker G, Fu J (2020) Offline reinforcement learning: tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643"},{"key":"9839_CR5","unstructured":"Agarwal R, Schuurmans D, Norouzi M (2020) An optimistic perspective on offline reinforcement learning. In: International conference on machine learning, pp. 104\u2013114. PMLR"},{"key":"9839_CR6","unstructured":"Fujimoto S, Meger D, Precup D (2019) Off-policy deep reinforcement learning without exploration. In: International conference on machine learning, pp. 2052\u20132062. PMLR"},{"key":"9839_CR7","unstructured":"Kumar A, Fu J, Soh M, Tucker G, Levine S (2019) Stabilizing off-policy q-learning via bootstrapping error reduction. Adv Neural Inf Process Syst 32"},{"key":"9839_CR8","first-page":"1179","volume":"33","author":"A Kumar","year":"2020","unstructured":"Kumar A, Zhou A, Tucker G, Levine S (2020) Conservative q-learning for offline reinforcement learning. Adv Neural Inf Process Syst 33:1179\u20131191","journal-title":"Adv Neural Inf Process Syst"},{"key":"9839_CR9","unstructured":"Kostrikov I, Nair A, Levine S (2021) Offline reinforcement learning with implicit q-learning. arXiv preprint arXiv:2110.06169"},{"key":"9839_CR10","first-page":"7436","volume":"34","author":"G An","year":"2021","unstructured":"An G, Moon S, Kim J-H, Song HO (2021) Uncertainty-based offline reinforcement learning with diversified q-ensemble. Adv Neural Inf Process Syst 34:7436\u20137447","journal-title":"Adv Neural Inf Process Syst"},{"key":"9839_CR11","unstructured":"Nikulin A, Kurenkov V, Tarasov D, Akimov D, Kolesnikov S (2022) Q-ensemble for offline rl: Don\u2019t scale the ensemble, scale the batch size. In: 3rd Offline RL Workshop: Offline RL as a\u201dLaunchpad\u201d"},{"key":"9839_CR12","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp. 1861\u20131870. PMLR"},{"key":"9839_CR13","unstructured":"Fu J, Kumar A, Nachum O, Tucker G, Levine S (2020) D4rl: Datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219"},{"key":"9839_CR14","unstructured":"Wu Y, Tucker G, Nachum O (2019) Behavior regularized offline reinforcement learning. arXiv preprint arXiv:1911.11361"},{"key":"9839_CR15","first-page":"20132","volume":"34","author":"S Fujimoto","year":"2021","unstructured":"Fujimoto S, Gu SS (2021) A minimalist approach to offline reinforcement learning. Adv Neural Inf Process Syst 34:20132\u201320145","journal-title":"Adv Neural Inf Process Syst"},{"key":"9839_CR16","unstructured":"Nair A, Gupta A, Dalal M, Levine S (2020) Awac: Accelerating online reinforcement learning with offline datasets. arXiv preprint arXiv:2006.09359"},{"key":"9839_CR17","first-page":"18267","volume":"35","author":"K Ghasemipour","year":"2022","unstructured":"Ghasemipour K, Gu SS, Nachum O (2022) Why so pessimistic? estimating uncertainties for offline rl through ensembles, and why their independence matters. Adv Neural Inf Process Syst 35:18267\u201318281","journal-title":"Adv Neural Inf Process Syst"},{"key":"9839_CR18","doi-asserted-by":"crossref","unstructured":"Rezaeifar S, Dadashi R, Vieillard N, Hussenot L, Bachem O, Pietquin O, Geist M (2022) Offline reinforcement learning as anti-exploration. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36, pp. 8106\u20138114","DOI":"10.1609\/aaai.v36i7.20783"},{"key":"9839_CR19","unstructured":"Chen X, Ghadirzadeh A, Yu T, Gao Y, Wang J, Li W, Liang B, Finn C, Zhang C (2022) Latent-variable advantage-weighted policy optimization for offline rl. arXiv preprint arXiv:2203.08949"},{"key":"9839_CR20","unstructured":"Zhou W, Bajracharya S, Held D (2021) Plas: Latent action space for offline reinforcement learning. In: Conference on Robot Learning, pp. 1719\u20131735. PMLR"},{"key":"9839_CR21","unstructured":"Akimov D, Kurenkov V, Nikulin A, Tarasov D, Kolesnikov S (2022) Let offline rl flow: Training conservative agents in the latent space of normalizing flows. arXiv preprint arXiv:2211.11096"},{"key":"9839_CR22","unstructured":"Sheikh H, Frisbee K, Phielipp M (2022) Dns: Determinantal point process based neural network sampler for ensemble reinforcement learning. In: International Conference on Machine Learning, pp. 19731\u201319746. PMLR"},{"key":"9839_CR23","unstructured":"Lee K, Laskin M, Srinivas A, Abbeel P (2021) Sunrise: A simple unified framework for ensemble learning in deep reinforcement learning. In: International Conference on Machine Learning, pp. 6131\u20136141. PMLR"},{"key":"9839_CR24","unstructured":"Osband I, Aslanides J, Cassirer A (2018) Randomized prior functions for deep reinforcement learning. Adv Neural Inf Process Syst 31"},{"key":"9839_CR25","unstructured":"Kurutach T, Clavera I, Duan Y, Tamar A, Abbeel P (2018) Model-ensemble trust-region policy optimization. arXiv preprint arXiv:1802.10592"},{"key":"9839_CR26","unstructured":"Lai H, Shen J, Zhang W, Yu Y (2020) Bidirectional model-based policy optimization. In: International Conference on Machine Learning, pp. 5618\u20135627. PMLR"},{"key":"9839_CR27","unstructured":"Osband I, Blundell C, Pritzel A, Van\u00a0Roy B (2016) Deep exploration via bootstrapped dqn. Adv Neural Inf Process Syst 29"},{"key":"9839_CR28","unstructured":"Clements WR, Van\u00a0Delft B, Robaglia B-M, Slaoui RB, Toth S (2019) Estimating risk and uncertainty in deep reinforcement learning. arXiv preprint arXiv:1905.09638"},{"key":"9839_CR29","first-page":"14129","volume":"33","author":"T Yu","year":"2020","unstructured":"Yu T, Thomas G, Yu L, Ermon S, Zou JY, Levine S, Finn C, Ma T (2020) Mopo: Model-based offline policy optimization. Adv Neural Inf Process Syst 33:14129\u201314142","journal-title":"Adv Neural Inf Process Syst"},{"key":"9839_CR30","first-page":"21810","volume":"33","author":"R Kidambi","year":"2020","unstructured":"Kidambi R, Rajeswaran A, Netrapalli P, Joachims T (2020) Morel: Model-based offline reinforcement learning. Adv Neural Inf Process Syst 33:21810\u201321823","journal-title":"Adv Neural Inf Process Syst"},{"key":"9839_CR31","unstructured":"Hong J, Kumar A, Levine S (2022) Confidence-conditioned value functions for offline reinforcement learning. arXiv preprint arXiv:2212.04607"},{"key":"9839_CR32","unstructured":"Ghosh D, Ajay A, Agrawal P, Levine S (2022) Offline rl policies should be trained to be adaptive. In: International Conference on Machine Learning, pp. 7513\u20137530. PMLR"},{"key":"9839_CR33","doi-asserted-by":"crossref","unstructured":"Pinto L, Gupta A (2016) Supersizing self-supervision: Learning to grasp from 50k tries and 700 robot hours. In: 2016 IEEE International Conference on Robotics and Automation (ICRA), pp. 3406\u20133413. IEEE","DOI":"10.1109\/ICRA.2016.7487517"},{"issue":"4\u20135","key":"9839_CR34","doi-asserted-by":"publisher","first-page":"421","DOI":"10.1177\/0278364917710318","volume":"37","author":"S Levine","year":"2018","unstructured":"Levine S, Pastor P, Krizhevsky A, Ibarz J, Quillen D (2018) Learning hand-eye coordination for robotic grasping with deep learning and large-scale data collection. Int J Robot Res 37(4\u20135):421\u2013436","journal-title":"Int J Robot Res"},{"issue":"11","key":"9839_CR35","doi-asserted-by":"publisher","first-page":"1289","DOI":"10.1177\/0278364915619772","volume":"35","author":"H Kretzschmar","year":"2016","unstructured":"Kretzschmar H, Spies M, Sprunk C, Burgard W (2016) Socially compliant mobile robot navigation via inverse reinforcement learning. Int J Robot Res 35(11):1289\u20131307","journal-title":"Int J Robot Res"},{"key":"9839_CR36","doi-asserted-by":"publisher","first-page":"2015","DOI":"10.1007\/s00521-020-05097-x","volume":"33","author":"VJ Hodge","year":"2021","unstructured":"Hodge VJ, Hawkins R, Alexander R (2021) Deep reinforcement learning for drone navigation using sensor data. Neural Comput Appl 33:2015\u20132033","journal-title":"Neural Comput Appl"},{"key":"9839_CR37","unstructured":"Nilsson J (1998) Real-time control systems with delays"},{"key":"9839_CR38","unstructured":"Ramstedt S, Pal C (2019) Real-time reinforcement learning. Adv Neural Inf Process Syst 32"},{"key":"9839_CR39","unstructured":"Fujimoto S, Hoof H, Meger D (2018) Addressing function approximation error in actor-critic methods. In: International Conference on Machine Learning, pp. 1587\u20131596. PMLR"},{"issue":"2","key":"9839_CR40","doi-asserted-by":"publisher","first-page":"161","DOI":"10.2307\/2347982","volume":"31","author":"J Royston","year":"1982","unstructured":"Royston J (1982) Expected normal order statistics(exact and approximate): algorithm as 177. Appl Stat 31(2):161\u20135","journal-title":"Appl Stat"},{"key":"9839_CR41","unstructured":"Hoffer E, Hubara I, Soudry D (2017) Train longer, generalize better: closing the generalization gap in large batch training of neural networks. Adv Neural Inf Process Syst 30"},{"key":"9839_CR42","unstructured":"You Y, Gitman I, Ginsburg B (2017) Large batch training of convolutional networks. arXiv preprint arXiv:1708.03888"},{"key":"9839_CR43","unstructured":"Krizhevsky A (2014) One weird trick for parallelizing convolutional neural networks. arXiv preprint arXiv:1404.5997"},{"key":"9839_CR44","first-page":"1711","volume":"35","author":"J Lyu","year":"2022","unstructured":"Lyu J, Ma X, Li X, Lu Z (2022) Mildly conservative q-learning for offline reinforcement learning. Adv Neural Inf Process Syst 35:1711\u20131724","journal-title":"Adv Neural Inf Process Syst"},{"key":"9839_CR45","unstructured":"Reid M, Yamada Y, Gu SS (2022) Can wikipedia help offline reinforcement learning? arXiv preprint arXiv:2201.12122"},{"issue":"1","key":"9839_CR46","first-page":"14205","volume":"23","author":"T Seno","year":"2022","unstructured":"Seno T, Imai M (2022) d3rlpy: An offline deep reinforcement learning library. J Mach Learn Res 23(1):14205\u201314224","journal-title":"J Mach Learn Res"},{"key":"9839_CR47","unstructured":"Kumar A, Agarwal R, Geng X, Tucker G, Levine S (2022) Offline q-learning on diverse multi-task data both scales and generalizes. arXiv preprint arXiv:2211.15144"},{"key":"9839_CR48","doi-asserted-by":"crossref","unstructured":"Smith L, Kostrikov I, Levine S (2022) A walk in the park: Learning to walk in 20 minutes with model-free reinforcement learning. arXiv preprint arXiv:2208.07860","DOI":"10.15607\/RSS.2023.XIX.056"},{"key":"9839_CR49","unstructured":"Ba JL, Kiros JR, Hinton GE (2016) Layer normalization. arXiv preprint arXiv:1607.06450"},{"key":"9839_CR50","unstructured":"Loshchilov I, Hutter F (2017) Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101"},{"key":"9839_CR51","first-page":"28954","volume":"34","author":"T Yu","year":"2021","unstructured":"Yu T, Kumar A, Rafailov R, Rajeswaran A, Levine S, Finn C (2021) Combo: Conservative offline model-based policy optimization. Adv Neural Inf Process Syst 34:28954\u201328967","journal-title":"Adv Neural Inf Process Syst"},{"key":"9839_CR52","first-page":"16082","volume":"35","author":"M Rigter","year":"2022","unstructured":"Rigter M, Lacerda B, Hawes N (2022) Rambo-rl: Robust adversarial model-based offline reinforcement learning. Adv Neural Inf Process Syst 35:16082\u201316097","journal-title":"Adv Neural Inf Process Syst"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09839-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-024-09839-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09839-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,23]],"date-time":"2024-08-23T14:21:28Z","timestamp":1724422888000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-024-09839-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,14]]},"references-count":52,"journal-issue":{"issue":"25","published-print":{"date-parts":[[2024,9]]}},"alternative-id":["9839"],"URL":"https:\/\/doi.org\/10.1007\/s00521-024-09839-z","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,5,14]]},"assertion":[{"value":"17 October 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 April 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 May 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}