{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,26]],"date-time":"2025-04-26T19:40:01Z","timestamp":1745696401376,"version":"3.40.4"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Sci Comput"],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1007\/s10915-025-02873-6","type":"journal-article","created":{"date-parts":[[2025,4,3]],"date-time":"2025-04-03T18:56:50Z","timestamp":1743706610000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["An Analysis of Offline Model-Based Learning with Action Noise"],"prefix":"10.1007","volume":"103","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7076-7600","authenticated-orcid":false,"given":"Haoya","family":"Li","sequence":"first","affiliation":[]},{"given":"Tanmay","family":"Gangwani","sequence":"additional","affiliation":[]},{"given":"Lexing","family":"Ying","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,1]]},"reference":[{"key":"2873_CR1","unstructured":"Agarwal, A., Jiang, N., Kakade, S.M., Sun, S.: Reinforcement Learning: Theory and Algorithms (2019)"},{"key":"2873_CR2","unstructured":"Agarwal, R., Schuurmans, D., Norouzi, M.: An optimistic perspective on offline reinforcement learning. In: International Conference on Machine Learning, pp. 104\u2013114 (2020)"},{"key":"2873_CR3","first-page":"4933","volume":"34","author":"D Brandfonbrener","year":"2021","unstructured":"Brandfonbrener, D., Whitney, W., Ranganath, R., Bruna, J.: Offline RL without off-policy evaluation. Adv. Neural Inf. Process. Syst. 34, 4933\u20134946 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"2873_CR4","first-page":"965","volume":"34","author":"J Chang","year":"2021","unstructured":"Chang, J., Uehara, M., Sreenivas, D., Kidambi, R., Sun, W.: Mitigating covariate shift in imitation learning via offline data with partial coverage. Adv. Neural Inf. Process. Syst. 34, 965\u2013979 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"2873_CR5","unstructured":"Chen, X., Ghadirzadeh, A, Tianhe, Y., Gao, Y., Wang, J., Li, W., Liang, B., Finn, C., Zhang, C.: Latent-variable advantage-weighted policy optimization for offline RL. arXiv preprint arXiv:2203.08949 (2022)"},{"key":"2873_CR6","unstructured":"Diehl, C., Sievernich, T., Kr\u00fcger, M., Hoffmann, F., Bertran, T.: Umbrella: uncertainty-aware model-based offline reinforcement learning leveraging planning. arXiv preprint arXiv:2111.11097 (2021)"},{"key":"2873_CR7","unstructured":"Duan, Y., Jia, Z., Wang, M.: Minimax-optimal off-policy evaluation with linear function approximation. In: International Conference on Machine Learning, pp. 2701\u20132709 (2020)"},{"key":"2873_CR8","unstructured":"Eysenbach, B., Khazatsky, A., Levine, S., Salakhutdinov, R.: Mismatched no more: joint model-policy optimization for model-based RL. arXiv preprint arXiv:2110.02758 (2021)"},{"key":"2873_CR9","first-page":"20132","volume":"34","author":"S Fujimoto","year":"2021","unstructured":"Fujimoto, S., Shixiang, S.G.: A minimalist approach to offline reinforcement learning. Adv. Neural Inf. Process. Syst. 34, 20132\u201320145 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"2873_CR10","unstructured":"Fujimoto, S., Meger, D., Precup, D.: Off-policy deep reinforcement learning without exploration. In: International Conference on Machine Learning, pp. 2052\u20132062 (2019)"},{"key":"2873_CR11","unstructured":"Ghasemipour, K., Gu, S.S., Nachum, O.: Why so pessimistic? Estimating uncertainties for offline RL through ensembles, and why their independence matters. arXiv preprint arXiv:2205.13703 (2022)"},{"key":"2873_CR12","unstructured":"Jiang, N.: Notes on tabular methods (2018)"},{"key":"2873_CR13","unstructured":"Jin, Y., Yang, Z., Wang, Z.: Is pessimism provably efficient for offline RL? In: International Conference on Machine Learning, pp. 5084\u20135096 (2021)"},{"key":"2873_CR14","first-page":"21810","volume":"33","author":"R Kidambi","year":"2020","unstructured":"Kidambi, R., Rajeswaran, A., Netrapalli, P., Joachims, T.: MOReL: model-based offline reinforcement learning. Adv. Neural Inf. Process. Syst. 33, 21810\u201321823 (2020)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"2873_CR15","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"2873_CR16","unstructured":"Kostrikov, I., Nair, A., Levine, S.: Offline reinforcement learning with implicit Q-learning. arXiv preprint arXiv:2110.06169 (2021)"},{"key":"2873_CR17","unstructured":"Kumar, A., Justin, F., Soh, M., Tucker, G., Levine, S.: Stabilizing off-policy Q-learning via bootstrapping error reduction. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"2873_CR18","first-page":"1179","volume":"33","author":"A Kumar","year":"2020","unstructured":"Kumar, A., Zhou, A., Tucker, G., Levine, S.: Conservative Q-learning for offline reinforcement learning. Adv. Neural Inf. Process. Syst. 33, 1179\u20131191 (2020)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"2873_CR19","unstructured":"Levine, S., Kumar, A., Tucker, G., Fu, J.: Offline reinforcement learning: tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643 (2020)"},{"issue":"1","key":"2873_CR20","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1214\/23-AOS2342","volume":"52","author":"G Li","year":"2024","unstructured":"Li, G., Shi, L., Chen, Y., Chi, Y., Wei, Y.: Settling the sample complexity of model-based offline reinforcement learning. Ann. Stat. 52(1), 233\u2013260 (2024)","journal-title":"Ann. Stat."},{"issue":"1","key":"2873_CR21","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1287\/opre.2023.2451","volume":"72","author":"G Li","year":"2024","unstructured":"Li, G., Wei, Y., Chi, Y., Chen, Y.: Breaking the sample size barrier in model-based reinforcement learning with a generative model. Oper. Res. 72(1), 203\u2013221 (2024)","journal-title":"Oper. Res."},{"key":"2873_CR22","unstructured":"Li, H., Yu, H.F., Ying L., Dhillon, I.: Accelerating primal-dual methods for regularized Markov decision processes (2022)"},{"key":"2873_CR23","unstructured":"Lyu, J., Li, X., Lu, Z.: Double check your state before trusting it: confidence-aware bidirectional offline model-based imagination. arXiv preprint arXiv:2206.07989 (2022)"},{"key":"2873_CR24","unstructured":"Lyu, J., Ma, X., Li, X., Lu, Z.: Mildly conservative q-learning for offline reinforcement learning. arXiv preprint arXiv:2206.04745 (2022)"},{"issue":"1","key":"2873_CR25","doi-asserted-by":"publisher","first-page":"276","DOI":"10.3390\/make4010013","volume":"4","author":"J Moos","year":"2022","unstructured":"Moos, J., Hansel, K., Abdulsamad, H., Stark, S., Clever, D., Peters, J.: Robust reinforcement learning: a review of foundations and recent advances. Mach. Learn. Knowl. Extr. 4(1), 276\u2013315 (2022)","journal-title":"Mach. Learn. Knowl. Extr."},{"key":"2873_CR26","unstructured":"Peng, X.B., Kumar, A., Zhang, G., Levine, S.: Advantage-weighted regression: simple and scalable off-policy reinforcement learning. arXiv preprint arXiv:1910.00177 (2019)"},{"key":"2873_CR27","unstructured":"Prudencio, R.F., Maximo, M.R., Colombini, E.L.: A survey on offline reinforcement learning: taxonomy, review, and open problems. arXiv preprint arXiv:2203.01387 (2022)"},{"key":"2873_CR28","series-title":"Wiley Series in Probability and Statistics","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"ML Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley Series in Probability and Statistics, John Wiley & Sons, Hoboken (1994)"},{"key":"2873_CR29","unstructured":"Rigter, M., Lacerda, B., Hawes, N.: RAMBO-RL: robust adversarial model-based offline reinforcement learning. arXiv preprint arXiv:2204.12581 (2022)"},{"key":"2873_CR30","unstructured":"Singh, A., Yu, A., Yang, J., Zhang, J., Kumar, A., Levine, S.: COG: connecting new skills to past experience with offline reinforcement learning. arXiv preprint arXiv:2010.14500 (2020)"},{"key":"2873_CR31","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (2018)"},{"key":"2873_CR32","doi-asserted-by":"publisher","first-page":"104366","DOI":"10.1016\/j.engappai.2021.104366","volume":"104","author":"P Swazinna","year":"2021","unstructured":"Swazinna, P., Udluft, S., Runkler, T.: Overcoming model bias for robust offline deep reinforcement learning. Eng. Appl. Artif. Intell. 104, 104366 (2021)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"2873_CR33","unstructured":"Tessler, C., Efroni, Y., Mannor, S.: Action robust reinforcement learning and applications in continuous control. In: International Conference on Machine Learning, pp. 6215\u20136224 (2019)"},{"key":"2873_CR34","unstructured":"Uehara, M., Sun, W.: Pessimistic model-based offline reinforcement learning under partial coverage. arXiv preprint arXiv:2107.06226 (2021)"},{"key":"2873_CR35","first-page":"29420","volume":"34","author":"J Wang","year":"2021","unstructured":"Wang, J., Li, W., Jiang, H., Zhu, G., Li, S., Zhang, C.: Offline reinforcement learning with reverse model-based imagination. Adv. Neural Inf. Process. Syst. 34, 29420\u201329432 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"1","key":"2873_CR36","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1287\/moor.1120.0566","volume":"38","author":"W Wiesemann","year":"2013","unstructured":"Wiesemann, W., Kuhn, D., Rustem, B.: Robust Markov decision processes. Math. Oper. Res. 38(1), 153\u2013183 (2013)","journal-title":"Math. Oper. Res."},{"key":"2873_CR37","unstructured":"Xie, T., Jiang, N.: Batch value-function approximation with only realizability. In: International Conference on Machine Learning, pp. 11404\u201311413 (2021)"},{"issue":"6","key":"2873_CR38","doi-asserted-by":"publisher","first-page":"3223","DOI":"10.1214\/22-AOS2225","volume":"50","author":"W Yang","year":"2022","unstructured":"Yang, W., Zhang, L., Zhang, Z.: Toward theoretical understandings of robust Markov decision processes: sample complexity and asymptotics. Ann. Stat. 50(6), 3223\u20133248 (2022)","journal-title":"Ann. Stat."},{"key":"2873_CR39","unstructured":"Ying, L., Zhu, Y.: A note on optimization formulations of Markov decision processes. arXiv preprint arXiv:2012.09417 (2020)"},{"key":"2873_CR40","first-page":"28954","volume":"34","author":"T Yu","year":"2021","unstructured":"Yu, T., Kumar, A., Rafailov, R., Rajeswaran, A., Levine, S., Finn, C.: COMBO: conservative offline model-based policy optimization. Adv. Neural Inf. Process. Syst. 34, 28954\u201328967 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"2873_CR41","first-page":"14129","volume":"33","author":"T Yu","year":"2020","unstructured":"Yu, T., Thomas, G., Yu, L., Ermon, S., Zou, J.Y., Levine, S., Finn, C., Ma, T.: MOPO: model-based offline policy optimization. Adv. Neural Inf. Process. Syst. 33, 14129\u201314142 (2020)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"2873_CR42","unstructured":"Zhan, W., Cen, S., Huang, B., Chen, Y., Lee, J.D., Chi, Y.: Policy mirror descent for regularized reinforcement learning: a generalized framework with linear convergence (2021)"},{"key":"2873_CR43","unstructured":"Zhou, W., Bajracharya, S., Held, D.: Plas: Latent action space for offline reinforcement learning. In: Conference on Robot Learning, pp. 1719\u20131735 (2021)"}],"container-title":["Journal of Scientific Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10915-025-02873-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10915-025-02873-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10915-025-02873-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,26]],"date-time":"2025-04-26T19:10:49Z","timestamp":1745694649000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10915-025-02873-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,1]]},"references-count":43,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,5]]}},"alternative-id":["2873"],"URL":"https:\/\/doi.org\/10.1007\/s10915-025-02873-6","relation":{},"ISSN":["0885-7474","1573-7691"],"issn-type":[{"type":"print","value":"0885-7474"},{"type":"electronic","value":"1573-7691"}],"subject":[],"published":{"date-parts":[[2025,4,1]]},"assertion":[{"value":"26 May 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 January 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 March 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 April 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have not disclosed any competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"51"}}