{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T22:09:35Z","timestamp":1740175775652,"version":"3.37.3"},"reference-count":57,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,11,11]],"date-time":"2024-11-11T00:00:00Z","timestamp":1731283200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2024,11,11]],"date-time":"2024-11-11T00:00:00Z","timestamp":1731283200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Complex Intell. Syst."],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s40747-024-01621-x","type":"journal-article","created":{"date-parts":[[2024,11,11]],"date-time":"2024-11-11T05:24:46Z","timestamp":1731302686000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Moor: Model-based offline policy optimization with a risk dynamics model"],"prefix":"10.1007","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-6876-458X","authenticated-orcid":false,"given":"Xiaolong","family":"Su","sequence":"first","affiliation":[]},{"given":"Peng","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6823-555X","authenticated-orcid":false,"given":"Shaofei","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,11]]},"reference":[{"key":"1621_CR1","unstructured":"PL A, Fu M (2022) Risk-sensitive reinforcement learning via policy gradient search. https:\/\/arxiv.org\/abs\/1810.09126"},{"issue":"3","key":"1621_CR2","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1007\/s11009-008-9089-z","volume":"11","author":"A Balb\u00e1s","year":"2009","unstructured":"Balb\u00e1s A, Garrido J, Mayoral S (2009) Properties of distortion risk measures. Methodol Comput Appl Probab 11(3):385\u2013399","journal-title":"Methodol Comput Appl Probab"},{"key":"1621_CR3","unstructured":"Bellemare MG, Dabney W, Munos R (2017) A distributional perspective on reinforcement learning. In: International conference on machine learning, PMLR, pp 449\u2013458"},{"key":"1621_CR4","doi-asserted-by":"publisher","unstructured":"Bellemare MG, Danihelka I, Dabney W, et\u00a0al (2017) The cramer distance as a solution to biased wasserstein gradients. arXiv e-prints arXiv:1705.10743. https:\/\/doi.org\/10.48550\/arXiv.1705.10743","DOI":"10.48550\/arXiv.1705.10743"},{"key":"1621_CR5","doi-asserted-by":"crossref","unstructured":"Bellemare MG, Dabney W, Rowland M (2023) Distributional reinforcement learning. MIT Press, http:\/\/www.distributional-rl.org","DOI":"10.7551\/mitpress\/14207.001.0001"},{"issue":"5","key":"1621_CR6","doi-asserted-by":"publisher","first-page":"1159","DOI":"10.1007\/s11425-021-2109-3","volume":"67","author":"L Bo","year":"2024","unstructured":"Bo L, Wang S, Yu X (2024) Mean field game of optimal relative investment with jump risk. Sci China Math 67(5):1159\u20131188","journal-title":"Sci China Math"},{"key":"1621_CR7","doi-asserted-by":"publisher","first-page":"411","DOI":"10.1146\/annurev-control-042920-020211","volume":"5","author":"L Brunke","year":"2022","unstructured":"Brunke L, Greeff M, Hall AW et al (2022) Safe learning in robotics: from learning-based control to safe reinforcement learning. Ann Rev Control Robot Auton Syst 5:411\u2013444. https:\/\/doi.org\/10.1146\/annurev-control-042920-020211","journal-title":"Ann Rev Control Robot Auton Syst"},{"key":"1621_CR8","unstructured":"Chang Y, Firoozi D, Benatia D (2023) Large banks and systemic risk: insights from a mean-field game model. https:\/\/arxiv.org\/abs\/2305.17830"},{"key":"1621_CR9","unstructured":"Chen L, Lu K, Rajeswaran A et al (2021) Decision transformer: reinforcement learning via sequence modeling. Adv Neural Inf Process Syst 34:15084\u201315097"},{"key":"1621_CR10","unstructured":"Chen X, Wang S, Yu T, et\u00a0al (2024) Uncertainty-aware distributional offline reinforcement learning. https:\/\/arxiv.org\/abs\/2403.17646,"},{"key":"1621_CR11","unstructured":"Chow Y, Tamar A, Mannor S, et\u00a0al (2015) Risk-sensitive and robust decision-making: a cvar optimization approach. In: Proceedings of the 28th international conference on neural information processing systems-volume 1, pp 1522\u20131530"},{"key":"1621_CR12","doi-asserted-by":"publisher","unstructured":"Coronato A, Naeem M, De Pietro G et al (2020) Reinforcement learning for intelligent healthcare applications: a survey. Artif Intell Med 109:101964. https:\/\/doi.org\/10.1016\/j.artmed.2020.101964","DOI":"10.1016\/j.artmed.2020.101964"},{"key":"1621_CR13","doi-asserted-by":"crossref","unstructured":"Dabney W, Ostrovski G, Silver D, et\u00a0al (2018a) Implicit quantile networks for distributional reinforcement learning. In: International conference on machine learning, PMLR, pp 1096\u20131105","DOI":"10.1609\/aaai.v32i1.11791"},{"key":"1621_CR14","doi-asserted-by":"crossref","unstructured":"Dabney W, Rowland M, Bellemare M, et\u00a0al (2018b) Distributional reinforcement learning with quantile regression. In: Proceedings of the AAAI conference on artificial intelligence, pp 2892\u20132901","DOI":"10.1609\/aaai.v32i1.11791"},{"issue":"7792","key":"1621_CR15","doi-asserted-by":"publisher","first-page":"671","DOI":"10.1038\/s41586-019-1924-6","volume":"577","author":"W Dabney","year":"2020","unstructured":"Dabney W, Kurth-Nelson Z, Uchida N et al (2020) A distributional code for value in dopamine-based reinforcement learning. Nature 577(7792):671\u2013675","journal-title":"Nature"},{"issue":"2","key":"1621_CR16","doi-asserted-by":"publisher","first-page":"1167","DOI":"10.1109\/LRA.2023.3236579","volume":"8","author":"C Diehl","year":"2023","unstructured":"Diehl C, Sievernich TS, Kr\u00fcger M et al (2023) Uncertainty-aware model-based offline reinforcement learning for automated driving. IEEE Robot Autom Lett 8(2):1167\u20131174","journal-title":"IEEE Robot Autom Lett"},{"key":"1621_CR17","doi-asserted-by":"publisher","unstructured":"Du Y, Wang S, Huang L (2022) Provably efficient risk-sensitive reinforcement learning: Iterated cvar and worst path. arXiv preprint arXiv:2206.02678https:\/\/doi.org\/10.48550\/arXiv.2206.02678","DOI":"10.48550\/arXiv.2206.02678"},{"key":"1621_CR18","first-page":"4856","volume":"34","author":"M Fatemi","year":"2021","unstructured":"Fatemi M, Killian TW, Subramanian J et al (2021) Medical dead-ends and learning to identify high-risk states and treatments. Adv Neural Inf Process Sys 34:4856\u20134870","journal-title":"Adv Neural Inf Process Sys"},{"key":"1621_CR19","doi-asserted-by":"publisher","unstructured":"Fu J, Kumar A, Nachum O, et\u00a0al (2020) D4rl: Datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219https:\/\/doi.org\/10.48550\/arXiv.2004.07219","DOI":"10.48550\/arXiv.2004.07219"},{"key":"1621_CR20","unstructured":"Fujimoto S, Meger D, Precup D (2019) Off-policy deep reinforcement learning without exploration. In: International conference on machine learning, PMLR, pp 2052\u20132062"},{"key":"1621_CR21","first-page":"64896","volume":"36","author":"H He","year":"2024","unstructured":"He H, Bai C, Xu K et al (2024) Diffusion model is an effective planner and data synthesizer for multi-task reinforcement learning. Adv Neural Inf Process Syst 36:64896\u201364917","journal-title":"Adv Neural Inf Process Syst"},{"key":"1621_CR22","doi-asserted-by":"crossref","unstructured":"Huber PJ (1992) Robust estimation of a location parameter. In: Breakthroughs in statistics: methodology and distribution. Springer, p 492\u2013518","DOI":"10.1007\/978-1-4612-4380-9_35"},{"key":"1621_CR23","unstructured":"Kidambi R, Rajeswaran A, Netrapalli P, et\u00a0al (2020) Morel: model-based offline reinforcement learning. In: Proceedings of the 34th international conference on neural information processing systems, pp 21810\u201321823"},{"key":"1621_CR24","doi-asserted-by":"publisher","unstructured":"Killian TW, Parbhoo S, Ghassemi M (2023) Risk sensitive dead-end identification in safety-critical offline reinforcement learning. arXiv preprint arXiv:2301.05664https:\/\/doi.org\/10.48550\/arXiv.2301.05664","DOI":"10.48550\/arXiv.2301.05664"},{"key":"1621_CR25","unstructured":"Kuang Q, Zhu Z, Zhang L, et\u00a0al (2023) Variance control for distributional reinforcement learning. In: Proceedings of the 40th international conference on machine learning, pp 17874\u201317895"},{"key":"1621_CR26","unstructured":"Kumar A, Zhou A, Tucker G, et\u00a0al (2020) Conservative q-learning for offline reinforcement learning. In: Proceedings of the 34th international conference on neural information processing systems, pp 1179\u20131191"},{"key":"1621_CR27","doi-asserted-by":"publisher","first-page":"407","DOI":"10.1016\/j.isatra.2017.01.022","volume":"67","author":"R Kumar","year":"2017","unstructured":"Kumar R, Srivastava S, Gupta J (2017) Diagonal recurrent neural network based adaptive control of nonlinear dynamical systems using lyapunov stability criterion. ISA Trans 67:407\u2013427. https:\/\/doi.org\/10.1016\/j.isatra.2017.01.022","journal-title":"ISA Trans"},{"key":"1621_CR28","doi-asserted-by":"publisher","unstructured":"Levine S, Kumar A, Tucker G, et\u00a0al (2020) Offline reinforcement learning: tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643https:\/\/doi.org\/10.48550\/arXiv.2005.01643","DOI":"10.48550\/arXiv.2005.01643"},{"issue":"1","key":"1621_CR29","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1109\/TSMC.2020.3042876","volume":"51","author":"D Liu","year":"2020","unstructured":"Liu D, Xue S, Zhao B et al (2020) Adaptive dynamic programming for control: a survey and recent advances. IEEE Trans Syst Man Cybern Syst 51(1):142\u2013160","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"key":"1621_CR30","doi-asserted-by":"crossref","unstructured":"Lyle C, Bellemare MG, Castro PS (2019) A comparative analysis of expected and distributional reinforcement learning. In: Proceedings of the AAAI conference on artificial intelligence, pp 4504\u20134511","DOI":"10.1609\/aaai.v33i01.33014504"},{"key":"1621_CR31","doi-asserted-by":"publisher","unstructured":"Ma X, Xia L, Zhou Z, et\u00a0al (2020) Dsac: Distributional soft actor critic for risk-sensitive reinforcement learning. arXiv preprint arXiv:2004.14547https:\/\/doi.org\/10.48550\/arXiv.2004.14547","DOI":"10.48550\/arXiv.2004.14547"},{"key":"1621_CR32","unstructured":"Ma Y, Jayaraman D, Bastani O (2021) Conservative offline distributional reinforcement learning. In: Ranzato M, Beygelzimer A, Dauphin Y, et\u00a0al (Eds) Advances in Neural Information Processing Systems, vol\u00a034. Curran Associates, Inc., pp 19235\u201319247, https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2021\/file\/a05d886123a54de3ca4b0985b718fb9b-Paper.pdf"},{"key":"1621_CR33","unstructured":"Mao Y, Zhang H, Chen C, et\u00a0al (2023) Supported value regularization for offline reinforcement learning. In: Proceedings of the 37th international conference on neural information processing systems, pp 40587\u201340609"},{"key":"1621_CR34","doi-asserted-by":"crossref","unstructured":"Markowitz J, Gardner RW, Llorens A, et\u00a0al (2023) A risk-sensitive approach to policy optimization. In: Proceedings of the AAAI conference on artificial intelligence, pp 15019\u201315027","DOI":"10.1609\/aaai.v37i12.26753"},{"key":"1621_CR35","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3250269","author":"RF Prudencio","year":"2023","unstructured":"Prudencio RF, Maximo MROA, Colombini EL (2023) A survey on offline reinforcement learning: taxonomy, review, and open problems. IEEE Trans Neural Netw Learn Syst. https:\/\/doi.org\/10.1109\/TNNLS.2023.3250269","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"1621_CR36","unstructured":"Ren XY, Firoozi D (2024) Risk-sensitive mean field games with common noise: a theoretical study with applications to interbank markets. https:\/\/arxiv.org\/abs\/2403.03915,"},{"key":"1621_CR37","unstructured":"Rigter M, Lacerda B, Hawes N (2023) One risk to rule them all: a risk-sensitive perspective on model-based offline reinforcement learning. In: Advances in neural information processing systems, pp 77520\u201377545, https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/f49287371916715b9209fa41a275851e-Paper-Conference.pdf"},{"issue":"7","key":"1621_CR38","doi-asserted-by":"publisher","first-page":"1298","DOI":"10.1162\/NECO_a_00600","volume":"26","author":"Y Shen","year":"2014","unstructured":"Shen Y, Tobia MJ, Sommer T et al (2014) Risk-sensitive reinforcement learning. Neural Comput 26(7):1298\u20131328","journal-title":"Neural Comput"},{"key":"1621_CR39","doi-asserted-by":"crossref","unstructured":"Shi J, Bai C, He H, et\u00a0al (2024) Robust quadrupedal locomotion via risk-averse policy learning. In: 2024 IEEE international conference on robotics and automation (ICRA), IEEE, pp 11459\u201311466","DOI":"10.1109\/ICRA57147.2024.10610086"},{"issue":"2","key":"1621_CR40","doi-asserted-by":"publisher","first-page":"945","DOI":"10.1007\/s10462-021-09997-9","volume":"55","author":"B Singh","year":"2022","unstructured":"Singh B, Kumar R, Singh VP (2022) Reinforcement learning in robotic applications: a comprehensive survey. Artif Intell Rev 55(2):945\u2013990","journal-title":"Artif Intell Rev"},{"issue":"13","key":"1621_CR41","doi-asserted-by":"publisher","first-page":"12181","DOI":"10.1007\/s11071-023-08456-0","volume":"111","author":"X Song","year":"2023","unstructured":"Song X, Sun P, Song S et al (2023) Finite-time adaptive neural resilient DSC for fractional-order nonlinear large-scale systems against sensor-actuator faults. Nonlinear Dyn 111(13):12181\u201312196","journal-title":"Nonlinear Dyn"},{"key":"1621_CR42","doi-asserted-by":"publisher","DOI":"10.1016\/j.cnsns.2024.107945","volume":"132","author":"X Song","year":"2024","unstructured":"Song X, Peng Z, Song S et al (2024) Anti-disturbance state estimation for PDT-switched RDNNS utilizing time-sampling and space-splitting measurements. Commun Nonlinear Sci Numer Simul 132:107945. https:\/\/doi.org\/10.1016\/j.cnsns.2024.107945","journal-title":"Commun Nonlinear Sci Numer Simul"},{"key":"1621_CR43","unstructured":"Sun H, Wu F (2023) Less is more: refining datasets for offline reinforcement learning with reward machines. In: Proceedings of the 2023 international conference on autonomous agents and multiagent systems (AAMAS), pp 1239\u20131247"},{"key":"1621_CR44","unstructured":"Tamar A, Di\u00a0Castro D, Mannor S (2012) Policy gradients with variance related risk criteria. In: Proceedings of the twenty-ninth international conference on machine learning, pp 387\u2013396"},{"key":"1621_CR45","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1016\/j.neucom.2023.02.049","volume":"534","author":"T Th\u00e9ate","year":"2023","unstructured":"Th\u00e9ate T, Wehenkel A, Bolland A et al (2023) Distributional reinforcement learning with unconstrained monotonic neural networks. Neurocomputing 534:199\u2013219. https:\/\/doi.org\/10.1016\/j.neucom.2023.02.049","journal-title":"Neurocomputing"},{"issue":"12","key":"1621_CR46","doi-asserted-by":"publisher","first-page":"9477","DOI":"10.1109\/TPAMI.2021.3127674","volume":"44","author":"O Tutsoy","year":"2022","unstructured":"Tutsoy O (2022) Pharmacological, non-pharmacological policies and mutation: an artificial intelligence based multi-dimensional policy making algorithm for controlling the casualties of the pandemic diseases. IEEE Trans Pattern Anal Mach Intell 44(12):9477\u20139488. https:\/\/doi.org\/10.1109\/TPAMI.2021.3127674","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"4","key":"1621_CR47","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1007\/BF00122574","volume":"5","author":"A Tversky","year":"1992","unstructured":"Tversky A, Kahneman D (1992) Advances in prospect theory: cumulative representation of uncertainty. J Risk Uncertain 5(4):297\u2013323","journal-title":"J Risk Uncertain"},{"key":"1621_CR48","unstructured":"Urp\u00ed NA, Curi S, Krause A (2021) Risk-averse offline reinforcement learning. In: International conference on learning representations, https:\/\/openreview.net\/forum?id=TBIzh9b5eaz"},{"key":"1621_CR49","doi-asserted-by":"publisher","first-page":"104643","DOI":"10.1109\/ACCESS.2023.3318324","volume":"11","author":"P Viroonluecha","year":"2023","unstructured":"Viroonluecha P, Egea-Lopez E, Santa J (2023) Evaluation of offline reinforcement learning for blood glucose level control in type 1 diabetes. IEEE Access 11:104643\u2013104655. https:\/\/doi.org\/10.1109\/ACCESS.2023.3318324","journal-title":"IEEE Access"},{"key":"1621_CR50","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1016\/j.isatra.2023.07.043","volume":"142","author":"R Wang","year":"2023","unstructured":"Wang R, Zhuang Z, Tao H et al (2023) Q-learning based fault estimation and fault tolerant iterative learning control for mimo systems. ISA Trans 142:123\u2013135. https:\/\/doi.org\/10.1016\/j.isatra.2023.07.043","journal-title":"ISA Trans"},{"key":"1621_CR51","doi-asserted-by":"crossref","unstructured":"Wang SS (2000) A class of distortion operators for pricing financial and insurance risks. J Risk Insur 67(1):15\u201336","DOI":"10.2307\/253675"},{"key":"1621_CR52","doi-asserted-by":"publisher","unstructured":"Wu Y, Tucker G, Nachum O (2019) Behavior regularized offline reinforcement learning. arXiv preprint arXiv:1911.11361https:\/\/doi.org\/10.48550\/arXiv.1911.11361","DOI":"10.48550\/arXiv.1911.11361"},{"key":"1621_CR53","unstructured":"Yamagata T, Khalil A, Santos-Rodriguez R (2023) Q-learning decision transformer: Leveraging dynamic programming for conditional sequence modelling in offline rl. In: International conference on machine learning, PMLR, pp 38989\u201339007"},{"key":"1621_CR54","unstructured":"Yang D, Zhao L, Lin Z, et\u00a0al (2019) Fully parameterized quantile function for distributional reinforcement learning. In: Proceedings of the 33rd international conference on neural information processing systems, pp 6193\u20136202"},{"key":"1621_CR55","first-page":"23851","volume":"35","author":"R Yang","year":"2022","unstructured":"Yang R, Bai C, Ma X et al (2022) Rorl: robust offline reinforcement learning via conservative smoothing. Adv Neural Inf Process Syst 35:23851\u201323866","journal-title":"Adv Neural Inf Process Syst"},{"key":"1621_CR56","unstructured":"Yu T, Thomas G, Yu L, et\u00a0al (2020) Mopo: model-based offline policy optimization. In: Proceedings of the 34th international conference on neural information processing systems, pp 14129\u201314142"},{"key":"1621_CR57","unstructured":"Zhang D, Lyu B, Qiu S, et\u00a0al (2024) Pessimism meets risk: risk-sensitive offline reinforcement learning. https:\/\/arxiv.org\/abs\/2407.07631,"}],"container-title":["Complex &amp; Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01621-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40747-024-01621-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01621-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,30]],"date-time":"2025-01-30T20:16:30Z","timestamp":1738268190000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40747-024-01621-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,11]]},"references-count":57,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["1621"],"URL":"https:\/\/doi.org\/10.1007\/s40747-024-01621-x","relation":{},"ISSN":["2199-4536","2198-6053"],"issn-type":[{"type":"print","value":"2199-4536"},{"type":"electronic","value":"2198-6053"}],"subject":[],"published":{"date-parts":[[2024,11,11]]},"assertion":[{"value":"23 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 September 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"On behalf of all authors, the corresponding author states that there is no Conflict of interest in any material discussed in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"20"}}