{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T21:29:47Z","timestamp":1773091787523,"version":"3.50.1"},"reference-count":70,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,3,2]],"date-time":"2026-03-02T00:00:00Z","timestamp":1772409600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.eswa.2026.131838","type":"journal-article","created":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T17:03:00Z","timestamp":1772557380000},"page":"131838","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Risk-sensitive actor-critic with static spectral risk measures for online and offline reinforcement learning"],"prefix":"10.1016","volume":"317","author":[{"given":"Mehrdad","family":"Moghimi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0107-6974","authenticated-orcid":false,"given":"Hyejin","family":"Ku","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"7","key":"10.1016\/j.eswa.2026.131838_bib0001","doi-asserted-by":"crossref","first-page":"1505","DOI":"10.1016\/S0378-4266(02)00281-9","article-title":"Spectral measures of risk: A coherent representation of subjective risk aversion","volume":"26","author":"Acerbi","year":"2002","journal-title":"Journal of Banking and Finance"},{"issue":"98","key":"10.1016\/j.eswa.2026.131838_bib0002","first-page":"1","article-title":"On the theory of policy gradient methods: Optimality, approximation, and distribution shift","volume":"22","author":"Agarwal","year":"2021","journal-title":"Journal of Machine Learning Research"},{"key":"10.1016\/j.eswa.2026.131838_bib0003","series-title":"Proceedings of the 37th international conference on Machine Learning","first-page":"104","article-title":"An optimistic perspective on offline reinforcement learning","author":"Agarwal","year":"2020"},{"key":"10.1016\/j.eswa.2026.131838_bib0004","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1016\/j.eswa.2017.06.023","article-title":"An adaptive portfolio trading system: A risk-return portfolio optimization using recurrent reinforcement learning with expected maximum drawdown","volume":"87","author":"Almahdi","year":"2017","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2026.131838_bib0005","first-page":"1","article-title":"Monotonic quantile network for worst-case offline reinforcement learning","author":"Bai","year":"2022","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"10.1016\/j.eswa.2026.131838_bib0006","series-title":"International conference on learning representations","article-title":"Distributed distributional deterministic policy gradients","author":"Barth-Maron","year":"2018"},{"issue":"1","key":"10.1016\/j.eswa.2026.131838_bib0007","doi-asserted-by":"crossref","first-page":"35","DOI":"10.1007\/s00186-021-00746-w","article-title":"Minimizing spectral risk measures applied to Markov decision processes","volume":"94","author":"B\u00e4uerle","year":"2021","journal-title":"Mathematical Methods of Operations Research"},{"issue":"3","key":"10.1016\/j.eswa.2026.131838_bib0008","doi-asserted-by":"crossref","first-page":"361","DOI":"10.1007\/s00186-011-0367-0","article-title":"Markov decision processes with average-value-at-risk criteria","volume":"74","author":"B\u00e4uerle","year":"2011","journal-title":"Mathematical Methods of Operations Research"},{"issue":"1","key":"10.1016\/j.eswa.2026.131838_bib0009","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1287\/moor.2013.0601","article-title":"More risk-sensitive Markov decision processes","volume":"39","author":"B\u00e4uerle","year":"2014","journal-title":"Mathematics of Operations Research"},{"key":"10.1016\/j.eswa.2026.131838_bib0010","series-title":"Proceedings of the 34th international conference on machine learning","first-page":"449","article-title":"A Distributional Perspective on Reinforcement Learning","author":"Bellemare","year":"2017"},{"key":"10.1016\/j.eswa.2026.131838_bib0011","series-title":"Distributional reinforcement learning","author":"Bellemare","year":"2023"},{"issue":"Complete","key":"10.1016\/j.eswa.2026.131838_bib0012","article-title":"Risk-averse policy optimization via risk-neutral policy optimization","volume":"311","author":"Bisi","year":"2022","journal-title":"Artificial Intelligence"},{"key":"10.1016\/j.eswa.2026.131838_bib0013","unstructured":"Bradbury, J., Frostig, R., Hawkins, P., Johnson, M. J., Leary, C., Maclaurin, D., Necula, G., Paszke, A., VanderPlas, J., Wanderman-Milne, S., & Zhang, Q. (2018). JAX: Composable transformations of Python+NumPy programs. https:\/\/github.com\/jax-ml\/jax."},{"issue":"167","key":"10.1016\/j.eswa.2026.131838_bib0014","first-page":"1","article-title":"Risk-constrained reinforcement learning with percentile risk criteria","volume":"18","author":"Chow","year":"2018","journal-title":"Journal of Machine Learning Research"},{"key":"10.1016\/j.eswa.2026.131838_bib0015","series-title":"2013 American control conference","first-page":"390","article-title":"Stochastic optimal control with dynamic, time-consistent risk constraints","author":"Chow","year":"2013"},{"key":"10.1016\/j.eswa.2026.131838_bib0016","series-title":"Advances in neural information processing systems","article-title":"Risk-sensitive and robust decision-making: A CVaR optimization approach","volume":"vol. 28","author":"Chow","year":"2015"},{"key":"10.1016\/j.eswa.2026.131838_bib0017","doi-asserted-by":"crossref","unstructured":"Coache, A., & Jaimungal, S. (2023). Reinforcement learning with dynamic convex risk measures. Mathematical Finance. 10.1111\/mafi.12388.","DOI":"10.1111\/mafi.12388"},{"key":"10.1016\/j.eswa.2026.131838_bib0018","series-title":"Proceedings of the 35th international conference on machine learning","first-page":"1096","article-title":"Implicit quantile networks for distributional reinforcement learning","author":"Dabney","year":"2018"},{"issue":"1","key":"10.1016\/j.eswa.2026.131838_bib0019","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11791","article-title":"Distributional Reinforcement learning with quantile regression","volume":"32","author":"Dabney","year":"2018","journal-title":"Proceedings of the AAAI conference on artificial intelligence"},{"key":"10.1016\/j.eswa.2026.131838_bib0020","series-title":"Proceedings of the 45th IEEE conference on Decision and Control","first-page":"667","article-title":"Clinical data based optimal STI strategies for HIV: A reinforcement learning approach","author":"Ernst","year":"2006"},{"key":"10.1016\/j.eswa.2026.131838_bib0021","unstructured":"Fu, J., Kumar, A., Nachum, O., Tucker, G., & Levine, S., et al. (2021). D4RL: Datasets for deep data-driven reinforcement learning. Preprint. 10.48550\/arXiv.2004.07219."},{"key":"10.1016\/j.eswa.2026.131838_bib0022","series-title":"Advances in neural information processing systems","article-title":"A minimalist approach to offline reinforcement learning","author":"Fujimoto","year":"2021"},{"key":"10.1016\/j.eswa.2026.131838_bib0023","series-title":"Proceedings of the 35th international conference on machine learning","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"Fujimoto","year":"2018"},{"key":"10.1016\/j.eswa.2026.131838_bib0024","series-title":"Proceedings of the 36th international conference on machine learning","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","author":"Fujimoto","year":"2019"},{"issue":"46","key":"10.1016\/j.eswa.2026.131838_bib0025","first-page":"1573","article-title":"RLPy: A value-function-based reinforcement learning framework for education and research","volume":"16","author":"Geramifard","year":"2015","journal-title":"Journal of Machine Learning Research"},{"key":"10.1016\/j.eswa.2026.131838_bib0026","series-title":"Advances in neural information processing systems","article-title":"Efficient risk-averse reinforcement learning","author":"Greenberg","year":"2022"},{"key":"10.1016\/j.eswa.2026.131838_bib0027","series-title":"Proceedings of the 35th international conference on machine learning","first-page":"1861","article-title":"Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"Haarnoja","year":"2018"},{"key":"10.1016\/j.eswa.2026.131838_bib0028","series-title":"Advances in neural information processing systems","article-title":"Double Q-learning","volume":"vol. 23","author":"Hasselt","year":"2010"},{"key":"10.1016\/j.eswa.2026.131838_bib0029","series-title":"On a relationship between distorted and spectral risk measures","author":"Henryk","year":"2006"},{"issue":"274","key":"10.1016\/j.eswa.2026.131838_bib0030","first-page":"1","article-title":"CleanRL: High-quality single-file implementations of deep reinforcement learning algorithms","volume":"23","author":"Huang","year":"2022","journal-title":"Journal of Machine Learning Research"},{"key":"10.1016\/j.eswa.2026.131838_bib0031","series-title":"Breakthroughs in statistics: Methodology and distribution","first-page":"492","article-title":"Robust estimation of a location parameter","author":"Huber","year":"1992"},{"key":"10.1016\/j.eswa.2026.131838_bib0032","series-title":"Proceedings of the nineteenth international conference on machine learning","first-page":"267","article-title":"Approximately optimal approximate reinforcement learning","author":"Kakade","year":"2002"},{"key":"10.1016\/j.eswa.2026.131838_bib0033","series-title":"Advances in neural information processing systems","article-title":"A natural policy gradient","volume":"vol. 14","author":"Kakade","year":"2001"},{"issue":"04","key":"10.1016\/j.eswa.2026.131838_bib0034","doi-asserted-by":"crossref","first-page":"4436","DOI":"10.1609\/aaai.v34i04.5870","article-title":"Being optimistic to be conservative: Quickly learning a CVaR policy","volume":"34","author":"Keramati","year":"2020","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"10.1016\/j.eswa.2026.131838_bib0035","series-title":"The Thirty-eighth annual conference on neural information processing systems","article-title":"Spectral-risk safe reinforcement learning with convergence guarantees","author":"Kim","year":"2024"},{"key":"10.1016\/j.eswa.2026.131838_bib0036","series-title":"International conference on learning representations","article-title":"Offline reinforcement learning with implicit Q-learning","author":"Kostrikov","year":"2021"},{"key":"10.1016\/j.eswa.2026.131838_bib0037","series-title":"Advances in neural information processing systems","first-page":"1179","article-title":"Conservative Q-learning for offline reinforcement learning","volume":"vol. 33","author":"Kumar","year":"2020"},{"key":"10.1016\/j.eswa.2026.131838_bib0038","unstructured":"Levine, S., Kumar, A., Tucker, G., & Fu, J., et al. (2020). Offline reinforcement learning: Tutorial, review, and perspectives on open problems. Preprint. 10.48550\/arXiv.2005.01643."},{"key":"10.1016\/j.eswa.2026.131838_bib0039","series-title":"Advances in neural information processing systems","article-title":"Distributional reinforcement learning for risk-sensitive policies","author":"Lim","year":"2022"},{"key":"10.1016\/j.eswa.2026.131838_bib0040","unstructured":"Ma, X., Xia, L., Zhou, Z., Yang, J., & Zhao, Q., et al. (2020). DSAC: Distributional soft actor critic for risk-sensitive reinforcement learning. Preprint. 10.48550\/arXiv.2004.14547."},{"key":"10.1016\/j.eswa.2026.131838_bib0041","series-title":"Advances in neural information processing systems","first-page":"19235","article-title":"Conservative offline distributional reinforcement learning","volume":"vol. 34","author":"Ma","year":"2021"},{"key":"10.1016\/j.eswa.2026.131838_bib0042","series-title":"Proceedings of the 37th international conference on Machine Learning","first-page":"6820","article-title":"On the global convergence rates of softmax policy gradient methods","author":"Mei","year":"2020"},{"issue":"7540","key":"10.1016\/j.eswa.2026.131838_bib0043","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"Mnih","year":"2015","journal-title":"Nature"},{"key":"10.1016\/j.eswa.2026.131838_bib0044","series-title":"Forty-second international conference on machine learning","article-title":"Beyond CVaR: Leveraging static spectral risk measures for enhanced decision-making in distributional reinforcement learning","author":"Moghimi","year":"2025"},{"key":"10.1016\/j.eswa.2026.131838_bib0045","series-title":"Proceedings of the 27th international conference on international conference on machine learning","first-page":"799","article-title":"Nonparametric return distribution approximation for reinforcement learning","author":"Morimura","year":"2010"},{"key":"10.1016\/j.eswa.2026.131838_bib0046","unstructured":"Nair, A., Gupta, A., Dalal, M., & Levine, S., et al. (2021). AWAC: Accelerating online reinforcement learning with offline datasets. Preprint. 10.48550\/arXiv.2006.09359."},{"key":"10.1016\/j.eswa.2026.131838_bib0047","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2020.113573","article-title":"An intelligent financial portfolio trading strategy using deep Q-learning","volume":"158","author":"Park","year":"2020","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2026.131838_bib0048","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.eswa.2018.02.032","article-title":"Trading financial indices with reinforcement learning agents","volume":"103","author":"Pendharkar","year":"2018","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2026.131838_bib0049","unstructured":"Peng, X. B., Kumar, A., Zhang, G., & Levine, S., et al. (2019). Advantage-weighted regression: Simple and scalable off-policy reinforcement learning. Preprint. 10.48550\/arXiv.1910.00177."},{"issue":"2","key":"10.1016\/j.eswa.2026.131838_bib0050","doi-asserted-by":"crossref","first-page":"682","DOI":"10.1287\/moor.2015.0747","article-title":"Time-consistent decisions and temporal decomposition of coherent risk functional","volume":"41","author":"Pflug","year":"2016","journal-title":"Mathematics of Operations Research"},{"issue":"4","key":"10.1016\/j.eswa.2026.131838_bib0051","doi-asserted-by":"crossref","first-page":"332","DOI":"10.1080\/03461238.2013.830228","article-title":"Premiums and reserves, adjusted by distortions","volume":"2015","author":"Pichler","year":"2015","journal-title":"Scandinavian Actuarial Journal"},{"key":"10.1016\/j.eswa.2026.131838_bib0052","unstructured":"Pires, B. \u00c1., Rowland, M., Borsa, D., Guo, Z. D., Khetarpal, K., Barreto, A., Abel, D., Munos, R., & Dabney, W., et al. (2025). Optimizing return distributions with distributional dynamic programming. Preprint. 10.48550\/arXiv.2501.13028."},{"key":"10.1016\/j.eswa.2026.131838_bib0053","first-page":"1","article-title":"A survey on offline reinforcement learning: taxonomy, review, and open problems","author":"Prudencio","year":"2023","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"10.1016\/j.eswa.2026.131838_bib0054","first-page":"77520","article-title":"One risk to rule them all: A risk-sensitive perspective on model-based offline reinforcement learning","volume":"36","author":"Rigter","year":"2023","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.eswa.2026.131838_bib0055","series-title":"Proceedings of the 32nd international conference on machine learning","first-page":"1889","article-title":"Trust region policy optimization","author":"Schulman","year":"2015"},{"key":"10.1016\/j.eswa.2026.131838_bib0056","article-title":"Lectures on stochastic programming: Modeling and theory","author":"Shapiro","year":"2014"},{"key":"10.1016\/j.eswa.2026.131838_bib0057","series-title":"Proceedings of the 31st international conference on machine learning","first-page":"387","article-title":"Deterministic policy gradient algorithms","author":"Silver","year":"2014"},{"key":"10.1016\/j.eswa.2026.131838_bib0058","article-title":"Reinforcement learning: An introduction","author":"Sutton","year":"2018"},{"key":"10.1016\/j.eswa.2026.131838_bib0059","series-title":"Advances in neural information processing systems","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"vol. 12","author":"Sutton","year":"1999"},{"issue":"7","key":"10.1016\/j.eswa.2026.131838_bib0060","doi-asserted-by":"crossref","first-page":"3323","DOI":"10.1109\/TAC.2016.2644871","article-title":"Sequential decision making with coherent risk","volume":"62","author":"Tamar","year":"2017","journal-title":"IEEE Transactions on Automatic Control"},{"key":"10.1016\/j.eswa.2026.131838_bib0061","series-title":"Proceedings of the 29th international coference on international conference on machine learning","first-page":"1651","article-title":"Policy gradients with variance related risk criteria","author":"Tamar","year":"2012"},{"issue":"1","key":"10.1016\/j.eswa.2026.131838_bib0062","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v29i1.9561","article-title":"Optimizing the CVaR via Sampling","volume":"29","author":"Tamar","year":"2015","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"10.1016\/j.eswa.2026.131838_bib0063","series-title":"2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems","first-page":"5026","article-title":"MuJoCo: A physics engine for model-based control","author":"Todorov","year":"2012"},{"issue":"3","key":"10.1016\/j.eswa.2026.131838_bib0064","doi-asserted-by":"crossref","first-page":"475","DOI":"10.1023\/A:1017501703105","article-title":"Convergence of a block coordinate descent method for nondifferentiable minimization","volume":"109","author":"Tseng","year":"2001","journal-title":"Journal of Optimization Theory and Applications"},{"key":"10.1016\/j.eswa.2026.131838_bib0065","series-title":"International conference on learning representations","article-title":"Risk-averse offline reinforcement learning","author":"Urp\u00ed","year":"2021"},{"key":"10.1016\/j.eswa.2026.131838_bib0066","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2022.116807","article-title":"Risk-sensitive policies for portfolio management","volume":"198","author":"Wang","year":"2022","journal-title":"Expert Systems with Applications"},{"issue":"1","key":"10.1016\/j.eswa.2026.131838_bib0067","first-page":"43","article-title":"Insurance pricing and increased limits ratemaking by proportional hazards transforms","volume":"17","author":"Wang","year":"1995","journal-title":"Insurance: Mathematics and economics"},{"issue":"1","key":"10.1016\/j.eswa.2026.131838_bib0068","doi-asserted-by":"crossref","first-page":"15","DOI":"10.2307\/253675","article-title":"A class of distortion operators for pricing financial and insurance risks","volume":"67","author":"Wang","year":"2000","journal-title":"The Journal of Risk and Insurance"},{"issue":"1","key":"10.1016\/j.eswa.2026.131838_bib0069","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/s10107-015-0892-3","article-title":"Coordinate descent algorithms","volume":"151","author":"Wright","year":"2015","journal-title":"Mathematical Programming"},{"issue":"12","key":"10.1016\/j.eswa.2026.131838_bib0070","doi-asserted-by":"crossref","first-page":"10905","DOI":"10.1609\/aaai.v35i12.17302","article-title":"Mean-variance policy iteration for risk-averse reinforcement learning","volume":"35","author":"Zhang","year":"2021","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426007517?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0957417426007517?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T11:36:58Z","timestamp":1773056218000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0957417426007517"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":70,"alternative-id":["S0957417426007517"],"URL":"https:\/\/doi.org\/10.1016\/j.eswa.2026.131838","relation":{},"ISSN":["0957-4174"],"issn-type":[{"value":"0957-4174","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Risk-sensitive actor-critic with static spectral risk measures for online and offline reinforcement learning","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.eswa.2026.131838","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Author(s). Published by Elsevier Ltd.","name":"copyright","label":"Copyright"}],"article-number":"131838"}}