{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:02:44Z","timestamp":1772906564870,"version":"3.50.1"},"reference-count":99,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2025,5,29]],"date-time":"2025-05-29T00:00:00Z","timestamp":1748476800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,5,29]],"date-time":"2025-05-29T00:00:00Z","timestamp":1748476800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Complex Intell. Syst."],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s40747-025-01884-y","type":"journal-article","created":{"date-parts":[[2025,5,29]],"date-time":"2025-05-29T08:25:12Z","timestamp":1748507112000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["A novel multi-agent dynamic portfolio optimization learning system based on hierarchical deep reinforcement learning"],"prefix":"10.1007","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-6052-0051","authenticated-orcid":false,"given":"Ruoyu","family":"Sun","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5373-6126","authenticated-orcid":false,"given":"Yue","family":"Xi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4703-8765","authenticated-orcid":false,"given":"Angelos","family":"Stefanidis","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8873-4073","authenticated-orcid":false,"given":"Zhengyong","family":"Jiang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5360-6493","authenticated-orcid":false,"given":"Jionglong","family":"Su","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,29]]},"reference":[{"key":"1884_CR1","doi-asserted-by":"publisher","DOI":"10.37394\/23208.2023.20.12","author":"AK Aggarwal","year":"2023","unstructured":"Aggarwal AK (2023) A review on genomics data analysis using machine learning. WSEAS Trans Biol Biomed. https:\/\/doi.org\/10.37394\/23208.2023.20.12","journal-title":"WSEAS Trans Biol Biomed"},{"issue":"1","key":"1884_CR2","doi-asserted-by":"publisher","first-page":"5643","DOI":"10.24294\/irr.v6i2.5643","volume":"6","author":"A Kumar","year":"2024","unstructured":"Kumar A (2024) SURF feature descriptor for image analysis. Imaging Radiat Res 6(1):5643. https:\/\/doi.org\/10.24294\/irr.v6i2.5643","journal-title":"Imaging Radiat Res"},{"issue":"1","key":"1884_CR3","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1111\/j.1540-6261.1952.tb01525.x","volume":"7","author":"H Markowitz","year":"1952","unstructured":"Markowitz H (1952) Portfolio selection. J Financ 7(1):77\u201391. https:\/\/doi.org\/10.1111\/j.1540-6261.1952.tb01525.x","journal-title":"J Financ"},{"key":"1884_CR4","volume-title":"Portfolio selection: efficient diversification of investments","author":"H Markowitz","year":"1959","unstructured":"Markowitz H (1959) Portfolio selection: efficient diversification of investments. Wiley, New York"},{"key":"1884_CR5","volume-title":"Mean-variance analysis in portfolio choice and capital markets","author":"HM Markowitz","year":"2000","unstructured":"Markowitz HM, Todd GP (2000) Mean-variance analysis in portfolio choice and capital markets, vol 66. Wiley, New York"},{"issue":"4","key":"1884_CR6","doi-asserted-by":"publisher","first-page":"917","DOI":"10.1002\/j.1538-7305.1956.tb03809.x","volume":"35","author":"JL Kelly","year":"1956","unstructured":"Kelly JL (1956) A new interpretation of information rate. Bell Syst Tech J 35(4):917\u2013926. https:\/\/doi.org\/10.1002\/j.1538-7305.1956.tb03809.x","journal-title":"Bell Syst Tech J"},{"key":"1884_CR7","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1016\/S0927-0507(05)80047-7","volume":"9","author":"NH Hakansson","year":"1995","unstructured":"Hakansson NH, Ziemba WT (1995) Capital growth theory. Handb Oper Res Manag Sci 9:65\u201386. https:\/\/doi.org\/10.1016\/S0927-0507(05)80047-7","journal-title":"Handb Oper Res Manag Sci"},{"issue":"3","key":"1884_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2512962","volume":"46","author":"B Li","year":"2014","unstructured":"Li B, Hoi SC (2014) Online portfolio selection: a survey. ACM Comput Surv 46(3):1\u201336. https:\/\/doi.org\/10.1145\/2512962","journal-title":"ACM Comput Surv"},{"issue":"4","key":"1884_CR9","doi-asserted-by":"crossref","first-page":"44","DOI":"10.3905\/jfds.2020.1.045","volume":"2","author":"J Du","year":"2020","unstructured":"Du J, Jin M, Kolm PN, Ritter G, Wang Y, Zhang B (2020) Deep reinforcement learning for option replication and hedging. J Financ Data Sci 2(4):44\u201357","journal-title":"J Financ Data Sci"},{"key":"1884_CR10","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2020.113988","volume":"164","author":"N Seong","year":"2021","unstructured":"Seong N, Nam K (2021) Predicting stock movements based on financial news with segmentation. Expert Syst Appl 164:113988","journal-title":"Expert Syst Appl"},{"key":"1884_CR11","doi-asserted-by":"crossref","first-page":"20","DOI":"10.1016\/j.neucom.2017.02.097","volume":"264","author":"Q Song","year":"2017","unstructured":"Song Q, Liu A, Yang SY (2017) Stock portfolio selection using learning-to-rank algorithms with news sentiment. Neurocomputing 264:20\u201328","journal-title":"Neurocomputing"},{"key":"1884_CR12","doi-asserted-by":"crossref","first-page":"100","DOI":"10.1016\/j.dss.2018.11.004","volume":"117","author":"K Nam","year":"2019","unstructured":"Nam K, Seong N (2019) Financial news-based stock movement prediction using causality analysis of influence in the Korean stock market. Decis Support Syst 117:100\u2013112","journal-title":"Decis Support Syst"},{"issue":"3","key":"1884_CR13","doi-asserted-by":"publisher","first-page":"5932","DOI":"10.1016\/j.ijar.2014.07.005","volume":"36","author":"GS Atsalakis","year":"2009","unstructured":"Atsalakis GS, Valavanis KP (2009) Surveying stock market forecasting techniques part II: Soft computing methods. Expert Syst Appl 36(3):5932\u20135941. https:\/\/doi.org\/10.1016\/j.ijar.2014.07.005","journal-title":"Expert Syst Appl"},{"key":"1884_CR14","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2021.115019","volume":"178","author":"N Jing","year":"2021","unstructured":"Jing N, Wu Z, Wang H (2021) A hybrid model integrating deep learning with investor sentiment analysis for stock price prediction. Expert Syst Appl 178:115019","journal-title":"Expert Syst Appl"},{"key":"1884_CR15","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2022.117763","volume":"206","author":"S Carta","year":"2022","unstructured":"Carta S, Consoli S, Podda AS, Recupero DR, Stanciu MM (2022) Statistical arbitrage powered by explainable artificial intelligence. Expert Syst Appl 206:117763","journal-title":"Expert Syst Appl"},{"key":"1884_CR16","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.119556","volume":"218","author":"J Jang","year":"2023","unstructured":"Jang J, Seong N (2023) Deep reinforcement learning for stock portfolio optimization by connecting with modern portfolio theory. Expert Syst Appl 218:119556","journal-title":"Expert Syst Appl"},{"key":"1884_CR17","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2024.111697","volume":"293","author":"W Jiang","year":"2024","unstructured":"Jiang W, Liu M, Xu M, Chen S, Shi K, Liu P, Zhao F (2024) New reinforcement learning based on representation transfer for portfolio management. Knowl Based Syst 293:111697","journal-title":"Knowl Based Syst"},{"key":"1884_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.112017","author":"M Kang","year":"2024","unstructured":"Kang M, Templeton GF, Kwak DH, Um S (2024) Development of an AI framework using neural process continuous reinforcement learning to optimize highly volatile financial portfolios. Knowl Based Syst. https:\/\/doi.org\/10.1016\/j.knosys.2024.112017","journal-title":"Knowl Based Syst"},{"key":"1884_CR19","doi-asserted-by":"crossref","first-page":"133","DOI":"10.1146\/annurev-financial-092214-043752","volume":"7","author":"D Hirshleifer","year":"2015","unstructured":"Hirshleifer D (2015) Behavioral finance. Annu Rev Financ Econ 7:133\u2013159","journal-title":"Annu Rev Financ Econ"},{"issue":"3","key":"1884_CR20","doi-asserted-by":"crossref","first-page":"437","DOI":"10.1111\/mafi.12382","volume":"33","author":"B Hambly","year":"2023","unstructured":"Hambly B, Xu R, Yang H (2023) Recent advances in reinforcement learning in finance. Math Financ 33(3):437\u2013503","journal-title":"Math Financ"},{"key":"1884_CR21","volume":"278","author":"S Yang","year":"2023","unstructured":"Yang S (2023) Deep reinforcement learning for portfolio management. Knowl Based Syst 278:110905","journal-title":"Knowl Based Syst"},{"key":"1884_CR22","doi-asserted-by":"publisher","unstructured":"Jiang Z, Xu D, Liang J (2017) A deep reinforcement learning framework for the financial portfolio management problem. arXiv:1706.10059. https:\/\/doi.org\/10.48550\/arXiv.1706.10059","DOI":"10.48550\/arXiv.1706.10059"},{"key":"1884_CR23","doi-asserted-by":"crossref","unstructured":"Shi S, Li J, Li G, Pan P (2019) A multi-scale temporal feature aggregation convolutional neural network for portfolio management. In: Proceedings of the 28th ACM international conference on information and knowledge management, pp 1613\u20131622","DOI":"10.1145\/3357384.3357961"},{"key":"1884_CR24","doi-asserted-by":"crossref","unstructured":"Ye Y, Pei H, Wang B, Chen PY, Zhu Y, Xiao J, Li B (2020) Reinforcement-learning based portfolio management with augmented asset movement prediction states. In: Proceedings of the AAAI conference on artificial intelligence, vol 34, no 01, pp 1112\u20131119","DOI":"10.1609\/aaai.v34i01.5462"},{"key":"1884_CR25","doi-asserted-by":"crossref","unstructured":"Wang J, Zhang Y, Tang K, Wu J, Xiong Z (2019) Alphastock: a buying-winners-and-selling-losers investment strategy using interpretable deep reinforcement attention networks. In: Proceedings of the 25th ACM SIGKDD international conference on knowledge discovery & data mining, pp 1900\u20131908","DOI":"10.1145\/3292500.3330647"},{"key":"1884_CR26","doi-asserted-by":"crossref","first-page":"14","DOI":"10.1016\/j.neucom.2022.04.105","volume":"498","author":"S Shi","year":"2022","unstructured":"Shi S, Li J, Li G, Pan P, Chen Q, Sun Q (2022) GPM: A graph convolutional network based reinforcement learning framework for portfolio management. Neurocomputing 498:14\u201327","journal-title":"Neurocomputing"},{"key":"1884_CR27","doi-asserted-by":"publisher","unstructured":"Yu P, Lee JS, Kulyatin I, Shi Z, Dasgupta S (2019) Model-based deep reinforcement learning for dynamic portfolio optimization. arXiv:1901.08740. https:\/\/doi.org\/10.48550\/arXiv.1901.08740","DOI":"10.48550\/arXiv.1901.08740"},{"key":"1884_CR28","doi-asserted-by":"crossref","unstructured":"Wang Z, Huang B, Tu S, Zhang K, Xu L (2021) Deeptrader: a deep reinforcement learning approach for risk-return balanced portfolio management with market conditions embedding. In: Proceedings of the AAAI conference on artificial intelligence, vol 35, no 1, pp 643\u2013650","DOI":"10.1609\/aaai.v35i1.16144"},{"issue":"1","key":"1884_CR29","first-page":"236","volume":"34","author":"Y Zhang","year":"2020","unstructured":"Zhang Y, Zhao P, Wu Q, Li B, Huang J, Tan M (2020) Cost-sensitive portfolio selection via deep reinforcement learning. IEEE Trans Knowl Data Eng 34(1):236\u2013248","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"1884_CR30","doi-asserted-by":"crossref","unstructured":"Sun R, Jiang Z, Su J (2021) A deep residual shrinkage neural network-based deep reinforcement learning strategy in financial portfolio management. In: 2021 IEEE 6th international conference on big data analytics (ICBDA). IEEE, pp 76\u201386","DOI":"10.1109\/ICBDA51983.2021.9403210"},{"issue":"5","key":"1884_CR31","doi-asserted-by":"publisher","first-page":"347","DOI":"10.1007\/s10489-025-06242-6","volume":"55","author":"Z Wei","year":"2025","unstructured":"Wei Z, Chen D, Zhang Y, Wen D, Nie X, Xie L (2025) Deep reinforcement learning portfolio model based on mixture of experts. Appl Intell 55(5):347. https:\/\/doi.org\/10.1007\/s10489-025-06242-6","journal-title":"Appl Intell"},{"issue":"3","key":"1884_CR32","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2022.103247","volume":"60","author":"J Li","year":"2023","unstructured":"Li J, Zhang Y, Yang X, Chen L (2023) Online portfolio management via deep reinforcement learning with high-frequency data. Inf Process Manag 60(3):103247. https:\/\/doi.org\/10.1016\/j.ipm.2022.103247","journal-title":"Inf Process Manag"},{"key":"1884_CR33","volume":"594","author":"LC Cheng","year":"2024","unstructured":"Cheng LC, Sun JS (2024) Multiagent-based deep reinforcement learning framework for multi-asset adaptive trading and portfolio management. Neurocomputing 594:127800","journal-title":"Neurocomputing"},{"key":"1884_CR34","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.122801","volume":"242","author":"J Zou","year":"2024","unstructured":"Zou J, Lou J, Wang B, Liu S (2024) A novel deep reinforcement learning based automated stock trading system using cascaded lstm networks. Expert Syst Appl 242:122801. https:\/\/doi.org\/10.1016\/j.eswa.2023.122801","journal-title":"Expert Syst Appl"},{"key":"1884_CR35","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.122027","volume":"238","author":"Q Sun","year":"2024","unstructured":"Sun Q, Wei X, Yang X (2024) GraphSAGE with deep reinforcement learning for financial portfolio optimization. Expert Syst Appl 238:122027. https:\/\/doi.org\/10.1016\/j.eswa.2023.122027","journal-title":"Expert Syst Appl"},{"key":"1884_CR36","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2025.126430","author":"C Alzaman","year":"2025","unstructured":"Alzaman C (2025) Optimizing portfolio selection through stock ranking and matching: a reinforcement learning approach. Expert Syst Appl. https:\/\/doi.org\/10.1016\/j.eswa.2025.126430","journal-title":"Expert Syst Appl"},{"issue":"32","key":"1884_CR37","doi-asserted-by":"publisher","first-page":"20111","DOI":"10.1007\/s00521-024-09805-9","volume":"36","author":"R Sun","year":"2024","unstructured":"Sun R, Stefanidis A, Jiang Z, Su J (2024) Combining transformer based deep reinforcement learning with Black\u2013Litterman model for portfolio optimization. Neural Comput Appl 36(32):20111\u201320146. https:\/\/doi.org\/10.1007\/s00521-024-09805-9","journal-title":"Neural Comput Appl"},{"key":"1884_CR38","doi-asserted-by":"publisher","unstructured":"Girshick R (2015) Fast r-cnn. arXiv:1504.08083. https:\/\/doi.org\/10.48550\/arXiv.1504.08083","DOI":"10.48550\/arXiv.1504.08083"},{"key":"1884_CR39","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2022.110211","volume":"262","author":"B Yang","year":"2023","unstructured":"Yang B, Liang T, Xiong J, Zhong C (2023) Deep reinforcement learning based on transformer and U-Net framework for stock trading. Knowl-Based Syst 262:110211","journal-title":"Knowl-Based Syst"},{"issue":"1","key":"1884_CR40","first-page":"1","volume":"1","author":"X Du","year":"2016","unstructured":"Du X, Zhai J, Lv K (2016) Algorithm trading using q-learning and recurrent reinforcement learning. Positions 1(1):1\u20137","journal-title":"Positions"},{"key":"1884_CR41","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2020.113573","volume":"158","author":"H Park","year":"2020","unstructured":"Park H, Sim MK, Choi DG (2020) An intelligent financial portfolio trading strategy using deep Q-learning. Expert Syst Appl 158:113573","journal-title":"Expert Syst Appl"},{"key":"1884_CR42","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.eswa.2018.02.032","volume":"103","author":"PC Pendharkar","year":"2018","unstructured":"Pendharkar PC, Cusatis P (2018) Trading financial indices with reinforcement learning agents. Expert Syst Appl 103:1\u201313","journal-title":"Expert Syst Appl"},{"key":"1884_CR43","doi-asserted-by":"crossref","first-page":"17229","DOI":"10.1007\/s00521-020-05359-8","volume":"32","author":"G Lucarelli","year":"2020","unstructured":"Lucarelli G, Borrotti M (2020) A deep Q-learning portfolio management framework for the cryptocurrency market. Neural Comput Appl 32:17229\u201317244","journal-title":"Neural Comput Appl"},{"key":"1884_CR44","doi-asserted-by":"crossref","unstructured":"Gao Z, Gao Y, Hu Y, Jiang Z, Su J (2020) Application of deep q-network in portfolio management. In: 2020 5th IEEE international conference on big data analytics (ICBDA). IEEE, pp 268\u2013275","DOI":"10.1109\/ICBDA49040.2020.9101333"},{"key":"1884_CR45","doi-asserted-by":"crossref","unstructured":"Gao Y, Gao Z, Hu Y, Song S, Jiang Z, Su J (2021) A framework of hierarchical deep Q-network for portfolio management. In: ICAART (2), pp 132\u2013140","DOI":"10.5220\/0010233201320140"},{"key":"1884_CR46","doi-asserted-by":"publisher","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2015) Continuous control with deep reinforcement learning. arXiv:1509.02971. https:\/\/doi.org\/10.48550\/arXiv.1509.02971","DOI":"10.48550\/arXiv.1509.02971"},{"key":"1884_CR47","doi-asserted-by":"publisher","unstructured":"Felizardo LK, Paiva FCL, Costa AHR, Del-Moral-Hernandez E (2022) Reinforcement learning applied to trading systems: a survey. arXiv:2212.06064. https:\/\/doi.org\/10.48550\/arXiv.2212.06064","DOI":"10.48550\/arXiv.2212.06064"},{"key":"1884_CR48","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International conference on machine learning. PMLR, pp 1861\u20131870"},{"key":"1884_CR49","doi-asserted-by":"publisher","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv:1707.06347. https:\/\/doi.org\/10.48550\/arXiv.1707.06347","DOI":"10.48550\/arXiv.1707.06347"},{"key":"1884_CR50","doi-asserted-by":"crossref","DOI":"10.1016\/j.ress.2019.106706","volume":"195","author":"OO Aremu","year":"2020","unstructured":"Aremu OO, Hyland-Wood D, McAree PR (2020) A machine learning approach to circumventing the curse of dimensionality in discontinuous time series machine data. Reliab Eng Syst Saf 195:106706","journal-title":"Reliab Eng Syst Saf"},{"key":"1884_CR51","doi-asserted-by":"crossref","first-page":"1181","DOI":"10.1613\/jair.1.14390","volume":"76","author":"JMC Ocana","year":"2023","unstructured":"Ocana JMC, Capobianco R, Nardi D (2023) An overview of environmental features that impact deep reinforcement learning in sparse-reward domains. J Artif Intell Res 76:1181\u20131218","journal-title":"J Artif Intell Res"},{"key":"1884_CR52","volume-title":"Dynamic programming","author":"R Bellman","year":"1957","unstructured":"Bellman R (1957) Dynamic programming. Princeton University Press, Princeton"},{"key":"1884_CR53","doi-asserted-by":"publisher","unstructured":"Curran W, Brys T, Taylor M, Smart W (2015) Using PCA to efficiently represent state spaces. arXiv: 1505.00322. https:\/\/doi.org\/10.48550\/arXiv.1505.00322","DOI":"10.48550\/arXiv.1505.00322"},{"key":"1884_CR54","doi-asserted-by":"publisher","unstructured":"Hao X, Mao H, Wang W, Yang Y, Li D, Zheng Y, Wang Z, Hao J (2022) Breaking the curse of dimensionality in multiagent state space: a unified agent permutation framework. arXiv: 2203.05285. https:\/\/doi.org\/10.48550\/arXiv.2203.05285","DOI":"10.48550\/arXiv.2203.05285"},{"issue":"8","key":"1884_CR55","doi-asserted-by":"publisher","first-page":"3177","DOI":"10.1007\/s40815-023-01563-5","volume":"25","author":"X Song","year":"2023","unstructured":"Song X, Song Y, Stojanovic V, Song S (2023) Improved dynamic event-triggered security control for T-S fuzzy LPV-PDE systems via pointwise measurements and point control. Int J Fuzzy Syst 25(8):3177\u20133192. https:\/\/doi.org\/10.1007\/s40815-023-01563-5","journal-title":"Int J Fuzzy Syst"},{"key":"1884_CR56","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2024.3371492","author":"Z Du","year":"2024","unstructured":"Du Z, Xie X, Qu Z, Hu Y, Stojanovic V (2024) Dynamic event-triggered consensus control for interval type-2 fuzzy multi-agent systems. IEEE Trans Circuits Syst I Regul Pap. https:\/\/doi.org\/10.1109\/TCSI.2024.3371492","journal-title":"IEEE Trans Circuits Syst I Regul Pap"},{"issue":"10","key":"1884_CR57","doi-asserted-by":"publisher","first-page":"1943","DOI":"10.1177\/01423312231225782","volume":"46","author":"Y Tao","year":"2024","unstructured":"Tao Y, Tao H, Zhuang Z, Stojanovic V, Paszke W (2024) Quantized iterative learning control of communication-constrained systems with encoding and decoding mechanism. Trans Inst Meas Control 46(10):1943\u20131954. https:\/\/doi.org\/10.1177\/01423312231225782","journal-title":"Trans Inst Meas Control"},{"key":"1884_CR58","doi-asserted-by":"crossref","first-page":"341","DOI":"10.1023\/A:1025696116075","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto AG, Mahadevan S (2003) Recent advances in hierarchical reinforcement learning. Discrete Event Dyn Syst 13:341\u2013379","journal-title":"Discrete Event Dyn Syst"},{"key":"1884_CR59","volume":"54","author":"S Li","year":"2024","unstructured":"Li S, Fang X, Liao J, Ghadamyari M, Khayatnezhad M, Ghadimi N (2024) Evaluating the efficiency of CCHP systems in Xinjiang Uygur Autonomous Region: an optimal strategy based on improved mother optimization algorithm. Case Stud Therm Eng 54:104005","journal-title":"Case Stud Therm Eng"},{"issue":"1","key":"1884_CR60","doi-asserted-by":"crossref","first-page":"86","DOI":"10.1049\/stg2.12095","volume":"6","author":"M Ghiasi","year":"2023","unstructured":"Ghiasi M, Wang Z, Mehrandezh M, Jalilian S, Ghadimi N (2023) Evolution of smart grids towards the internet of energy: concept and essential components for deep decarbonisation. IET Smart Grid 6(1):86\u2013102","journal-title":"IET Smart Grid"},{"key":"1884_CR61","volume":"55","author":"W Jiang","year":"2022","unstructured":"Jiang W, Wang X, Huang H, Zhang D, Ghadimi N (2022) Optimal economic scheduling of microgrids considering renewable energy sources based on energy hub model using demand response and improved water wave optimization algorithm. J Energy Storage 55:105311","journal-title":"J Energy Storage"},{"issue":"2","key":"1884_CR62","first-page":"4296","volume":"44","author":"L Chen","year":"2022","unstructured":"Chen L, Huang H, Tang P, Yao D, Yang H, Ghadimi N (2022) Optimal modeling of combined cooling, heating, and power systems using developed African Vulture optimization: a case study in watersport complex. Energy Sources Part A Recov Util Environ Effects 44(2):4296\u20134317","journal-title":"Energy Sources Part A Recov Util Environ Effects"},{"issue":"3","key":"1884_CR63","first-page":"7109","volume":"44","author":"G Bo","year":"2022","unstructured":"Bo G, Cheng P, Dezhi K, Xiping W, Chaodong L, Mingming G, Ghadimi N (2022) Optimum structure of a combined wind\/photovoltaic\/fuel cell-based on amended Dragon Fly optimization algorithm: a case study. Energy Sources Part A Recov Util Environ Effects 44(3):7109\u20137131","journal-title":"Energy Sources Part A Recov Util Environ Effects"},{"issue":"3","key":"1884_CR64","doi-asserted-by":"crossref","first-page":"230","DOI":"10.1109\/TAMD.2010.2056368","volume":"2","author":"J Schmidhuber","year":"2010","unstructured":"Schmidhuber J (2010) Formal theory of creativity, fun, and intrinsic motivation (1990\u20132010). IEEE Trans Auton Ment Dev 2(3):230\u2013247","journal-title":"IEEE Trans Auton Ment Dev"},{"issue":"1","key":"1884_CR65","doi-asserted-by":"crossref","first-page":"172","DOI":"10.3390\/make4010009","volume":"4","author":"M Hutsebaut-Buysse","year":"2022","unstructured":"Hutsebaut-Buysse M, Mets K, Latr\u00e9 S (2022) Hierarchical reinforcement learning: a survey and open research challenges. Mach Learn Knowl Extr 4(1):172\u2013221","journal-title":"Mach Learn Knowl Extr"},{"issue":"1","key":"1884_CR66","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1086\/294846","volume":"39","author":"WF Sharpe","year":"1966","unstructured":"Sharpe WF (1966) Mutual fund performance. J Bus 39(1):119\u2013138","journal-title":"J Bus"},{"key":"1884_CR67","volume-title":"Sortino: a \u2018sharper\u2019 ratio","author":"TN Rollinger","year":"2013","unstructured":"Rollinger TN, Hoffman ST (2013) Sortino: a \u2018sharper\u2019 ratio. Red Rock Capital, Chicago"},{"issue":"10","key":"1884_CR68","first-page":"99","volume":"17","author":"M Magdon-Ismail","year":"2004","unstructured":"Magdon-Ismail M, Atiya AF (2004) Maximum drawdown. Risk Mag 17(10):99\u2013102","journal-title":"Risk Mag"},{"key":"1884_CR69","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2024.111739","volume":"294","author":"C Choi","year":"2024","unstructured":"Choi C, Kim J (2024) Outperforming the tutor: expert-infused deep reinforcement learning for dynamic portfolio selection of diverse assets. Knowl Based Syst 294:111739","journal-title":"Knowl Based Syst"},{"issue":"2","key":"1884_CR70","doi-asserted-by":"crossref","first-page":"13","DOI":"10.3390\/econometrics11020013","volume":"11","author":"C Li","year":"2023","unstructured":"Li C, Shen L, Qian G (2023) Online hybrid neural network for stock price prediction: a case study of high-frequency stock trading in the Chinese market. Econometrics 11(2):13","journal-title":"Econometrics"},{"key":"1884_CR71","doi-asserted-by":"publisher","unstructured":"Norton V (2011) Adjusted closing prices. arXiv:1105.2956. https:\/\/doi.org\/10.48550\/arXiv.1105.2956","DOI":"10.48550\/arXiv.1105.2956"},{"key":"1884_CR72","doi-asserted-by":"crossref","unstructured":"Hernandez-Leal P, Kartal B, Taylor ME (2019) Agent modeling as auxiliary task for deep reinforcement learning. In: Proceedings of the AAAI conference on artificial intelligence and interactive digital entertainment, vol 15, no 1, pp 31\u201337","DOI":"10.1609\/aiide.v15i1.5221"},{"key":"1884_CR73","doi-asserted-by":"crossref","unstructured":"Wang R, Wei H, An B, Feng Z, Yao J (2021) Commission fee is not enough: a hierarchical reinforced framework for portfolio management. In: Proceedings of the AAAI conference on artificial intelligence, vol 35, no 1, pp 626\u2013633","DOI":"10.1609\/aaai.v35i1.16142"},{"key":"1884_CR74","doi-asserted-by":"crossref","unstructured":"Kumar A (2024) Geo-tagged 3D geometric modeling of urban structures by mitigating reflected GPS signals using a laser range sensor. In: Science and information conference. Springer Nature Switzerland, Cham, pp 396\u2013414","DOI":"10.1007\/978-3-031-62281-6_29"},{"key":"1884_CR75","unstructured":"Garg M, Ubhi JS, Aggarwal AK (2019) Steganography and its advancements in spatial domain (No. 2177). EasyChair. https:\/\/api.semanticscholar.org\/CorpusID:212420324"},{"key":"1884_CR76","first-page":"9","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton RS (1988) Learning to predict by the methods of temporal differences. Mach Learn 3:9\u201344","journal-title":"Mach Learn"},{"key":"1884_CR77","doi-asserted-by":"crossref","first-page":"15","DOI":"10.3905\/jpm.2004.442611","volume":"30","author":"AW Lo","year":"2004","unstructured":"Lo AW (2004) The adaptive markets hypothesis. J Portfolio Manag 30:15\u2013129","journal-title":"J Portfolio Manag"},{"key":"1884_CR78","doi-asserted-by":"crossref","unstructured":"Duan Y, Wang L, Zhang Q, Li J (2022) Factorvae: a probabilistic dynamic factor model based on variational autoencoder for predicting cross-sectional stock returns. In: Proceedings of the AAAI conference on artificial intelligence, vol 36, no 4, pp 4468\u20134476","DOI":"10.1609\/aaai.v36i4.20369"},{"key":"1884_CR79","doi-asserted-by":"crossref","unstructured":"Liu XY, Yang H, Gao J, Wang CD (2021) FinRL: deep reinforcement learning framework to automate trading in quantitative finance. In: Proceedings of the second ACM international conference on AI in finance, pp 1\u20139","DOI":"10.1145\/3490354.3494366"},{"issue":"1","key":"1884_CR80","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.1467-9965.1991.tb00002.x","volume":"1","author":"TM Cover","year":"1991","unstructured":"Cover TM (1991) Universal portfolios. Math Finance 1(1):1\u201329","journal-title":"Math Finance"},{"key":"1884_CR81","doi-asserted-by":"publisher","unstructured":"Borodin A, El-Yaniv R, Gogan V (2000) On the competitive theory and practice of portfolio selection. In: Latin 2000: theoretical informatics: 4th Latin American symposium, Punta del Este, Uruguay, April 10\u201314, 2000 Proceedings 4. Springer, Berlin, Heidelberg, pp 173\u2013196. https:\/\/doi.org\/10.1007\/10719839_19","DOI":"10.1007\/10719839_19"},{"issue":"2","key":"1884_CR82","doi-asserted-by":"crossref","first-page":"348","DOI":"10.1109\/18.485708","volume":"42","author":"TM Cover","year":"1996","unstructured":"Cover TM, Ordentlich E (1996) Universal portfolios with side information. IEEE Trans Inf Theory 42(2):348\u2013363","journal-title":"IEEE Trans Inf Theory"},{"issue":"4","key":"1884_CR83","doi-asserted-by":"crossref","first-page":"325","DOI":"10.1111\/1467-9965.00058","volume":"8","author":"DP Helmbold","year":"1998","unstructured":"Helmbold DP, Schapire RE, Singer Y, Warmuth MK (1998) On-line portfolio selection using multiplicative updates. Math Finance 8(4):325\u2013347","journal-title":"Math Finance"},{"key":"1884_CR84","first-page":"345","volume":"16","author":"A Borodin","year":"2003","unstructured":"Borodin A, El-Yaniv R, Gogan V (2003) Can we learn to beat the best stock. Adv Neural Inf Process Syst 16:345\u2013352","journal-title":"Adv Neural Inf Process Syst"},{"key":"1884_CR85","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1007\/s10994-012-5281-z","volume":"87","author":"B Li","year":"2012","unstructured":"Li B, Zhao P, Hoi SC, Gopalkrishnan V (2012) PAMR: passive aggressive mean reversion strategy for portfolio selection. Mach Learn 87:221\u2013258","journal-title":"Mach Learn"},{"key":"1884_CR86","unstructured":"Li B, Hoi SCH, Zhao P, Gopalkrishnan V (2011b) Confidence weighted mean reversion strategy for on-line portfolio selection. In: Proceedings of the international conference on artificial intelligence and statistics, pp 434\u2013442"},{"key":"1884_CR87","doi-asserted-by":"publisher","unstructured":"Li B, Hoi SC (2012) On-line portfolio selection with moving average reversion. arXiv:1206.4626. https:\/\/doi.org\/10.48550\/arXiv.1206.4626","DOI":"10.48550\/arXiv.1206.4626"},{"key":"1884_CR88","unstructured":"Huang D, Zhou J, Li B, Hoi S, Zhou S (2012) Robust median reversion strategy for on-line portfolio selection. In: Proceedings of the twenty-third international joint conference on artificial intelligence: IJCAI 2013, pp 2006\u20132012"},{"key":"1884_CR89","doi-asserted-by":"crossref","unstructured":"Gao L, Zhang W (2013) Weighted moving average passive aggressive algorithm for online portfolio selection. In: 2013 5th international conference on intelligent human-machine systems and cybernetics, vol 1. IEEE, pp 327\u2013330","DOI":"10.1109\/IHMSC.2013.84"},{"issue":"2","key":"1884_CR90","doi-asserted-by":"crossref","first-page":"337","DOI":"10.1111\/j.1467-9965.2006.00274.x","volume":"16","author":"L Gy\u00f6rfi","year":"2006","unstructured":"Gy\u00f6rfi L, Lugosi G, Udina F (2006) Nonparametric kernel-based sequential investment strategies. Math Finance 16(2):337\u2013357","journal-title":"Math Finance"},{"issue":"3","key":"1884_CR91","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1961189.1961193","volume":"2","author":"B Li","year":"2011","unstructured":"Li B, Hoi SC, Gopalkrishnan V (2011) Corn: correlation-driven nonparametric learning approach for portfolio selection. ACM Trans Intell Syst Technol (TIST) 2(3):1\u201329","journal-title":"ACM Trans Intell Syst Technol (TIST)"},{"key":"1884_CR92","doi-asserted-by":"publisher","unstructured":"Agarwal A, Hazan E, Kale S, Schapire RE (2006) Algorithms for portfolio management based on the Newton method. In ACM: proceedings of the 23rd international conference on machine learning, pp 9\u201316. https:\/\/doi.org\/10.1145\/1143844.1143846","DOI":"10.1145\/1143844.1143846"},{"issue":"3","key":"1884_CR93","doi-asserted-by":"crossref","first-page":"279","DOI":"10.2307\/2331042","volume":"21","author":"P Jorion","year":"1986","unstructured":"Jorion P (1986) Bayes-Stein estimation for portfolio analysis. J Financ Quant Anal 21(3):279\u2013292","journal-title":"J Financ Quant Anal"},{"issue":"3","key":"1884_CR94","doi-asserted-by":"crossref","first-page":"621","DOI":"10.1017\/S0022109000004129","volume":"42","author":"R Kan","year":"2007","unstructured":"Kan R, Zhou G (2007) Optimal portfolio choice with parameter uncertainty. J Financ Quant Anal 42(3):621\u2013656","journal-title":"J Financ Quant Anal"},{"key":"1884_CR95","doi-asserted-by":"crossref","unstructured":"Zeng A, Chen M, Zhang L, Xu Q (2023) Are transformers effective for time series forecasting?. In: Proceedings of the AAAI conference on artificial intelligence, vol 37, no 9, pp 11121\u201311128","DOI":"10.1609\/aaai.v37i9.26317"},{"key":"1884_CR96","first-page":"22419","volume":"34","author":"H Wu","year":"2021","unstructured":"Wu H, Xu J, Wang J, Long M (2021) Autoformer: decomposition transformers with auto-correlation for long-term series forecasting. Adv Neural Inf Process Syst 34:22419\u201322430","journal-title":"Adv Neural Inf Process Syst"},{"key":"1884_CR97","doi-asserted-by":"crossref","unstructured":"Zhou H, Zhang S, Peng J, Zhang S, Li J, Xiong H, Zhang W (2021) Informer: Beyond efficient transformer for long sequence time-series forecasting. In: Proceedings of the AAAI conference on artificial intelligence, vol 35, no 12, pp 11106\u201311115","DOI":"10.1609\/aaai.v35i12.17325"},{"key":"1884_CR98","doi-asserted-by":"publisher","unstructured":"Nie Y, Nguyen NH, Sinthong P, Kalagnanam J (2022) A time series is worth 64 words: long-term forecasting with transformers. arXiv:2211.14730. https:\/\/doi.org\/10.48550\/arXiv.2211.14730","DOI":"10.48550\/arXiv.2211.14730"},{"issue":"3","key":"1884_CR99","doi-asserted-by":"crossref","first-page":"509","DOI":"10.1080\/14697688.2013.836283","volume":"15","author":"Y Xiao","year":"2015","unstructured":"Xiao Y, Valdez EA (2015) A Black\u2013Litterman asset allocation model under elliptical distributions. Quant Finance 15(3):509\u2013519","journal-title":"Quant Finance"}],"container-title":["Complex &amp; Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-025-01884-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40747-025-01884-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-025-01884-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T11:08:32Z","timestamp":1750331312000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40747-025-01884-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,29]]},"references-count":99,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["1884"],"URL":"https:\/\/doi.org\/10.1007\/s40747-025-01884-y","relation":{},"ISSN":["2199-4536","2198-6053"],"issn-type":[{"value":"2199-4536","type":"print"},{"value":"2198-6053","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,29]]},"assertion":[{"value":"25 November 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 March 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"On behalf of all authors, the corresponding author states that there is no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"311"}}