{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T19:11:42Z","timestamp":1774120302280,"version":"3.50.1"},"reference-count":93,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,2,26]],"date-time":"2024-02-26T00:00:00Z","timestamp":1708905600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,26]],"date-time":"2024-02-26T00:00:00Z","timestamp":1708905600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1007\/s10994-023-06511-w","type":"journal-article","created":{"date-parts":[[2024,2,26]],"date-time":"2024-02-26T12:02:12Z","timestamp":1708948932000},"page":"2795-2839","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":31,"title":["Dynamic datasets and market environments for financial reinforcement learning"],"prefix":"10.1007","volume":"113","author":[{"given":"Xiao-Yang","family":"Liu","sequence":"first","affiliation":[]},{"given":"Ziyi","family":"Xia","sequence":"additional","affiliation":[]},{"given":"Hongyang","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Jiechao","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Daochen","family":"Zha","sequence":"additional","affiliation":[]},{"given":"Ming","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Christina Dan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Zhaoran","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Guo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,26]]},"reference":[{"key":"6511_CR1","doi-asserted-by":"crossref","unstructured":"Akiba, T., Sano, S., Yanase, T., Ohta, T., & Koyama, M. (2019). Optuna: A next-generation hyperparameter optimization framework. In ACM SIGKDD international conference on knowledge discovery & data mining.","DOI":"10.1145\/3292500.3330701"},{"key":"6511_CR2","doi-asserted-by":"crossref","unstructured":"Alla, S., & Adari, S. K. (2021). What is MLOps? In: Beginning MLOps with MLFlow (pp. 79\u2013124).","DOI":"10.1007\/978-1-4842-6549-9_3"},{"key":"6511_CR3","doi-asserted-by":"crossref","unstructured":"Amrouni, S., Moulin, A., Vann, J., Vyetrenko, S., Balch, T., & Veloso, M. (2021). ABIDES-Gym: Gym environments for multi-agent discrete event simulation and application to financial markets. In ACM International conference on AI in finance (ICAIF).","DOI":"10.1145\/3490354.3494433"},{"key":"6511_CR4","doi-asserted-by":"crossref","unstructured":"Ang, A. (2012). Mean-variance investing. Columbia Business School Research Paper No. 12\/49.","DOI":"10.2139\/ssrn.2131932"},{"key":"6511_CR5","unstructured":"Araci, D. (2019). Finbert: Financial sentiment analysis with pre-trained language models. arXiv preprint arXiv:1908.10063."},{"key":"6511_CR6","doi-asserted-by":"crossref","unstructured":"Ardon, L., Vadori, N., Spooner, T., Xu, M., Vann, J., & Ganesh, S. (2021). Towards a fully RL-based market simulator. In ACM international conference on AI in finance (ICAIF).","DOI":"10.1145\/3490354.3494372"},{"key":"6511_CR7","doi-asserted-by":"crossref","unstructured":"Atwal, H. (2019). Practical DataOps: Delivering agile data science at scale.","DOI":"10.1007\/978-1-4842-5104-1"},{"key":"6511_CR8","unstructured":"Bao, W., & Liu, X.-Y. (2019). Multi-agent deep reinforcement learning for liquidation strategy analysis. In ICML workshop on applications and infrastructure for multi-agent learning."},{"issue":"1","key":"6511_CR9","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman, L. (2001). Random forests. Machine Learning, 45(1), 5\u201332.","journal-title":"Machine Learning"},{"key":"6511_CR10","unstructured":"Brockman, G., Cheung, V., Pettersson, L., Schneider, J., Schulman, J., Tang, J., & Zaremba, W. (2016). OpenAI Gym. arXiv preprint arXiv:1606.01540."},{"issue":"4","key":"6511_CR11","doi-asserted-by":"publisher","first-page":"553","DOI":"10.1093\/rfs\/5.4.553","volume":"5","author":"SJ Brown","year":"1992","unstructured":"Brown, S. J., Goetzmann, W., Ibbotson, R. G., & Ross, S. A. (1992). Survivorship bias in performance studies. The Review of Financial Studies, 5(4), 553\u2013580.","journal-title":"The Review of Financial Studies"},{"key":"6511_CR12","doi-asserted-by":"crossref","unstructured":"Byrd, D., & Polychroniadou, A. (2020). Differentially private secure multi-party computation for federated learning in financial applications. In Proceedings of the first ACM international conference on AI in finance (pp. 1\u20139).","DOI":"10.1145\/3383455.3422562"},{"key":"6511_CR13","doi-asserted-by":"crossref","unstructured":"Chen, Q., & Liu, X.-Y. (2020) Quantifying ESG alpha using scholar big data: An automated machine learning approach. In Proceedings of the first ACM international conference on AI in finance (pp. 1\u20138).","DOI":"10.1145\/3383455.3422529"},{"key":"6511_CR14","unstructured":"Chen, C.-C., Huang, H.-H., & Chen, H.-H. (2018). Ntusd-fin: A market sentiment dictionary for financial social media data applications. In Proceedings of the 1st financial narrative processing workshop (FNP 2018) (pp. 37\u201343)."},{"key":"6511_CR15","unstructured":"Christiano, P. F., Leike, J., Brown, T., Martic, M., Legg, S., & Amodei, D. (2017). Deep reinforcement learning from human preferences. Advances in Neural Information Processing Systems30."},{"key":"6511_CR16","doi-asserted-by":"crossref","unstructured":"Coletta, A., Prata, M., Conti, M., Mercanti, E., Bartolini, N., Moulin, A., Vyetrenko, S., & Balch, T. (2021). Towards realistic market simulations: A generative adversarial networks approach. In ACM international conference on AI in finance (ICAIF).","DOI":"10.1145\/3490354.3494411"},{"key":"6511_CR17","unstructured":"De Prado, M. L. (2018). Advances in financial machine learning."},{"key":"6511_CR18","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., & Fei-Fei, L. (2009). ImageNet: A large-scale hierarchical image database. In IEEE conference on computer vision and pattern recognition (pp. 248\u2013255).","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"6511_CR19","unstructured":"Dulac-Arnold, G., Mankowitz, D., & Hester, T. (2019). Challenges of real-world reinforcement learning. In ICML workshop on reinforcement learning for real life."},{"issue":"9","key":"6511_CR20","doi-asserted-by":"publisher","first-page":"2419","DOI":"10.1007\/s10994-021-05961-4","volume":"110","author":"G Dulac-Arnold","year":"2021","unstructured":"Dulac-Arnold, G., Levine, N., Mankowitz, D. J., Li, J., Paduraru, C., Gowal, S., & Hester, T. (2021). Challenges of real-world reinforcement learning: Definitions, benchmarks and analysis. Machine Learning, 110(9), 2419\u20132468.","journal-title":"Machine Learning"},{"key":"6511_CR21","first-page":"104","volume":"2191","author":"J Ereth","year":"2018","unstructured":"Ereth, J. (2018). DataOps: Towards a definition. LWDA, 2191, 104\u2013112.","journal-title":"LWDA"},{"key":"6511_CR22","doi-asserted-by":"crossref","unstructured":"Fang, Y., Liu, X.-Y., & Yang, H. (2019). Practical machine learning approach to capture the scholar data driven Alpha in AI industry. In IEEE international conference on big data (big data) (pp. 2230\u20132239). IEEE.","DOI":"10.1109\/BigData47090.2019.9006093"},{"key":"6511_CR23","unstructured":"Fu, J., Kumar, A., Nachum, O., Tucker, G., & Levine, S. (2020). D4RL: Datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219."},{"key":"6511_CR24","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., & Bengio, Y. (2014). Generative adversarial nets. Advances in Neural Information Processing Systems27."},{"key":"6511_CR25","unstructured":"Gort, B., Liu, X.-Y., Sun, X., Gao, J., Chen, S., & Wang, C. D. (2023). Deep reinforcement learning for cryptocurrency trading: Practical approach to address backtest overfitting. AAAI: AI in Finance Bridge."},{"key":"6511_CR26","doi-asserted-by":"crossref","unstructured":"Guan, M., & Liu, X.-Y. (2021). Explainable deep reinforcement learning for portfolio management: An empirical approach. In ACM international conference on AI in finance (ICAIF).","DOI":"10.1145\/3490354.3494415"},{"key":"6511_CR27","doi-asserted-by":"crossref","unstructured":"Gupta, A., Savarese, S., Ganguli, S., & Fei-Fei, L. (2021). Embodied intelligence via learning and evolution. Nature Communications.","DOI":"10.1038\/s41467-021-25874-z"},{"key":"6511_CR28","doi-asserted-by":"crossref","unstructured":"Hambly, B., Xu, R., & Yang, H. (2023). Recent advances in reinforcement learning in finance. Mathematical Finance.","DOI":"10.1111\/mafi.12382"},{"key":"6511_CR29","doi-asserted-by":"crossref","unstructured":"Hamilton, W. L., Clark, K., Leskovec, J., & Jurafsky, D. (2016). Inducing domain-specific sentiment lexicons from unlabeled corpora. In Proceedings of the conference on empirical methods in natural language processing. conference on empirical methods in natural language processing, vol. 2016 (p. 595). NIH Public Access.","DOI":"10.18653\/v1\/D16-1057"},{"key":"6511_CR30","unstructured":"Han, J., Xia, Z., Liu, X.-Y., Zhang, C., Wang, Z., & Guo, J. (2023). Massively parallel market simulator for financial reinforcement learning. AI in Finance Bridge, AAAI."},{"key":"6511_CR31","doi-asserted-by":"crossref","unstructured":"Hein, D., Depeweg, S., Tokic, M., Udluft, S., Hentschel, A., Runkler, T.A., & Sterzing, V. (2017). A benchmark environment motivated by industrial control problems. In IEEE symposium series on computational intelligence (SSCI) (pp. 1\u20138). IEEE.","DOI":"10.1109\/SSCI.2017.8280935"},{"key":"6511_CR32","doi-asserted-by":"crossref","unstructured":"Hutto, C., & Gilbert, E. (2014). Vader: A parsimonious rule-based model for sentiment analysis of social media text. In Proceedings of the international AAAI conference on web and social media, vol. 8 (pp. 216\u2013225).","DOI":"10.1609\/icwsm.v8i1.14550"},{"key":"6511_CR33","doi-asserted-by":"crossref","unstructured":"Kairouz, P., McMahan, H. B., Avent, B., Bellet, A., Bennis, M., Bhagoji, A. N., Bonawitz, K., Charles, Z., Cormode, G., & Cummings R. (2021). Advances and open problems in federated learning. Foundations and trends\u00ae in machine learning 14(1\u20132), 1\u2013210.","DOI":"10.1561\/2200000083"},{"issue":"5","key":"6511_CR34","doi-asserted-by":"publisher","first-page":"30","DOI":"10.2469\/faj.v66.n5.3","volume":"66","author":"M Kritzman","year":"2010","unstructured":"Kritzman, M., & Li, Y. (2010). Skulls, financial turbulence, and risk management. Financial Analysts Journal, 66(5), 30\u201341.","journal-title":"Financial Analysts Journal"},{"key":"6511_CR35","unstructured":"Levine, S., Kumar, A., Tucker, G., & Fu, J. (2020). Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643."},{"key":"6511_CR36","unstructured":"Li, X., Li, Y., Yang, H., Yang, L., & Liu, X.-Y. (2019). DP-LSTM: Differential privacy-inspired LSTM for stock prediction using financial news. In 33rd conference on neural information processing systems workshop on robust AI in financial services: Data, fairness, explainability, trustworthiness, and privacy, December 2019."},{"key":"6511_CR37","doi-asserted-by":"crossref","unstructured":"Li, Z., Liu, X.-Y., Zheng, J., Wang, Z., Walid, A., & Guo, J. (2021). FinRL-Podracer: High-performance and scalable deep reinforcement learning for quantitative finance. In ACM international conference on AI in finance (ICAIF).","DOI":"10.1145\/3490354.3494413"},{"key":"6511_CR38","unstructured":"Liang, E., Liaw, R., Nishihara, R., Moritz, P., Fox, R., Goldberg, K., Gonzalez, J., Jordan, M., & Stoica, I. (2018). RLlib: Abstractions for distributed reinforcement learning. In International conference on machine learning (pp. 3053\u20133062). PMLR."},{"key":"6511_CR39","unstructured":"Liaw, R., Liang, E., Nishihara, R., Moritz, P., Gonzalez, J. E., & Stoica, I. (2018). Tune: A research platform for distributed model selection and training. In ICML AutoML workshop."},{"key":"6511_CR40","unstructured":"Lillicrap, T., Hunt, J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D., & Wierstra, D. (2016). Continuous control with deep reinforcement learning. In International conference on learning representations (ICLR)."},{"key":"6511_CR41","unstructured":"Liu, X.-Y., Li, Z., Wang, Z., & Zheng, J. (2021). ElegantRL: A lightweight and stable deep reinforcement learning library. GitHub."},{"key":"6511_CR42","doi-asserted-by":"crossref","unstructured":"Liu, X.-Y., Li, Z., Yang, Z., Zheng, J., Wang, Z., Walid, A., Guo, J., & Jordan, M. (2021). ElegantRL-Podracer: Scalable and elastic library for cloud-native deep reinforcement learning. In Deep reinforcement learning workshop at NeurIPS.","DOI":"10.1145\/3490354.3494413"},{"key":"6511_CR43","doi-asserted-by":"crossref","unstructured":"Liu, Y., Liu, Q., Zhao, H., Pan, Z., & Liu, C. (2020). Adaptive quantitative trading: An imitative deep reinforcement learning approach. In Proceedings of the AAAI conference on artificial intelligence, vol. 34 (pp. 2128\u20132135).","DOI":"10.1609\/aaai.v34i02.5587"},{"key":"6511_CR44","doi-asserted-by":"crossref","unstructured":"Liu, X.-Y., Rui, J., Gao, J., Yang, L., Yang, H., Wang, Z., Wang, C. D., & Jian, G. (2021). FinRL-Meta: Data-driven deep reinforcementlearning in quantitative finance. NeurIPS: Data-Centric AI Workshop.","DOI":"10.1145\/3490354.3494366"},{"key":"6511_CR45","unstructured":"Liu, X.-Y., Xia, Z., Rui, J., Gao, J., Yang, H., Zhu, M., Wang, C. D., Wang, Z., & Guo, J. FinRL-Meta: Market environments and benchmarks for data-driven financial reinforcement learning. In Thirty-sixth conference on neural information processing systems."},{"key":"6511_CR46","unstructured":"Liu, X.-Y., Xiong, Z., Zhong, S., Yang, H., & Walid, A. (2018). Practical deep reinforcement learning approach for stock trading. NeurIPS: Workshop on Challenges and Opportunities for AI in Financial Services."},{"key":"6511_CR47","doi-asserted-by":"crossref","unstructured":"Liu, X.-Y., Yang, H., Chen, Q., Zhang, R., Yang, L., Xiao, B., & Wang, C. D. (2020). FinRL: A deep reinforcement learning library for automated stock trading in quantitative finance. NeurIPS: Deep RL Workshop.","DOI":"10.2139\/ssrn.3737257"},{"key":"6511_CR48","doi-asserted-by":"crossref","unstructured":"Liu, X.-Y., Yang, H., Gao, J., & Wang, C. D. (2021). FinRL: Deep reinforcement learning framework to automate trading in quantitative finance. In ACM international conference on AI in finance (ICAIF)","DOI":"10.2139\/ssrn.3955949"},{"issue":"226","key":"6511_CR49","first-page":"1","volume":"22","author":"Y Liu","year":"2021","unstructured":"Liu, Y., Fan, T., Chen, T., Xu, Q., & Yang, Q. (2021). Fate: An industrial grade platform for collaborative learning with data protection. Journal of Machine Learning Research, 22(226), 1\u20136.","journal-title":"Journal of Machine Learning Research"},{"key":"6511_CR50","unstructured":"Loria, S. (2018). textblob documentation. Release 0.15 2(8)."},{"issue":"1","key":"6511_CR51","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1111\/j.1540-6261.2010.01625.x","volume":"66","author":"T Loughran","year":"2011","unstructured":"Loughran, T., & McDonald, B. (2011). When is a liability not a liability? Textual analysis, dictionaries, and 10-ks. The Journal of Finance, 66(1), 35\u201365.","journal-title":"The Journal of Finance"},{"issue":"1","key":"6511_CR52","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1007\/s10614-020-10038-w","volume":"57","author":"J Lussange","year":"2021","unstructured":"Lussange, J., Lazarevich, I., Bourgeois-Gironde, S., Palminteri, S., & Gutkin, B. (2021). Modelling stock markets by multi-agent reinforcement learning. Computational Economics, 57(1), 113\u2013147.","journal-title":"Computational Economics"},{"key":"6511_CR53","unstructured":"Mahfouz, M., Gopalakrishnan, S., Suau, M., Patra, S., Mandic, P. D., Magazzeni, D., & Veloso, M. (2023). Towards asset allocation using behavioural cloning and reinforcement learning. AAAI AI for Financial Services Bridge."},{"key":"6511_CR54","unstructured":"Makoviychuk, V., Wawrzyniak, L., Guo, Y., Lu, M., Storey, K., Macklin, M., Hoeller, D., Rudin, N., Allshire, A., Handa, A., & State, G. (2021). Isaac Gym: High performance GPU-based physics simulation for robot learning. NeurIPS: Datasets and Benchmarks Track."},{"issue":"1","key":"6511_CR55","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1111\/1468-036X.00205","volume":"9","author":"BG Malkiel","year":"2003","unstructured":"Malkiel, B. G. (2003). Passive investment strategies and efficient markets. European Financial Management, 9(1), 1\u201310.","journal-title":"European Financial Management"},{"key":"6511_CR56","doi-asserted-by":"crossref","unstructured":"Mamon, R. S., & Elliott, R. J. (2007). Hidden Markov models in finance vol. 4.","DOI":"10.1007\/0-387-71163-5"},{"key":"6511_CR57","unstructured":"Mazumder, M., Banbury, C., Yao, X., Karla\u0161, B., Rojas, W. G., Diamos, S., Diamos, G., He, L., Kiela, D., & Jurado, D. et al. (2022). Dataperf: Benchmarks for data-centric AI development. arXiv preprint arXiv:2207.10062."},{"key":"6511_CR58","unstructured":"Miller, G. A. (1998). WordNet: An electronic lexical database."},{"key":"6511_CR59","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A., Veness, J., Bellemare, M., Graves, A., Riedmiller, M., Fidjeland, A., Ostrovski, G., Petersen, S., Beattie, C., Sadik, A., Antonoglou, I., King, H., Kumaran, D., Wierstra, D., Legg, S., & Hassabis, D. (2015). Human-level control through deep reinforcement learning. Nature, 518, 529\u201333.","journal-title":"Nature"},{"key":"6511_CR60","doi-asserted-by":"crossref","unstructured":"Nargesian, F., Samulowitz, H., Khurana, U., Khalil, E. B., & Turaga, D. S. (2017). Learning feature engineering for classification. In IJCAI, vol. 17 (pp. 2529\u20132535).","DOI":"10.24963\/ijcai.2017\/352"},{"key":"6511_CR61","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1109\/MC.2011.31","volume":"44","author":"G Nuti","year":"2011","unstructured":"Nuti, G., Mirghaemi, M., Treleaven, P., & Yingsaeree, C. (2011). Algorithmic trading. Computer, 44, 61\u201369.","journal-title":"Computer"},{"key":"6511_CR62","unstructured":"OpenAI: GPT-4 technical report. https:\/\/arxiv.org\/abs\/2303.08774 (2023)."},{"key":"6511_CR63","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang, L., Wu, J., Jiang, X., Almeida, D., Wainwright, C., Mishkin, P., Zhang, C., Agarwal, S., Slama, K., Ray, A., et al. (2022). Training language models to follow instructions with human feedback. Advances in Neural Information Processing Systems, 35, 27730\u201327744.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6511_CR64","unstructured":"Polyzotis, N., & Zaharia, M. (2021). What can data-centric AI learn from data and ML engineering? arXiv preprint arXiv:2112.06439."},{"key":"6511_CR65","unstructured":"Pricope, T.-V. (2021). Deep reinforcement learning in quantitative algorithmic trading: A review. arXiv preprint arXiv:2106.00123."},{"key":"6511_CR66","unstructured":"Qin, R., Gao, S., Zhang, X., Xu, Z., Huang, S., Li, Z., Zhang, W., & Yu, Y. (2022). NeoRL: A near real-world benchmark for offline reinforcement learning. NeurIPS Datasets and Benchmarks."},{"issue":"1\u20132","key":"6511_CR67","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1016\/S0378-4371(01)00312-0","volume":"299","author":"M Raberto","year":"2001","unstructured":"Raberto, M., Cincotti, S., Focardi, S. M., & Marchesi, M. (2001). Agent-based simulation of a financial market. Physica A: Statistical Mechanics and its Applications, 299(1\u20132), 319\u2013327.","journal-title":"Physica A: Statistical Mechanics and its Applications"},{"key":"6511_CR68","unstructured":"Raffin, A., Hill, A., Gleave, A., Kanervisto, A., Ernestus, M., & Dormann, N. (2021). Stable-baselines3: Reliable reinforcement learning implementations. Journal of Machine Learning Research."},{"issue":"20","key":"6511_CR69","doi-asserted-by":"publisher","first-page":"4460","DOI":"10.3390\/app9204460","volume":"9","author":"F Rundo","year":"2019","unstructured":"Rundo, F. (2019). Deep LSTM with reinforcement learning layer for financial trend prediction in fx high frequency trading systems. Applied Sciences, 9(20), 4460.","journal-title":"Applied Sciences"},{"key":"6511_CR70","doi-asserted-by":"crossref","unstructured":"Sambasivan, N., Kapania, S., Highfill, H., Akrong, D., Paritosh, P., & Aroyo, L. M. (2021). \u201cEveryone wants to do the model work, not the data work\u201d: Data cascades in high-stakes AI. In Proceedings of the 2021 CHI conference on human factors in computing systems (pp. 1\u201315).","DOI":"10.1145\/3411764.3445518"},{"key":"6511_CR71","doi-asserted-by":"crossref","unstructured":"Scholl, M. P., Calinescu, A., & Farmer, J. D. (2021). How market ecology explains market malfunction. Proceedings of the National Academy of Sciences118(26)","DOI":"10.1073\/pnas.2015574118"},{"key":"6511_CR72","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., & Klimov, O. (2017). Proximal policy optimization algorithms. arXiv:1707.06347."},{"key":"6511_CR73","doi-asserted-by":"crossref","unstructured":"Sharpe, W. F. (1994). The sharpe ratio. Journal of Portfolio Management.","DOI":"10.3905\/jpm.1994.409501"},{"key":"6511_CR74","doi-asserted-by":"crossref","unstructured":"Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Van Den\u00a0Driessche, G., Schrittwieser, J., Antonoglou, I., Panneershelvam, V., & Lanctot, M., et al. (2016). Mastering the game of go with deep neural networks and tree search. Nature529(7587), 484\u2013489.","DOI":"10.1038\/nature16961"},{"issue":"7676","key":"6511_CR75","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., Schrittwieser, J., Simonyan, K., Antonoglou, I., Huang, A., Guez, A., Hubert, T., Baker, L., Lai, M., Bolton, A., et al. (2017). Mastering the game of Go without human knowledge. Nature, 550(7676), 354\u2013359.","journal-title":"Nature"},{"key":"6511_CR76","doi-asserted-by":"crossref","unstructured":"Strapparava, C., & Mihalcea, R. (2007). Semeval-2007 task 14: Affective text. In Proceedings of the fourth international workshop on semantic evaluations (SemEval-2007) (pp. 70\u201374).","DOI":"10.3115\/1621474.1621487"},{"key":"6511_CR77","unstructured":"Sutton, R. S. (2022). The quest for a common model of the intelligent decision maker. arXiv preprint arXiv:2202.13252."},{"key":"6511_CR78","unstructured":"Sutton, R. S., & Barto, A. G. (2018). Reinforcement learning: An introduction."},{"key":"6511_CR79","doi-asserted-by":"crossref","unstructured":"Tai, Y.-J., & Kao, H.-Y. (2013). Automatic domain-specific sentiment lexicon generation with label propagation. In Proceedings of international conference on information integration and web-based applications & services (pp. 53\u201362).","DOI":"10.1145\/2539150.2539190"},{"key":"6511_CR80","unstructured":"Team, O. E. L., Stooke, A., Mahajan, A., Barros, C., Deck, C., Bauer, J., Sygnowski, J., Trebacz, M., Jaderberg, M., & Mathieu, M. et al. (2021). Open-ended learning leads to generally capable agents. arXiv preprint arXiv:2107.12808."},{"key":"6511_CR81","doi-asserted-by":"crossref","unstructured":"Todorov, E., Erez, T., & Tassa, Y. (2012). Mujoco: A physics engine for model-based control. In IEEE\/RSJ international conference on intelligent robots and systems (pp. 5026\u20135033). IEEE.","DOI":"10.1109\/IROS.2012.6386109"},{"key":"6511_CR82","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1145\/2500117","volume":"56","author":"P Treleaven","year":"2013","unstructured":"Treleaven, P., Galas, M., & Lalchand, V. (2013). Algorithmic trading review. Communications of the ACM, 56, 76\u201385.","journal-title":"Communications of the ACM"},{"key":"6511_CR83","doi-asserted-by":"crossref","unstructured":"V\u00e1zquez-Canteli, J. R., K\u00e4mpf, J., Henze, G., & Nagy, Z. (2019). CityLearn v1.0: An OpenAI gym environment for demand response with deep reinforcement learning. In ACM international conference on systems for energy-efficient buildings, cities, and transportation.","DOI":"10.1145\/3360322.3360998"},{"issue":"7782","key":"6511_CR84","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals, O., Babuschkin, I., Czarnecki, W. M., Mathieu, M., Dudzik, A., Chung, J., Choi, D. H., Powell, R., Ewalds, T., Georgiev, P., et al. (2019). Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nature, 575(7782), 350\u2013354.","journal-title":"Nature"},{"issue":"3","key":"6511_CR85","doi-asserted-by":"publisher","first-page":"98","DOI":"10.3905\/JPM.2009.35.3.098","volume":"35","author":"RE Whaley","year":"2009","unstructured":"Whaley, R. E. (2009). Understanding the VIX. The Journal of Portfolio Management, 35(3), 98\u2013105.","journal-title":"The Journal of Portfolio Management"},{"key":"6511_CR86","doi-asserted-by":"crossref","unstructured":"Whang, S. E., Roh, Y., Song, H., & Lee, J.-G. (2023). Data collection and quality challenges in deep learning: A data-centric AI perspective. The VLDB Journal 1\u201323.","DOI":"10.1007\/s00778-022-00775-9"},{"key":"6511_CR87","unstructured":"Wilkman, M. (2020). Feasibility of a reinforcement learning based stock trader. Aaltodoc."},{"key":"6511_CR88","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1016\/j.jpdc.2020.03.012","volume":"141","author":"G Xiao","year":"2020","unstructured":"Xiao, G., Li, J., Chen, Y., & Li, K. (2020). Malfcs: An effective malware classification framework with automated feature extraction based on deep convolutional neural networks. Elsevier Journal of Parallel and Distributed Computing, 141, 49\u201358.","journal-title":"Elsevier Journal of Parallel and Distributed Computing"},{"issue":"1","key":"6511_CR89","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1007\/s10462-017-9588-9","volume":"50","author":"FZ Xing","year":"2018","unstructured":"Xing, F. Z., Cambria, E., & Welsch, R. E. (2018). Natural language based financial forecasting: A survey. Artificial Intelligence Review, 50(1), 49\u201373.","journal-title":"Artificial Intelligence Review"},{"key":"6511_CR90","doi-asserted-by":"crossref","unstructured":"Yang, H., Liu, X.-Y., Zhong, S., & Walid, A. (2020). Deep reinforcement learning for automated stock trading: An ensemble strategy. In ACM International Conference on AI in Finance.","DOI":"10.2139\/ssrn.3690996"},{"key":"6511_CR91","doi-asserted-by":"crossref","unstructured":"Zha, D., Bhat, Z. P., Lai, K.-H., Yang, F., & Hu, X. (2023). Data-centric AI: Perspectives and challenges. arXiv preprint arXiv:2301.04819.","DOI":"10.1137\/1.9781611977653.ch106"},{"key":"6511_CR92","unstructured":"Zha, D., Bhat, Z. P., Lai, K.-H., Yang, F., Jiang, Z., Zhong, S., & Hu, X. (2023). Data-centric artificial intelligence: A survey. arXiv preprint arXiv:2303.10158."},{"issue":"2","key":"6511_CR93","doi-asserted-by":"publisher","first-page":"25","DOI":"10.3905\/jfds.2020.1.030","volume":"2","author":"Z Zhang","year":"2020","unstructured":"Zhang, Z., Zohren, S., & Roberts, S. (2020). Deep reinforcement learning for trading. The Journal of Financial Data Science, 2(2), 25\u201340.","journal-title":"The Journal of Financial Data Science"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-023-06511-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-023-06511-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-023-06511-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:09:08Z","timestamp":1764266948000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-023-06511-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,26]]},"references-count":93,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,5]]}},"alternative-id":["6511"],"URL":"https:\/\/doi.org\/10.1007\/s10994-023-06511-w","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2,26]]},"assertion":[{"value":"29 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 October 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 December 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 February 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors are from universities and research labs. No competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"We do not contain ethics issues.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"FinRL-Meta uses MIT License. We, all authors, welcome any person participate in our project and join our open-source community.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"We, all authors, consent publication of everything mentioned in the paper.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}