{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T16:48:08Z","timestamp":1776876488400,"version":"3.51.2"},"reference-count":55,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100013139","name":"Humanities and Social Science Fund of Ministry of Education of the People's Republic of China","doi-asserted-by":"publisher","award":["22XJCZH004"],"award-info":[{"award-number":["22XJCZH004"]}],"id":[{"id":"10.13039\/501100013139","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["12371512"],"award-info":[{"award-number":["12371512"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["72301211"],"award-info":[{"award-number":["72301211"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Applied Soft Computing"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.asoc.2026.115168","type":"journal-article","created":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T17:32:57Z","timestamp":1775151177000},"page":"115168","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Algorithmic trading by reinforcement learning in a collaborative manner"],"prefix":"10.1016","volume":"197","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0094-8240","authenticated-orcid":false,"given":"Li","family":"Long","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9639-4507","authenticated-orcid":false,"given":"Chunxia","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9367-5685","authenticated-orcid":false,"given":"Cong","family":"Ma","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0657-4972","authenticated-orcid":false,"given":"Hongtao","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3347-0477","authenticated-orcid":false,"given":"Lizhen","family":"Ji","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3471-8451","authenticated-orcid":false,"given":"Fei","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Jiangshe","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7705-7025","authenticated-orcid":false,"given":"Kaiwen","family":"Qiu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"5","key":"10.1016\/j.asoc.2026.115168_bib0005","doi-asserted-by":"crossref","first-page":"2045","DOI":"10.1111\/jofi.12186","article-title":"Rise of the machines: algorithmic trading in the foreign exchange market","volume":"69","author":"Chaboud","year":"2014","journal-title":"The Journal of Finance"},{"key":"10.1016\/j.asoc.2026.115168_bib0010","series-title":"Proceedings of the 9th International Symposium on Information and Communication Technology","first-page":"98","article-title":"Prediction and portfolio optimization in quantitative trading using machine learning techniques","author":"Ta","year":"2018"},{"issue":"1","key":"10.1016\/j.asoc.2026.115168_bib0015","doi-asserted-by":"crossref","first-page":"132","DOI":"10.1080\/17517575.2018.1493145","article-title":"Automated trading systems statistical and machine learning methods and hardware implementation: a survey","volume":"13","author":"Huang","year":"2019","journal-title":"Enterp. Inf. Syst."},{"issue":"7540","key":"10.1016\/j.asoc.2026.115168_bib0020","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"Mnih","year":"2015","journal-title":"Nature"},{"issue":"7782","key":"10.1016\/j.asoc.2026.115168_bib0025","doi-asserted-by":"crossref","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","article-title":"Grandmaster level in starcraft II using multi-agent reinforcement learning","volume":"575","author":"Vinyals","year":"2019","journal-title":"Nature"},{"issue":"7587","key":"10.1016\/j.asoc.2026.115168_bib0030","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"Silver","year":"2016","journal-title":"Nature"},{"key":"10.1016\/j.asoc.2026.115168_bib0035","author":"Mnih"},{"key":"10.1016\/j.asoc.2026.115168_bib0040","doi-asserted-by":"crossref","first-page":"142","DOI":"10.1016\/j.ins.2020.05.066","article-title":"Adaptive stock trading strategies with deep reinforcement learning methods","volume":"538","author":"Wu","year":"2020","journal-title":"Inf. Sci."},{"key":"10.1016\/j.asoc.2026.115168_bib0045","series-title":"International Conference on Learning Representations","first-page":"6423","article-title":"Timesnet: temporal 2d-variation modeling for general time series analysis","author":"Wu","year":"2023"},{"key":"10.1016\/j.asoc.2026.115168_bib0050","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.121502","article-title":"A multi-agent reinforcement learning framework for optimizing financial trading strategies based on timesnet","volume":"237","author":"Huang","year":"2024","journal-title":"Expert Syst. Appl."},{"issue":"6","key":"10.1016\/j.asoc.2026.115168_bib0055","doi-asserted-by":"crossref","first-page":"864","DOI":"10.1109\/TSMCA.2007.904825","article-title":"A multiagent approach toq-learning for daily stock trading","volume":"37","author":"Lee","year":"2007","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics - Part A: Systems and Humans"},{"key":"10.1016\/j.asoc.2026.115168_bib0060","series-title":"Proceedings of the First ACM International Conference on AI in Finance","first-page":"1","article-title":"Deep reinforcement learning for automated stock trading: an ensemble strategy","author":"Yang","year":"2020"},{"issue":"4","key":"10.1016\/j.asoc.2026.115168_bib0065","doi-asserted-by":"crossref","first-page":"851","DOI":"10.1109\/JSAC.2023.3242710","article-title":"Communication-efficient distributed learning: an overview","volume":"41","author":"Cao","year":"2023","journal-title":"IEEE J. Sel. Areas Commun."},{"issue":"12","key":"10.1016\/j.asoc.2026.115168_bib0070","doi-asserted-by":"crossref","first-page":"3579","DOI":"10.1109\/JSAC.2021.3118346","article-title":"Distributed learning in wireless networks: recent progress and future challenges","volume":"39","author":"Chen","year":"2021","journal-title":"IEEE J. Sel. Areas Commun."},{"issue":"10","key":"10.1016\/j.asoc.2026.115168_bib0075","doi-asserted-by":"crossref","first-page":"7338","DOI":"10.1109\/JIOT.2021.3103635","article-title":"Sustainability of healthcare data analysis iot-based systems using deep federated learning","volume":"9","author":"Elayan","year":"2021","journal-title":"IEEE Internet Things J."},{"key":"10.1016\/j.asoc.2026.115168_bib0080","series-title":"International Conference on Learning Representations","first-page":"3864","article-title":"Distributed prioritized experience replay","author":"Horgan","year":"2019"},{"key":"10.1016\/j.asoc.2026.115168_bib0085","series-title":"International Conference on Learning Representations","first-page":"5150","article-title":"Recurrent experience replay in distributed reinforcement learning","author":"Kapturowski","year":"2019"},{"key":"10.1016\/j.asoc.2026.115168_bib0090","series-title":"Proceedings of the European Conference on Computer Vision","first-page":"584","article-title":"Cirl: controllable imitative reinforcement learning for vision-based self-driving","author":"Liang","year":"2018"},{"issue":"2","key":"10.1016\/j.asoc.2026.115168_bib0095","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3054912","article-title":"Imitation learning: a survey of learning methods","volume":"50","author":"Hussein","year":"2017","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.asoc.2026.115168_bib0100","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2021.114632","article-title":"An application of deep reinforcement learning to algorithmic trading","volume":"173","author":"Th\u00e9ate","year":"2021","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.asoc.2026.115168_bib0105","article-title":"Predicting price trends combining kinetic energy and deep reinforcement learning","volume":"244","author":"Ghotbi","year":"2023","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.asoc.2026.115168_bib0110","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2023.110802","article-title":"Algorithmic trading using combinational rule vector and deep reinforcement learning","volume":"147","author":"Huang","year":"2023","journal-title":"Appl. Soft Comput."},{"issue":"1","key":"10.1016\/j.asoc.2026.115168_bib0115","article-title":"Stock trading strategies based on deep reinforcement learning","volume":"2022","author":"Li","year":"2022","journal-title":"Scientific Programming"},{"key":"10.1016\/j.asoc.2026.115168_bib0120","article-title":"Deep reinforcement learning applied to a sparse-reward trading environment with intraday data","volume":"238","author":"de Azevedo Takara","year":"2024","journal-title":"Expert Syst. Appl."},{"issue":"7","key":"10.1016\/j.asoc.2026.115168_bib0125","doi-asserted-by":"crossref","first-page":"2837","DOI":"10.1109\/TNNLS.2020.2997523","article-title":"Price trailing for financial trading using deep reinforcement learning","volume":"32","author":"Tsantekidis","year":"2020","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"5\u20136","key":"10.1016\/j.asoc.2026.115168_bib0130","doi-asserted-by":"crossref","first-page":"441","DOI":"10.1002\/(SICI)1099-131X(1998090)17:5\/6<441::AID-FOR707>3.0.CO;2-#","article-title":"Performance functions and reinforcement learning for trading systems and portfolios","volume":"17","author":"Moody","year":"1998","journal-title":"J. Forecast."},{"issue":"3","key":"10.1016\/j.asoc.2026.115168_bib0135","doi-asserted-by":"crossref","first-page":"653","DOI":"10.1109\/TNNLS.2016.2522401","article-title":"Deep direct reinforcement learning for financial signal representation and trading","volume":"28","author":"Deng","year":"2016","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"4","key":"10.1016\/j.asoc.2026.115168_bib0140","doi-asserted-by":"crossref","first-page":"875","DOI":"10.1109\/72.935097","article-title":"Learning to trade via direct reinforcement","volume":"12","author":"Moody","year":"2001","journal-title":"IEEE Trans. Neural Netw."},{"key":"10.1016\/j.asoc.2026.115168_bib0145","doi-asserted-by":"crossref","first-page":"108014","DOI":"10.1109\/ACCESS.2019.2932789","article-title":"Deep robust reinforcement learning for practical algorithmic trading","volume":"7","author":"Li","year":"2019","journal-title":"IEEE Access"},{"key":"10.1016\/j.asoc.2026.115168_bib0150","series-title":"2019 International Conference on Information and Communication Technology Convergence","first-page":"7","article-title":"A deep multimodal reinforcement learning system combined with CNN and LSTM for stock trading","author":"Shin","year":"2019"},{"key":"10.1016\/j.asoc.2026.115168_bib0155","series-title":"Advances in Neural Information Processing Systems","first-page":"5998","article-title":"Attention is all you need","author":"Vaswani","year":"2017"},{"key":"10.1016\/j.asoc.2026.115168_bib0160","doi-asserted-by":"crossref","first-page":"592","DOI":"10.1016\/j.ins.2022.12.042","article-title":"Self-attention based deep direct recurrent reinforcement learning with hybrid loss for trading signal generation","volume":"623","author":"Kwak","year":"2023","journal-title":"Inf. Sci."},{"key":"10.1016\/j.asoc.2026.115168_bib0165","doi-asserted-by":"crossref","first-page":"291","DOI":"10.1109\/TKDE.2024.3484454","article-title":"Exploring progress in multivariate time series forecasting: comprehensive benchmarking and heterogeneity analysis","volume":"37","author":"Shao","year":"2024","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10.1016\/j.asoc.2026.115168_bib0170","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"8389","article-title":"Stockmixer: a simple yet strong mlp-based architecture for stock price forecasting","author":"Fan","year":"2024"},{"key":"10.1016\/j.asoc.2026.115168_bib0175","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"2128","article-title":"Adaptive quantitative trading: an imitative deep reinforcement learning approach","author":"Liu","year":"2020"},{"key":"10.1016\/j.asoc.2026.115168_bib0180","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.120939","article-title":"Human-aligned trading by imitative multi-loss reinforcement learning","volume":"234","author":"Ye","year":"2023","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.asoc.2026.115168_bib0185","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2023.111108","article-title":"Soft imitation reinforcement learning with value decomposition for portfolio management","volume":"151","author":"Dong","year":"2024","journal-title":"Appl. Soft Comput."},{"key":"10.1016\/j.asoc.2026.115168_bib0190","doi-asserted-by":"crossref","first-page":"889","DOI":"10.1007\/s10489-020-01839-5","article-title":"A multi-layer and multi-ensemble stock trader using deep learning and deep reinforcement learning","volume":"51","author":"Carta","year":"2021","journal-title":"Appl. Intell."},{"key":"10.1016\/j.asoc.2026.115168_bib0195","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2022.118124","article-title":"A multi-agent deep reinforcement learning framework for algorithmic trading in financial markets","volume":"208","author":"Shavandi","year":"2022","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.asoc.2026.115168_bib0200","doi-asserted-by":"crossref","DOI":"10.1016\/j.energy.2023.129394","article-title":"A novel crude oil futures trading strategy based on volume-price time-frequency decomposition with ensemble deep reinforcement learning","volume":"285","author":"Du","year":"2023","journal-title":"Energy"},{"issue":"2","key":"10.1016\/j.asoc.2026.115168_bib0205","doi-asserted-by":"crossref","first-page":"2452","DOI":"10.1007\/s10489-022-03606-0","article-title":"Dynamic stock-decision ensemble strategy based on deep reinforcement learning","volume":"53","author":"Yu","year":"2023","journal-title":"Appl. Intell."},{"key":"10.1016\/j.asoc.2026.115168_bib0210","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.121373","article-title":"Automated cryptocurrency trading approach using ensemble deep reinforcement learning: learn to understand candlesticks","volume":"237","author":"Jing","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.asoc.2026.115168_bib0215","series-title":"On-Line Q-Learning Using Connectionist Systems","author":"Rummery","year":"1994"},{"key":"10.1016\/j.asoc.2026.115168_bib0220","first-page":"279","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Mach. Learn."},{"key":"10.1016\/j.asoc.2026.115168_bib0225","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"2094","article-title":"Deep reinforcement learning with double q-learning","author":"van Hasselt","year":"2016"},{"issue":"1","key":"10.1016\/j.asoc.2026.115168_bib0230","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1038\/s41746-020-00323-1","article-title":"The future of digital health with federated learning","volume":"3","author":"Rieke","year":"2020","journal-title":"NPJ Digit. Med."},{"key":"10.1016\/j.asoc.2026.115168_bib0235","series-title":"Artificial Intelligence and Statistics","first-page":"1273","article-title":"Communication-efficient learning of deep networks from decentralized data","author":"McMahan","year":"2017"},{"issue":"1","key":"10.1016\/j.asoc.2026.115168_bib0240","doi-asserted-by":"crossref","first-page":"259","DOI":"10.1111\/j.1540-6261.2006.00836.x","article-title":"The cross-section of volatility and expected returns","volume":"61","author":"Ang","year":"2006","journal-title":"The Journal of Finance"},{"key":"10.1016\/j.asoc.2026.115168_bib0245","author":"Fujimoto"},{"key":"10.1016\/j.asoc.2026.115168_bib0250","series-title":"International Conference on Learning Representations","first-page":"5837","article-title":"Soft q-learning with mutual-information regularization","author":"Grau-Moya","year":"2019"},{"key":"10.1016\/j.asoc.2026.115168_bib0255","series-title":"International Conference on Learning Representations","first-page":"1","article-title":"Reversible instance normalization for accurate time-series forecasting against distribution shift","author":"Kim","year":"2022"},{"key":"10.1016\/j.asoc.2026.115168_bib0260","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2022.116523","article-title":"Learning financial asset-specific trading rules via deep reinforcement learning","volume":"195","author":"Taghian","year":"2022","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.asoc.2026.115168_bib0265","article-title":"An optimized convolutional neural network with a novel spherical triangular fuzzy pooling layer for an algorithmic trading model","volume":"182","author":"Amiri","year":"2025","journal-title":"Appl. Soft Comput."},{"key":"10.1016\/j.asoc.2026.115168_bib0270","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"11121","article-title":"Are transformers effective for time series forecasting?","author":"Zeng","year":"2023"},{"key":"10.1016\/j.asoc.2026.115168_bib0275","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.122581","article-title":"A novel deep reinforcement learning framework with bilstm-attention networks for algorithmic trading","volume":"240","author":"Huang","year":"2024","journal-title":"Expert Syst. Appl."}],"container-title":["Applied Soft Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1568494626006162?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1568494626006162?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T16:06:53Z","timestamp":1776874013000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1568494626006162"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":55,"alternative-id":["S1568494626006162"],"URL":"https:\/\/doi.org\/10.1016\/j.asoc.2026.115168","relation":{},"ISSN":["1568-4946"],"issn-type":[{"value":"1568-4946","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Algorithmic trading by reinforcement learning in a collaborative manner","name":"articletitle","label":"Article Title"},{"value":"Applied Soft Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.asoc.2026.115168","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"115168"}}