{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T06:51:36Z","timestamp":1764399096074,"version":"3.46.0"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,22]]},"DOI":"10.1109\/apsipaasc65261.2025.11249355","type":"proceedings-article","created":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T18:40:26Z","timestamp":1764355226000},"page":"1241-1246","source":"Crossref","is-referenced-by-count":0,"title":["Reinforcement Learning in Portfolio Management: A Survey of Methods and Trends"],"prefix":"10.1109","author":[{"given":"Silan","family":"Hu","sequence":"first","affiliation":[{"name":"National University of Singapore,Singapore"}]},{"given":"Yulin","family":"Huang","sequence":"additional","affiliation":[{"name":"National University of Singapore,Singapore"}]},{"given":"Arjun","family":"Agarwal","sequence":"additional","affiliation":[{"name":"National University of Singapore,Singapore"}]},{"given":"Tanya","family":"Warrier","sequence":"additional","affiliation":[{"name":"National University of Singapore,Singapore"}]},{"given":"Wang","family":"Yuwen","sequence":"additional","affiliation":[{"name":"National University of Singapore,Singapore"}]},{"given":"Haozhe","family":"Ma","sequence":"additional","affiliation":[{"name":"National University of Singapore,Singapore"}]},{"given":"Zhengding","family":"Luo","sequence":"additional","affiliation":[{"name":"National University of Singapore,Singapore"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Deep reinforcement learning for portfolio management","author":"Jiang","year":"2017","journal-title":"arXiv preprint"},{"key":"ref2","article-title":"Deep reinforcement learning for optimal portfolio allocation: A comparative study with mean-variance optimization","author":"Sood","year":"2023","journal-title":"in Planning and Scheduling for Financial Services Workshop, ICAPS"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-statistics-112723-034423"},{"key":"ref4","first-page":"1112","article-title":"Deep reinforcement learning and mean-variance strategies for responsible portfolio optimization","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"38","author":"Acero","year":"2024"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i01.5462"},{"volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","key":"ref6","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4615-3618-5"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref8","first-page":"936","article-title":"Enhancing q-learning for optimal asset allocation","volume":"10","author":"Neuneier","year":"1997","journal-title":"in Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1099-131X(1998090)17:5\/6<441::AID-FOR707>3.0.CO;2-#"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2522401"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-020-05359-8"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3383455.3422540"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.122801"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330647"},{"key":"ref15","article-title":"Catching two birds with one stone: Reward shaping with dual random networks for balancing exploration and exploitation","volume-title":"Forty-second International Conference on Machine Learning","author":"Ma","year":"2025"},{"key":"ref16","article-title":"Highly efficient self-adaptive reward shaping for reinforcement learning","volume-title":"Thirteenth International Conference on Learning Representations","author":"Ma","year":"2025"},{"key":"ref17","article-title":"Reward shaping for reinforcement learning with an assistant reward agent","volume-title":"Forty-first International Conference on Machine Learning","author":"Ma","year":"2024"},{"journal-title":"Centralized reward agent for knowledge sharing and transfer in multi-task reinforcement learning","year":"2025","author":"Ma","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2024.106687"},{"key":"ref20","first-page":"1328","article-title":"Mixed-initiative bayesian sub-goal optimization in hierarchical reinforcement learning","volume-title":"Proceedings of the 23rd international conference on autonomous agents and multiagent systems","author":"Ma","year":"2024"},{"key":"ref21","first-page":"2310","article-title":"Hierarchical reinforcement learning with human-ai collaborative subgoals optimization","volume-title":"Proceedings of the 22nd international conference on autonomous agents and multiagent systems","author":"Ma","year":"2023"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i1.16144"},{"key":"ref23","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"Proceedings of the 31st International Conference on Machine Learning (ICML), ser. Proceedings of Machine Learning Research","volume":"32","author":"Silver"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2021.3079850"},{"key":"ref25","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Lillicrap","year":"2016"},{"key":"ref26","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proceedings of the 33rd International Conference on Machine Learning","volume":"48","author":"Mnih","year":"2016"},{"key":"ref27","first-page":"1856","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proceedings of the 35th International Conference on Machine Learning","volume":"80","author":"Haarnoja","year":"2018"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/623"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i1.16142"},{"key":"ref30","article-title":"Prudex-compass: Towards systematic evaluation of reinforcement learning in financial markets","author":"Sun","year":"2023","journal-title":"arXiv preprint"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.3390\/data6110119"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.3624052"}],"event":{"name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","start":{"date-parts":[[2025,10,22]]},"location":"Singapore, Singapore","end":{"date-parts":[[2025,10,24]]}},"container-title":["2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11248853\/11248968\/11249355.pdf?arnumber=11249355","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T06:50:59Z","timestamp":1764399059000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11249355\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,22]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/apsipaasc65261.2025.11249355","relation":{},"subject":[],"published":{"date-parts":[[2025,10,22]]}}}