{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T01:22:28Z","timestamp":1774056148331,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T00:00:00Z","timestamp":1731542400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Ministry of Education - Singapore","award":["MOE-T2EP20220-0013, R-144-000-457-733 (A-0004550-00-00)"],"award-info":[{"award-number":["MOE-T2EP20220-0013, R-144-000-457-733 (A-0004550-00-00)"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,14]]},"DOI":"10.1145\/3677052.3698670","type":"proceedings-article","created":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T06:38:06Z","timestamp":1731566286000},"page":"353-360","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Autoregressive DRL with Learned Intrinsic Rewards for Portfolio Optimisation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-3859-3246","authenticated-orcid":false,"given":"Magdalene Hui Qi","family":"Lim","sequence":"first","affiliation":[{"name":"School of Physical and Mathematical Sciences, Nanyang Technological University, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-3705-3382","authenticated-orcid":false,"given":"Nixie S","family":"Lesmana","sequence":"additional","affiliation":[{"name":"School of Physical and Mathematical Sciences, Nanyang Technological University, Singapore and Department of Physics, Faculty of Science, National University of Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7478-6961","authenticated-orcid":false,"given":"Chi Seng","family":"Pun","sequence":"additional","affiliation":[{"name":"School of Physical and Mathematical Sciences, Nanyang Technological University, Singapore"}]}],"member":"320","published-online":{"date-parts":[[2024,11,14]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Surprise-based intrinsic motivation for deep reinforcement learning. arXiv preprint arXiv:1703.01732","author":"Achiam Joshua","year":"2017","unstructured":"Joshua Achiam and Shankar Sastry. 2017. Surprise-based intrinsic motivation for deep reinforcement learning. arXiv preprint arXiv:1703.01732 (2017)."},{"key":"e_1_3_2_1_2_1","volume-title":"Intrinsic motivation and reinforcement learning. Intrinsically Motivated Learning in Natural and Artificial Systems","author":"Barto G.","year":"2012","unstructured":"Andrew\u00a0G. Barto. 2012. Intrinsic motivation and reinforcement learning. Intrinsically Motivated Learning in Natural and Artificial Systems (2012), 17\u201347."},{"key":"e_1_3_2_1_3_1","volume-title":"Advances in Neural Information Processing Systems, Vol.\u00a016","author":"Ho Tracey","unstructured":"Yu-han Chang, Tracey Ho, and Leslie Kaelbling. 2003. All learning is local: Multi-agent learning in global reward games. In Advances in Neural Information Processing Systems, Vol.\u00a016. MIT Press."},{"key":"e_1_3_2_1_4_1","volume-title":"LIIR: Learning Individual Intrinsic Reward in Multi-Agent Reinforcement Learning. In Advances in Neural Information Processing Systems, Vol.\u00a032.","author":"Du Yali","year":"2019","unstructured":"Yali Du, Lei Han, Meng Fang, Ji Liu, Tianhong Dai, and Dacheng Tao. 2019. LIIR: Learning Individual Intrinsic Reward in Multi-Agent Reinforcement Learning. In Advances in Neural Information Processing Systems, Vol.\u00a032."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2008.08.019"},{"key":"e_1_3_2_1_6_1","volume-title":"Modern Investment Theory","author":"Haugen A.","unstructured":"Robert\u00a0A. Haugen. 1990. Modern Investment Theory. Prentice Hall."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1540-6261.1993.tb00853.x"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.2469\/faj.v66.n5.3"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-012-5281-z"},{"key":"e_1_3_2_1_10_1","volume-title":"Portfolio Selection: Efficient Diversification of Investments","author":"Markowitz M.","year":"1959","unstructured":"Harry\u00a0M. Markowitz. 1959. Portfolio Selection: Efficient Diversification of Investments. Yale University Press."},{"key":"e_1_3_2_1_11_1","volume-title":"Portfolio Optimization with Prediction-Based Return Using Long Short-Term Memory Neural Networks: Testing on Upward and Downward European Markets. Computational Economics","author":"Mart\u00ednez-Barbero Xavier","year":"2024","unstructured":"Xavier Mart\u00ednez-Barbero, Roberto Cervell\u00f3-Royo, and Javier Ribal. 2024. Portfolio Optimization with Prediction-Based Return Using Long Short-Term Memory Neural Networks: Testing on Upward and Downward European Markets. Computational Economics (2024)."},{"key":"e_1_3_2_1_12_1","unstructured":"Luke Metz Julian Ibarz Navdeep Jaitly and James Davidson. 2018. Discrete Sequential Prediction of Continuous Actions for Deep RL. https:\/\/openreview.net\/forum?id=r1SuFjkRW"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_1_14_1","volume-title":"Reinforcement learning for trading systems and portfolios: Immediate vs future rewards","author":"Moody John","unstructured":"John Moody, Matthew Saffell, Yuansong Liao, and Lizhong Wu. 1998. Reinforcement learning for trading systems and portfolios: Immediate vs future rewards. Springer US, 129\u2013140."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1099-131X(1998090)17:5\/6<441::AID-FOR707>3.0.CO;2-#"},{"key":"e_1_3_2_1_16_1","volume-title":"International Conference on Machine Learning, Vol.\u00a016","author":"Ng Y.","year":"1999","unstructured":"Andrew\u00a0Y. Ng, Daishi Harada, and Stuart\u00a0J. Russell. 1999. Policy invariance under reward transformations: Theory and application to reward shaping. In International Conference on Machine Learning, Vol.\u00a016. 278\u2013287."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the 27th International Conference on International Conference on Machine Learning. 1007 \u2013 1014","author":"Sorg Jonathan","year":"2010","unstructured":"Jonathan Sorg, Satinder Singh, and Richard Lewis. 2010. Internal rewards mitigate agent boundedness. In Proceedings of the 27th International Conference on International Conference on Machine Learning. 1007 \u2013 1014."},{"key":"e_1_3_2_1_19_1","volume-title":"Reinforcement learning: An introduction","author":"Sutton S","unstructured":"Richard\u00a0S Sutton and Andrew\u00a0G Barto. 2018. Reinforcement learning: An introduction. MIT press."},{"key":"e_1_3_2_1_20_1","volume-title":"Policy gradient methods for reinforcement learning with function approximation. Advances in neural information processing systems 12","author":"Sutton S","year":"1999","unstructured":"Richard\u00a0S Sutton, David McAllester, Satinder Singh, and Yishay Mansour. 1999. Policy gradient methods for reinforcement learning with function approximation. Advances in neural information processing systems 12 (1999)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11798"},{"key":"e_1_3_2_1_22_1","volume-title":"Hybrid Reward Architecture for Reinforcement Learning. CoRR abs\/1706.04208","author":"van Seijen Harm","year":"2017","unstructured":"Harm van Seijen, Mehdi Fatemi, Joshua Romoff, Romain Laroche, Tavian Barnes, and Jeffrey Tsang. 2017. Hybrid Reward Architecture for Reinforcement Learning. CoRR abs\/1706.04208 (2017)."},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the First ACM International Conference on AI in Finance. ACM, Article 31","author":"Yang Hongyang","year":"2021","unstructured":"Hongyang Yang, Xiao-Yang Liu, Shan Zhong, and Anwar Walid. 2021. Deep reinforcement learning for automated stock trading: an ensemble strategy. In Proceedings of the First ACM International Conference on AI in Finance. ACM, Article 31."},{"key":"e_1_3_2_1_24_1","volume-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems. 4649\u20134659","author":"Zeyu Zheng","year":"2018","unstructured":"Zheng Zeyu, Oh Junhyuk, and Singh Satinder. 2018. On learning intrinsic rewards for policy gradient methods. In Proceedings of the 32nd International Conference on Neural Information Processing Systems. 4649\u20134659."},{"key":"e_1_3_2_1_25_1","volume-title":"Efficient entropy for policy gradient with multidimensional action space. arXiv preprint arXiv:1806.00589","author":"Zhang Yiming","year":"2018","unstructured":"Yiming Zhang, Quan\u00a0Ho Vuong, Kenny Song, Xiao-Yue Gong, and Keith\u00a0W Ross. 2018. Efficient entropy for policy gradient with multidimensional action space. arXiv preprint arXiv:1806.00589 (2018)."}],"event":{"name":"ICAIF '24: 5th ACM International Conference on AI in Finance","location":"Brooklyn NY USA","acronym":"ICAIF '24"},"container-title":["Proceedings of the 5th ACM International Conference on AI in Finance"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677052.3698670","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3677052.3698670","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:10:18Z","timestamp":1755882618000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677052.3698670"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,14]]},"references-count":25,"alternative-id":["10.1145\/3677052.3698670","10.1145\/3677052"],"URL":"https:\/\/doi.org\/10.1145\/3677052.3698670","relation":{},"subject":[],"published":{"date-parts":[[2024,11,14]]},"assertion":[{"value":"2024-11-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}