{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T14:57:24Z","timestamp":1773327444651,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,8,4]],"date-time":"2023-08-04T00:00:00Z","timestamp":1691107200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Shanghai Sailing Program","award":["21YF1411100"],"award-info":[{"award-number":["21YF1411100"]}]},{"name":"National Nature Science Foundation of China","award":["62202168"],"award-info":[{"award-number":["62202168"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,8,6]]},"DOI":"10.1145\/3580305.3599813","type":"proceedings-article","created":{"date-parts":[[2023,8,4]],"date-time":"2023-08-04T18:13:58Z","timestamp":1691172838000},"page":"4112-4122","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":17,"title":["Efficient Continuous Space Policy Optimization for High-frequency Trading"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5797-2554","authenticated-orcid":false,"given":"Li","family":"Han","sequence":"first","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0579-966X","authenticated-orcid":false,"given":"Nan","family":"Ding","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9178-2137","authenticated-orcid":false,"given":"Guoxuan","family":"Wang","sequence":"additional","affiliation":[{"name":"Tongji University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5877-7387","authenticated-orcid":false,"given":"Dawei","family":"Cheng","sequence":"additional","affiliation":[{"name":"Tongji University &amp; Shanghai Artificial Intelligence Laboratory, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1804-6896","authenticated-orcid":false,"given":"Yuqi","family":"Liang","sequence":"additional","affiliation":[{"name":"Emoney Inc., Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2023,8,4]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/s13748-020-00225-z"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.5539\/mas.v12n11p330"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1080\/14697680701381228"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.3871071"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2020.114002"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbankfin.2012.09.006"},{"key":"e_1_3_2_2_8_1","volume-title":"Deep reinforcement learning for active high frequency trading. arXiv preprint arXiv:2101.07107","author":"Briola Antonio","year":"2021","unstructured":"Antonio Briola , Jeremy Turiel , Riccardo Marcaccioli , and Tomaso Aste . 2021. Deep reinforcement learning for active high frequency trading. arXiv preprint arXiv:2101.07107 ( 2021 ). Antonio Briola, Jeremy Turiel, Riccardo Marcaccioli, and Tomaso Aste. 2021. Deep reinforcement learning for active high frequency trading. arXiv preprint arXiv:2101.07107 (2021)."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1093\/rfs\/hhu032"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330663"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3094549"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401427"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.108218"},{"key":"e_1_3_2_2_14_1","unstructured":"Kyunghyun Cho Bart van Merrienboer \u00c7aglar G\u00fcl\u00e7ehre Dzmitry Bahdanau Fethi Bougares Holger Schwenk and Yoshua Bengio. 2014. Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation. In EMNLP.  Kyunghyun Cho Bart van Merrienboer \u00c7aglar G\u00fcl\u00e7ehre Dzmitry Bahdanau Fethi Bougares Holger Schwenk and Yoshua Bengio. 2014. Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation. In EMNLP."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jfineco.2016.09.007"},{"key":"e_1_3_2_2_16_1","volume-title":"ICLR workshop","author":"Dozat Timothy","year":"2016","unstructured":"Timothy Dozat . 2016 . Incorporating nesterov momentum into adam . ICLR workshop (2016). Timothy Dozat. 2016. Incorporating nesterov momentum into adam. ICLR workshop (2016)."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482315"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557412"},{"key":"e_1_3_2_2_19_1","volume-title":"Deep learning with long short-term memory networks for financial market predictions. European journal of operational research","author":"Fischer Thomas","year":"2018","unstructured":"Thomas Fischer and Christopher Krauss . 2018. Deep learning with long short-term memory networks for financial market predictions. European journal of operational research , Vol. 270 , 2 ( 2018 ), 654--669. Thomas Fischer and Christopher Krauss. 2018. Deep learning with long short-term memory networks for financial market predictions. European journal of operational research, Vol. 270, 2 (2018), 654--669."},{"key":"e_1_3_2_2_20_1","volume-title":"International Conference on Machine Learning. PMLR, 109--117","author":"Ganeshapillai Gartheeban","year":"2013","unstructured":"Gartheeban Ganeshapillai , John Guttag , and Andrew Lo . 2013 . Learning connections in financial time series . In International Conference on Machine Learning. PMLR, 109--117 . Gartheeban Ganeshapillai, John Guttag, and Andrew Lo. 2013. Learning connections in financial time series. In International Conference on Machine Learning. PMLR, 109--117."},{"key":"e_1_3_2_2_21_1","volume-title":"Encyclopedia of Information Science and Technology","author":"Gomber Peter","unstructured":"Peter Gomber and Martin Haferkorn . 2015. High frequency trading . In Encyclopedia of Information Science and Technology , Third Edition. IGI Global , 1--9. Peter Gomber and Martin Haferkorn. 2015. High frequency trading. In Encyclopedia of Information Science and Technology, Third Edition. IGI Global, 1--9."},{"key":"e_1_3_2_2_22_1","volume-title":"International conference on machine learning. PMLR","author":"Haarnoja Tuomas","year":"2018","unstructured":"Tuomas Haarnoja , Aurick Zhou , Pieter Abbeel , and Sergey Levine . 2018 . Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor . In International conference on machine learning. PMLR , 1861--1870. Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, and Sergey Levine. 2018. Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In International conference on machine learning. PMLR, 1861--1870."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1002\/asmb.2209"},{"key":"e_1_3_2_2_24_1","volume-title":"2019 International Joint Conference on Neural Networks (IJCNN). IEEE, 1--8.","author":"Jia WU","year":"2019","unstructured":"WU Jia , WANG Chen , Lidong Xiong , and SUN Hongyong . 2019 . Quantitative trading on stock market based on deep reinforcement learning . In 2019 International Joint Conference on Neural Networks (IJCNN). IEEE, 1--8. WU Jia, WANG Chen, Lidong Xiong, and SUN Hongyong. 2019. Quantitative trading on stock market based on deep reinforcement learning. In 2019 International Joint Conference on Neural Networks (IJCNN). IEEE, 1--8."},{"key":"e_1_3_2_2_25_1","volume-title":"Lightgbm: A highly efficient gradient boosting decision tree. Advances in neural information processing systems","author":"Ke Guolin","year":"2017","unstructured":"Guolin Ke , Qi Meng , Thomas Finley , Taifeng Wang , Wei Chen , Weidong Ma , Qiwei Ye , and Tie-Yan Liu . 2017 . Lightgbm: A highly efficient gradient boosting decision tree. Advances in neural information processing systems , Vol. 30 (2017). Guolin Ke, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. Lightgbm: A highly efficient gradient boosting decision tree. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-021-02218-4"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.econlet.2019.05.022"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2022.103247"},{"key":"e_1_3_2_2_29_1","volume-title":"Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971","author":"Lillicrap Timothy P","year":"2015","unstructured":"Timothy P Lillicrap , Jonathan J Hunt , Alexander Pritzel , Nicolas Heess , Tom Erez , Yuval Tassa , David Silver , and Daan Wierstra . 2015. Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 ( 2015 ). Timothy P Lillicrap, Jonathan J Hunt, Alexander Pritzel, Nicolas Heess, Tom Erez, Yuval Tassa, David Silver, and Daan Wierstra. 2015. Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467358"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2022.108894"},{"key":"e_1_3_2_2_32_1","volume-title":"Zhaoran Wang, and Jian Guo.","author":"Liu Xiao-Yang","year":"2022","unstructured":"Xiao-Yang Liu , Ziyi Xia , Jingyang Rui , Jiechao Gao , Hongyang Yang , Ming Zhu , Christina Dan Wang , Zhaoran Wang, and Jian Guo. 2022 . FinRL-Meta: Market Environments and Benchmarks for Data-Driven Financial Reinforcement Learning . (2022). Xiao-Yang Liu, Ziyi Xia, Jingyang Rui, Jiechao Gao, Hongyang Yang, Ming Zhu, Christina Dan Wang, Zhaoran Wang, and Jian Guo. 2022. FinRL-Meta: Market Environments and Benchmarks for Data-Driven Financial Reinforcement Learning. (2022)."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i02.5587"},{"key":"e_1_3_2_2_34_1","first-page":"99","article-title":"Maximum drawdown","volume":"17","author":"Magdon-Ismail Malik","year":"2004","unstructured":"Malik Magdon-Ismail and Amir F Atiya . 2004 . Maximum drawdown . Risk Magazine , Vol. 17 , 10 (2004), 99 -- 102 . Malik Magdon-Ismail and Amir F Atiya. 2004. Maximum drawdown. Risk Magazine, Vol. 17, 10 (2004), 99--102.","journal-title":"Risk Magazine"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.5539\/ijef.v6n3p96"},{"key":"e_1_3_2_2_36_1","volume-title":"International conference on machine learning. PMLR","author":"Mnih Volodymyr","year":"2016","unstructured":"Volodymyr Mnih , Adria Puigdomenech Badia , Mehdi Mirza , Alex Graves , Timothy Lillicrap , Tim Harley , David Silver , and Koray Kavukcuoglu . 2016 . Asynchronous methods for deep reinforcement learning . In International conference on machine learning. PMLR , 1928--1937. Volodymyr Mnih, Adria Puigdomenech Badia, Mehdi Mirza, Alex Graves, Timothy Lillicrap, Tim Harley, David Silver, and Koray Kavukcuoglu. 2016. Asynchronous methods for deep reinforcement learning. In International conference on machine learning. PMLR, 1928--1937."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/72.935097"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557363"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/366"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISDA.2011.6121737"},{"key":"e_1_3_2_2_41_1","volume-title":"An overview of gradient descent optimization algorithms. arXiv preprint arXiv:1609.04747","author":"Ruder Sebastian","year":"2016","unstructured":"Sebastian Ruder . 2016. An overview of gradient descent optimization algorithms. arXiv preprint arXiv:1609.04747 ( 2016 ). Sebastian Ruder. 2016. An overview of gradient descent optimization algorithms. arXiv preprint arXiv:1609.04747 (2016)."},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450095"},{"key":"e_1_3_2_2_43_1","volume-title":"International conference on machine learning. PMLR","author":"Schulman John","year":"2015","unstructured":"John Schulman , Sergey Levine , Pieter Abbeel , Michael Jordan , and Philipp Moritz . 2015 . Trust region policy optimization . In International conference on machine learning. PMLR , 1889--1897. John Schulman, Sergey Levine, Pieter Abbeel, Michael Jordan, and Philipp Moritz. 2015. Trust region policy optimization. In International conference on machine learning. PMLR, 1889--1897."},{"key":"e_1_3_2_2_44_1","volume-title":"Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman , Filip Wolski , Prafulla Dhariwal , Alec Radford , and Oleg Klimov . 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 ( 2017 ). John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)."},{"key":"e_1_3_2_2_45_1","volume-title":"The sharpe ratio. Streetwise--the Best of the Journal of Portfolio Management","author":"Sharpe William F","year":"1998","unstructured":"William F Sharpe . 1998. The sharpe ratio. Streetwise--the Best of the Journal of Portfolio Management ( 1998 ), 169--185. William F Sharpe. 1998. The sharpe ratio. Streetwise--the Best of the Journal of Portfolio Management (1998), 169--185."},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.04.105"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2020.113456"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1080\/14697688.2018.1537503"},{"key":"e_1_3_2_2_49_1","volume-title":"Dimension reduction in mean-variance portfolio optimization. Expert Systems with applications","author":"Tayali Halit Alper","year":"2018","unstructured":"Halit Alper Tayali and Seda Tolun . 2018. Dimension reduction in mean-variance portfolio optimization. Expert Systems with applications , Vol. 92 ( 2018 ), 161--169. Halit Alper Tayali and Seda Tolun. 2018. Dimension reduction in mean-variance portfolio optimization. Expert Systems with applications, Vol. 92 (2018), 161--169."},{"key":"e_1_3_2_2_50_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani , Noam Shazeer , Niki Parmar , Jakob Uszkoreit , Llion Jones , Aidan N Gomez , \u0141ukasz Kaiser , and Illia Polosukhin . 2017. Attention is all you need. Advances in neural information processing systems , Vol. 30 ( 2017 ). Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i1.16144"},{"key":"e_1_3_2_2_52_1","unstructured":"WorldBank. 2022. Market capitalization of listed domestic companies. https:\/\/data.worldbank.org\/indicator\/CM.MKT.LCAP.CD\/ accessed 04-Jun-2022.  WorldBank. 2022. Market capitalization of listed domestic companies. https:\/\/data.worldbank.org\/indicator\/CM.MKT.LCAP.CD\/ accessed 04-Jun-2022."},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557089"},{"key":"e_1_3_2_2_54_1","volume-title":"Practical deep reinforcement learning approach for stock trading. arXiv preprint arXiv:1811.07522","author":"Xiong Zhuoran","year":"2018","unstructured":"Zhuoran Xiong , Xiao-Yang Liu , Shan Zhong , Hongyang Yang , and Anwar Walid . 2018. Practical deep reinforcement learning approach for stock trading. arXiv preprint arXiv:1811.07522 ( 2018 ). Zhuoran Xiong, Xiao-Yang Liu, Shan Zhong, Hongyang Yang, and Anwar Walid. 2018. Practical deep reinforcement learning approach for stock trading. arXiv preprint arXiv:1811.07522 (2018)."},{"key":"e_1_3_2_2_55_1","volume-title":"Proceedings of the Twenty-Ninth International Conference on International Joint Conferences on Artificial Intelligence. 4647--4653","author":"Xu Ke","year":"2021","unstructured":"Ke Xu , Yifan Zhang , Deheng Ye , Peilin Zhao , and Mingkui Tan . 2021 . Relation-aware transformer for portfolio policy learning . In Proceedings of the Twenty-Ninth International Conference on International Joint Conferences on Artificial Intelligence. 4647--4653 . Ke Xu, Yifan Zhang, Deheng Ye, Peilin Zhao, and Mingkui Tan. 2021. Relation-aware transformer for portfolio policy learning. In Proceedings of the Twenty-Ninth International Conference on International Joint Conferences on Artificial Intelligence. 4647--4653."},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"crossref","unstructured":"Mengyuan Yang Xiaolin Zheng Qianqiao Liang Bing Han and Mengying Zhu. 2022. A Smart Trader for Portfolio Management based on Normalizing Flows. IJCAI.  Mengyuan Yang Xiaolin Zheng Qianqiao Liang Bing Han and Mengying Zhu. 2022. A Smart Trader for Portfolio Management based on Normalizing Flows. IJCAI.","DOI":"10.24963\/ijcai.2022\/557"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i01.5462"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.108209"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108543"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2020.2979700"},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2019.2907260"},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"publisher","DOI":"10.3905\/jfds.2020.1.042"},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.3905\/jfds.2020.1.030"}],"event":{"name":"KDD '23: The 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Long Beach CA USA","acronym":"KDD '23","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3580305.3599813","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3580305.3599813","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:49:23Z","timestamp":1750182563000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3580305.3599813"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,4]]},"references-count":63,"alternative-id":["10.1145\/3580305.3599813","10.1145\/3580305"],"URL":"https:\/\/doi.org\/10.1145\/3580305.3599813","relation":{},"subject":[],"published":{"date-parts":[[2023,8,4]]},"assertion":[{"value":"2023-08-04","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}