{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T22:12:38Z","timestamp":1769638358133,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,15]]},"DOI":"10.1145\/3768292.3770337","type":"proceedings-article","created":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T07:24:26Z","timestamp":1763105066000},"page":"360-368","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Continuous-Time Reinforcement Learning for Asset\u2013Liability Management"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2922-9904","authenticated-orcid":false,"given":"Yilie","family":"Huang","sequence":"first","affiliation":[{"name":"Department of Industrial Engineering and Operations Research, Columbia University, New York, NY, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,11,14]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Sigr\u00fan Andrad\u00f3ttir. 1995. A stochastic approximation algorithm with varying bounds. Operations Research 43 6 (1995) 1037\u20131048.","DOI":"10.1287\/opre.43.6.1037"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICNN.1994.374604"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"Matteo Basei Xin Guo Anran Hu and Yufei Zhang. 2022. Logarithmic regret for episodic continuous-time linear-quadratic reinforcement learning over a finite-time horizon. Journal of Machine Learning Research 23 178 (2022) 1\u201334.","DOI":"10.2139\/ssrn.3848428"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Fischer Black and AndreF Perold. 1992. Theory of constant proportion portfolio insurance. Journal of Economic Dynamics and Control 16 3-4 (1992) 403\u2013426.","DOI":"10.1016\/0165-1889(92)90043-E"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"crossref","unstructured":"Shuping Chen Xunjing Li and Xun\u00a0Yu Zhou. 1998. Stochastic linear quadratic regulators with indefinite control weight costs. SIAM Journal on Control and Optimization 36 5 (1998) 1685\u20131702.","DOI":"10.1137\/S0363012996310478"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"crossref","unstructured":"Mei\u00a0Choi Chiu and Hoi\u00a0Ying Wong. 2012. Mean\u2013variance asset\u2013liability management: Cointegrated assets and insurance liability. European Journal of Operational Research 223 3 (2012) 785\u2013793.","DOI":"10.1016\/j.ejor.2012.07.009"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Kenji Doya. 2000. Reinforcement learning in continuous time and space. Neural Computation 12 1 (2000) 219\u2013245.","DOI":"10.1162\/089976600300015961"},{"key":"e_1_3_3_1_9_2","first-page":"1861","volume-title":"International Conference on Machine Learning","author":"Haarnoja Tuomas","year":"2018","unstructured":"Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, and Sergey Levine. 2018. Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In International Conference on Machine Learning. PMLR, 1861\u20131870."},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Erol Hakanoglu Robert Kopprasch and Emmanuel Roman. 1989. Constant proportion portfolio insurance for fixed-income investment. Journal of Portfolio Management 15 4 (1989) 58.","DOI":"10.3905\/jpm.1989.409214"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Thomas\u00a0SY Ho. 1992. Key rate durations: Measures of interest rate risks. The Journal of Fixed Income 2 2 (1992) 29\u201344.","DOI":"10.3905\/jfi.1992.408049"},{"key":"e_1_3_3_1_12_2","unstructured":"Ying Hu Xiaomin Shi and Zuo\u00a0Quan Xu. 2022. Non-homogeneous stochastic LQ control with regime switching and random coefficients. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2201.01433 (2022)."},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3533271.3561760"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"crossref","unstructured":"Yilie Huang Yanwei Jia and Xun\u00a0Yu Zhou. 2024. Mean\u2013Variance Portfolio Selection by Continuous-Time Reinforcement Learning: Algorithms Regret Analysis and Empirical Study. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.16175 (2024).","DOI":"10.2139\/ssrn.5048272"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Yilie Huang Yanwei Jia and Xun\u00a0Yu Zhou. 2025. Sublinear regret for a class of continuous-time linear-quadratic reinforcement learning problems. SIAM Journal on Control and Optimization 63 5 (2025) 3452\u20133474.","DOI":"10.1137\/24M1695075"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"Yilie Huang and Xun\u00a0Yu Zhou. 2025. Data-Driven Exploration for a Class of Continuous-Time Linear\u2013Quadratic Reinforcement Learning Problems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2507.00358 (2025).","DOI":"10.2139\/ssrn.5332029"},{"key":"e_1_3_3_1_17_2","unstructured":"Yanwei Jia and Xun\u00a0Yu Zhou. 2022. Policy evaluation and temporal-difference learning in continuous time and space: A martingale approach. Journal of Machine Learning Research 23 154 (2022) 1\u201355."},{"key":"e_1_3_3_1_18_2","unstructured":"Yanwei Jia and Xun\u00a0Yu Zhou. 2022. Policy gradient and actor-critic learning in continuous time and space: Theory and algorithms. Journal of Machine Learning Research 23 154 (2022) 1\u201355."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"crossref","unstructured":"Martin\u00a0L Leibowitz and Alfred Weinberger. 1982. Contingent immunization\u2014Part I: Risk control procedures. Financial Analysts Journal 38 6 (1982) 17\u201331.","DOI":"10.2469\/faj.v38.n6.17"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Martin\u00a0L Leibowitz and Alfred Weinberger. 1983. Contingent Immunization\u2014Part II: Problem Areas. Financial Analysts Journal 39 1 (1983) 35\u201350.","DOI":"10.2469\/faj.v39.n1.35"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"crossref","unstructured":"Chanjuan Li Zhongfei Li Ke Fu and Haiqing Song. 2013. Time-consistent optimal portfolio strategy for asset-liability management under mean-variance criterion. Accounting and Finance Research 2 2 (2013) 1\u201389.","DOI":"10.5430\/afr.v2n2p89"},{"key":"e_1_3_3_1_22_2","unstructured":"Timothy\u00a0P Lillicrap Jonathan\u00a0J Hunt Alexander Pritzel Nicolas Heess Tom Erez Yuval Tassa David Silver and Daan Wierstra. 2015. Continuous control with deep reinforcement learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1509.02971 (2015)."},{"key":"e_1_3_3_1_23_2","volume-title":"Investment Science","author":"Luenberger David\u00a0G","year":"1998","unstructured":"David\u00a0G Luenberger. 1998. Investment Science. Oxford University Press."},{"key":"e_1_3_3_1_24_2","unstructured":"R\u00e9mi Munos. 2006. Policy gradient in continuous time. Journal of Machine Learning Research 7 (2006) 771\u2013791."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"crossref","unstructured":"Jian Pan Zujin Zhang and Xiangying Zhou. 2018. Optimal dynamic mean-variance asset-liability management under the Heston model. Advances in Difference Equations 2018 (2018) 1\u201316.","DOI":"10.1186\/s13662-018-1677-9"},{"key":"e_1_3_3_1_26_2","unstructured":"Seohong Park Jaekyeom Kim and Gunhee Kim. 2021. Time discretization-invariant safe action repetition for policy gradient methods. Advances in Neural Information Processing Systems 34 (2021) 267\u2013279."},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"MA Rami and Xun\u00a0Yu Zhou. 2000. Linear matrix inequalities Riccati equations and indefinite stochastic linear quadratic controls. IEEE Trans. Automat. Control 45 6 (2000) 1131\u20131143.","DOI":"10.1109\/9.863597"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"crossref","unstructured":"Herbert Robbins and Sutton Monro. 1951. A stochastic approximation method. The Annals of Mathematical Statistics (1951) 400\u2013407.","DOI":"10.1214\/aoms\/1177729586"},{"key":"e_1_3_3_1_29_2","first-page":"233","volume-title":"Optimizing Methods in Statistics","author":"Robbins Herbert","year":"1971","unstructured":"Herbert Robbins and David Siegmund. 1971. A convergence theorem for non negative almost supermartingales and some applications. In Optimizing Methods in Statistics. Elsevier, 233\u2013257."},{"key":"e_1_3_3_1_30_2","unstructured":"John Schulman Filip Wolski Prafulla Dhariwal Alec Radford and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1707.06347 (2017)."},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"crossref","unstructured":"Yang Shen Jiaqin Wei and Qian Zhao. 2020. Mean\u2013variance asset\u2013liability management problem under non-Markovian regime-switching models. Applied Mathematics & Optimization 81 (2020) 859\u2013897.","DOI":"10.1007\/s00245-018-9523-8"},{"key":"e_1_3_3_1_32_2","volume-title":"Reinforcement learning: An introduction","author":"Sutton Richard\u00a0S","year":"2018","unstructured":"Richard\u00a0S Sutton and Andrew\u00a0G Barto. 2018. Reinforcement learning: An introduction. MIT press."},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"crossref","unstructured":"Lukasz Szpruch Tanut Treetanthiploet and Yufei Zhang. 2024. Optimal Scheduling of Entropy Regularizer for Continuous-Time Linear-Quadratic Reinforcement Learning. SIAM Journal on Control and Optimization 62 1 (2024) 135\u2013166.","DOI":"10.1137\/22M1515744"},{"key":"e_1_3_3_1_34_2","first-page":"6096","volume-title":"International Conference on Machine Learning","author":"Tallec Corentin","year":"2019","unstructured":"Corentin Tallec, L\u00e9onard Blier, and Yann Ollivier. 2019. Making deep Q-learning methods robust to time discretization. In International Conference on Machine Learning. PMLR, 6096\u20136104."},{"key":"e_1_3_3_1_35_2","unstructured":"Wenpin Tang and Xun\u00a0Yu Zhou. 2024. Regret of exploratory policy improvement and q -learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.01302 (2024)."},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"crossref","unstructured":"Kyriakos\u00a0G Vamvoudakis and Frank\u00a0L Lewis. 2010. Online actor\u2013critic algorithm to solve the continuous-time infinite horizon optimal control problem. Automatica 46 5 (2010) 878\u2013888.","DOI":"10.1016\/j.automatica.2010.02.018"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"crossref","unstructured":"Robert Van\u00a0der Meer and Meye Smink. 1993. Strategies and techniques for asset-liability management: an overview. Geneva Papers on Risk and Insurance. Issues and Practice (1993) 144\u2013157.","DOI":"10.1057\/gpp.1993.10"},{"key":"e_1_3_3_1_38_2","unstructured":"Haoran Wang Thaleia Zariphopoulou and Xun\u00a0Yu Zhou. 2020. Reinforcement learning in continuous time and space: A stochastic control approach. Journal of Machine Learning Research 21 198 (2020) 1\u201334."},{"key":"e_1_3_3_1_39_2","unstructured":"Xiaoli Wei and Xiang Yu. 2023. Continuous-time q-learning for McKean-Vlasov control problems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.16208 (2023)."},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"crossref","unstructured":"AJ Wise. 1984. The matching of assets to liabilities. Journal of the Institute of Actuaries 111 3 (1984) 445\u2013501.","DOI":"10.1017\/S0020268100041858"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"crossref","unstructured":"Haixiang Yao Yongzeng Lai and Yong Li. 2013. Continuous-time mean\u2013variance asset\u2013liability management with endogenous liabilities. Insurance: Mathematics and Economics 52 1 (2013) 6\u201317.","DOI":"10.1016\/j.insmatheco.2012.10.001"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-1466-3"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"crossref","unstructured":"Miao Zhang and Ping Chen. 2016. Mean\u2013variance asset\u2013liability management under constant elasticity of variance process. Insurance: Mathematics and Economics 70 (2016) 11\u201318.","DOI":"10.1016\/j.insmatheco.2016.05.019"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"crossref","unstructured":"Xun\u00a0Yu Zhou and Duan Li. 2000. Continuous-time mean-variance portfolio selection: A stochastic LQ framework. Applied Mathematics and Optimization 42 1 (2000) 19\u201333.","DOI":"10.1007\/s002450010003"},{"key":"e_1_3_3_1_45_2","first-page":"1433","volume-title":"AAAI","author":"Ziebart Brian\u00a0D","year":"2008","unstructured":"Brian\u00a0D Ziebart, Andrew\u00a0L Maas, J\u00a0Andrew Bagnell, and Anind\u00a0K Dey. 2008. Maximum entropy inverse reinforcement learning.. In AAAI , Vol.\u00a08. Chicago, IL, USA, 1433\u20131438."}],"event":{"name":"ICAIF '25: 6th ACM International Conference on AI in Finance","location":"Singapore Singapore","acronym":"ICAIF '25"},"container-title":["Proceedings of the 6th ACM International Conference on AI in Finance"],"original-title":[],"deposited":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T07:31:52Z","timestamp":1763105512000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3768292.3770337"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,14]]},"references-count":44,"alternative-id":["10.1145\/3768292.3770337","10.1145\/3768292"],"URL":"https:\/\/doi.org\/10.1145\/3768292.3770337","relation":{},"subject":[],"published":{"date-parts":[[2025,11,14]]},"assertion":[{"value":"2025-11-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}