{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T19:28:11Z","timestamp":1775503691883,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T00:00:00Z","timestamp":1745280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,22]]},"DOI":"10.1145\/3696410.3714562","type":"proceedings-article","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T23:08:29Z","timestamp":1745363309000},"page":"402-412","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Policy-Guided Causal State Representation for Offline Reinforcement Learning Recommendation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-8726-5277","authenticated-orcid":false,"given":"Siyu","family":"Wang","sequence":"first","affiliation":[{"name":"The University of New South Wales, Sydney, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8849-4943","authenticated-orcid":false,"given":"Xiaocong","family":"Chen","sequence":"additional","affiliation":[{"name":"Data 61, CSIRO, Eveleigh, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4149-839X","authenticated-orcid":false,"given":"Lina","family":"Yao","sequence":"additional","affiliation":[{"name":"Data 61, CSIRO, Eveleigh, Australia and The University of New South Wales, Sydney, Australia"}]}],"member":"320","published-online":{"date-parts":[[2025,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543846"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3661996"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11280-023-01187-7"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671750"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.110335"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482347"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1137\/10080484X"},{"key":"e_1_3_2_1_8_1","volume-title":"International conference on machine learning. PMLR, 1587--1596","author":"Fujimoto Scott","year":"2018","unstructured":"Scott Fujimoto, Herke Hoof, and David Meger. 2018. Addressing function approximation error in actor-critic methods. In International conference on machine learning. PMLR, 1587--1596."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591636"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557220"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557624"},{"key":"e_1_3_2_1_12_1","volume-title":"International conference on machine learning. PMLR","author":"Haarnoja Tuomas","year":"2018","unstructured":"Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, and Sergey Levine. 2018. Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In International conference on machine learning. PMLR, 1861--1870."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3559757"},{"key":"e_1_3_2_1_14_1","volume-title":"International Conference on Machine Learning. PMLR, 9260--9279","author":"Huang Biwei","year":"2022","unstructured":"Biwei Huang, Chaochao Lu, Liu Leqi, Jos\u00e9 Miguel Hern\u00e1ndez-Lobato, Clark Glymour, Bernhard Sch\u00f6lkopf, and Kun Zhang. 2022. Action-sufficient state representation learning for control with structural constraints. In International Conference on Machine Learning. PMLR, 9260--9279."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2018.07.006"},{"key":"e_1_3_2_1_16_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Li Haoxuan","year":"2024","unstructured":"Haoxuan Li, Kunhan Wu, Chunyuan Zheng, Yanghao Xiao, Hao Wang, Zhi Geng, Fuli Feng, Xiangnan He, and Peng Wu. 2024. Removing hidden confounding in recommendation: a unified multi-task learning approach. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599550"},{"key":"e_1_3_2_1_18_1","volume-title":"Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971","author":"Lillicrap Timothy P","year":"2015","unstructured":"Timothy P Lillicrap, Jonathan J Hunt, Alexander Pritzel, Nicolas Heess, Tom Erez, Yuval Tassa, David Silver, and Daan Wierstra. 2015. Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/1282100.1282114"},{"key":"e_1_3_2_1_20_1","unstructured":"Judea Pearl. 2009. Causality. Cambridge university press."},{"key":"e_1_3_2_1_21_1","volume-title":"Elements of Causal Inference: Foundations and Learning Algorithms","author":"Peters Jonas","unstructured":"Jonas Peters, Dominik Janzing, and Bernhard Schlkopf. 2017. Elements of Causal Inference: Foundations and Learning Algorithms. The MIT Press."},{"key":"e_1_3_2_1_22_1","volume-title":"international conference on machine learning. PMLR, 1670--1679","author":"Schnabel Tobias","year":"2016","unstructured":"Tobias Schnabel, Adith Swaminathan, Ashudeep Singh, Navin Chandak, and Thorsten Joachims. 2016. Recommendations as treatments: Debiasing learning and evaluation. In international conference on machine learning. PMLR, 1670--1679."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014902"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591648"},{"key":"e_1_3_2_1_25_1","volume-title":"Plug-and-Play Model-Agnostic Counterfactual Policy Synthesis for Deep Reinforcement Learning-Based Recommendation","author":"Wang Siyu","year":"2023","unstructured":"Siyu Wang, Xiaocong Chen, Julian McAuley, Sally Cripps, and Lina Yao. 2023b. Plug-and-Play Model-Agnostic Counterfactual Policy Synthesis for Deep Reinforcement Learning-Based Recommendation. IEEE Transactions on Neural Networks and Learning Systems (2023)."},{"key":"e_1_3_2_1_26_1","volume-title":"On Causally Disentangled State Representation Learning for Reinforcement Learning based Recommender Systems. arXiv preprint arXiv:2407.13091","author":"Wang Siyu","year":"2024","unstructured":"Siyu Wang, Xiaocong Chen, and Lina Yao. 2024. On Causally Disentangled State Representation Learning for Reinforcement Learning based Recommender Systems. arXiv preprint arXiv:2407.13091 (2024)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539221"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657868"},{"key":"e_1_3_2_1_29_1","volume-title":"Remi Tachet des Combes, and Romain Laroche","author":"Zang Hongyu","year":"2024","unstructured":"Hongyu Zang, Xin Li, Leiji Zhang, Yang Liu, Baigui Sun, Riashat Islam, Remi Tachet des Combes, and Romain Laroche. 2024. Understanding and addressing the pitfalls of bisimulation-based representations in offline reinforcement learning. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_30_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=-2FCwDKRREu","author":"Zhang Amy","year":"2021","unstructured":"Amy Zhang, Rowan Thomas McAllister, Roberto Calandra, Yarin Gal, and Sergey Levine. 2021b. Learning Invariant Representations for Reinforcement Learning without Reconstruction. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=-2FCwDKRREu"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462875"}],"event":{"name":"WWW '25: The ACM Web Conference 2025","location":"Sydney NSW Australia","acronym":"WWW '25","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714562","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696410.3714562","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:33Z","timestamp":1750295913000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714562"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,22]]},"references-count":31,"alternative-id":["10.1145\/3696410.3714562","10.1145\/3696410"],"URL":"https:\/\/doi.org\/10.1145\/3696410.3714562","relation":{},"subject":[],"published":{"date-parts":[[2025,4,22]]},"assertion":[{"value":"2025-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}