{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T16:47:27Z","timestamp":1755794847975,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T00:00:00Z","timestamp":1752969600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,20]]},"DOI":"10.1145\/3690624.3709237","type":"proceedings-article","created":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T18:44:43Z","timestamp":1743792283000},"page":"1256-1264","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Off-Policy Evaluation and Learning for the Future under Non-Stationarity"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-9746-3346","authenticated-orcid":false,"given":"Tatsuhiro","family":"Shimizu","sequence":"first","affiliation":[{"name":"Yale University, New Haven, Connecticut, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5181-320X","authenticated-orcid":false,"given":"Kazuki","family":"Kawamura","sequence":"additional","affiliation":[{"name":"Sony Group Corporation, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2359-1658","authenticated-orcid":false,"given":"Takanori","family":"Muroi","sequence":"additional","affiliation":[{"name":"Sony Group Corporation, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0314-3384","authenticated-orcid":false,"given":"Yusuke","family":"Narita","sequence":"additional","affiliation":[{"name":"Yale University, New Haven, CT, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8249-2659","authenticated-orcid":false,"given":"Kei","family":"Tateno","sequence":"additional","affiliation":[{"name":"Sony Group Corporation, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7441-5773","authenticated-orcid":false,"given":"Takuma","family":"Udagawa","sequence":"additional","affiliation":[{"name":"Sony Group Corporation, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4357-5835","authenticated-orcid":false,"given":"Yuta","family":"Saito","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,7,20]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Reinforcement Learning: Theory and algorithms. MIT","author":"Agarwal A","year":"2020","unstructured":"A Agarwal, N Jiang, and M Kakade, S. 2020. Reinforcement Learning: Theory and algorithms. MIT (2020)."},{"key":"e_1_3_2_2_2_1","volume-title":"International Conference on Machine Learning. PMLR, 1414--1425","author":"Chandak Yash","year":"2020","unstructured":"Yash Chandak, Georgios Theocharous, Shiv Shankar, Martha White, Sridhar Mahadevan, and Philip Thomas. 2020. Optimizing for the future in non-stationary mdps. In International Conference on Machine Learning. PMLR, 1414--1425."},{"key":"e_1_3_2_2_3_1","volume-title":"Cross-Validated Off-Policy Evaluation. arXiv preprint arXiv:2405.15332","author":"Cief Matej","year":"2024","unstructured":"Matej Cief, Michal Kompan, and Branislav Kveton. 2024. Cross-Validated Off-Policy Evaluation. arXiv preprint arXiv:2405.15332 (2024)."},{"key":"e_1_3_2_2_4_1","volume-title":"Proceedings of the 28th International Conference on International Conference on Machine Learning. 1097--1104","author":"Dud\u00edk Miroslav","year":"2011","unstructured":"Miroslav Dud\u00edk, John Langford, and Lihong Li. 2011. Doubly robust policy evaluation and learning. In Proceedings of the 28th International Conference on International Conference on Machine Learning. 1097--1104."},{"key":"e_1_3_2_2_5_1","volume-title":"AutoOPE: Automated Off-Policy Estimator Selection. arXiv preprint arXiv:2406.18022","author":"Felicioni Nicol\u00f2","year":"2024","unstructured":"Nicol\u00f2 Felicioni, Michael Benigni, and Maurizio Ferrari Dacrema. 2024. AutoOPE: Automated Off-Policy Estimator Selection. arXiv preprint arXiv:2406.18022 (2024)."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557220"},{"key":"e_1_3_2_2_7_1","volume-title":"International Conference on Artificial Intelligence and Statistics. PMLR, 2494--2502","author":"Hong Joey","year":"2021","unstructured":"Joey Hong, Branislav Kveton, Manzil Zaheer, Yinlam Chow, and Amr Ahmed. 2021. Non-stationary off-policy optimization. In International Conference on Artificial Intelligence and Statistics. PMLR, 2494--2502."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290958"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474247"},{"key":"e_1_3_2_2_10_1","volume-title":"International Conference on Machine Learning. PMLR, 5247--5256","author":"Kallus Nathan","year":"2021","unstructured":"Nathan Kallus, Yuta Saito, and Masatoshi Uehara. 2021. Optimal off-policy evaluation from multiple logging policies. In International Conference on Machine Learning. PMLR, 5247--5256."},{"key":"e_1_3_2_2_11_1","volume-title":"Towards Assessing and Benchmarking Risk-Return Tradeoff of Off-Policy Evaluation. In The Twelfth International Conference on Learning Representations.","author":"Kiyohara Haruka","year":"2024","unstructured":"Haruka Kiyohara, Ren Kishimoto, Kosuke Kawakami, Ken Kobayashi, Kazuhide Nakata, and Yuta Saito. 2024. Towards Assessing and Benchmarking Risk-Return Tradeoff of Off-Policy Evaluation. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498380"},{"key":"e_1_3_2_2_13_1","first-page":"1179","article-title":"Conservative Q-Learning for Offline Reinforcement Learning","volume":"33","author":"Kumar Aviral","year":"2020","unstructured":"Aviral Kumar, Aurick Zhou, George Tucker, and Sergey Levine. 2020. Conservative Q-Learning for Offline Reinforcement Learning. In Advances in Neural Information Processing Systems, Vol. 33. 1179--1191.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_14_1","volume-title":"Local Policy Improvement for Recommender Systems. arXiv preprint arXiv:2212.11431","author":"Liang Dawen","year":"2022","unstructured":"Dawen Liang and Nikos Vlassis. 2022. Local Policy Improvement for Recommender Systems. arXiv preprint arXiv:2212.11431 (2022)."},{"key":"e_1_3_2_2_15_1","volume-title":"International Conference on Artificial Intelligence and Statistics. PMLR, 5474--5492","author":"Liu Vincent","year":"2023","unstructured":"Vincent Liu, Yash Chandak, Philip Thomas, and Martha White. 2023. Asymptotically Unbiased Off-Policy Policy Evaluation when Reusing Old Data in Nonstationary Environments. In International Conference on Artificial Intelligence and Statistics. PMLR, 5474--5492."},{"key":"e_1_3_2_2_16_1","volume-title":"The 22nd International Conference on Artificial Intelligence and Statistics. PMLR, 2956--2965","author":"Ma Yifei","year":"2019","unstructured":"Yifei Ma, Yu-Xiang Wang, and Balakrishnan Narayanaswamy. 2019. Imitation-regularized offline learning. In The 22nd International Conference on Artificial Intelligence and Statistics. PMLR, 2956--2965."},{"key":"e_1_3_2_2_17_1","volume-title":"Advances in Neural Information Processing Systems","volume":"34","author":"Metelli Alberto Maria","year":"2021","unstructured":"Alberto Maria Metelli, Alessio Russo, and Marcello Restelli. 2021. Subgaussian and Differentiable Importance Sampling for Off-Policy Evaluation and Learning. Advances in Neural Information Processing Systems, Vol. 34 (2021)."},{"volume-title":"Proceedings of the 17th International Conference on Machine Learning. 759--766","author":"Precup Doina","key":"e_1_3_2_2_18_1","unstructured":"Doina Precup, Richard S. Sutton, and Satinder P. Singh. 2000. Eligibility Traces for Off-Policy Policy Evaluation. In Proceedings of the 17th International Conference on Machine Learning. 759--766."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403139"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645501"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3473320"},{"key":"e_1_3_2_2_22_1","volume-title":"International Conference on Machine Learning. PMLR, 29734--29759","author":"Saito Yuta","year":"2023","unstructured":"Yuta Saito, Ren Qingyang, and Thorsten Joachims. 2023. Off-Policy Evaluation for Large Action Spaces via Conjunct Effect Modeling. In International Conference on Machine Learning. PMLR, 29734--29759."},{"key":"e_1_3_2_2_23_1","volume-title":"POTEC: Off-Policy Learning for Large Action Spaces via Two-Stage Policy Decomposition. arXiv preprint arXiv:2402.06151","author":"Saito Yuta","year":"2024","unstructured":"Yuta Saito, Jihan Yao, and Thorsten Joachims. 2024. POTEC: Off-Policy Learning for Large Action Spaces via Two-Stage Policy Decomposition. arXiv preprint arXiv:2402.06151 (2024)."},{"key":"e_1_3_2_2_24_1","volume-title":"Proceedings of the 37th International Conference on Machine Learning","volume":"119","author":"Su Yi","year":"2020","unstructured":"Yi Su, Maria Dimakopoulou, Akshay Krishnamurthy, and Miroslav Dud\u00edk. 2020a. Doubly Robust Off-Policy Evaluation with Shrinkage. In Proceedings of the 37th International Conference on Machine Learning, Vol. 119. PMLR, 9167--9176."},{"key":"e_1_3_2_2_25_1","volume-title":"International Conference on Machine Learning. PMLR, 9196--9205","author":"Su Yi","year":"2020","unstructured":"Yi Su, Pavithra Srinath, and Akshay Krishnamurthy. 2020b. Adaptive estimator selection for off-policy evaluation. In International Conference on Machine Learning. PMLR, 9196--9205."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i2.19104"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i8.26195"},{"key":"e_1_3_2_2_28_1","first-page":"49","article-title":"Off-policy evaluation and learning for external validity under a covariate shift","volume":"33","author":"Uehara Masatoshi","year":"2020","unstructured":"Masatoshi Uehara, Masahiro Kato, and Shota Yasui. 2020. Off-policy evaluation and learning for external validity under a covariate shift. Advances in Neural Information Processing Systems, Vol. 33 (2020), 49--61.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_29_1","volume-title":"International Conference on Machine Learning. PMLR, 3589--3597","author":"Wang Yu-Xiang","year":"2017","unstructured":"Yu-Xiang Wang, Alekh Agarwal, and Miroslav Dudik. 2017. Optimal and adaptive off-policy evaluation in contextual bandits. In International Conference on Machine Learning. PMLR, 3589--3597."}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Toronto ON Canada","acronym":"KDD '25"},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709237","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3690624.3709237","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T15:40:24Z","timestamp":1755358824000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709237"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,20]]},"references-count":29,"alternative-id":["10.1145\/3690624.3709237","10.1145\/3690624"],"URL":"https:\/\/doi.org\/10.1145\/3690624.3709237","relation":{},"subject":[],"published":{"date-parts":[[2025,7,20]]},"assertion":[{"value":"2025-07-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}