{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T02:08:03Z","timestamp":1777342083321,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1145\/3589334.3645446","type":"proceedings-article","created":{"date-parts":[[2024,5,8]],"date-time":"2024-05-08T07:08:13Z","timestamp":1715152093000},"page":"3432-3443","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Long-term Off-Policy Evaluation and Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4357-5835","authenticated-orcid":false,"given":"Yuta","family":"Saito","sequence":"first","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0065-9978","authenticated-orcid":false,"given":"Himan","family":"Abdollahpouri","sequence":"additional","affiliation":[{"name":"Spotify, New York, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0405-2441","authenticated-orcid":false,"given":"Jesse","family":"Anderton","sequence":"additional","affiliation":[{"name":"Spotify, New York, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9538-047X","authenticated-orcid":false,"given":"Ben","family":"Carterette","sequence":"additional","affiliation":[{"name":"Spotify, New York, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3531-3096","authenticated-orcid":false,"given":"Mounia","family":"Lalmas","sequence":"additional","affiliation":[{"name":"Spotify, London, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2024,5,13]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Effective Evaluation Using Logged Bandit Feedback from Multiple Loggers. KDD","author":"Agarwal Aman","year":"2017","unstructured":"Aman Agarwal, Soumya Basu, Tobias Schnabel, and Thorsten Joachims. 2017. Effective Evaluation Using Logged Bandit Feedback from Multiple Loggers. KDD (2017), 687--696."},{"key":"e_1_3_2_2_2_1","volume-title":"Exponential Smoothing for Off-Policy Learning. arXiv preprint arXiv:2305.15877","author":"Aouali Imad","year":"2023","unstructured":"Imad Aouali, Victor-Emmanuel Brunel, David Rohde, and Anna Korba. 2023. Exponential Smoothing for Off-Policy Learning. arXiv preprint arXiv:2305.15877 (2023)."},{"key":"e_1_3_2_2_3_1","volume-title":"Combining experimental and observational data to estimate treatment effects on long term outcomes. arXiv preprint arXiv:2006.09676","author":"Athey Susan","year":"2020","unstructured":"Susan Athey, Raj Chetty, and Guido Imbens. 2020. Combining experimental and observational data to estimate treatment effects on long term outcomes. arXiv preprint arXiv:2006.09676 (2020)."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"crossref","unstructured":"Susan Athey Raj Chetty Guido W Imbens and Hyunseung Kang. 2019. The surrogate index: Combining short-term proxies to estimate long-term treatment effects more rapidly and precisely. Technical Report. National Bureau of Economic Research.","DOI":"10.3386\/w26463"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512038"},{"key":"e_1_3_2_2_6_1","volume-title":"Semiparametric estimation of long-term treatment effects. arXiv preprint arXiv:2107.14405","author":"Chen Jiafeng","year":"2021","unstructured":"Jiafeng Chen and David M Ritzwoller. 2021. Semiparametric estimation of long-term treatment effects. arXiv preprint arXiv:2107.14405 (2021)."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441719"},{"key":"e_1_3_2_2_8_1","volume-title":"Learning Action Embeddings for Off-Policy Evaluation. arXiv preprint arXiv:2305.03954","author":"Cief Matej","year":"2023","unstructured":"Matej Cief, Jacek Golebiowski, Philipp Schmidt, Ziawasch Abedjan, and Artur Bekasov. 2023. Learning Action Embeddings for Off-Policy Evaluation. arXiv preprint arXiv:2305.03954 (2023)."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939700"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441737"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1214\/14-STS500"},{"key":"e_1_3_2_2_12_1","volume-title":"Proceedings of the 28th International Conference on International Conference on Machine Learning","author":"Dud'ik Miroslav","year":"2011","unstructured":"Miroslav Dud'ik, John Langford, and Lihong Li. 2011. Doubly Robust Policy Evaluation and Learning. In Proceedings of the 28th International Conference on International Conference on Machine Learning (Bellevue, Washington, USA) (ICML'11). Omnipress, Madison, WI, USA, 1097--1104."},{"key":"e_1_3_2_2_13_1","volume-title":"Proceedings of the 35th International Conference on Machine Learning","volume":"80","author":"Farajtabar Mehrdad","year":"2018","unstructured":"Mehrdad Farajtabar, Yinlam Chow, and Mohammad Ghavamzadeh. 2018. More Robust Doubly Robust Off-Policy Evaluation. In Proceedings of the 35th International Conference on Machine Learning, Vol. 80. PMLR, 1447--1456."},{"key":"e_1_3_2_2_14_1","volume-title":"Surrogate and auxiliary endpoints in clinical trials, with potential applications in cancer and AIDS research. Statistics in medicine","author":"Fleming Thomas R","year":"1994","unstructured":"Thomas R Fleming, Ross L Prentice, Margaret S Pepe, and David Glidden. 1994. Surrogate and auxiliary endpoints in clinical trials, with potential applications in cancer and AIDS research. Statistics in medicine, Vol. 13, 9 (1994), 955--968."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3291027"},{"key":"e_1_3_2_2_16_1","volume-title":"Proceedings 21st Conference on Knowledge Discovery and Data Mining","author":"Hohnhold Henning","year":"2015","unstructured":"Henning Hohnhold, Deirdre O'Brien, and Diane Tang. 2015. Focus on the Long-Term: It's better for Users and Business. In Proceedings 21st Conference on Knowledge Discovery and Data Mining. Sydney, Australia."},{"key":"e_1_3_2_2_17_1","volume-title":"Estimating Effects of Long-Term Treatments. arXiv preprint arXiv:2308.08152","author":"Huang Shan","year":"2023","unstructured":"Shan Huang, Chen Wang, Yuan Yuan, Jinglong Zhao, and Jingjing Zhang. 2023. Estimating Effects of Long-Term Treatments. arXiv preprint arXiv:2308.08152 (2023)."},{"key":"e_1_3_2_2_18_1","volume-title":"Long-term causal inference under persistent confounding via data combination. arXiv preprint arXiv:2202.07234","author":"Imbens Guido","year":"2022","unstructured":"Guido Imbens, Nathan Kallus, Xiaojie Mao, and Yuhao Wang. 2022. Long-term causal inference under persistent confounding via data combination. arXiv preprint arXiv:2202.07234 (2022)."},{"key":"e_1_3_2_2_19_1","volume-title":"On the role of surrogates in the efficient estimation of treatment effects with limited outcome data. arXiv preprint arXiv:2003.12408","author":"Kallus Nathan","year":"2020","unstructured":"Nathan Kallus and Xiaojie Mao. 2020. On the role of surrogates in the efficient estimation of treatment effects with limited outcome data. arXiv preprint arXiv:2003.12408 (2020)."},{"key":"e_1_3_2_2_20_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning","volume":"139","author":"Kallus Nathan","year":"2021","unstructured":"Nathan Kallus, Yuta Saito, and Masatoshi Uehara. 2021. Optimal Off-Policy Evaluation from Multiple Logging Policies. In Proceedings of the 38th International Conference on Machine Learning, Vol. 139. PMLR, 5247--5256."},{"key":"e_1_3_2_2_21_1","volume-title":"Towards Assessing and Benchmarking Risk-Return Tradeoff of Off-Policy Evaluation. In International Conference on Learning Representations.","author":"Kiyohara Haruka","year":"2024","unstructured":"Haruka Kiyohara, Ren Kishimoto, Kosuke Kawakami, Ken Kobayashi, Kazuhide Nakata, and Yuta Saito. 2024 a. Towards Assessing and Benchmarking Risk-Return Tradeoff of Off-Policy Evaluation. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_22_1","volume-title":"2024 b. Off-policy evaluation of slate bandit policies via optimizing abstraction. arXiv preprint arXiv:2402.02171","author":"Kiyohara Haruka","year":"2024","unstructured":"Haruka Kiyohara, Masahiro Nomura, and Yuta Saito. 2024 b. Off-policy evaluation of slate bandit policies via optimizing abstraction. arXiv preprint arXiv:2402.02171 (2024)."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498380"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599447"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599386"},{"key":"e_1_3_2_2_26_1","volume-title":"Advances in Neural Information Processing Systems","volume":"34","author":"Metelli Alberto Maria","year":"2021","unstructured":"Alberto Maria Metelli, Alessio Russo, and Marcello Restelli. 2021. Subgaussian and Differentiable Importance Sampling for Off-Policy Evaluation and Learning. Advances in Neural Information Processing Systems , Vol. 34 (2021)."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583448"},{"key":"e_1_3_2_2_28_1","volume-title":"Surrogate endpoints in clinical trials: definition and operational criteria. Statistics in medicine","author":"Prentice Ross L","year":"1989","unstructured":"Ross L Prentice. 1989. Surrogate endpoints in clinical trials: definition and operational criteria. Statistics in medicine, Vol. 8, 4 (1989), 431--440."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/70.1.41"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403139"},{"key":"e_1_3_2_2_31_1","volume-title":"Off-policy evaluation for large action spaces via policy convolution. arXiv preprint arXiv:2310.15433","author":"Sachdeva Noveen","year":"2023","unstructured":"Noveen Sachdeva, Lequn Wang, Dawen Liang, Nathan Kallus, and Julian McAuley. 2023. Off-policy evaluation for large action spaces via policy convolution. arXiv preprint arXiv:2310.15433 (2023)."},{"key":"e_1_3_2_2_32_1","volume-title":"Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation. arXiv preprint arXiv:2008.07146","author":"Saito Yuta","year":"2020","unstructured":"Yuta Saito, Shunsuke Aihara, Megumi Matsutani, and Yusuke Narita. 2020. Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation. arXiv preprint arXiv:2008.07146 (2020)."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3473320"},{"key":"e_1_3_2_2_34_1","volume-title":"Proceedings of the 39th International Conference on Machine Learning. 19089--19122","author":"Saito Yuta","year":"2022","unstructured":"Yuta Saito and Thorsten Joachims. 2022. Off-Policy Evaluation for Large Action Spaces via Embeddings. In Proceedings of the 39th International Conference on Machine Learning. 19089--19122."},{"key":"e_1_3_2_2_35_1","volume-title":"International Conference on Machine Learning. PMLR, 29734--29759","author":"Saito Yuta","year":"2023","unstructured":"Yuta Saito, Ren Qingyang, and Thorsten Joachims. 2023. Off-Policy Evaluation for Large Action Spaces via Conjunct Effect Modeling. In International Conference on Machine Learning. PMLR, 29734--29759."},{"key":"e_1_3_2_2_36_1","volume-title":"POTEC: Off-Policy Learning for Large Action Spaces via Two-Stage Policy Decomposition. arXiv preprint arXiv:2402.06151","author":"Saito Yuta","year":"2024","unstructured":"Yuta Saito, Jihan Yao, and Thorsten Joachims. 2024. POTEC: Off-Policy Learning for Large Action Spaces via Two-Stage Policy Decomposition. arXiv preprint arXiv:2402.06151 (2024)."},{"key":"e_1_3_2_2_37_1","volume-title":"Proceedings of the 37th International Conference on Machine Learning","volume":"119","author":"Su Yi","year":"2020","unstructured":"Yi Su, Maria Dimakopoulou, Akshay Krishnamurthy, and Miroslav Dud'ik. 2020. Doubly Robust Off-Policy Evaluation with Shrinkage. In Proceedings of the 37th International Conference on Machine Learning, Vol. 119. PMLR, 9167--9176."},{"key":"e_1_3_2_2_38_1","volume-title":"International Conference on Machine Learning","volume":"84","author":"Su Yi","year":"2019","unstructured":"Yi Su, Lequn Wang, Michele Santacatterina, and Thorsten Joachims. 2019. Cab: Continuous adaptive blending for policy evaluation and learning. In International Conference on Machine Learning, Vol. 84. 6005--6014."},{"key":"e_1_3_2_2_39_1","volume-title":"A review of off-policy evaluation in reinforcement learning. arXiv preprint arXiv:2212.06355","author":"Uehara Masatoshi","year":"2022","unstructured":"Masatoshi Uehara, Chengchun Shi, and Nathan Kallus. 2022. A review of off-policy evaluation in reinforcement learning. arXiv preprint arXiv:2212.06355 (2022)."},{"key":"e_1_3_2_2_40_1","volume-title":"Estimating long-term causal effects from short-term experiments and long-term observational data with unobserved confounding. arXiv preprint arXiv:2302.10625","author":"Goffrier Graham Van","year":"2023","unstructured":"Graham Van Goffrier, Lucas Maystre, and Ciar\u00e1n Gilligan-Lee. 2023. Estimating long-term causal effects from short-term experiments and long-term observational data with unobserved confounding. arXiv preprint arXiv:2302.10625 (2023)."},{"key":"e_1_3_2_2_41_1","volume-title":"Empirical Study of Off-Policy Policy Evaluation for Reinforcement Learning. arXiv preprint arXiv:1911.06854","author":"Voloshin Cameron","year":"2019","unstructured":"Cameron Voloshin, Hoang M Le, Nan Jiang, and Yisong Yue. 2019. Empirical Study of Off-Policy Policy Evaluation for Reinforcement Learning. arXiv preprint arXiv:1911.06854 (2019)."},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539073"},{"key":"e_1_3_2_2_43_1","volume-title":"Proceedings of the 34th International Conference on Machine Learning. ICML, 3589--3597","author":"Wang Yu-Xiang","year":"2017","unstructured":"Yu-Xiang Wang, Alekh Agarwal, and Miroslav Dudik. 2017. Optimal and Adaptive Off-policy Evaluation in Contextual Bandits, In Proceedings of the 34th International Conference on Machine Learning. ICML, 3589--3597."},{"key":"e_1_3_2_2_44_1","volume-title":"Targeting for long-term outcomes. arXiv preprint arXiv:2010.15835","author":"Yang Jeremy","year":"2020","unstructured":"Jeremy Yang, Dean Eckles, Paramveer Dhillon, and Sinan Aral. 2020. Targeting for long-term outcomes. arXiv preprint arXiv:2010.15835 (2020)."}],"event":{"name":"WWW '24: The ACM Web Conference 2024","location":"Singapore Singapore","acronym":"WWW '24","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2024"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589334.3645446","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3589334.3645446","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:28:21Z","timestamp":1755822501000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589334.3645446"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":44,"alternative-id":["10.1145\/3589334.3645446","10.1145\/3589334"],"URL":"https:\/\/doi.org\/10.1145\/3589334.3645446","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]},"assertion":[{"value":"2024-05-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}