{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T05:41:47Z","timestamp":1757310107308,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1145\/3589334.3645501","type":"proceedings-article","created":{"date-parts":[[2024,5,8]],"date-time":"2024-05-08T07:08:13Z","timestamp":1715152093000},"page":"3576-3585","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Off-Policy Evaluation for Large Action Spaces via Policy Convolution"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1228-746X","authenticated-orcid":false,"given":"Noveen","family":"Sachdeva","sequence":"first","affiliation":[{"name":"University of California, San Diego, La Jolla, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8957-0509","authenticated-orcid":false,"given":"Lequn","family":"Wang","sequence":"additional","affiliation":[{"name":"Netflix, Los Gatos, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9701-6473","authenticated-orcid":false,"given":"Dawen","family":"Liang","sequence":"additional","affiliation":[{"name":"Netflix, Los Gatos, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1672-0507","authenticated-orcid":false,"given":"Nathan","family":"Kallus","sequence":"additional","affiliation":[{"name":"Netflix &amp; Cornell University, Los Gatos, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0955-7588","authenticated-orcid":false,"given":"Julian","family":"McAuley","sequence":"additional","affiliation":[{"name":"University of California, San Diego, La Jolla, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,5,13]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"International Conference on Learning Representations","author":"Buckman Jacob","year":"2021","unstructured":"Jacob Buckman, Carles Gelada, and Marc G Bellemare. The importance of pessimism in ?xed-dataset policy optimization. In International Conference on Learning Representations, 2021."},{"key":"e_1_3_2_2_2_1","first-page":"1597","volume-title":"International conference on machine learning","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. A simple framework for contrastive learning of visual representations. In International conference on machine learning, pages 1597--1607. PMLR, 2020."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1214\/14-STS500"},{"key":"e_1_3_2_2_4_1","volume-title":"Proceedings of the 28th International Conference on International Conference on Machine Learning, ICML'11","author":"Dud\u00edk Miroslav","year":"2011","unstructured":"Miroslav Dud\u00edk, John Langford, and Lihong Li. Doubly robust policy evaluation and learning. In Proceedings of the 28th International Conference on International Conference on Machine Learning, ICML'11, 2011."},{"key":"e_1_3_2_2_5_1","first-page":"1447","volume-title":"International Conference on Machine Learning","author":"Farajtabar Mehrdad","year":"2018","unstructured":"Mehrdad Farajtabar, Yinlam Chow, and Mohammad Ghavamzadeh. More robust doubly robust off-policy evaluation. In International Conference on Machine Learning, pages 1447--1456. PMLR, 2018."},{"key":"e_1_3_2_2_6_1","volume-title":"Marcello Restelli, and Paolo Cremonesi. Off-policy evaluation with deffcient support using side information. In Alice H. Oh","author":"Felicioni Nicol\u00f2","year":"2022","unstructured":"Nicol\u00f2 Felicioni, Maurizio Ferrari Dacrema, Marcello Restelli, and Paolo Cremonesi. Off-policy evaluation with deffcient support using side information. In Alice H. Oh, Alekh Agarwal, Danielle Belgrave, and Kyunghyun Cho, editors, Advances in Neural Information Processing Systems, 2022."},{"key":"e_1_3_2_2_7_1","volume-title":"The movielens datasets: History and context. Acm transactions on interactive intelligent systems (tiis)","author":"Maxwell Harper F","year":"2015","unstructured":"F Maxwell Harper and Joseph A Konstan. The movielens datasets: History and context. Acm transactions on interactive intelligent systems (tiis), 2015."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1080\/00401706.1995.10484303"},{"key":"e_1_3_2_2_9_1","volume-title":"The propensity score with continuous treatments. Applied Bayesian modeling and causal inference from incomplete-data perspectives, 226164:73--84","author":"Hirano Keisuke","year":"2004","unstructured":"Keisuke Hirano and Guido W Imbens. The propensity score with continuous treatments. Applied Bayesian modeling and causal inference from incomplete-data perspectives, 226164:73--84, 2004."},{"key":"e_1_3_2_2_10_1","volume-title":"A generalization of sampling without replacement from a \"nite universe. Journal of the American statistical Association, 47(260):663--685","author":"Horvitz Daniel G","year":"1952","unstructured":"Daniel G Horvitz and Donovan J Thompson. A generalization of sampling without replacement from a \"nite universe. Journal of the American statistical Association, 47(260):663--685, 1952."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1198\/106186008X320456"},{"key":"e_1_3_2_2_12_1","volume-title":"International Conference on Learning Representations","author":"Joachims Thorsten","year":"2018","unstructured":"Thorsten Joachims, Adith Swaminathan, and Maarten De Rijke. Deep learning with logged bandit feedback. In International Conference on Learning Representations, 2018."},{"key":"e_1_3_2_2_13_1","volume-title":"Operations Research","author":"Kallus Nathan","year":"2022","unstructured":"Nathan Kallus and Masatoshi Uehara. E?ciently breaking the curse of horizon in o?-policy evaluation with double reinforcement learning. Operations Research, 2022."},{"key":"e_1_3_2_2_14_1","first-page":"1243","volume-title":"International conference on artificial intelligence and statistics","author":"Kallus Nathan","year":"2018","unstructured":"Nathan Kallus and Angela Zhou. Policy evaluation and optimization with continuous treatments. In International conference on artificial intelligence and statistics, pages 1243--1251. PMLR, 2018."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/1374376.1374475"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2009.263"},{"key":"e_1_3_2_2_17_1","first-page":"3913","article-title":"Local metric learning for off-policy evaluation in contextual bandits with continuous actions","volume":"35","author":"Lee Haanvid","year":"2022","unstructured":"Haanvid Lee, Jongmin Lee, Yunseon Choi, Wonseok Jeon, Byung-Jun Lee, Yung- Kyun Noh, and Kee-Eung Kim. Local metric learning for off-policy evaluation in contextual bandits with continuous actions. Advances in Neural Information Processing Systems, 35:3913--3925, 2022.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_18_1","first-page":"28194","article-title":"Oracle inequalities for model selection in offline reinforcement learning","volume":"35","author":"Lee Jonathan N","year":"2022","unstructured":"Jonathan N Lee, Bo Dai, and Emma Brunskill. Oracle inequalities for model selection in offline reinforcement learning. Advances in Neural Information Processing Systems, 35:28194--28207, 2022.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i10.17058"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380130"},{"key":"e_1_3_2_2_21_1","first-page":"8119","article-title":"Subgaussian and differentiable importance sampling for o?-policy evaluation and learning","volume":"34","author":"Metelli Alberto Maria","year":"2021","unstructured":"Alberto Maria Metelli, Alessio Russo, and Marcello Restelli. Subgaussian and differentiable importance sampling for o?-policy evaluation and learning. Advances in Neural Information Processing Systems, 34:8119--8132, 2021.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449815"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583448"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1214\/07-STS227D"},{"key":"e_1_3_2_2_25_1","volume-title":"Advances in Neural Information Processing Systems","author":"Sachdeva Noveen","year":"2022","unstructured":"Noveen Sachdeva, Mehak Preet Dhaliwal, Carole-Jean Wu, and Julian McAuley. Infinite recommendation networks: A data-centric approach. In Advances in Neural Information Processing Systems, 2022."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403139"},{"key":"e_1_3_2_2_27_1","first-page":"19089","volume-title":"International Conference on Machine Learning","author":"Saito Yuta","year":"2022","unstructured":"Yuta Saito and Thorsten Joachims. Off-policy evaluation for large action spaces via embeddings. In International Conference on Machine Learning, pages 19089--19122. PMLR, 2022."},{"key":"e_1_3_2_2_28_1","volume-title":"international conference on Machine learning","author":"Saito Yuta","year":"2023","unstructured":"Yuta Saito, Qingyang Ren, and Thorsten Joachims. Off-policy evaluation for large action spaces via conjunct effect modeling. In international conference on Machine learning, 2023."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3336191.3371783"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403121"},{"key":"e_1_3_2_2_31_1","first-page":"9422","volume-title":"International Conference on Machine Learning","author":"Sen Rajat","year":"2021","unstructured":"Rajat Sen, Alexander Rakhlin, Lexing Ying, Rahul Kidambi, Dean Foster, Daniel N Hill, and Inderjit S Dhillon. Top-k extreme contextual bandits with arm hierarchy. In International Conference on Machine Learning, pages 9422--9433. PMLR, 2021."},{"key":"e_1_3_2_2_32_1","volume-title":"Proceedings of the 24th annual Conference On Learning Theory, pages 679--702. JMLR Workshop and Conference Proceedings","author":"Slivkins Aleksandrs","year":"2011","unstructured":"Aleksandrs Slivkins. Contextual bandits with similarity information. In Proceedings of the 24th annual Conference On Learning Theory, pages 679--702. JMLR Workshop and Conference Proceedings, 2011."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.5555\/3104322.3104447"},{"key":"e_1_3_2_2_34_1","first-page":"9167","volume-title":"International Conference on Machine Learning","author":"Su Yi","year":"2020","unstructured":"Yi Su, Maria Dimakopoulou, Akshay Krishnamurthy, and Miroslav Dud\u00edk. Doubly robust off-policy evaluation with shrinkage. In International Conference on Machine Learning, pages 9167--9176. PMLR, 2020."},{"key":"e_1_3_2_2_35_1","first-page":"9196","volume-title":"International Conference on Machine Learning","author":"Su Yi","year":"2020","unstructured":"Yi Su, Pavithra Srinath, and Akshay Krishnamurthy. Adaptive estimator selection for off-policy evaluation. In International Conference on Machine Learning, pages 9196--9205. PMLR, 2020."},{"key":"e_1_3_2_2_36_1","first-page":"6005","volume-title":"International Conference on Machine Learning","author":"Su Yi","year":"2019","unstructured":"Yi Su, Lequn Wang, Michele Santacatterina, and Thorsten Joachims. Cab: Continuous adaptive blending for policy evaluation and learning. In International Conference on Machine Learning, pages 6005--6014. PMLR, 2019."},{"key":"e_1_3_2_2_37_1","first-page":"814","volume-title":"International Conference on Machine Learning","author":"Swaminathan Adith","year":"2015","unstructured":"Adith Swaminathan and Thorsten Joachims. Counterfactual risk minimization: Learning from logged bandit feedback. In International Conference on Machine Learning, pages 814--823. PMLR, 2015."},{"key":"e_1_3_2_2_38_1","volume-title":"The self-normalized estimator for counterfactual learning. advances in neural information processing systems, 28","author":"Swaminathan Adith","year":"2015","unstructured":"Adith Swaminathan and Thorsten Joachims. The self-normalized estimator for counterfactual learning. advances in neural information processing systems, 28, 2015."},{"key":"e_1_3_2_2_39_1","first-page":"30","article-title":"Off-policy evaluation for slate recommendation","author":"Swaminathan Adith","year":"2017","unstructured":"Adith Swaminathan, Akshay Krishnamurthy, Alekh Agarwal, Miro Dudik, John Langford, Damien Jose, and Imed Zitouni. Off-policy evaluation for slate recommendation. Advances in Neural Information Processing Systems, 30, 2017.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6059"},{"key":"e_1_3_2_2_41_1","volume-title":"Neural Information Processing Systems","author":"Doucet Arnaud","year":"2023","unstructured":"Arnaud Doucet, Rob Cornish, and Jean-Francois Ton. Marginal density ratio for off-policy evaluation in contextual bandits. In Neural Information Processing Systems, 2023."},{"key":"e_1_3_2_2_42_1","first-page":"2139","volume-title":"International Conference on Machine Learning","author":"Thomas Philip","year":"2016","unstructured":"Philip Thomas and Emma Brunskill. Data-effcient off-policy policy evaluation for reinforcement learning. In International Conference on Machine Learning, pages 2139--2148. PMLR, 2016."},{"key":"e_1_3_2_2_43_1","volume-title":"A review of off-policy evaluation in reinforcement learning. arXiv preprint arXiv:2212.06355","author":"Uehara Masatoshi","year":"2022","unstructured":"Masatoshi Uehara, Chengchun Shi, and Nathan Kallus. A review of off-policy evaluation in reinforcement learning. arXiv preprint arXiv:2212.06355, 2022."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1201\/b14876"},{"key":"e_1_3_2_2_45_1","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Wang Lequn","year":"2024","unstructured":"Lequn Wang, Akshay Krishnamurthy, and Aleksandrs Slivkins. Oracle-effcient pessimism: Offine policy optimization in contextual bandits. In International Conference on Artificial Intelligence and Statistics, 2024."},{"key":"e_1_3_2_2_46_1","first-page":"3589","volume-title":"International Conference on Machine Learning","author":"Wang Yu-Xiang","year":"2017","unstructured":"Yu-Xiang Wang, Alekh Agarwal, and Miroslav Dud?k. Optimal and adaptive o?- policy evaluation in contextual bandits. In International Conference on Machine Learning, pages 3589--3597. PMLR, 2017."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219890"}],"event":{"name":"WWW '24: The ACM Web Conference 2024","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Singapore Singapore","acronym":"WWW '24"},"container-title":["Proceedings of the ACM Web Conference 2024"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589334.3645501","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3589334.3645501","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:34:32Z","timestamp":1755822872000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589334.3645501"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":47,"alternative-id":["10.1145\/3589334.3645501","10.1145\/3589334"],"URL":"https:\/\/doi.org\/10.1145\/3589334.3645501","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]},"assertion":[{"value":"2024-05-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}