{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:10:21Z","timestamp":1757617821984,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,22]]},"DOI":"10.1145\/3705328.3748057","type":"proceedings-article","created":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T10:48:44Z","timestamp":1757155724000},"page":"350-359","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Off-Policy Evaluation of Candidate Generators in Two-Stage Recommender Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5567-3339","authenticated-orcid":false,"given":"Peiyao","family":"Wang","sequence":"first","affiliation":[{"name":"Amazon.com, Seattle, Washington, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4342-3133","authenticated-orcid":false,"given":"Zhan","family":"Shi","sequence":"additional","affiliation":[{"name":"Amazon.com, Vancouver, British Columbia, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2308-3198","authenticated-orcid":false,"given":"Amina","family":"Shabbeer","sequence":"additional","affiliation":[{"name":"Amazon.com, San Francisco, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9515-5456","authenticated-orcid":false,"given":"Ben","family":"London","sequence":"additional","affiliation":[{"name":"Amazon.com, Seattle, Washington, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,9,7]]},"reference":[{"key":"e_1_3_3_3_2_2","doi-asserted-by":"crossref","unstructured":"Aman Agarwal Soumya\u00a0Sankar Basu Tobias Schnabel and Thorsten Joachims. 2017. Effective Evaluation Using Logged Bandit Feedback from Multiple Loggers. Knowledge Discovery and Data Mining (2017).","DOI":"10.1145\/3097983.3098155"},{"key":"e_1_3_3_3_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/2124295.2124354"},{"key":"e_1_3_3_3_4_2","unstructured":"L\u00e9on Bottou J. Peters Joaquin\u00a0Qui\u00f1onero Candela Denis\u00a0Xavier Charles David\u00a0Maxwell Chickering Elon Portugaly Dipankar Ray Patrice\u00a0Y. Simard and Edward Snelson. 2013. Counterfactual Reasoning and Learning Systems: The Example of Computational Advertising. Journal of Machine Learning Research 14 (2013)."},{"key":"e_1_3_3_3_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/2959100.2959190"},{"key":"e_1_3_3_3_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/1864708.1864770"},{"key":"e_1_3_3_3_7_2","volume-title":"International Conference on Machine Learning","author":"Dud\u00edk Miroslav","year":"2011","unstructured":"Miroslav Dud\u00edk, John Langford, and Lihong Li. 2011. Doubly Robust Policy Evaluation and Learning. In International Conference on Machine Learning."},{"key":"e_1_3_3_3_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3186183"},{"key":"e_1_3_3_3_9_2","volume-title":"International Conference on Machine Learning","author":"Farajtabar Mehrdad","year":"2018","unstructured":"Mehrdad Farajtabar, Yinlam Chow, and Mohammad Ghavamzadeh. 2018. More Robust Doubly Robust Off-policy Evaluation. In International Conference on Machine Learning."},{"key":"e_1_3_3_3_10_2","volume-title":"Neural Information Processing Systems","author":"Felicioni Nicol\u00f2","year":"2022","unstructured":"Nicol\u00f2 Felicioni, Maurizio\u00a0Ferrari Dacrema, Marcello Restelli, and Paolo Cremonesi. 2022. Off-Policy Evaluation with Deficient Support Using Side Information. In Neural Information Processing Systems."},{"key":"e_1_3_3_3_11_2","volume-title":"ICML","author":"Fujimoto Scott","year":"2019","unstructured":"Scott Fujimoto, David Meger, and Doina Precup. 2019. Off-Policy Deep Reinforcement Learning without Exploration. In ICML."},{"key":"e_1_3_3_3_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159687"},{"key":"e_1_3_3_3_13_2","doi-asserted-by":"crossref","unstructured":"Daniel\u00a0G. Horvitz and D.\u00a0J. Thompson. 1952. A generalization of sampling without replacement from a finite universe. J. Amer. Statist. Assoc. 47 260 (1952) 663\u2013685.","DOI":"10.1080\/01621459.1952.10483446"},{"key":"e_1_3_3_3_14_2","volume-title":"Neural Information Processing Systems","author":"Hron Jiri","year":"2021","unstructured":"Jiri Hron, Karl Krauth, Michael\u00a0I. Jordan, and Niki Kilbertus. 2021. On component interactions in two-stage recommender systems. In Neural Information Processing Systems."},{"key":"e_1_3_3_3_15_2","doi-asserted-by":"crossref","unstructured":"Edward\u00a0L. Ionides. 2008. Truncated Importance Sampling. Journal of Computational and Graphical Statistics 17 2 (2008) 295\u2013311.","DOI":"10.1198\/106186008X320456"},{"key":"e_1_3_3_3_16_2","volume-title":"International Conference on Machine Learning","author":"Jiang Nan","year":"2016","unstructured":"Nan Jiang and Lihong Li. 2016. Doubly Robust Off-policy Value Evaluation for Reinforcement Learning. In International Conference on Machine Learning."},{"key":"e_1_3_3_3_17_2","volume-title":"International Conference on Learning Representations","author":"Joachims Thorsten","year":"2018","unstructured":"Thorsten Joachims, Adith Swaminathan, and Maarten de Rijke. 2018. Deep Learning with Logged Bandit Feedback. In International Conference on Learning Representations."},{"key":"e_1_3_3_3_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/3018661.3018699"},{"key":"e_1_3_3_3_19_2","volume-title":"Neural Information Processing Systems","author":"Kallus Nathan","year":"2018","unstructured":"Nathan Kallus. 2018. Balanced Policy Evaluation and Learning. In Neural Information Processing Systems."},{"key":"e_1_3_3_3_20_2","volume-title":"Artificial Intelligence and Statistics","author":"Kallus Nathan","year":"2018","unstructured":"Nathan Kallus and Angela Zhou. 2018. Policy Evaluation and Optimization with Continuous Treatments. In Artificial Intelligence and Statistics."},{"key":"e_1_3_3_3_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390223"},{"key":"e_1_3_3_3_22_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-19460-3"},{"key":"e_1_3_3_3_23_2","volume-title":"Artificial Intelligence and Statistics","author":"Li Lihong","year":"2015","unstructured":"Lihong Li, R\u00e9mi Munos, and Csaba Szepesvari. 2015. Toward Minimax Off-policy Value Estimation. In Artificial Intelligence and Statistics."},{"key":"e_1_3_3_3_24_2","unstructured":"Anqi Liu Hao Liu Anima Anandkumar and Yisong Yue. 2019. Triply Robust Off-Policy Evaluation. CoRR abs\/1911.05811 (2019)."},{"key":"e_1_3_3_3_25_2","volume-title":"NeurIPS Workshop on Offline Reinforcement Learning","author":"London Ben","year":"2020","unstructured":"Ben London and Thorsten Joachims. 2020. Offline Policy Evaluation with New Arms. In NeurIPS Workshop on Offline Reinforcement Learning."},{"key":"e_1_3_3_3_26_2","volume-title":"International Conference on Machine Learning","author":"London Ben","year":"2019","unstructured":"Ben London and Ted Sandler. 2019. Bayesian Counterfactual Risk Minimization. In International Conference on Machine Learning."},{"key":"e_1_3_3_3_27_2","volume-title":"Neural Information Processing Systems","author":"Mahmood Ashique\u00a0Rupam","year":"2014","unstructured":"Ashique\u00a0Rupam Mahmood, H.\u00a0V. Hasselt, and Richard\u00a0S. Sutton. 2014. Weighted importance sampling for off-policy learning with linear function approximation. In Neural Information Processing Systems."},{"key":"e_1_3_3_3_28_2","volume-title":"Knowledge Discovery and Data Mining","author":"Okura Shumpei","year":"2017","unstructured":"Shumpei Okura, Yukihiro Tagami, Shingo Ono, and Akira Tajima. 2017. Embedding-based news recommendation for millions of users. In Knowledge Discovery and Data Mining."},{"key":"e_1_3_3_3_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583448"},{"key":"e_1_3_3_3_30_2","volume-title":"Knowledge Discovery and Data Mining","author":"Sachdeva Noveen","year":"2020","unstructured":"Noveen Sachdeva, Yi-Hsun Su, and Thorsten Joachims. 2020. Off-policy Bandits with Deficient Support. In Knowledge Discovery and Data Mining."},{"key":"e_1_3_3_3_31_2","unstructured":"Yuta Saito Shunsuke Aihara Megumi Matsutani and Yusuke Narita. 2020. Large-scale Open Dataset Pipeline and Benchmark for Bandit Algorithms. ArXiv abs\/2008.07146 (2020)."},{"key":"e_1_3_3_3_32_2","volume-title":"International Conference on Machine Learning","author":"Saito Yuta","year":"2022","unstructured":"Yuta Saito and Thorsten Joachims. 2022. Off-Policy Evaluation for Large Action Spaces via Embeddings. In International Conference on Machine Learning."},{"key":"e_1_3_3_3_33_2","volume-title":"International Conference on Machine Learning","author":"Saito Yuta","year":"2023","unstructured":"Yuta Saito, Qingyang Ren, and Thorsten Joachims. 2023. Off-Policy Evaluation for Large Action Spaces via Conjunct Effect Modeling. In International Conference on Machine Learning."},{"key":"e_1_3_3_3_34_2","volume-title":"International Conference on Machine Learning","author":"Schnabel Tobias","year":"2016","unstructured":"Tobias Schnabel, Adith Swaminathan, Ashudeep Singh, Navin Chandak, and Thorsten Joachims. 2016. Recommendations as Treatments: Debiasing Learning and Evaluation. In International Conference on Machine Learning."},{"key":"e_1_3_3_3_35_2","volume-title":"International Conference on Machine Learning","author":"Schulman John","year":"2015","unstructured":"John Schulman, Sergey Levine, P. Abbeel, Michael\u00a0I. Jordan, and Philipp Moritz. 2015. Trust Region Policy Optimization. In International Conference on Machine Learning."},{"key":"e_1_3_3_3_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3640457.3688099"},{"key":"e_1_3_3_3_37_2","volume-title":"Neural Information Processing Systems","author":"Strehl Alexander\u00a0L.","year":"2010","unstructured":"Alexander\u00a0L. Strehl, John Langford, Lihong Li, and Sham\u00a0M. Kakade. 2010. Learning from Logged Implicit Exploration Data. In Neural Information Processing Systems."},{"key":"e_1_3_3_3_38_2","volume-title":"International Conference on Machine Learning","author":"Su Yi-Hsun","year":"2020","unstructured":"Yi-Hsun Su, Maria Dimakopoulou, Akshay Krishnamurthy, and Miroslav Dud\u00edk. 2020. Doubly robust off-policy evaluation with shrinkage. In International Conference on Machine Learning."},{"key":"e_1_3_3_3_39_2","volume-title":"International Conference on Machine Learning","author":"Su Yi-Hsun","year":"2019","unstructured":"Yi-Hsun Su, Lequn Wang, Michele Santacatterina, and Thorsten Joachims. 2019. CAB: Continuous Adaptive Blending for Policy Evaluation and Learning. In International Conference on Machine Learning."},{"key":"e_1_3_3_3_40_2","unstructured":"Adith Swaminathan and Thorsten Joachims. 2015. Batch Learning from Logged Bandit Feedback through Counterfactual Risk Minimization. Journal of Machine Learning Research 16 (2015)."},{"key":"e_1_3_3_3_41_2","volume-title":"Neural Information Processing Systems","author":"Swaminathan Adith","year":"2015","unstructured":"Adith Swaminathan and Thorsten Joachims. 2015. The Self-Normalized Estimator for Counterfactual Learning. In Neural Information Processing Systems."},{"key":"e_1_3_3_3_42_2","volume-title":"Neural Information Processing Systems","author":"Swaminathan Adith","year":"2017","unstructured":"Adith Swaminathan, Akshay Krishnamurthy, Alekh Agarwal, Miroslav Dud\u00edk, John Langford, Damien Jose, and Imed Zitouni. 2017. Off-policy evaluation for slate recommendation. In Neural Information Processing Systems."},{"key":"e_1_3_3_3_43_2","volume-title":"International Conference on Machine Learning","author":"Thomas Philip\u00a0S.","year":"2016","unstructured":"Philip\u00a0S. Thomas and Emma Brunskill. 2016. Data-Efficient Off-Policy Policy Evaluation for Reinforcement Learning. In International Conference on Machine Learning."},{"key":"e_1_3_3_3_44_2","doi-asserted-by":"publisher","DOI":"10.5555\/2888116.2888134"},{"key":"e_1_3_3_3_45_2","volume-title":"International Conference on Machine Learning","author":"Vlassis Nikos\u00a0A.","year":"2019","unstructured":"Nikos\u00a0A. Vlassis, Aur\u00e9lien\u00a0F. Bibaut, Maria Dimakopoulou, and Tony Jebara. 2019. On the Design of Estimators for Bandit Off-Policy Evaluation. In International Conference on Machine Learning."},{"key":"e_1_3_3_3_46_2","volume-title":"Neural Information Processing SystemsNeural Information Processing Systems","author":"Vlassis Nikos\u00a0A.","year":"2021","unstructured":"Nikos\u00a0A. Vlassis, Ashok Chandrashekar, Fernando\u00a0Amat Gil, and Nathan Kallus. 2021. Control variates for slate off-policy evaluation, In Neural Information Processing Systems. Neural Information Processing Systems."},{"key":"e_1_3_3_3_47_2","volume-title":"International Conference on Machine Learning","author":"Wang Yu-Xiang","year":"2017","unstructured":"Yu-Xiang Wang, Alekh Agarwal, and Miroslav Dud\u00edk. 2017. Optimal and Adaptive Off-policy Evaluation in Contextual Bandits. In International Conference on Machine Learning."},{"key":"e_1_3_3_3_48_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.331"},{"key":"e_1_3_3_3_49_2","doi-asserted-by":"crossref","unstructured":"Dongkuan Xu and Ying jie Tian. 2015. A Comprehensive Survey of Clustering Algorithms. Annals of Data Science 2 2 (2015) 165\u2013193.","DOI":"10.1007\/s40745-015-0040-1"},{"key":"e_1_3_3_3_50_2","doi-asserted-by":"publisher","DOI":"10.1145\/3298689.3346996"},{"key":"e_1_3_3_3_51_2","doi-asserted-by":"crossref","unstructured":"Eva Zangerle and Christine Bauer. 2022. Evaluating recommender systems: survey and framework. Comput. Surveys 55 8 (2022) 1\u201338.","DOI":"10.1145\/3556536"}],"event":{"name":"RecSys '25: Nineteenth ACM Conference on Recommender Systems","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGIR ACM Special Interest Group on Information Retrieval","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Prague Czech Republic","acronym":"RecSys '25"},"container-title":["Proceedings of the Nineteenth ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3705328.3748057","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T11:44:19Z","timestamp":1757159059000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3705328.3748057"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,7]]},"references-count":50,"alternative-id":["10.1145\/3705328.3748057","10.1145\/3705328"],"URL":"https:\/\/doi.org\/10.1145\/3705328.3748057","relation":{},"subject":[],"published":{"date-parts":[[2025,9,7]]},"assertion":[{"value":"2025-09-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}