{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T06:15:27Z","timestamp":1757312127495,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T00:00:00Z","timestamp":1728345600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,8]]},"DOI":"10.1145\/3640457.3688162","type":"proceedings-article","created":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T15:39:28Z","timestamp":1728401968000},"page":"878-883","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["\u0394-OPE: Off-Policy Estimation with Pairs of Policies"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6256-5814","authenticated-orcid":false,"given":"Olivier","family":"Jeunen","sequence":"first","affiliation":[{"name":"ShareChat, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4942-7779","authenticated-orcid":false,"given":"Aleksei","family":"Ustimenko","sequence":"additional","affiliation":[{"name":"ShareChat, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2024,10,8]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.5555\/2567709.2567766"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441764"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3523227.3546758"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","unstructured":"Peter Dayan. 1991. Reinforcement Comparison. In Connectionist Models David\u00a0S. Touretzky Jeffrey\u00a0L. Elman Terrence\u00a0J. Sejnowski and Geoffrey\u00a0E. Hinton (Eds.). Morgan Kaufmann 45\u201351. https:\/\/doi.org\/10.1016\/B978-1-4832-1448-1.50011-1","DOI":"10.1016\/B978-1-4832-1448-1.50011-1"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3411552"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1214\/14-STS500"},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings of the 35th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a080)","author":"Farajtabar Mehrdad","year":"2018","unstructured":"Mehrdad Farajtabar, Yinlam Chow, and Mohammad Ghavamzadeh. 2018. More Robust Doubly Robust Off-policy Evaluation. In Proceedings of the 35th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a080), Jennifer Dy and Andreas Krause (Eds.). PMLR, 1447\u20131456. https:\/\/proceedings.mlr.press\/v80\/farajtabar18a.html"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5797"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159687"},{"key":"e_1_3_2_1_11_1","volume-title":"Variance Reduction Techniques for Gradient Estimates in Reinforcement Learning. J. Mach. Learn. Res. 5 (dec","author":"Greensmith Evan","year":"2004","unstructured":"Evan Greensmith, Peter\u00a0L. Bartlett, and Jonathan Baxter. 2004. Variance Reduction Techniques for Gradient Estimates in Reinforcement Learning. J. Mach. Learn. Res. 5 (dec 2004), 1471\u20131530."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3291027"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616855.3636451"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the 18th ACM Conference on Recommender Systems(RecSys \u201924)","author":"Gupta Shashank","year":"2024","unstructured":"Shashank Gupta, Olivier Jeunen, Harrie Oosterhuis, and Maarten de Rijke. 2024. Optimal Baseline Corrections for Off-Policy Contextual Bandits. In Proceedings of the 18th ACM Conference on Recommender Systems(RecSys \u201924). arxiv:2405.05736\u00a0[cs.LG]"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3578337.3605114"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591760"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1952.10483446"},{"volume-title":"Offline approaches to recommendation with online success. Ph.\u00a0D. Dissertation","author":"Jeunen Olivier","key":"e_1_3_2_1_18_1","unstructured":"Olivier Jeunen. 2021. Offline approaches to recommendation with online success. Ph.\u00a0D. Dissertation. University of Antwerp."},{"key":"e_1_3_2_1_19_1","volume-title":"Proc. of the ACM RecSys Workshop on Bandit Learning from User Interactions(REVEAL \u201920)","author":"Jeunen Olivier","year":"2020","unstructured":"Olivier Jeunen and Bart Goethals. 2020. An Empirical Evaluation of Doubly Robust Learning for Recommendation. In Proc. of the ACM RecSys Workshop on Bandit Learning from User Interactions(REVEAL \u201920)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474247"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3568029"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3523227.3547409"},{"key":"e_1_3_2_1_23_1","volume-title":"RecSys workshop on Causality, Counterfactuals and Sequential Decision-Making(CONSEQUENCES \u201924)","author":"Jeunen Olivier","year":"2023","unstructured":"Olivier Jeunen and Ben London. 2023. Offline Recommender System Evaluation under Unobserved Confounding. In RecSys workshop on Causality, Counterfactuals and Sequential Decision-Making(CONSEQUENCES \u201924). arxiv:2309.04222\u00a0[cs.LG]"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Olivier Jeunen Jatin Mandav Ivan Potapov Nakul Agarwal Sourabh Vaid Wenzhe Shi and Aleksei Ustimenko. 2024. Multi-Objective Recommendation via Multivariate Policy Learning. arxiv:2405.02141\u00a0[cs.IR]","DOI":"10.1145\/3640457.3688132"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403175"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671512"},{"key":"e_1_3_2_1_27_1","volume-title":"Deep Learning with Logged Bandit Feedback. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SJaP_-xAb","author":"Joachims Thorsten","year":"2018","unstructured":"Thorsten Joachims, Adith Swaminathan, and Maarten de Rijke. 2018. Deep Learning with Logged Bandit Feedback. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SJaP_-xAb"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240334"},{"key":"e_1_3_2_1_29_1","volume-title":"A note on importance sampling using standardized weights","author":"Kong Augustine","year":"1992","unstructured":"Augustine Kong. 1992. A note on importance sampling using standardized weights. University of Chicago, Dept. of Statistics, Tech. Rep 348 (1992)."},{"key":"e_1_3_2_1_30_1","unstructured":"Erich\u00a0L Lehmann and Joseph\u00a0P Romano. 2005. Testing statistical hypotheses."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772758"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401083"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539295"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380130"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474600"},{"key":"e_1_3_2_1_36_1","first-page":"21","article-title":"Empirical Bernstein Bounds and Sample Variance","volume":"1050","author":"Maurer Andreas","year":"2009","unstructured":"Andreas Maurer and Massimiliano Pontil. 2009. Empirical Bernstein Bounds and Sample Variance Penalization. Stat. 1050 (2009), 21.","journal-title":"Penalization. Stat."},{"key":"e_1_3_2_1_37_1","unstructured":"Art\u00a0B. Owen. 2013. Monte Carlo theory methods and examples."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616855.3635846"},{"key":"e_1_3_2_1_39_1","volume-title":"Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks, Vol.\u00a01.","author":"Saito Yuta","year":"2021","unstructured":"Yuta Saito, Shunsuke Aihara, Megumi Matsutani, and Yusuke Narita. 2021. Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation. In Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks, Vol.\u00a01."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3473320"},{"key":"e_1_3_2_1_41_1","volume-title":"Proceedings of the 32nd International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a037)","author":"Schulman John","year":"2015","unstructured":"John Schulman, Sergey Levine, Pieter Abbeel, Michael Jordan, and Philipp Moritz. 2015. Trust Region Policy Optimization. In Proceedings of the 32nd International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a037), Francis Bach and David Blei (Eds.). PMLR, Lille, France, 1889\u20131897. https:\/\/proceedings.mlr.press\/v37\/schulman15.html"},{"key":"e_1_3_2_1_42_1","unstructured":"John Schulman Filip Wolski Prafulla Dhariwal Alec Radford and Oleg Klimov. 2017. Proximal Policy Optimization Algorithms. arxiv:1707.06347\u00a0[cs.LG]"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.5555\/3524938.3525762"},{"key":"e_1_3_2_1_44_1","volume-title":"International Conference on Machine Learning. PMLR, 9167\u20139176","author":"Su Yi","year":"2020","unstructured":"Yi Su, Maria Dimakopoulou, Akshay Krishnamurthy, and Miroslav Dud\u00edk. 2020. Doubly robust off-policy evaluation with shrinkage. In International Conference on Machine Learning. PMLR, 9167\u20139176."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886805"},{"key":"e_1_3_2_1_46_1","unstructured":"Adith Swaminathan and Thorsten Joachims. 2015. The Self-Normalized Estimator for Counterfactual Learning. In Advances in Neural Information Processing Systems Vol.\u00a028. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2015\/file\/39027dfad5138c9ca0c474d71db915c3-Paper.pdf"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616855.3636449"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340631.3398666"},{"key":"e_1_3_2_1_49_1","volume-title":"Proceedings of the 30th World Wide Web Conference ACM Conference.","author":"Vasile Flavian","year":"2021","unstructured":"Flavian Vasile, David Rohde, Olivier Jeunen, Amine Benhalloum, and Otmane Sakhi. 2021. Recommender Systems Through the Lens of Decision Theory. In Proceedings of the 30th World Wide Web Conference ACM Conference."},{"key":"e_1_3_2_1_50_1","volume-title":"International Conference on Machine Learning. PMLR, 22491\u201322511","author":"Wan Runzhe","year":"2022","unstructured":"Runzhe Wan, Branislav Kveton, and Rui Song. 2022. Safe exploration for efficient policy evaluation and comparison. In International Conference on Machine Learning. PMLR, 22491\u201322511."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240355"}],"event":{"name":"RecSys '24: 18th ACM Conference on Recommender Systems","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval","SIGCHI ACM Special Interest Group on Computer-Human Interaction"],"location":"Bari Italy","acronym":"RecSys '24"},"container-title":["18th ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640457.3688162","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3640457.3688162","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:32Z","timestamp":1750294712000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640457.3688162"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,8]]},"references-count":51,"alternative-id":["10.1145\/3640457.3688162","10.1145\/3640457"],"URL":"https:\/\/doi.org\/10.1145\/3640457.3688162","relation":{},"subject":[],"published":{"date-parts":[[2024,10,8]]},"assertion":[{"value":"2024-10-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}