{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:10:26Z","timestamp":1757617826331,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,22]]},"DOI":"10.1145\/3705328.3748762","type":"proceedings-article","created":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T10:46:13Z","timestamp":1757155573000},"page":"1458-1462","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Bayesian Perspectives on Offline Evaluation for Recommender Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-2810-5800","authenticated-orcid":false,"given":"Michael","family":"Benigni","sequence":"first","affiliation":[{"name":"Politecnico di Milano, Milan, Italy"}]}],"member":"320","published-online":{"date-parts":[[2025,9,7]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","unstructured":"Imad Aouali Victor-Emmanuel Brunel David Rohde and Anna Korba. 2024. Bayesian Off-Policy Evaluation and Learning for Large Action Spaces. 10.48550\/arXiv.2402.14664arXiv:https:\/\/arXiv.org\/abs\/2402.14664 [cs] version: 1.","DOI":"10.48550\/arXiv.2402.14664"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","unstructured":"Imad Aouali Achraf Ait\u00a0Sidi Hammou Otmane Sakhi David Rohde and Flavian Vasile. 2024. Probabilistic Rank and Reward: A Scalable Model for Slate Recommendation. 10.48550\/arXiv.2208.06263arXiv:https:\/\/arXiv.org\/abs\/2208.06263 [cs].","DOI":"10.48550\/arXiv.2208.06263"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/1557019.1557040"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1609\/AAAI.V39I15.33765"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"crossref","unstructured":"Miroslav Dud\u0131k Dumitru Erhan John Langford and Lihong Li. 2014. Doubly Robust Policy Evaluation and Optimization. Statist. Sci. 29 4 (2014) 485\u2013511.","DOI":"10.1214\/14-STS500"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","unstructured":"Nicol\u00f2 Felicioni Michael Benigni and Maurizio\u00a0Ferrari Dacrema. 2024. AutoOPE: Automated Off-Policy Estimator Selection. CoRR abs\/2406.18022 (2024). 10.48550\/ARXIV.2406.18022 arXiv:https:\/\/arXiv.org\/abs\/2406.18022","DOI":"10.48550\/ARXIV.2406.18022"},{"key":"e_1_3_3_2_8_2","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Felicioni Nicol\u00f2","year":"2022","unstructured":"Nicol\u00f2 Felicioni, Maurizio\u00a0Ferrari Dacrema, Marcello Restelli, and Paolo Cremonesi. 2022. Off-Policy Evaluation with Deficient Support Using Side Information. In Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/2645710.2645745"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159687"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","unstructured":"Carlos\u00a0Alberto Gomez-Uribe and Neil Hunt. 2016. The Netflix Recommender System: Algorithms Business Value and Innovation. ACM Trans. Manag. Inf. Syst. 6 4 (2016) 13:1\u201313:19. 10.1145\/2843948","DOI":"10.1145\/2843948"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3604915.3608839"},{"key":"e_1_3_3_2_13_2","first-page":"652","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","volume":"48","author":"Jiang Nan","year":"2016","unstructured":"Nan Jiang and Lihong Li. 2016. Doubly Robust Off-policy Value Evaluation for Reinforcement Learning. In Proceedings of the International Conference on Machine Learning (ICML) , Vol.\u00a048. 652\u2013661."},{"key":"e_1_3_3_2_14_2","first-page":"9597","volume-title":"Advances in Neural Information Processing Systems","author":"Karampatziakis Nikos","year":"2020","unstructured":"Nikos Karampatziakis, John Langford, and Paul Mineiro. 2020. Empirical Likelihood for Contextual Bandits. In Advances in Neural Information Processing Systems , Vol.\u00a033. Curran Associates, Inc., 9597\u20139607. https:\/\/proceedings.neurips.cc\/paper\/2020\/hash\/6d34d468ac8876333c4d7173b85efed9-Abstract.html"},{"key":"e_1_3_3_2_15_2","first-page":"817","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Langford John","year":"2007","unstructured":"John Langford and Tong Zhang. 2007. The Epoch-Greedy Algorithm for Multi-armed Bandits with Side Information. In Advances in Neural Information Processing Systems (NeurIPS). 817\u2013824."},{"key":"e_1_3_3_2_16_2","unstructured":"Alberto\u00a0Maria Metelli Matteo Papini Nico Montali and Marcello Restelli. 2020. Importance Sampling Techniques for Policy Optimization. J. Mach. Learn. Res. 21 (2020) 141:1\u2013141:75."},{"key":"e_1_3_3_2_17_2","first-page":"8119","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Metelli Alberto\u00a0Maria","year":"2021","unstructured":"Alberto\u00a0Maria Metelli, Alessio Russo, and Marcello Restelli. 2021. Subgaussian and Differentiable Importance Sampling for Off-Policy Evaluation and Learning. In Advances in Neural Information Processing Systems (NeurIPS). 8119\u20138132."},{"key":"e_1_3_3_2_18_2","unstructured":"Allen Nie Yash Chandak Christina\u00a0J Yuan Anirudhan Badrinath Yannis Flet-Berliac and Emma Brunskil. 2024. OPERA: Automatic Offline Policy Evaluation with Re-weighted Aggregates of Multiple Estimators. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.17708 (2024)."},{"key":"e_1_3_3_2_19_2","first-page":"759","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","author":"Precup Doina","year":"2000","unstructured":"Doina Precup, Richard\u00a0S. Sutton, and Satinder\u00a0P. Singh. 2000. Eligibility Traces for Off-Policy Policy Evaluation. In Proceedings of the International Conference on Machine Learning (ICML). 759\u2013766."},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645501"},{"key":"e_1_3_3_2_21_2","volume-title":"Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks (NeurIPS Datasets and Benchmarks)","author":"Saito Yuta","year":"2021","unstructured":"Yuta Saito, Shunsuke Aihara, Megumi Matsutani, and Yusuke Narita. 2021. Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation. In Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks (NeurIPS Datasets and Benchmarks)."},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3473320"},{"key":"e_1_3_3_2_23_2","series-title":"Proceedings of Machine Learning Research","first-page":"19089","volume-title":"International Conference on Machine Learning, ICML 2022, 17-23 July 2022, Baltimore, Maryland, USA","volume":"162","author":"Saito Yuta","year":"2022","unstructured":"Yuta Saito and Thorsten Joachims. 2022. Off-Policy Evaluation for Large Action Spaces via Embeddings. In International Conference on Machine Learning, ICML 2022, 17-23 July 2022, Baltimore, Maryland, USA(Proceedings of Machine Learning Research, Vol.\u00a0162), Kamalika Chaudhuri, Stefanie Jegelka, Le\u00a0Song, Csaba Szepesv\u00e1ri, Gang Niu, and Sivan Sabato (Eds.). PMLR, 19089\u201319122. https:\/\/proceedings.mlr.press\/v162\/saito22a.html"},{"key":"e_1_3_3_2_24_2","series-title":"Proceedings of Machine Learning Research","first-page":"29734","volume-title":"International Conference on Machine Learning, ICML 2023, 23-29 July 2023, Honolulu, Hawaii, USA","volume":"202","author":"Saito Yuta","year":"2023","unstructured":"Yuta Saito, Qingyang Ren, and Thorsten Joachims. 2023. Off-Policy Evaluation for Large Action Spaces via Conjunct Effect Modeling. In International Conference on Machine Learning, ICML 2023, 23-29 July 2023, Honolulu, Hawaii, USA(Proceedings of Machine Learning Research, Vol.\u00a0202), Andreas Krause, Emma Brunskill, Kyunghyun Cho, Barbara Engelhardt, Sivan Sabato, and Jonathan Scarlett (Eds.). PMLR, 29734\u201329759. https:\/\/proceedings.mlr.press\/v202\/saito23b.html"},{"key":"e_1_3_3_2_25_2","first-page":"29734","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","author":"Saito Yuta","year":"2023","unstructured":"Yuta Saito, Qingyang Ren, and Thorsten Joachims. 2023. Off-policy evaluation for large action spaces via conjunct effect modeling. In Proceedings of the International Conference on Machine Learning (ICML). 29734\u201329759."},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474245"},{"key":"e_1_3_3_2_27_2","unstructured":"Otmane Sakhi Pierre Alquier and Nicolas Chopin. [n. d.]. PAC-Bayesian Offline Contextual Bandits With Guarantees. ([n. d.])."},{"key":"e_1_3_3_2_28_2","first-page":"80706","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Sakhi Otmane","year":"2024","unstructured":"Otmane Sakhi, Imad Aouali, Pierre Alquier, and Nicolas Chopin. 2024. Logarithmic Smoothing for Pessimistic Off-Policy Evaluation, Selection and Learning. In Advances in Neural Information Processing Systems (NeurIPS) , Vol.\u00a037. 80706\u201380755."},{"key":"e_1_3_3_2_29_2","first-page":"2217","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Strehl Alexander\u00a0L.","year":"2010","unstructured":"Alexander\u00a0L. Strehl, John Langford, Lihong Li, and Sham\u00a0M. Kakade. 2010. Learning from Logged Implicit Exploration Data. In Advances in Neural Information Processing Systems (NeurIPS). 2217\u20132225."},{"key":"e_1_3_3_2_30_2","first-page":"9167","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","volume":"119","author":"Su Yi","year":"2020","unstructured":"Yi Su, Maria Dimakopoulou, Akshay Krishnamurthy, and Miroslav Dud\u00edk. 2020. Doubly robust off-policy evaluation with shrinkage. In Proceedings of the International Conference on Machine Learning (ICML) , Vol.\u00a0119. 9167\u20139176."},{"key":"e_1_3_3_2_31_2","first-page":"9196","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","volume":"119","author":"Su Yi","year":"2020","unstructured":"Yi Su, Pavithra Srinath, and Akshay Krishnamurthy. 2020. Adaptive Estimator Selection for Off-Policy Evaluation. In Proceedings of the International Conference on Machine Learning (ICML) , Vol.\u00a0119. 9196\u20139205."},{"key":"e_1_3_3_2_32_2","unstructured":"Hao Sun Alex\u00a0James Chan Nabeel Seedat Alihan H\u00fcy\u00fck and Mihaela van\u00a0der Schaar. 2024. When is Off-Policy Evaluation (Reward Modeling) Useful in Contextual Bandits? A Data-Centric Perspective. Journal of Data-centric Machine Learning Research (2024)."},{"key":"e_1_3_3_2_33_2","first-page":"3231","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Swaminathan Adith","year":"2015","unstructured":"Adith Swaminathan and Thorsten Joachims. 2015. The Self-Normalized Estimator for Counterfactual Learning. In Advances in Neural Information Processing Systems (NeurIPS). 3231\u20133239."},{"key":"e_1_3_3_2_34_2","first-page":"2139","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","volume":"48","author":"Thomas Philip\u00a0S.","year":"2016","unstructured":"Philip\u00a0S. Thomas and Emma Brunskill. 2016. Data-Efficient Off-Policy Policy Evaluation for Reinforcement Learning. In Proceedings of the International Conference on Machine Learning (ICML) , Vol.\u00a048. 2139\u20132148."},{"key":"e_1_3_3_2_35_2","volume-title":"Workshop on Reinforcement Learning Theory at the International Conference on Machine Learning (ICML)","author":"Tucker George","year":"2021","unstructured":"George Tucker and Jonathan Lee. 2021. Improved Estimator Selection for Off-Policy Evaluation. In Workshop on Reinforcement Learning Theory at the International Conference on Machine Learning (ICML)."},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i8.26195"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","unstructured":"Nikos Vlassis Fernando\u00a0Amat Gil and Ashok Chandrashekar. 2021. Off-Policy Evaluation of Slate Policies under Bayes Risk. 10.48550\/arXiv.2101.02553arXiv:https:\/\/arXiv.org\/abs\/2101.02553 [cs].","DOI":"10.48550\/arXiv.2101.02553"},{"key":"e_1_3_3_2_38_2","volume-title":"Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks (NeurIPS Datasets and Benchmarks)","author":"Voloshin Cameron","year":"2021","unstructured":"Cameron Voloshin, Hoang\u00a0Minh Le, Nan Jiang, and Yisong Yue. 2021. Empirical Study of Off-Policy Policy Evaluation for Reinforcement Learning. In Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks (NeurIPS Datasets and Benchmarks)."},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"crossref","unstructured":"Chih-Chun Wang Sanjeev\u00a0R Kulkarni and H\u00a0Vincent Poor. 2005. Bandit problems with side observations. IEEE Trans. Automat. Control 50 3 (2005) 338\u2013355.","DOI":"10.1109\/TAC.2005.844079"},{"key":"e_1_3_3_2_40_2","first-page":"3589","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","volume":"70","author":"Wang Yu-Xiang","year":"2017","unstructured":"Yu-Xiang Wang, Alekh Agarwal, and Miroslav Dud\u00edk. 2017. Optimal and Adaptive Off-policy Evaluation in Contextual Bandits. In Proceedings of the International Conference on Machine Learning (ICML) , Vol.\u00a070. 3589\u20133597."}],"event":{"name":"RecSys '25: Nineteenth ACM Conference on Recommender Systems","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGIR ACM Special Interest Group on Information Retrieval","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Prague Czech Republic","acronym":"RecSys '25"},"container-title":["Proceedings of the Nineteenth ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3705328.3748762","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T11:48:41Z","timestamp":1757159321000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3705328.3748762"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,7]]},"references-count":39,"alternative-id":["10.1145\/3705328.3748762","10.1145\/3705328"],"URL":"https:\/\/doi.org\/10.1145\/3705328.3748762","relation":{},"subject":[],"published":{"date-parts":[[2025,9,7]]},"assertion":[{"value":"2025-09-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}