{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T06:36:20Z","timestamp":1757313380768,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":65,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T00:00:00Z","timestamp":1728345600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Dutch Research Council (NWO)","award":["KICH3.LTP.20.006"],"award-info":[{"award-number":["KICH3.LTP.20.006"]}]},{"name":"Dutch Research Council (NWO)","award":["024.004.022"],"award-info":[{"award-number":["024.004.022"]}]},{"name":"Dutch Research Council (NWO)","award":["VI.Veni.222.269"],"award-info":[{"award-number":["VI.Veni.222.269"]}]},{"name":"European Union","award":["10107021"],"award-info":[{"award-number":["10107021"]}]},{"name":"Dutch Research Council (NWO)","award":["NWA.1389.20.18"],"award-info":[{"award-number":["NWA.1389.20.18"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,8]]},"DOI":"10.1145\/3640457.3688105","type":"proceedings-article","created":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T15:39:28Z","timestamp":1728401968000},"page":"722-732","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Optimal Baseline Corrections for Off-Policy Contextual Bandits"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1291-7951","authenticated-orcid":false,"given":"Shashank","family":"Gupta","sequence":"first","affiliation":[{"name":"IRLab, University of Amsterdam, The Netherlands, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6256-5814","authenticated-orcid":false,"given":"Olivier","family":"Jeunen","sequence":"additional","affiliation":[{"name":"AI, ShareChat, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0458-9233","authenticated-orcid":false,"given":"Harrie","family":"Oosterhuis","sequence":"additional","affiliation":[{"name":"Institute for Computing and Information Sciences, Radboud University, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1086-0202","authenticated-orcid":false,"given":"Maarten","family":"de Rijke","sequence":"additional","affiliation":[{"name":"Informatics Institute, University of Amsterdam, Netherlands"}]}],"member":"320","published-online":{"date-parts":[[2024,10,8]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-56069-9_34"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412217"},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of KDD cup and workshop, Vol.\u00a02007","author":"Bennett James","year":"2007","unstructured":"James Bennett, Stan Lanning, 2007. The Netflix Prize. In Proceedings of KDD cup and workshop, Vol.\u00a02007. 35."},{"key":"e_1_3_2_1_4_1","series-title":"SIAM review 60, 2","volume-title":"Optimization Methods for Large-scale Machine Learning","author":"Bottou L\u00e9on","year":"2018","unstructured":"L\u00e9on Bottou, Frank\u00a0E. Curtis, and Jorge Nocedal. 2018. Optimization Methods for Large-scale Machine Learning. SIAM review 60, 2 (2018), 223\u2013311."},{"key":"e_1_3_2_1_5_1","volume-title":"Proc. of the 46th European Conference on Information Retrieval(ECIR \u201924)","author":"Briand L\u00e9a","year":"2024","unstructured":"L\u00e9a Briand, Th\u00e9o Bontempelli, Walid Bendada, Mathieu Morlon, Fran\u00e7ois Rigaud, Benjamin Chapus, Thomas Bouab\u00e7a, and Guillaume Salha-Galvan. 2024. Let\u2019s Get It Started: Fostering the Discoverability of New Releases on Deezer. In Proc. of the 46th European Conference on Information Retrieval(ECIR \u201924). Springer."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159699"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441764"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3523227.3546758"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","unstructured":"Peter Dayan. 1991. Reinforcement Comparison. In Connectionist Models David\u00a0S. Touretzky Jeffrey\u00a0L. Elman Terrence\u00a0J. Sejnowski and Geoffrey\u00a0E. Hinton (Eds.). Morgan Kaufmann 45\u201351. https:\/\/doi.org\/10.1016\/B978-1-4832-1448-1.50011-1","DOI":"10.1016\/B978-1-4832-1448-1.50011-1"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2433396.2433413"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3411552"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1214\/14-STS500"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the 35th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a080)","author":"Farajtabar Mehrdad","year":"2018","unstructured":"Mehrdad Farajtabar, Yinlam Chow, and Mohammad Ghavamzadeh. 2018. More Robust Doubly Robust Off-policy Evaluation. In Proceedings of the 35th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a080), Jennifer Dy and Andreas Krause (Eds.). PMLR, 1447\u20131456. https:\/\/proceedings.mlr.press\/v80\/farajtabar18a.html"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aam.2006.12.003"},{"key":"e_1_3_2_1_16_1","volume-title":"Variance Reduction Techniques for Gradient Estimates in Reinforcement Learning. J. Mach. Learn. Res. 5 (dec","author":"Greensmith Evan","year":"2004","unstructured":"Evan Greensmith, Peter\u00a0L. Bartlett, and Jonathan Baxter. 2004. Variance Reduction Techniques for Gradient Estimates in Reinforcement Learning. J. Mach. Learn. Res. 5 (dec 2004), 1471\u20131530."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616855.3636451"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3578337.3605114"},{"key":"e_1_3_2_1_19_1","volume-title":"CONSEQUENCES Workshop at RecSys \u201923","author":"Gupta Shashank","year":"2023","unstructured":"Shashank Gupta, Harrie Oosterhuis, and Maarten de Rijke. 2023. A First Look at Selection Bias in Preference Elicitation for Recommendation (Abstract). In CONSEQUENCES Workshop at RecSys \u201923. ACM."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591760"},{"key":"e_1_3_2_1_21_1","volume-title":"CIKM 2024: 33rd ACM International Conference on Information and Knowledge Management. ACM.","author":"Gupta Shashank","year":"2024","unstructured":"Shashank Gupta, Harrie Oosterhuis, and Maarten de Rijke. 2024. Practical and Robust Safety Guarantees for Advanced Counterfactual Learning to Rank. In CIKM 2024: 33rd ACM International Conference on Information and Knowledge Management. ACM."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1198\/106186008X320456"},{"volume-title":"Offline Approaches to Recommendation with Online Success. Ph.\u00a0D. Dissertation","author":"Jeunen Olivier","key":"e_1_3_2_1_23_1","unstructured":"Olivier Jeunen. 2021. Offline Approaches to Recommendation with Online Success. Ph.\u00a0D. Dissertation. University of Antwerp."},{"key":"e_1_3_2_1_24_1","volume-title":"Proc. of the ACM RecSys Workshop on Bandit Learning from User Interactions(REVEAL \u201920)","author":"Jeunen Olivier","year":"2020","unstructured":"Olivier Jeunen and Bart Goethals. 2020. An Empirical Evaluation of Doubly Robust Learning for Recommendation. In Proc. of the ACM RecSys Workshop on Bandit Learning from User Interactions(REVEAL \u201920)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474247"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474248"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3568029"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3523227.3547409"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599877"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671687"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403175"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2914803"},{"key":"e_1_3_2_1_33_1","volume-title":"Deep Learning with Logged Bandit Feedback. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SJaP_-xAb","author":"Joachims Thorsten","year":"2018","unstructured":"Thorsten Joachims, Adith Swaminathan, and Maarten de Rijke. 2018. Deep Learning with Logged Bandit Feedback. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SJaP_-xAb"},{"key":"e_1_3_2_1_34_1","volume-title":"Kingma and Jimmy Ba","author":"P.","year":"2014","unstructured":"Diederik\u00a0P. Kingma and Jimmy Ba. 2014. Adam: A Method for Stochastic Optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"volume-title":"Bandit Algorithms","author":"Lattimore Tor","key":"e_1_3_2_1_36_1","unstructured":"Tor Lattimore and Csaba Szepesv\u00e1ri. 2020. Bandit Algorithms. Cambridge University Press."},{"key":"e_1_3_2_1_37_1","volume-title":"Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems. arXiv preprint arXiv:2005.01643","author":"Levine Sergey","year":"2020","unstructured":"Sergey Levine, Aviral Kumar, George Tucker, and Justin Fu. 2020. Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems. arXiv preprint arXiv:2005.01643 (2020)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772758"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539295"},{"key":"e_1_3_2_1_40_1","volume-title":"Self-Normalized Off-Policy Estimators for Ranking. In CONSEQUENCES Workshop at ACM RecSys \u201923(CONSEQUENCES \u201923)","author":"London Ben","year":"2023","unstructured":"Ben London, Alexander Buchholz, Giuseppe Di\u00a0Benedetto, Jan\u00a0Malte Lichtenberg, Yannik Stein, and Thorsten Joachims. 2023. Self-Normalized Off-Policy Estimators for Ranking. In CONSEQUENCES Workshop at ACM RecSys \u201923(CONSEQUENCES \u201923)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380130"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240354"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403374"},{"key":"e_1_3_2_1_44_1","article-title":"Monte Carlo Gradient Estimation in Machine Learning","volume":"21","author":"Mohamed Shakir","year":"2020","unstructured":"Shakir Mohamed, Mihaela Rosca, Michael Figurnov, and Andriy Mnih. 2020. Monte Carlo Gradient Estimation in Machine Learning. J. Mach. Learn. Res. 21, 1, Article 132 (jan 2020), 62\u00a0pages.","journal-title":"J. Mach. Learn. Res."},{"key":"e_1_3_2_1_45_1","unstructured":"Art\u00a0B. Owen. 2013. Monte Carlo Theory Methods and Examples. https:\/\/artowen.su.domains\/mc\/."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939688"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-0716-2197-4_4"},{"key":"e_1_3_2_1_48_1","volume-title":"RecoGym: A Reinforcement Learning Environment for the Problem of Product Recommendation in Online Advertising. arXiv preprint arXiv:1808.00720","author":"Rohde David","year":"2018","unstructured":"David Rohde, Stephen Bonner, Travis Dunlop, Flavian Vasile, and Alexandros Karatzoglou. 2018. RecoGym: A Reinforcement Learning Environment for the Problem of Product Recommendation in Online Advertising. arXiv preprint arXiv:1808.00720 (2018)."},{"key":"e_1_3_2_1_49_1","volume-title":"Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-policy Evaluation. arXiv preprint arXiv:2008.07146","author":"Saito Yuta","year":"2020","unstructured":"Yuta Saito, Shunsuke Aihara, Megumi Matsutani, and Yusuke Narita. 2020. Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-policy Evaluation. arXiv preprint arXiv:2008.07146 (2020)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3473320"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3542601"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474245"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403121"},{"key":"e_1_3_2_1_54_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR).","author":"Schulman John","year":"2016","unstructured":"John Schulman, Philipp Moritz, Sergey Levine, Michael Jordan, and Pieter Abbeel. 2016. High-Dimensional Continuous Control Using Generalized Advantage Estimation. In Proceedings of the International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.5555\/2073876.2073930"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/2507157.2507160"},{"key":"e_1_3_2_1_57_1","volume-title":"Doubly Robust Off-policy Evaluation with Shrinkage. In International Conference on Machine Learning. PMLR, 9167\u20139176","author":"Su Yi","year":"2020","unstructured":"Yi Su, Maria Dimakopoulou, Akshay Krishnamurthy, and Miroslav Dud\u00edk. 2020. Doubly Robust Off-policy Evaluation with Shrinkage. In International Conference on Machine Learning. PMLR, 9167\u20139176."},{"key":"e_1_3_2_1_58_1","volume-title":"Proc. of the 36th International Conference on Machine Learning(ICML \u201919","author":"Su Yi","year":"2019","unstructured":"Yi Su, Lequn Wang, Michele Santacatterina, and Thorsten Joachims. 2019. CAB: Continuous Adaptive Blending for Policy Evaluation and Learning. In Proc. of the 36th International Conference on Machine Learning(ICML \u201919, Vol.\u00a097). PMLR, 6005\u20136014. https:\/\/proceedings.mlr.press\/v97\/su19a.html"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616855.3635833"},{"key":"e_1_3_2_1_60_1","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton S.","year":"2018","unstructured":"Richard\u00a0S. Sutton and Andrew\u00a0G. Barto. 2018. Reinforcement Learning: An Introduction. MIT press."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886805"},{"key":"e_1_3_2_1_62_1","unstructured":"Adith Swaminathan and Thorsten Joachims. 2015. The Self-Normalized Estimator for Counterfactual Learning. In Advances in Neural Information Processing Systems Vol.\u00a028. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2015\/file\/39027dfad5138c9ca0c474d71db915c3-Paper.pdf"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616855.3636449"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340631.3398666"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3604915.3608792"}],"event":{"name":"RecSys '24: 18th ACM Conference on Recommender Systems","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval","SIGCHI ACM Special Interest Group on Computer-Human Interaction"],"location":"Bari Italy","acronym":"RecSys '24"},"container-title":["18th ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640457.3688105","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3640457.3688105","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:29Z","timestamp":1750294709000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640457.3688105"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,8]]},"references-count":65,"alternative-id":["10.1145\/3640457.3688105","10.1145\/3640457"],"URL":"https:\/\/doi.org\/10.1145\/3640457.3688105","relation":{},"subject":[],"published":{"date-parts":[[2024,10,8]]},"assertion":[{"value":"2024-10-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}