{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T06:54:57Z","timestamp":1757314497168,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Hybrid Intelligence Center"},{"name":"Netherlands Organisation for Scientific Research (NWO)","award":["VI.Veni.222.269"],"award-info":[{"award-number":["VI.Veni.222.269"]}]},{"name":"SURF Cooperative","award":["EINF-4963"],"award-info":[{"award-number":["EINF-4963"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3679531","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:21Z","timestamp":1729452861000},"page":"737-747","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Practical and Robust Safety Guarantees for Advanced Counterfactual Learning to Rank"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1291-7951","authenticated-orcid":false,"given":"Shashank","family":"Gupta","sequence":"first","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0458-9233","authenticated-orcid":false,"given":"Harrie","family":"Oosterhuis","sequence":"additional","affiliation":[{"name":"Radboud University, Nijmegen, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1086-0202","authenticated-orcid":false,"given":"Maarten","family":"de Rijke","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331202"},{"key":"e_1_3_2_1_2_1","volume-title":"Addressing Trust Bias for Unbiased Learning-to-rank. In The World Wide Web Conference. 4--14","author":"Agarwal Aman","year":"2019","unstructured":"Aman Agarwal, Xuanhui Wang, Cheng Li, Michael Bendersky, and Marc Najork. 2019. Addressing Trust Bias for Unbiased Learning-to-rank. In The World Wide Web Conference. 4--14."},{"key":"e_1_3_2_1_3_1","volume-title":"Advances in Neural Information Processing Systems","volume":"19","author":"Burges Christopher","year":"2006","unstructured":"Christopher Burges, Robert Ragno, and Quoc Le. 2006. Learning to Rank with Nonsmooth Cost Functions. Advances in Neural Information Processing Systems, Vol. 19 (2006)."},{"key":"e_1_3_2_1_4_1","first-page":"23","article-title":"From RankNet to LambdaRank to LambdaMART","volume":"11","author":"Burges Christopher JC","year":"2010","unstructured":"Christopher JC Burges. 2010. From RankNet to LambdaRank to LambdaMART. Learning, Vol. 11, 23--581 (2010), 81.","journal-title":"Learning"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000021"},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the Learning to Rank Challenge. PMLR, 1--24","author":"Chapelle Olivier","year":"2011","unstructured":"Olivier Chapelle and Yi Chang. 2011. Yahoo! Learning to Rank Challenge Overview. In Proceedings of the Learning to Rank Challenge. PMLR, 1--24."},{"volume-title":"Click Models for Web Search","author":"Chuklin Aleksandr","key":"e_1_3_2_1_7_1","unstructured":"Aleksandr Chuklin, Ilya Markov, and Maarten de Rijke. 2015. Click Models for Web Search. Morgan & Claypool Publishers."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1341531.1341545"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2987380"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1198\/000313002119"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616855.3636451"},{"key":"e_1_3_2_1_12_1","volume-title":"Optimal Baseline Corrections for Off-Policy Contextual Bandits. arXiv preprint arXiv:2405.05736","author":"Gupta Shashank","year":"2024","unstructured":"Shashank Gupta, Olivier Jeunen, Harrie Oosterhuis, and Maarten de Rijke. 2024. Optimal Baseline Corrections for Off-Policy Contextual Bandits. arXiv preprint arXiv:2405.05736 (2024)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3578337.3605114"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591760"},{"key":"e_1_3_2_1_15_1","volume-title":"A First Look at Selection Bias in Preference Elicitation for Recommendation. arXiv preprint arXiv:2405.00554","author":"Gupta Shashank","year":"2024","unstructured":"Shashank Gupta, Harrie Oosterhuis, and Maarten de Rijke. 2024. A First Look at Selection Bias in Preference Elicitation for Recommendation. arXiv preprint arXiv:2405.00554 (2024)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3385670"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/582415.582418"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/775047.775067"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2914803"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018661.3018699"},{"key":"e_1_3_2_1_21_1","volume-title":"Neural Trust Region\/Proximal Policy Optimization Attains Globally Optimal Policy. Advances in neural information processing systems","author":"Liu Boyi","year":"2019","unstructured":"Boyi Liu, Qi Cai, Zhuoran Yang, and Zhaoran Wang. 2019. Neural Trust Region\/Proximal Policy Optimization Attains Globally Optimal Policy. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000016"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3614793"},{"volume-title":"Learning from User Interactions with Rankings: A Unification of the Field. Ph.,D. Dissertation","author":"Oosterhuis Harrie","key":"e_1_3_2_1_24_1","unstructured":"Harrie Oosterhuis. 2020. Learning from User Interactions with Rankings: A Unification of the Field. Ph.,D. Dissertation. Informatics Institute, University of Amsterdam."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462830"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531842"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539813.3545137"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3569453"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401102"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441794"},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the Web Conference","author":"Oosterhuis Harrie","year":"2021","unstructured":"Harrie Oosterhuis and Maarten de de Rijke. 2021. Robust Generalization and Safe Query-Specialization in Counterfactual Learning to Rank. In Proceedings of the Web Conference 2021. 158--170."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380255"},{"key":"e_1_3_2_1_33_1","volume-title":"arXiv preprint arXiv:1306.2597","author":"Qin Tao","year":"2013","unstructured":"Tao Qin and Tie-Yan Liu. 2013. Introducing LETOR 4.0 Datasets. arXiv preprint arXiv:1306.2597 (2013)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-009-9123-y"},{"key":"e_1_3_2_1_35_1","first-page":"11909","article-title":"Generalized Proximal Policy Optimization with Sample Reuse","volume":"34","author":"Queeney James","year":"2021","unstructured":"James Queeney, Yannis Paschalidis, and Christos G Cassandras. 2021. Generalized Proximal Policy Optimization with Sample Reuse. In Advances in Neural Information Processing Systems, Vol. 34. 11909--11919.","journal-title":"Advances in Neural Information Processing Systems"},{"volume-title":"LR4IR 2007: Learning to Rank for Information Retrieval Workshop at SIGIR","author":"Radlinski Filip","key":"e_1_3_2_1_36_1","unstructured":"Filip Radlinski. 2007. Addressing Malicious Noise in Clickthrough Data. In LR4IR 2007: Learning to Rank for Information Retrieval Workshop at SIGIR, Vol. 2007."},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of the fourth Berkeley symposium on mathematical statistics and probability","volume":"1","author":"R\u00e9nyi Alfr\u00e9d","year":"1961","unstructured":"Alfr\u00e9d R\u00e9nyi. 1961. On Measures of Entropy and Information. In Proceedings of the fourth Berkeley symposium on mathematical statistics and probability, Vol. 1. Berkeley, California, USA."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3473320"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000009"},{"key":"e_1_3_2_1_40_1","volume-title":"High-dimensional Continuous Control using Generalized Advantage Estimation. arXiv preprint arXiv:1506.02438","author":"Schulman John","year":"2015","unstructured":"John Schulman, Philipp Moritz, Sergey Levine, Michael Jordan, and Pieter Abbeel. 2015. High-dimensional Continuous Control using Generalized Advantage Estimation. arXiv preprint arXiv:1506.02438 (2015)."},{"key":"e_1_3_2_1_41_1","volume-title":"Proximal Policy Optimization Algorithms. arXiv preprint arXiv:1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal Policy Optimization Algorithms. arXiv preprint arXiv:1707.06347 (2017)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886805"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v29i1.9541"},{"key":"e_1_3_2_1_44_1","volume-title":"StochasticRank: Global Optimization of Scale-Free Discrete Functions. In International Conference on Machine Learning. PMLR, 9669--9679","author":"Ustimenko Aleksei","year":"2020","unstructured":"Aleksei Ustimenko and Liudmila Prokhorenkova. 2020. StochasticRank: Global Optimization of Scale-Free Discrete Functions. In International Conference on Machine Learning. PMLR, 9669--9679."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412031"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2911537"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159732"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3271784"},{"key":"e_1_3_2_1_49_1","unstructured":"Yuhui Wang Hao He and Xiaoyang Tan. 2020. Truly Proximal Policy Optimization. In Uncertainty in Artificial Intelligence. PMLR 113--122."},{"key":"e_1_3_2_1_50_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Wang Yuhui","year":"2019","unstructured":"Yuhui Wang, Hao He, Xiaoyang Tan, and Yaozhong Gan. 2019. Trust Region-guided Proximal Policy Optimization. In Advances in Neural Information Processing Systems, Vol. 32."},{"key":"e_1_3_2_1_51_1","volume-title":"Simple Statistical Gradient-Following Algorithms for Connectionist Reinforcement Learning. Machine learning","author":"Williams Ronald J","year":"1992","unstructured":"Ronald J Williams. 1992. Simple Statistical Gradient-Following Algorithms for Connectionist Reinforcement Learning. Machine learning, Vol. 8, 3 (1992), 229--256."},{"key":"e_1_3_2_1_52_1","volume-title":"International Conference on Machine Learning. PMLR, 5353--5362","author":"Wu Hang","year":"2018","unstructured":"Hang Wu and May Wang. 2018. Variance Regularized Counterfactual Risk Minimization via Variational Divergence Minimization. In International Conference on Machine Learning. PMLR, 5353--5362."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462953"}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Boise ID USA","acronym":"CIKM '24"},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679531","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3679531","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:22Z","timestamp":1750294702000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679531"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":53,"alternative-id":["10.1145\/3627673.3679531","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3679531","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}