{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T13:40:08Z","timestamp":1755870008835,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,18]]},"DOI":"10.1145\/3731120.3744583","type":"proceedings-article","created":{"date-parts":[[2025,7,18]],"date-time":"2025-07-18T13:34:06Z","timestamp":1752845646000},"page":"177-182","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards Two-Stage Counterfactual Learning to Rank"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1291-7951","authenticated-orcid":false,"given":"Shashank","family":"Gupta","sequence":"first","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8480-0114","authenticated-orcid":false,"given":"Yiming","family":"Liao","sequence":"additional","affiliation":[{"name":"Meta AI, New York, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1086-0202","authenticated-orcid":false,"given":"Maarten","family":"de Rijke","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]}],"member":"320","published-online":{"date-parts":[[2025,7,18]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331202"},{"key":"e_1_3_2_1_2_1","volume-title":"Addressing Trust Bias for Unbiased Learning-to-rank. In The World Wide Web Conference. 4-14","author":"Agarwal Aman","year":"2019","unstructured":"Aman Agarwal, Xuanhui Wang, Cheng Li, Michael Bendersky, and Marc Najork. 2019b. Addressing Trust Bias for Unbiased Learning-to-rank. In The World Wide Web Conference. 4-14."},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of the learning to rank challenge. PMLR, 1-24","author":"Chapelle Olivier","year":"2011","unstructured":"Olivier Chapelle and Yi Chang. 2011. Yahoo! Learning to Rank Challenge Overview. In Proceedings of the learning to rank challenge. PMLR, 1-24."},{"volume-title":"Click Models for Web Search","author":"Chuklin Aleksandr","key":"e_1_3_2_1_4_1","unstructured":"Aleksandr Chuklin, Ilya Markov, and Maarten de Rijke. 2015. Click Models for Web Search. Morgan & Claypool Publishers."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/1341531.1341545"},{"key":"e_1_3_2_1_6_1","first-page":"423","volume-title":"Advances in Information Retrieval: 35th European Conference on IR Research, ECIR 2013, Moscow, Russia, March 24-27, 2013. Proceedings 35","author":"Dang Van","unstructured":"Van Dang, Michael Bendersky, and W. Bruce Croft. 2013. Two-stage Learning to Rank for Information Retrieval. In Advances in Information Retrieval: 35th European Conference on IR Research, ECIR 2013, Moscow, Russia, March 24-27, 2013. Proceedings 35. Springer, 423-434."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591636"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3594247"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616855.3636451"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3640457.3688105"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3578337.3605114"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591760"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3679531"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3385670"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/775047.775067"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2914803"},{"key":"e_1_3_2_1_17_1","volume-title":"International Conference on Learning Representations.","author":"Joachims Thorsten","year":"2018","unstructured":"Thorsten Joachims, Adith Swaminathan, and Maarten de Rijke. 2018. Deep learning with Logged Bandit Feedback. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018661.3018699"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2507157.2508063"},{"key":"e_1_3_2_1_20_1","volume-title":"Adam: A Method for Stochastic Optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A Method for Stochastic Optimization. arXiv preprint arXiv:1412.6980, (2014)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000016"},{"key":"e_1_3_2_1_22_1","first-page":"463","volume-title":"Proceedings of The Web Conference","author":"Ma Jiaqi","year":"2020","unstructured":"Jiaqi Ma, Zhe Zhao, Xinyang Yi, Ji Yang, Minmin Chen, Jiaxi Tang, Lichan Hong, and Ed H. Chi. 2020. Off-policy Learning in Two-stage Recommender Systems. In Proceedings of The Web Conference 2020. 463-473."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462830"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3569453"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401102"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441794"},{"key":"e_1_3_2_1_27_1","first-page":"158","volume-title":"Proceedings of the Web Conference","author":"Oosterhuis Harrie","year":"2021","unstructured":"Harrie Oosterhuis and Maarten de de Rijke. 2021b. Robust Generalization and Safe Query-Specialization in Counterfactual Learning to Rank. In Proceedings of the Web Conference 2021. 158-170."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366424.3383107"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-009-9123-y"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3473320"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000009"},{"key":"e_1_3_2_1_32_1","volume-title":"international conference on machine learning. PMLR, 1670-1679","author":"Schnabel Tobias","year":"2016","unstructured":"Tobias Schnabel, Adith Swaminathan, Ashudeep Singh, Navin Chandak, and Thorsten Joachims. 2016. Recommendations as Treatments: Debiasing Learning and Evaluation. In international conference on machine learning. PMLR, 1670-1679."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886805"},{"key":"e_1_3_2_1_34_1","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Swaminathan Adith","year":"2017","unstructured":"Adith Swaminathan, Akshay Krishnamurthy, Alekh Agarwal, Miro Dudik, John Langford, Damien Jose, and Imed Zitouni. 2017. Off-Policy Evaluation for Slate Recommendation. Advances in Neural Information Processing Systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/2009916.2009934"},{"key":"e_1_3_2_1_36_1","volume-title":"Simple Statistical Gradient-Following Algorithms for Connectionist Reinforcement Learning. Machine learning","author":"Williams Ronald J","year":"1992","unstructured":"Ronald J Williams. 1992. Simple Statistical Gradient-Following Algorithms for Connectionist Reinforcement Learning. Machine learning, Vol. 8, 3 (1992), 229-256."},{"key":"e_1_3_2_1_37_1","volume-title":"International Conference on Machine Learning. PMLR, 5353-5362","author":"Wu Hang","year":"2018","unstructured":"Hang Wu and May Wang. 2018. Variance Regularized Counterfactual Risk Minimization via Variational Divergence Minimization. In International Conference on Machine Learning. PMLR, 5353-5362."},{"key":"e_1_3_2_1_38_1","volume-title":"International Conference on Learning Representations.","author":"Xie Yuan","year":"2018","unstructured":"Yuan Xie, Boyi Liu, Qiang Liu, Zhaoran Wang, Yuan Zhou, and Jian Peng. 2018. Off-Policy Evaluation and Learning from Logged Bandit Feedback: Error Reduction via Surrogate Policy. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462953"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3158369"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/2505515.2505690"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401181"}],"event":{"name":"ICTIR '25: International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Padua Italy","acronym":"ICTIR '25"},"container-title":["Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR)"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731120.3744583","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T13:16:25Z","timestamp":1755868585000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731120.3744583"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,18]]},"references-count":42,"alternative-id":["10.1145\/3731120.3744583","10.1145\/3731120"],"URL":"https:\/\/doi.org\/10.1145\/3731120.3744583","relation":{},"subject":[],"published":{"date-parts":[[2025,7,18]]},"assertion":[{"value":"2025-07-18","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}