{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T10:15:19Z","timestamp":1775470519755,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,3,4]],"date-time":"2024-03-04T00:00:00Z","timestamp":1709510400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,4]]},"DOI":"10.1145\/3616855.3635846","type":"proceedings-article","created":{"date-parts":[[2024,3,4]],"date-time":"2024-03-04T18:18:12Z","timestamp":1709576292000},"page":"586-595","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Ad-load Balancing via Off-policy Learning in a Content Marketplace"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-6995-1912","authenticated-orcid":false,"given":"Hitesh","family":"Sagtani","sequence":"first","affiliation":[{"name":"ShareChat, Bengaluru, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7877-4292","authenticated-orcid":false,"given":"Madan Gopal","family":"Jhawar","sequence":"additional","affiliation":[{"name":"ShareChat, Bengaluru, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0836-4605","authenticated-orcid":false,"given":"Rishabh","family":"Mehrotra","sequence":"additional","affiliation":[{"name":"ShareChat, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6256-5814","authenticated-orcid":false,"given":"Olivier","family":"Jeunen","sequence":"additional","affiliation":[{"name":"ShareChat, Edinburgh, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2024,3,4]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Multistakeholder recommendation: Survey and research directions. User Modeling and User-Adapted Interaction 30, 1 (01","author":"Abdollahpouri Himan","year":"2020","unstructured":"Himan Abdollahpouri, Gediminas Adomavicius, Robin Burke, Ido Guy, Dietmar Jannach, Toshihiro Kamishima, Jan Krasnodebski, and Luiz Pizzato. 2020. Multistakeholder recommendation: Survey and research directions. User Modeling and User-Adapted Interaction 30, 1 (01 Mar 2020), 127--158. https:\/\/doi.org\/10. 1007\/s11257-019-09256-1"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-0716-2197-4_17"},{"key":"e_1_3_2_1_3_1","volume-title":"Personalized Ad Delivery When Ads Fatigue: An Approximation Algorithm","author":"Abrams Zo\u00eb","unstructured":"Zo\u00eb Abrams and Erik Vee. 2007. Personalized Ad Delivery When Ads Fatigue: An Approximation Algorithm. In Internet and Network Economics, Xiaotie Deng and Fan Chung Graham (Eds.). Springer Berlin Heidelberg, Berlin, Heidelberg, 535--540."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3542920"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.5555\/2567709.2567766"},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the 34th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"634","author":"Busa-Fekete R\u00f3bert","year":"2017","unstructured":"R\u00f3bert Busa-Fekete, Bal\u00e1zs Sz\u00f6r\u00e9nyi, Paul Weng, and Shie Mannor. 2017. Multiobjective Bandits: Optimizing the Generalized Gini Index. In Proceedings of the 34th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 70), Doina Precup and Yee Whye Teh (Eds.). PMLR, 625--634. https:\/\/proceedings.mlr.press\/v70\/busa-fekete17a.html"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018661.3018702"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","unstructured":"Aleksandr Chuklin Ilya Markov and Maarten de Rijke. 2015. Click Models for Web Search. Morgan & Claypool. https:\/\/doi.org\/10.2200\/S00654ED1V01Y201507ICR043","DOI":"10.2200\/S00654ED1V01Y201507ICR043"},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the 28th International Conference on International Conference on Machine Learning (ICML'11)","author":"Dud\u00edk Miroslav","year":"2011","unstructured":"Miroslav Dud\u00edk, John Langford, and Lihong Li. 2011. Doubly Robust Policy Evaluation and Learning. In Proceedings of the 28th International Conference on International Conference on Machine Learning (ICML'11). Omnipress, 1097--1104."},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the 35th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"1456","author":"Farajtabar Mehrdad","year":"2018","unstructured":"Mehrdad Farajtabar, Yinlam Chow, and Mohammad Ghavamzadeh. 2018. More Robust Doubly Robust Off-policy Evaluation. In Proceedings of the 35th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 80), Jennifer Dy and Andreas Krause (Eds.). PMLR, 1447--1456. https:\/\/proceedings.mlr.press\/v80\/farajtabar18a.html"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5797"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2648584.2648589"},{"key":"e_1_3_2_1_14_1","volume-title":"Measuring consumer sensitivity to audio advertising: A field experiment on pandora internet radio. Available at SSRN 3166676","author":"Huang Jason","year":"2018","unstructured":"Jason Huang, David Reiley, and Nick Riabov. 2018. Measuring consumer sensitivity to audio advertising: A field experiment on pandora internet radio. Available at SSRN 3166676 (2018)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1198\/106186008X320456"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3604915.3608777"},{"key":"e_1_3_2_1_17_1","volume-title":"Proc. of the ACM RecSys Workshop on Bandit Learning from User Interactions (REVEAL '20)","author":"Jeunen Olivier","year":"2020","unstructured":"Olivier Jeunen and Bart Goethals. 2020. An Empirical Evaluation of Doubly Robust Learning for Recommendation. In Proc. of the ACM RecSys Workshop on Bandit Learning from User Interactions (REVEAL '20)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474247"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3568029"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3523227.3547409"},{"key":"e_1_3_2_1_21_1","volume-title":"RecSys 2023 Workshop: CONSEQUENCES -- Causality, Counterfactuals and Sequential Decision-Making. arXiv:2309","author":"Jeunen Olivier","year":"2023","unstructured":"Olivier Jeunen and Ben London. 2023. Offline Recommender System Evaluation under Unobserved Confounding. In RecSys 2023 Workshop: CONSEQUENCES -- Causality, Counterfactuals and Sequential Decision-Making. arXiv:2309.04222"},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the Workshop on Knowledge Discovery and Data Mining for Online Advertising (ADKDD '22)","author":"Jeunen Olivier","year":"2022","unstructured":"Olivier Jeunen, Sean Murphy, and Ben Allison. 2022. Learning to Bid with AuctionGym. In Proceedings of the Workshop on Knowledge Discovery and Data Mining for Online Advertising (ADKDD '22)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599877"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403175"},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the Workshop on Artificial Intelligence for Online Advertising (AI4WebAds '23)","author":"Jeunen Olivier","year":"2023","unstructured":"Olivier Jeunen, Lampros Stavrogiannis, Amin Sayedi, and Ben Allison. 2023. A Probabilistic Framework for Learning Auction Mechanisms via Gradient Descent. In Proceedings of the Workshop on Artificial Intelligence for Online Advertising (AI4WebAds '23)."},{"key":"e_1_3_2_1_26_1","volume-title":"Proc. of the 6th International Conference on Learning Representations (ICLR '18)","author":"Joachims Thorsten","year":"2018","unstructured":"Thorsten Joachims, Adith Swaminathan, and Maarten de Rijke. 2018. Deep Learning with Logged Bandit Feedback. In Proc. of the 6th International Conference on Learning Representations (ICLR '18)."},{"key":"e_1_3_2_1_27_1","unstructured":"Sergey Levine Aviral Kumar George Tucker and Justin Fu. 2020. Offline Reinforcement Learning: Tutorial Review and Perspectives on Open Problems. arXiv:2005.01643 [cs.LG]"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2740908.2742562"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467103"},{"key":"e_1_3_2_1_30_1","volume-title":"RecSys 2022 Workshop: CONSEQUENCES -- Causality, Counterfactuals and Sequential Decision-Making.","author":"London Ben","year":"2022","unstructured":"Ben London and Thorsten Joachims. 2022. Control variate diagnostics for detecting problems in logged bandit feedback. In RecSys 2022 Workshop: CONSEQUENCES -- Causality, Counterfactuals and Sequential Decision-Making."},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of The 26th International Conference on Artificial Intelligence and Statistics (Proceedings of Machine Learning Research","volume":"5640","author":"London Ben","year":"2023","unstructured":"Ben London, Levi Lu, Ted Sandler, and Thorsten Joachims. 2023. Boosted Off- Policy Learning. In Proceedings of The 26th International Conference on Artificial Intelligence and Statistics (Proceedings of Machine Learning Research, Vol. 206), Francisco Ruiz, Jennifer Dy, and Jan-Willem van de Meent (Eds.). PMLR, 5614--5640. https:\/\/proceedings.mlr.press\/v206\/london23a.html"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380130"},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of the Twenty-Second International Conference on Artificial Intelligence and Statistics (Proceedings of Machine Learning Research","volume":"2965","author":"Ma Yifei","year":"2019","unstructured":"Yifei Ma, Yu-Xiang Wang, and Balakrishnan Narayanaswamy. 2019. Imitation- Regularized Offline Learning. In Proceedings of the Twenty-Second International Conference on Artificial Intelligence and Statistics (Proceedings of Machine Learning Research, Vol. 89). PMLR, 2956--2965. https:\/\/proceedings.mlr.press\/v89\/ma19b.html"},{"key":"e_1_3_2_1_34_1","first-page":"21","article-title":"Empirical Bernstein Bounds and Sample Variance","volume":"1050","author":"Maurer Andreas","year":"2009","unstructured":"Andreas Maurer and Massimiliano Pontil. 2009. Empirical Bernstein Bounds and Sample Variance Penalization. Stat. 1050 (2009), 21.","journal-title":"Penalization. Stat."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/2487575.2488200"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403374"},{"key":"e_1_3_2_1_37_1","volume-title":"Proc. of the NeurIPS Workshop on Causality and Machine Learning (CausalML '19)","author":"Mykhaylov Dmytro","year":"2019","unstructured":"Dmytro Mykhaylov, David Rohde, Flavian Vasile, Martin Bompaire, and Olivier Jeunen. 2019. Three Methods for Training on Bandit Feedback. In Proc. of the NeurIPS Workshop on Causality and Machine Learning (CausalML '19)."},{"key":"e_1_3_2_1_38_1","unstructured":"Art B. Owen. 2013. Monte Carlo theory methods and examples."},{"key":"e_1_3_2_1_39_1","unstructured":"Judea Pearl. 2009. Causality. Cambridge university press."},{"key":"e_1_3_2_1_40_1","volume-title":"Garnett (Eds.)","volume":"31","author":"Prokhorenkova Liudmila","year":"2018","unstructured":"Liudmila Prokhorenkova, Gleb Gusev, Aleksandr Vorobev, Anna Veronika Dorogush, and Andrey Gulin. 2018. CatBoost: unbiased boosting with categorical features. In Advances in Neural Information Processing Systems, S. Bengio, H. Wallach, H. Larochelle, K. Grauman, N. Cesa-Bianchi, and R. Garnett (Eds.), Vol. 31. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2018\/file\/14491b756b3a51daac41c24863285549-Paper.pdf"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1080\/10447318.2013.847762"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3592044"},{"key":"e_1_3_2_1_43_1","volume-title":"Open bandit dataset and pipeline: Towards realistic and reproducible off-policy evaluation. arXiv preprint arXiv:2008.07146","author":"Saito Yuta","year":"2020","unstructured":"Yuta Saito, Shunsuke Aihara, Megumi Matsutani, and Yusuke Narita. 2020. Open bandit dataset and pipeline: Towards realistic and reproducible off-policy evaluation. arXiv preprint arXiv:2008.07146 (2020)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3542601"},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of the 37th International Conference on Machine Learning (Proceedings of Machine Learning Research","author":"Si Nian","year":"2020","unstructured":"Nian Si, Fan Zhang, Zhengyuan Zhou, and Jose Blanchet. 2020. Distributionally Robust Policy Evaluation and Learning in Offline Contextual Bandits. In Proceedings of the 37th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 119), Hal Daum\u00e9 III and Aarti Singh (Eds.). PMLR, 8884--8894. https:\/\/proceedings.mlr.press\/v119\/si20a.html"},{"key":"e_1_3_2_1_46_1","volume-title":"Proceedings of the 37th International Conference on Machine Learning (Proceedings of Machine Learning Research","author":"Su Yi","year":"2020","unstructured":"Yi Su, Maria Dimakopoulou, Akshay Krishnamurthy, and Miroslav Dudik. 2020. Doubly robust off-policy evaluation with shrinkage. In Proceedings of the 37th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 119), Hal Daum\u00e9 III and Aarti Singh (Eds.). PMLR, 9167--9176. https:\/\/proceedings.mlr.press\/v119\/su20a.html"},{"key":"e_1_3_2_1_47_1","volume-title":"International Conference on Machine Learning. PMLR, 6005--6014","author":"Su Yi","year":"2019","unstructured":"Yi Su, Lequn Wang, Michele Santacatterina, and Thorsten Joachims. 2019. Cab: Continuous adaptive blending for policy evaluation and learning. In International Conference on Machine Learning. PMLR, 6005--6014."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886805"},{"key":"e_1_3_2_1_49_1","volume-title":"Proc. of the 32nd International Conference on International Conference on Machine Learning (ICML'15)","author":"Swaminathan Adith","year":"2015","unstructured":"Adith Swaminathan and Thorsten Joachims. 2015. Counterfactual Risk Minimization: Learning from Logged Bandit Feedback. In Proc. of the 32nd International Conference on International Conference on Machine Learning (ICML'15). JMLR.org, 814--823."},{"key":"e_1_3_2_1_50_1","volume-title":"The self-normalized estimator for counterfactual learning. advances in neural information processing systems 28","author":"Swaminathan Adith","year":"2015","unstructured":"Adith Swaminathan and Thorsten Joachims. 2015. The self-normalized estimator for counterfactual learning. advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_2_1_51_1","unstructured":"Adith Swaminathan and Thorsten Joachims. 2015. The Self-Normalized Estimator for Counterfactual Learning. In Advances in Neural Information Processing Systems. 3231--3239."},{"key":"e_1_3_2_1_52_1","volume-title":"Practical Bandits: An Industry Perspective. arXiv:2302.01223 [cs.LG]","author":"van den Akker Bram","year":"2023","unstructured":"Bram van den Akker, Olivier Jeunen, Ying Li, Ben London, Zahra Nazari, and Devesh Parekh. 2023. Practical Bandits: An Industry Perspective. arXiv:2302.01223 [cs.LG]"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.11.041"}],"event":{"name":"WSDM '24: The 17th ACM International Conference on Web Search and Data Mining","location":"Merida Mexico","acronym":"WSDM '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 17th ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3616855.3635846","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3616855.3635846","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:47:44Z","timestamp":1755823664000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3616855.3635846"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,4]]},"references-count":53,"alternative-id":["10.1145\/3616855.3635846","10.1145\/3616855"],"URL":"https:\/\/doi.org\/10.1145\/3616855.3635846","relation":{},"subject":[],"published":{"date-parts":[[2024,3,4]]},"assertion":[{"value":"2024-03-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}