{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T05:47:01Z","timestamp":1757310421984,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,3,4]],"date-time":"2024-03-04T00:00:00Z","timestamp":1709510400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,4]]},"DOI":"10.1145\/3616855.3636449","type":"proceedings-article","created":{"date-parts":[[2024,3,4]],"date-time":"2024-03-04T18:18:12Z","timestamp":1709576292000},"page":"1132-1135","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Practical Bandits: An Industry Perspective"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-7132-4633","authenticated-orcid":false,"given":"Bram","family":"van den Akker","sequence":"first","affiliation":[{"name":"Booking.com, Amsterdam, Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6256-5814","authenticated-orcid":false,"given":"Olivier","family":"Jeunen","sequence":"additional","affiliation":[{"name":"ShareChat, Edinburgh, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7587-800X","authenticated-orcid":false,"given":"Ying","family":"Li","sequence":"additional","affiliation":[{"name":"Netflix, Los Gatos, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9515-5456","authenticated-orcid":false,"given":"Ben","family":"London","sequence":"additional","affiliation":[{"name":"Amazon, Seattle, WA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3832-8840","authenticated-orcid":false,"given":"Zahra","family":"Nazari","sequence":"additional","affiliation":[{"name":"Spotify, New York, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5098-3164","authenticated-orcid":false,"given":"Devesh","family":"Parekh","sequence":"additional","affiliation":[{"name":"Netflix, Los Gatos, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,3,4]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"volume-title":"Proc. of the Fourteenth ACM Conference on Recommender Systems (RecSys '20)","author":"Bendada W.","key":"e_1_3_2_1_2_1","unstructured":"W. Bendada, G. Salha, and T. Bontempelli. 2020. Carousel Personalization in Music Streaming Apps with Contextual Bandits. In Proc. of the Fourteenth ACM Conference on Recommender Systems (RecSys '20). ACM, 420--425."},{"key":"e_1_3_2_1_3_1","volume-title":"Boltzmann exploration done right. Advances in neural information processing systems 30","author":"Cesa-Bianchi Nicol\u00f2","year":"2017","unstructured":"Nicol\u00f2 Cesa-Bianchi, Claudio Gentile, G\u00e1bor Lugosi, and Gergely Neu. 2017. Boltzmann exploration done right. Advances in neural information processing systems 30 (2017)."},{"volume-title":"Prediction, learning, and games","author":"Cesa-Bianchi Nicolo","key":"e_1_3_2_1_4_1","unstructured":"Nicolo Cesa-Bianchi and G\u00e1bor Lugosi. 2006. Prediction, learning, and games. Cambridge university press."},{"volume-title":"Proc. of the 24th International Conference on Neural Information Processing Systems (NIPS'11)","author":"Chapelle O.","key":"e_1_3_2_1_5_1","unstructured":"O. Chapelle and L. Li. 2011. An Empirical Evaluation of Thompson Sampling. In Proc. of the 24th International Conference on Neural Information Processing Systems (NIPS'11). 2249--2257."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2013.6707036"},{"key":"e_1_3_2_1_7_1","volume-title":"Conference on Learning Theory. PMLR, 998--1027","author":"Garivier Aur\u00e9lien","year":"2016","unstructured":"Aur\u00e9lien Garivier and Emilie Kaufmann. 2016. Optimal best arm identification with fixed confidence. In Conference on Learning Theory. PMLR, 998--1027."},{"volume-title":"Proceedings of the 12th ACM International Conference on Web Search and Data Mining (WSDM '19)","author":"Gruson A.","key":"e_1_3_2_1_8_1","unstructured":"A. Gruson, P. Chandar, C. Charbuillet, J. McInerney, S. Hansen, D. Tardieu, and B. Carterette. 2019. Offline Evaluation to Make Decisions About Playlist Recommendation Algorithms. In Proceedings of the 12th ACM International Conference on Web Search and Data Mining (WSDM '19). ACM, 420--428."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3385670"},{"key":"e_1_3_2_1_10_1","volume-title":"Conference on Learning Theory. PMLR, 423--439","author":"Jamieson Kevin","year":"2014","unstructured":"Kevin Jamieson, Matthew Malloy, Robert Nowak, and S\u00e9bastien Bubeck. 2014. lil'ucb: An optimal exploration algorithm for multi-armed bandits. In Conference on Learning Theory. PMLR, 423--439."},{"volume-title":"Offline Approaches to Recommendation with Online Success. Ph.,D. Dissertation","author":"Jeunen O.","key":"e_1_3_2_1_11_1","unstructured":"O. Jeunen. 2021. Offline Approaches to Recommendation with Online Success. Ph.,D. Dissertation. University of Antwerp."},{"volume-title":"Proc. of the 16th ACM Conference on Recommender Systems (RecSys '22)","author":"Jeunen O.","key":"e_1_3_2_1_12_1","unstructured":"O. Jeunen, T. Joachims, H. Oosterhuis, Y. Saito, and F. Vasile. 2022. CONSEQUENCES - Causality, Counterfactuals and Sequential Decision-Making for Recommender Systems. In Proc. of the 16th ACM Conference on Recommender Systems (RecSys '22). ACM, 654--657."},{"key":"e_1_3_2_1_13_1","volume-title":"CONSEQUENCES Workshop at the 17th ACM Conference on Recommender Systems (CONSEQUENCES '23)","author":"Jeunen Olivier","year":"2023","unstructured":"Olivier Jeunen and Ben London. 2023. Offline Recommender System Evaluation under Unobserved Confounding. In CONSEQUENCES Workshop at the 17th ACM Conference on Recommender Systems (CONSEQUENCES '23)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599877"},{"key":"e_1_3_2_1_15_1","volume-title":"Proc. of the 6th International Conference on Learning Representations (ICLR '18)","author":"Joachims T.","year":"2018","unstructured":"T. Joachims, A. Swaminathan, and M. de Rijke. 2018. Deep Learning with Logged Bandit Feedback. In Proc. of the 6th International Conference on Learning Representations (ICLR '18)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018661.3018699"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498380"},{"volume-title":"Proc. of the 13th ACM Conference on Recommender Systems","author":"Ktena S.","key":"e_1_3_2_1_18_1","unstructured":"S. Ktena, A. Tejani, L. Theis, P. Myana, D. Dilipkumar, F. Husz\u00e1r, S. Yoo, and W. Shi. 2019. Addressing Delayed Feedback for Continuous Training with Neural Networks in CTR Prediction. In Proc. of the 13th ACM Conference on Recommender Systems (Copenhagen, Denmark) (RecSys '19). ACM, 187--195."},{"key":"e_1_3_2_1_19_1","volume-title":"International Conference on Machine Learning. PMLR, 5969--5978","author":"Lancewicki Tal","year":"2021","unstructured":"Tal Lancewicki, Shahar Segal, Tomer Koren, and Yishay Mansour. 2021. Stochastic multi-armed bandits with unrestricted delay distributions. In International Conference on Machine Learning. PMLR, 5969--5978."},{"volume-title":"Bandit algorithms","author":"Lattimore Tor","key":"e_1_3_2_1_20_1","unstructured":"Tor Lattimore and Csaba Szepesv\u00e1ri. 2020. Bandit algorithms. Cambridge University Press."},{"volume-title":"Proc. of the 19th International Conference on World Wide Web (WWW '10)","author":"Li L.","key":"e_1_3_2_1_21_1","unstructured":"L. Li, W. Chu, J. Langford, and R. E. Schapire. 2010. A Contextual-Bandit Approach to Personalized News Article Recommendation. In Proc. of the 19th International Conference on World Wide Web (WWW '10). ACM, 661--670."},{"key":"e_1_3_2_1_22_1","volume-title":"REVEAL 2022: Reinforcement Learning-Based Recommender Systems at Scale. In Proceedings of the 16th ACM Conference on Recommender Systems. 684--685","author":"Liaw Richard","year":"2022","unstructured":"Richard Liaw, Paige Bailey, Ying Li, Maria Dimakopoulou, and Yves Raimond. 2022. REVEAL 2022: Reinforcement Learning-Based Recommender Systems at Scale. In Proceedings of the 16th ACM Conference on Recommender Systems. 684--685."},{"key":"e_1_3_2_1_23_1","volume-title":"KDD 2021 Workshop on Multi-Armed Bandits and Reinforcement Learning (MARBLE).","author":"Liu Yi","year":"2021","unstructured":"Yi Liu and Lihong Li. 2021. A map of bandits for e-commerce. In KDD 2021 Workshop on Multi-Armed Bandits and Reinforcement Learning (MARBLE)."},{"volume-title":"Offline Policy Evaluation with New Arms. In Offline Reinforcement Learning Workshop at Neural Information Processing Systems.","author":"London B.","key":"e_1_3_2_1_24_1","unstructured":"B. London and T. Joachims. 2020. Offline Policy Evaluation with New Arms. In Offline Reinforcement Learning Workshop at Neural Information Processing Systems."},{"key":"e_1_3_2_1_25_1","volume-title":"Proc. of the CONSEQUENCES+REVEAL Workshop at the 16th ACM Conference on Recommender Systems (CONSEQUENCES+REVEAL '22)","author":"London Ben","year":"2022","unstructured":"Ben London and Thorsten Joachims. 2022. Control Variate Diagnostics for Detecting Problems in Logged Bandit Feedback. In Proc. of the CONSEQUENCES+REVEAL Workshop at the 16th ACM Conference on Recommender Systems (CONSEQUENCES+REVEAL '22). ACM."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2889473"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403374"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1090\/S0002-9904-1952-09620-8"},{"key":"e_1_3_2_1_29_1","volume-title":"CONSEQUENCES Workshop at the 17th ACM Conference on Recommender Systems (CONSEQUENCES '23)","author":"Sagtani Hitesh","year":"2023","unstructured":"Hitesh Sagtani, Madan Jhawar, Rishabh Mehrotra, and Olivier Jeunen. 2023. Ad-load Balancing via Off-Policy Learning in a Content Marketplace. In CONSEQUENCES Workshop at the 17th ACM Conference on Recommender Systems (CONSEQUENCES '23)."},{"volume-title":"Proc. of the Neural Information Processing Systems Track on Datasets and Benchmarks (NeurIPS '21)","author":"Saito Y.","key":"e_1_3_2_1_30_1","unstructured":"Y. Saito, S. Aihara, M. Matsutani, and Y. Narita. 2021. Open Bandit Dataset and Pipeline: Towards Realistic and Reproducible Off-Policy Evaluation. In Proc. of the Neural Information Processing Systems Track on Datasets and Benchmarks (NeurIPS '21)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3473320"},{"key":"e_1_3_2_1_32_1","volume-title":"Proceedings of the 39th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"19122","author":"Saito Yuta","year":"2022","unstructured":"Yuta Saito and Thorsten Joachims. 2022. Off-Policy Evaluation for Large Action Spaces via Embeddings. In Proceedings of the 39th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 162). PMLR, 19089--19122."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159700"},{"key":"e_1_3_2_1_34_1","volume-title":"International Conference on Machine Learning. PMLR, 9422--9433","author":"Sen Rajat","year":"2021","unstructured":"Rajat Sen, Alexander Rakhlin, Lexing Ying, Rahul Kidambi, Dean Foster, Daniel N Hill, and Inderjit S Dhillon. 2021. Top-k extreme contextual bandits with arm hierarchy. In International Conference on Machine Learning. PMLR, 9422--9433."},{"key":"e_1_3_2_1_35_1","volume-title":"A general reinforcement learning algorithm that masters chess, shogi, and Go through self-play. Science 362, 6419","author":"Silver David","year":"2018","unstructured":"David Silver, Thomas Hubert, Julian Schrittwieser, Ioannis Antonoglou, Matthew Lai, Arthur Guez, Marc Lanctot, Laurent Sifre, Dharshan Kumaran, Thore Graepel, Timothy Lillicrap, Karen Simonyan, and Demis Hassabis. 2018. A general reinforcement learning algorithm that masters chess, shogi, and Go through self-play. Science 362, 6419 (2018), 1140--1144."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1561\/2200000068"},{"key":"e_1_3_2_1_37_1","unstructured":"R. S. Sutton and A. G. Barto. 1998. Introduction to reinforcement learning. Vol. 135."},{"volume-title":"Proc. of the 32nd International Conference on International Conference on Machine Learning","author":"Swaminathan A.","key":"e_1_3_2_1_38_1","unstructured":"A. Swaminathan and T. Joachims. 2015. Counterfactual Risk Minimization: Learning from Logged Bandit Feedback. In Proc. of the 32nd International Conference on International Conference on Machine Learning (Lille, France) (ICML'15). JMLR.org, 814--823."},{"key":"e_1_3_2_1_39_1","volume-title":"Off-policy evaluation for slate recommendation. Advances in Neural Information Processing Systems 30","author":"Swaminathan Adith","year":"2017","unstructured":"Adith Swaminathan, Akshay Krishnamurthy, Alekh Agarwal, Miro Dudik, John Langford, Damien Jose, and Imed Zitouni. 2017. Off-policy evaluation for slate recommendation. Advances in Neural Information Processing Systems 30 (2017)."},{"key":"e_1_3_2_1_40_1","volume-title":"Practical Bandits: An Industry Perspective. arxiv: 2302.01223 [cs.LG]","author":"van den Akker Bram","year":"2023","unstructured":"Bram van den Akker, Olivier Jeunen, Ying Li, Ben London, Zahra Nazari, and Devesh Parekh. 2023. Practical Bandits: An Industry Perspective. arxiv: 2302.01223 [cs.LG]"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Bram van den Akker Ilya Markov and Maarten de Rijke. 2019. ViTOR: learning to rank webpages based on visual features. In The world wide web conference. 3279--3285.","DOI":"10.1145\/3308558.3313419"},{"volume-title":"Proc. of the CONSEQUENCES+REVEAL Workshop at the 16th ACM Conference on Recommender Systems (CONSEQUENCES+REVEAL '22)","author":"van den Akker B.","key":"e_1_3_2_1_42_1","unstructured":"B. van den Akker, N. Weber, F. Moraes, and D. Goldenberg. 2022. Extending Open Bandit Pipeline to Simulate Industry Challenges. In Proc. of the CONSEQUENCES+REVEAL Workshop at the 16th ACM Conference on Recommender Systems (CONSEQUENCES+REVEAL '22). ACM."},{"volume-title":"Proc. of the 28th ACM Conference on User Modeling, Adaptation and Personalization (UMAP '20)","author":"Vasile F.","key":"e_1_3_2_1_43_1","unstructured":"F. Vasile, D. Rohde, O. Jeunen, and A. Benhalloum. 2020. A Gentle Introduction to Recommendation as Counterfactual Policy Learning. In Proc. of the 28th ACM Conference on User Modeling, Adaptation and Personalization (UMAP '20). ACM, 392--393."},{"volume-title":"Companion Proceedings of the Web Conference 2021 (WWW '21)","author":"Vasile F.","key":"e_1_3_2_1_44_1","unstructured":"F. Vasile, D. Rohde, O. Jeunen, A. Benhalloum, and O. Sakhi. 2021. Recommender Systems through the Lens of Decision Theory. In Companion Proceedings of the Web Conference 2021 (WWW '21). ACM, 727--733."},{"key":"e_1_3_2_1_45_1","volume-title":"Vinyals","author":"Oriol","year":"2019","unstructured":"Oriol et al. Vinyals. 2019. Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nature 575, 7782 (2019), 350--354."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2911537"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132847.3133025"}],"event":{"name":"WSDM '24: The 17th ACM International Conference on Web Search and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Merida Mexico","acronym":"WSDM '24"},"container-title":["Proceedings of the 17th ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3616855.3636449","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3616855.3636449","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:47:01Z","timestamp":1755823621000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3616855.3636449"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,4]]},"references-count":47,"alternative-id":["10.1145\/3616855.3636449","10.1145\/3616855"],"URL":"https:\/\/doi.org\/10.1145\/3616855.3636449","relation":{},"subject":[],"published":{"date-parts":[[2024,3,4]]},"assertion":[{"value":"2024-03-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}