{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T06:00:17Z","timestamp":1757311217825,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,9,14]],"date-time":"2023-09-14T00:00:00Z","timestamp":1694649600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,9,14]]},"DOI":"10.1145\/3604915.3608855","type":"proceedings-article","created":{"date-parts":[[2023,9,14]],"date-time":"2023-09-14T22:40:23Z","timestamp":1694731223000},"page":"963-970","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Deep Exploration for Recommendation Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1162-106X","authenticated-orcid":false,"given":"Zheqing","family":"Zhu","sequence":"first","affiliation":[{"name":"Applied Reinforcement Learning, Meta AI, USA and Stanford University, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8364-3746","authenticated-orcid":false,"given":"Benjamin","family":"Van Roy","sequence":"additional","affiliation":[{"name":"Stanford University, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,9,14]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"key":"e_1_3_2_1_2_1","unstructured":"Peter\u00a0L Bartlett Dylan\u00a0J Foster and Matus\u00a0J Telgarsky. 2017. Spectrally-Normalized Margin Bounds for Neural Networks. In Advances in Neural Information Processing Systems. 6240\u20136249."},{"key":"e_1_3_2_1_3_1","volume-title":"Online Recommender Systems\u2013How Does a Website Know What I Want?American Mathematical Society. Retrieved October 31","author":"Blanda Stephanie","year":"2016","unstructured":"Stephanie Blanda. 2016. Online Recommender Systems\u2013How Does a Website Know What I Want?American Mathematical Society. Retrieved October 31 (2016)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3172944.3172967"},{"key":"e_1_3_2_1_5_1","unstructured":"Olivier Chapelle and Lihong Li. 2011. An Empirical Evaluation of Thompson Sampling. In Advances in Neural Information Processing Systems. 2249\u20132257."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3523227.3546758"},{"key":"e_1_3_2_1_8_1","volume-title":"International Conference on Machine Learning. PMLR, 1052\u20131061","author":"Chen Xinshi","year":"2019","unstructured":"Xinshi Chen, Shuang Li, Hui Li, Shaohua Jiang, Yuan Qi, and Le Song. 2019. Generative Adversarial User Model for Reinforcement Learning based Recommendation System. In International Conference on Machine Learning. PMLR, 1052\u20131061."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2019.00101"},{"key":"e_1_3_2_1_11_1","first-page":"42","article-title":"An Overview of Classification Algorithms for Imbalanced Datasets","volume":"2","author":"Ganganwar Vaishali","year":"2012","unstructured":"Vaishali Ganganwar. 2012. An Overview of Classification Algorithms for Imbalanced Datasets. International Journal of Emerging Technology and Advanced Engineering 2, 4 (2012), 42\u201347.","journal-title":"International Journal of Emerging Technology and Advanced Engineering"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the 24th annual conference on learning theory. JMLR Workshop and Conference Proceedings, 359\u2013376","author":"Garivier Aur\u00e9lien","year":"2011","unstructured":"Aur\u00e9lien Garivier and Olivier Capp\u00e9. 2011. The KL-UCB Algorithm for Bounded Stochastic Bandits and Beyond. In Proceedings of the 24th annual conference on learning theory. JMLR Workshop and Conference Proceedings, 359\u2013376."},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics. 249\u2013256","author":"Glorot Xavier","year":"2010","unstructured":"Xavier Glorot and Yoshua Bengio. 2010. Understanding the Difficulty of Training Deep Feedforward Neural Networks. In Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics. 249\u2013256."},{"key":"e_1_3_2_1_14_1","volume-title":"Reinforcement Learning Architecture for Web Recommendations. In International Conference on Information Technology: Coding and Computing, 2004. Proceedings. ITCC 2004., Vol.\u00a01. IEEE, 398\u2013402","author":"Golovin Nick","year":"2004","unstructured":"Nick Golovin and Erhard Rahm. 2004. Reinforcement Learning Architecture for Web Recommendations. In International Conference on Information Technology: Coding and Computing, 2004. Proceedings. ITCC 2004., Vol.\u00a01. IEEE, 398\u2013402."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/308"},{"key":"e_1_3_2_1_17_1","volume-title":"Playlist Recommendation Based on Reinforcement Learning. In International Conference on Intelligence Science. Springer, 172\u2013182","author":"Hu Binbin","year":"2017","unstructured":"Binbin Hu, Chuan Shi, and Jian Liu. 2017. Playlist Recommendation Based on Reinforcement Learning. In International Conference on Intelligence Science. Springer, 172\u2013182."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/360"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2008.55"},{"key":"e_1_3_2_1_20_1","volume-title":"International Conference on Machine Learning. 1453\u20131461","author":"Joulani Pooria","year":"2013","unstructured":"Pooria Joulani, Andras Gyorgy, and Csaba Szepesv\u00e1ri. 2013. Online Learning under Delayed Feedback. In International Conference on Machine Learning. 1453\u20131461."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2039320.2039329"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3298689.3347002"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772758"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.im.2004.01.008"},{"key":"e_1_3_2_1_25_1","unstructured":"Xiuyuan Lu and Benjamin Van\u00a0Roy. 2017. Ensemble Sampling. In Advances in Neural Information Processing Systems. 3258\u20133266."},{"key":"e_1_3_2_1_26_1","volume-title":"Asian Conference on Machine Learning. 315\u2013329","author":"Nakamura Atsuyoshi","year":"2015","unstructured":"Atsuyoshi Nakamura. 2015. A UCB-Like Strategy of Collaborative Filtering. In Asian Conference on Machine Learning. 315\u2013329."},{"key":"e_1_3_2_1_27_1","volume-title":"Recommendation System-Based Upper Confidence Bound for Online Advertising. In REVEAL","author":"Nguyen-Thanh Nhan","year":"2019","unstructured":"Nhan Nguyen-Thanh, Dana Marinca, Kinda Khawam, David Rohde, Flavian Vasile, Elena Lohan, Steven Martin, and Dominique Quadri. 2019. Recommendation System-Based Upper Confidence Bound for Online Advertising. In REVEAL 2019."},{"key":"e_1_3_2_1_28_1","unstructured":"Ian Osband John Aslanides and Albin Cassirer. 2018. Randomized Prior Functions for Deep Reinforcement Learning. In Advances in Neural Information Processing Systems. 8617\u20138629."},{"key":"e_1_3_2_1_29_1","unstructured":"Ian Osband Charles Blundell Alexander Pritzel and Benjamin Van\u00a0Roy. 2016. Deep Exploration via Bootstrapped DQN. In Advances in Neural information processing systems."},{"key":"e_1_3_2_1_30_1","first-page":"1","article-title":"Deep Exploration via Randomized Value Functions","volume":"20","author":"Osband Ian","year":"2019","unstructured":"Ian Osband, Benjamin Van\u00a0Roy, Daniel\u00a0J Russo, and Zheng Wen. 2019. Deep Exploration via Randomized Value Functions. Journal of Machine Learning Research 20, 124 (2019), 1\u201362.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_31_1","volume-title":"Epistemic Neural Networks. arXiv preprint arXiv:2107.08924","author":"Osband Ian","year":"2021","unstructured":"Ian Osband, Zheng Wen, Mohammad Asghari, Morteza Ibrahimi, Xiyuan Lu, and Benjamin Van\u00a0Roy. 2021. Epistemic Neural Networks. arXiv preprint arXiv:2107.08924 (2021)."},{"key":"e_1_3_2_1_32_1","unstructured":"Ian Osband Zhegn Wen Seyed\u00a0Mohammad Asghari Vikranth Dwaracherla Xiuyuan Lu Morteza Ibrahimi Dieterich Lawson Botao Hao Brendan O\u2019Donoghue and Benjamin Van\u00a0Roy. 2022. The Neural Testbed: Evaluating Joint Predictions. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_33_1","unstructured":"Chao Qin Zheng Wen Xiuyuan Lu and Benjamin Van\u00a0Roy. 2022. An Analysis of Ensemble Sampling. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_34_1","volume-title":"SP 3","author":"Rojanavasu Pornthep","year":"2005","unstructured":"Pornthep Rojanavasu, Phaitoon Srinil, and Ouen Pinngern. 2005. New Recommendation System Using Reinforcement Learning. Special Issue of the Intl. J. Computer, the Internet and Management 13, SP 3 (2005)."},{"key":"e_1_3_2_1_35_1","volume-title":"Collaborative Filtering Recommender Systems. The Adaptive Web","author":"Schafer J\u00a0Ben","year":"2007","unstructured":"J\u00a0Ben Schafer, Dan Frankowski, Jon Herlocker, and Shilad Sen. 2007. Collaborative Filtering Recommender Systems. The Adaptive Web (2007), 291\u2013324."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014902"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2007.07.055"},{"key":"e_1_3_2_1_38_1","unstructured":"Jiaming Song Hongyu Ren Dorsa Sadigh and Stefano Ermon. 2018. Multi-Agent Generative Adversarial Imitation Learning. In Advances in neural information processing systems."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1111\/bmsp.12144"},{"key":"e_1_3_2_1_40_1","volume-title":"Personalized Ad Recommendation Systems for Life-Time Value Optimization with Guarantees. In Twenty-Fourth International Joint Conference on Artificial Intelligence.","author":"Theocharous Georgios","year":"2015","unstructured":"Georgios Theocharous, Philip\u00a0S Thomas, and Mohammad Ghavamzadeh. 2015. Personalized Ad Recommendation Systems for Life-Time Value Optimization with Guarantees. In Twenty-Fourth International Joint Conference on Artificial Intelligence."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/25.3-4.285"},{"key":"e_1_3_2_1_42_1","volume-title":"RL4RS: A Real-World Benchmark for Reinforcement Learning based Recommender System. arXiv preprint arXiv:2110.11073","author":"Wang Kai","year":"2021","unstructured":"Kai Wang, Zhene Zou, Qilin Deng, Yue Shang, Minghao Zhao, Runze Wu, Xudong Shen, Tangjie Lyu, and Changjie Fan. 2021. RL4RS: A Real-World Benchmark for Reinforcement Learning based Recommender System. arXiv preprint arXiv:2110.11073 (2021)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1671"},{"key":"e_1_3_2_1_44_1","volume-title":"Neural Contextual Bandits with Deep Representation and Shallow Exploration. In International Conference on Learning Representations.","author":"Xu Pan","year":"2021","unstructured":"Pan Xu, Zheng Wen, Handong Zhao, and Quanquan Gu. 2021. Neural Contextual Bandits with Deep Representation and Shallow Exploration. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_45_1","volume-title":"Understanding Deep Learning Requires Rethinking Generalization Int. In Conf. on Learning Representations.","author":"Zhang C","year":"2017","unstructured":"C Zhang, S Bengio, M Hardt, B Recht, and O Vinyals. 2017. Understanding Deep Learning Requires Rethinking Generalization Int. In Conf. on Learning Representations."},{"key":"e_1_3_2_1_46_1","volume-title":"Neural Thompson Sampling. In International Conference on Learning Representations.","author":"Zhang Weitong","year":"2020","unstructured":"Weitong Zhang, Dongruo Zhou, Lihong Li, and Quanquan Gu. 2020. Neural Thompson Sampling. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"},{"key":"e_1_3_2_1_48_1","volume-title":"Neural Contextual Bandits with UCB-Based Exploration. In International Conference on Machine Learning. PMLR, 11492\u201311502","author":"Zhou Dongruo","year":"2020","unstructured":"Dongruo Zhou, Lihong Li, and Quanquan Gu. 2020. Neural Contextual Bandits with UCB-Based Exploration. In International Conference on Machine Learning. PMLR, 11492\u201311502."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015941"}],"event":{"name":"RecSys '23: Seventeenth ACM Conference on Recommender Systems","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval","SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGecom Special Interest Group on Economics and Computation"],"location":"Singapore Singapore","acronym":"RecSys '23"},"container-title":["Proceedings of the 17th ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3604915.3608855","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3604915.3608855","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:34Z","timestamp":1750178794000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3604915.3608855"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,14]]},"references-count":49,"alternative-id":["10.1145\/3604915.3608855","10.1145\/3604915"],"URL":"https:\/\/doi.org\/10.1145\/3604915.3608855","relation":{},"subject":[],"published":{"date-parts":[[2023,9,14]]},"assertion":[{"value":"2023-09-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}