{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T14:27:07Z","timestamp":1775831227345,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,2,11]],"date-time":"2022-02-11T00:00:00Z","timestamp":1644537600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"The National Key R&D Program of China","award":["2020YFB1406704"],"award-info":[{"award-number":["2020YFB1406704"]}]},{"name":"Natural Science Foundation of China","award":["62072279"],"award-info":[{"award-number":["62072279"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,2,11]]},"DOI":"10.1145\/3488560.3498471","type":"proceedings-article","created":{"date-parts":[[2022,2,15]],"date-time":"2022-02-15T21:42:57Z","timestamp":1644961377000},"page":"957-965","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":38,"title":["Choosing the Best of Both Worlds"],"prefix":"10.1145","author":[{"given":"Dusan","family":"Stamenkovic","sequence":"first","affiliation":[{"name":"University of Novi Sad, Novi Sad, Serbia"}]},{"given":"Alexandros","family":"Karatzoglou","sequence":"additional","affiliation":[{"name":"Google Research, London, United Kingdom"}]},{"given":"Ioannis","family":"Arapakis","sequence":"additional","affiliation":[{"name":"Telefonica Research, Barcelona, Spain"}]},{"given":"Xin","family":"Xin","sequence":"additional","affiliation":[{"name":"Shandong University, Jinan, China"}]},{"given":"Kleomenis","family":"Katevas","sequence":"additional","affiliation":[{"name":"Telefonica Research, Barcelona, Spain"}]}],"member":"320","published-online":{"date-parts":[[2022,2,15]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380281"},{"key":"e_1_3_2_2_2_1","first-page":"1742","article-title":"Optimal Greedy Diversity for Recommendation","volume":"15","author":"Ashkan Azin","year":"2015","unstructured":"Azin Ashkan, Branislav Kveton, Shlomo Berkovsky, and Zheng Wen. 2015. Optimal Greedy Diversity for Recommendation.. In IJCAI, Vol. 15. 1742--1748.","journal-title":"IJCAI"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICECA49313.2020.9297559"},{"key":"e_1_3_2_2_4_1","volume-title":"Fast greedy map inference for determinantal point process to improve recommendation diversity. arXiv preprint arXiv:1709.05135","author":"Chen Laming","year":"2017","unstructured":"Laming Chen, Guoxin Zhang, and Hanning Zhou. 2017. Fast greedy map inference for determinantal point process to improve recommendation diversity. arXiv preprint arXiv:1709.05135 (2017)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_2_6_1","volume-title":"International Conference on Machine Learning . PMLR, 1052--1061","author":"Chen Xinshi","year":"2019","unstructured":"Xinshi Chen, Shuang Li, Hui Li, Shaohua Jiang, Yuan Qi, and Le Song. 2019 b. Generative adversarial user model for reinforcement learning based recommendation system. In International Conference on Machine Learning . PMLR, 1052--1061."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3038912.3052585"},{"key":"e_1_3_2_2_8_1","volume-title":"Dzmitry Bahdanau, and Yoshua Bengio.","author":"Cho Kyunghyun","year":"2014","unstructured":"Kyunghyun Cho, Bart Van Merri\u00ebnboer, Dzmitry Bahdanau, and Yoshua Bengio. 2014. On the properties of neural machine translation: Encoder-decoder approaches. arXiv preprint arXiv:1409.1259 (2014)."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1250910.1250939"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441775"},{"key":"e_1_3_2_2_11_1","unstructured":"Hado V Hasselt. 2010. Double Q-learning. In Advances in neural information processing systems. 2613--2621."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/963770.963772"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3271761"},{"key":"e_1_3_2_2_14_1","volume-title":"Session-based recommendations with recurrent neural networks. arXiv preprint arXiv:1511.06939","author":"Hidasi Bal\u00e1zs","year":"2015","unstructured":"Bal\u00e1zs Hidasi, Alexandros Karatzoglou, Linas Baltrunas, and Domonkos Tikk. 2015. Session-based recommendations with recurrent neural networks. arXiv preprint arXiv:1511.06939 (2015)."},{"key":"e_1_3_2_2_15_1","unstructured":"Rong Hu and Pearl Pu. 2011. Helping Users Perceive Recommendation Diversity."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219846"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1037\/0022-3514.76.3.349"},{"key":"e_1_3_2_2_18_1","volume-title":"ACM Transactions on Information Systems (TOIS)","volume":"20","author":"Jaana Kalervo","year":"2002","unstructured":"Kalervo J\"arvelin and Jaana Kek\"al\"ainen. 2002. Cumulated gain-based evaluation of IR techniques. ACM Transactions on Information Systems (TOIS) , Vol. 20, 4 (2002), 422--446."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2018.00035"},{"key":"e_1_3_2_2_20_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/1835449.1835486"},{"key":"e_1_3_2_2_22_1","volume-title":"Determinantal point process models and statistical inference. Journal of the Royal Statistical Society: Series B: Statistical Methodology","author":"Lavancier Fr\u00e9d\u00e9ric","year":"2015","unstructured":"Fr\u00e9d\u00e9ric Lavancier, Jesper M\u00f8ller, and Ege Rubak. 2015. Determinantal point process models and statistical inference. Journal of the Royal Statistical Society: Series B: Statistical Methodology (2015), 853--877."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1490"},{"key":"e_1_3_2_2_24_1","volume-title":"Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. 2013. Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)."},{"key":"e_1_3_2_2_25_1","volume-title":"Multi-gradient descent for multi-objective recommender systems. arXiv preprint arXiv:2001.00846","author":"Milojkovic Nikola","year":"2019","unstructured":"Nikola Milojkovic, Diego Antognini, Giancarlo Bergamin, Boi Faltings, and Claudiu Musat. 2019. Multi-gradient descent for multi-objective recommender systems. arXiv preprint arXiv:2001.00846 (2019)."},{"key":"e_1_3_2_2_26_1","volume-title":"Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602","author":"Mnih Volodymyr","year":"2013","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis Antonoglou, Daan Wierstra, and Martin Riedmiller. 2013. Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)."},{"key":"e_1_3_2_2_27_1","volume-title":"Multi-objective deep reinforcement learning. arXiv preprint arXiv:1610.02707","author":"Mossalam Hossam","year":"2016","unstructured":"Hossam Mossalam, Yannis M Assael, Diederik M Roijers, and Shimon Whiteson. 2016. Multi-objective deep reinforcement learning. arXiv preprint arXiv:1610.02707 (2016)."},{"key":"e_1_3_2_2_28_1","volume-title":"The filter bubble: What the Internet is hiding from you","author":"Pariser Eli","unstructured":"Eli Pariser. 2011. The filter bubble: What the Internet is hiding from you .Penguin UK."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/2540128.2540517"},{"key":"e_1_3_2_2_30_1","volume-title":"BPR: Bayesian personalized ranking from implicit feedback. arXiv preprint arXiv:1205.2618","author":"Rendle Steffen","year":"2012","unstructured":"Steffen Rendle, Christoph Freudenthaler, Zeno Gantner, and Lars Schmidt-Thieme. 2012. BPR: Bayesian personalized ranking from implicit feedback. arXiv preprint arXiv:1205.2618 (2012)."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/2365952.2365962"},{"key":"e_1_3_2_2_32_1","volume-title":"The paradox of choice: Why less is more","author":"Schwartz Barry","year":"2004","unstructured":"Barry Schwartz. 2004. The paradox of choice: Why less is more. New York: Ecco (2004)."},{"key":"e_1_3_2_2_33_1","first-page":"3868","article-title":"A Framework for Recommending Relevant and Diverse Items","volume":"16","author":"Sha Chaofeng","year":"2016","unstructured":"Chaofeng Sha, Xiaowei Wu, and Junyu Niu. 2016. A Framework for Recommending Relevant and Diverse Items.. In IJCAI, Vol. 16. 3868--3874.","journal-title":"IJCAI"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330933"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1086\/233202"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159656"},{"key":"e_1_3_2_2_37_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. In Advances in neural information processing systems. 5998--6008."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330952"},{"key":"e_1_3_2_2_39_1","volume-title":"CC III WHITE, and KIM KW","author":"White C Ch","year":"1980","unstructured":"C Ch White, CC III WHITE, and KIM KW. 1980. Solution procedures for vector criterion Markov decision processes. (1980)."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3272018"},{"key":"e_1_3_2_2_41_1","volume-title":"Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine learning","author":"Williams Ronald J","year":"1992","unstructured":"Ronald J Williams. 1992. Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine learning , Vol. 8, 3--4 (1992), 229--256."},{"key":"e_1_3_2_2_42_1","volume-title":"Self-Supervised Reinforcement Learning forRecommender Systems. arXiv preprint arXiv:2006.05779","author":"Xin Xin","year":"2020","unstructured":"Xin Xin, Alexandros Karatzoglou, Ioannis Arapakis, and Joemon M Jose. 2020. Self-Supervised Reinforcement Learning forRecommender Systems. arXiv preprint arXiv:2006.05779 (2020)."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290975"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2124295.2124300"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219886"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330668"}],"event":{"name":"WSDM '22: The Fifteenth ACM International Conference on Web Search and Data Mining","location":"Virtual Event AZ USA","acronym":"WSDM '22","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3488560.3498471","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3488560.3498471","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:31:19Z","timestamp":1750188679000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3488560.3498471"}},"subtitle":["Diverse and Novel Recommendations through Multi-Objective Reinforcement Learning"],"short-title":[],"issued":{"date-parts":[[2022,2,11]]},"references-count":47,"alternative-id":["10.1145\/3488560.3498471","10.1145\/3488560"],"URL":"https:\/\/doi.org\/10.1145\/3488560.3498471","relation":{},"subject":[],"published":{"date-parts":[[2022,2,11]]},"assertion":[{"value":"2022-02-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}