{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T15:54:30Z","timestamp":1771602870541,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T00:00:00Z","timestamp":1720569600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62376275,62377044"],"award-info":[{"award-number":["62376275,62377044"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"National Science and Technology Major Project","doi-asserted-by":"publisher","award":["2022ZD0114802"],"award-info":[{"award-number":["2022ZD0114802"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.1145\/3626772.3657714","type":"proceedings-article","created":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T12:40:05Z","timestamp":1720701605000},"page":"1850-1860","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["Reinforcing Long-Term Performance in Recommender Systems with User-Oriented Exploration Policy"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-8481-9421","authenticated-orcid":false,"given":"Changshuo","family":"Zhang","sequence":"first","affiliation":[{"name":"Gaoling School of AI, Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7740-508X","authenticated-orcid":false,"given":"Sirui","family":"Chen","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Champaign, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7397-5632","authenticated-orcid":false,"given":"Xiao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Gaoling School of AI, Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7549-0860","authenticated-orcid":false,"given":"Sunhao","family":"Dai","sequence":"additional","affiliation":[{"name":"Gaoling School of AI, Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5676-4339","authenticated-orcid":false,"given":"Weijie","family":"Yu","sequence":"additional","affiliation":[{"name":"School of Information Technology and Management, University of International Business and Economics, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7170-111X","authenticated-orcid":false,"given":"Jun","family":"Xu","sequence":"additional","affiliation":[{"name":"Gaoling School of AI, Renmin University of China, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,7,11]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Finite-time analysis of the multiarmed bandit problem. Machine learning","author":"Auer Peter","year":"2002","unstructured":"Peter Auer, Nicolo Cesa-Bianchi, and Paul Fischer. 2002. Finite-time analysis of the multiarmed bandit problem. Machine learning , Vol. 47 (2002), 235--256."},{"key":"e_1_3_2_1_2_1","volume-title":"Unifying count-based exploration and intrinsic motivation. Advances in neural information processing systems","author":"Bellemare Marc","year":"2016","unstructured":"Marc Bellemare, Sriram Srinivasan, Georg Ostrovski, Tom Schaul, David Saxton, and Remi Munos. 2016. Unifying count-based exploration and intrinsic motivation. Advances in neural information processing systems , Vol. 29 (2016)."},{"key":"e_1_3_2_1_3_1","volume-title":"International conference on machine learning. PMLR, 449--458","author":"Bellemare Marc G","year":"2017","unstructured":"Marc G Bellemare, Will Dabney, and R\u00e9mi Munos. 2017. A distributional perspective on reinforcement learning. In International conference on machine learning. PMLR, 449--458."},{"key":"e_1_3_2_1_4_1","volume-title":"Large-scale study of curiosity-driven learning. arXiv preprint arXiv:1808.04355","author":"Burda Yuri","year":"2018","unstructured":"Yuri Burda, Harri Edwards, Deepak Pathak, Amos Storkey, Trevor Darrell, and Alexei A Efros. 2018. Large-scale study of curiosity-driven learning. arXiv preprint arXiv:1808.04355 (2018)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474236"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599796"},{"key":"e_1_3_2_1_8_1","volume-title":"International conference on machine learning. PMLR, 1096--1105","author":"Dabney Will","year":"2018","unstructured":"Will Dabney, Georg Ostrovski, David Silver, and R\u00e9mi Munos. 2018a. Implicit quantile networks for distributional reinforcement learning. In International conference on machine learning. PMLR, 1096--1105."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11791"},{"key":"e_1_3_2_1_10_1","volume-title":"Modeling User Attention in Music Recommendation. In 2024 IEEE 40th International Conference on Data Engineering (ICDE).","author":"Dai Sunhao","year":"2024","unstructured":"Sunhao Dai, Ninglu Shao, Jieming Zhu, Xiao Zhang, Zhenhua Dong, Jun Xu, Quanyu Dai, and Ji-Rong Wen. 2024. Modeling User Attention in Music Recommendation. In 2024 IEEE 40th International Conference on Data Engineering (ICDE)."},{"key":"e_1_3_2_1_11_1","volume-title":"ACM SIGIR Forum","volume":"56","author":"Deffayet Romain","year":"2023","unstructured":"Romain Deffayet, Thibaut Thonet, Jean-Michel Renders, and Maarten De Rijke. 2023. Offline evaluation for reinforcement learning-based recommendation: a critical issue and some alternatives. In ACM SIGIR Forum, Vol. 56. ACM New York, NY, USA, 1--14."},{"key":"e_1_3_2_1_12_1","volume-title":"Yangang Ren, Qi Sun, and Bo Cheng.","author":"Duan Jingliang","year":"2021","unstructured":"Jingliang Duan, Yang Guan, Shengbo Eben Li, Yangang Ren, Qi Sun, and Bo Cheng. 2021. Distributional soft actor-critic: Off-policy reinforcement learning for addressing value estimation errors. IEEE transactions on neural networks and learning systems, Vol. 33, 11 (2021), 6584--6598."},{"key":"e_1_3_2_1_13_1","volume-title":"Deep reinforcement learning in large discrete action spaces. arXiv preprint arXiv:1512.07679","author":"Dulac-Arnold Gabriel","year":"2015","unstructured":"Gabriel Dulac-Arnold, Richard Evans, Hado van Hasselt, Peter Sunehag, Timothy Lillicrap, Jonathan Hunt, Timothy Mann, Theophane Weber, Thomas Degris, and Ben Coppin. 2015. Deep reinforcement learning in large discrete action spaces. arXiv preprint arXiv:1512.07679 (2015)."},{"key":"e_1_3_2_1_14_1","volume-title":"International conference on machine learning. PMLR, 1587--1596","author":"Fujimoto Scott","year":"2018","unstructured":"Scott Fujimoto, Herke Hoof, and David Meger. 2018. Addressing function approximation error in actor-critic methods. In International conference on machine learning. PMLR, 1587--1596."},{"key":"e_1_3_2_1_15_1","first-page":"32639","article-title":"Efficient risk-averse reinforcement learning","volume":"35","author":"Greenberg Ido","year":"2022","unstructured":"Ido Greenberg, Yinlam Chow, Mohammad Ghavamzadeh, and Shie Mannor. 2022. Efficient risk-averse reinforcement learning. Advances in Neural Information Processing Systems , Vol. 35 (2022), 32639--32652.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_16_1","volume-title":"Filip De Turck, and Pieter Abbeel","author":"Houthooft Rein","year":"2016","unstructured":"Rein Houthooft, Xi Chen, Yan Duan, John Schulman, Filip De Turck, and Pieter Abbeel. 2016. Vime: Variational information maximizing exploration. Advances in neural information processing systems , Vol. 29 (2016)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219846"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Guangda Huzhang Zhenjia Pang Yongqing Gao Yawen Liu Weijie Shen Wen-Ji Zhou Qing Da Anxiang Zeng Han Yu Yang Yu et al. 2021. AliExpress Learning-To-Rank: Maximizing online model performance without going online. IEEE Transactions on Knowledge and Data Engineering (2021).","DOI":"10.1109\/TKDE.2021.3098898"},{"key":"e_1_3_2_1_19_1","unstructured":"Eugene Ie Vihan Jain Jing Wang Sanmit Narvekar Ritesh Agarwal Rui Wu Heng-Tze Cheng Tushar Chandra and Craig Boutilier. 2019. SlateQ: A tractable decomposition for reinforcement learning with recommendation sets. (2019)."},{"key":"e_1_3_2_1_20_1","volume-title":"Proceedings of the Genetic and Evolutionary Computation Conference Companion. Association for Computing Machinery.","author":"Ethan","unstructured":"Ethan C. Jackson and Mark Daley. 2019. Novelty Search for Deep Reinforcement Learning Policy Network Weights by Action Sequence Edit Metric Distance. In Proceedings of the Genetic and Evolutionary Computation Conference Companion. Association for Computing Machinery."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474247"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2018.00035"},{"key":"e_1_3_2_1_23_1","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar Aviral","year":"2020","unstructured":"Aviral Kumar, Aurick Zhou, George Tucker, and Sergey Levine. 2020. Conservative q-learning for offline reinforcement learning. Advances in Neural Information Processing Systems , Vol. 33 (2020), 1179--1191.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_24_1","volume-title":"International Conference on Machine Learning. PMLR, 5556--5566","author":"Kuznetsov Arsenii","year":"2020","unstructured":"Arsenii Kuznetsov, Pavel Shvechikov, Alexander Grishin, and Dmitry Vetrov. 2020. Controlling overestimation bias with truncated mixture of continuous distributional quantile critics. In International Conference on Machine Learning. PMLR, 5556--5566."},{"key":"e_1_3_2_1_25_1","volume-title":"Abandoning objectives: Evolution through the search for novelty alone. Evolutionary computation","author":"Lehman Joel","year":"2011","unstructured":"Joel Lehman and Kenneth O Stanley. 2011. Abandoning objectives: Evolution through the search for novelty alone. Evolutionary computation, Vol. 19, 2 (2011), 189--223."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772758"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/1935826.1935878"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2019.8852397"},{"key":"e_1_3_2_1_29_1","volume-title":"Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971","author":"Lillicrap Timothy P","year":"2015","unstructured":"Timothy P Lillicrap, Jonathan J Hunt, Alexander Pritzel, Nicolas Heess, Tom Erez, Yuval Tassa, David Silver, and Daan Wierstra. 2015. Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)."},{"key":"e_1_3_2_1_30_1","volume-title":"Deep reinforcement learning based recommendation with explicit user-item interactions modeling. arXiv preprint arXiv:1810.12027","author":"Liu Feng","year":"2018","unstructured":"Feng Liu, Ruiming Tang, Xutao Li, Weinan Zhang, Yunming Ye, Haokun Chen, Huifeng Guo, and Yuzhou Zhang. 2018. Deep reinforcement learning based recommendation with explicit user-item interactions modeling. arXiv preprint arXiv:1810.12027 (2018)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583244"},{"key":"e_1_3_2_1_32_1","volume-title":"Foundations and Trends\u00ae in Information Retrieval","volume":"3","author":"Tie-Yan","year":"2009","unstructured":"Tie-Yan Liu et al. 2009. Learning to rank for information retrieval. Foundations and Trends\u00ae in Information Retrieval, Vol. 3, 3 (2009), 225--331."},{"key":"e_1_3_2_1_33_1","volume-title":"International conference on machine learning. PMLR","author":"Mnih Volodymyr","year":"2016","unstructured":"Volodymyr Mnih, Adria Puigdomenech Badia, Mehdi Mirza, Alex Graves, Timothy Lillicrap, Tim Harley, David Silver, and Koray Kavukcuoglu. 2016. Asynchronous methods for deep reinforcement learning. In International conference on machine learning. PMLR, 1928--1937."},{"key":"e_1_3_2_1_34_1","volume-title":"Variational information maximisation for intrinsically motivated reinforcement learning. Advances in neural information processing systems","author":"Mohamed Shakir","year":"2015","unstructured":"Shakir Mohamed and Danilo Jimenez Rezende. 2015. Variational information maximisation for intrinsically motivated reinforcement learning. Advances in neural information processing systems , Vol. 28 (2015)."},{"key":"e_1_3_2_1_35_1","first-page":"18050","article-title":"Effective diversity in population based reinforcement learning","volume":"33","author":"Parker-Holder Jack","year":"2020","unstructured":"Jack Parker-Holder, Aldo Pacchiano, Krzysztof M Choromanski, and Stephen J Roberts. 2020. Effective diversity in population based reinforcement learning. Advances in Neural Information Processing Systems , Vol. 33 (2020), 18050--18062.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3298689.3347000"},{"key":"e_1_3_2_1_37_1","volume-title":"Parameter space noise for exploration. arXiv preprint arXiv:1706.01905","author":"Plappert Matthias","year":"2017","unstructured":"Matthias Plappert, Rein Houthooft, Prafulla Dhariwal, Szymon Sidor, Richard Y Chen, Xi Chen, Tamim Asfour, Pieter Abbeel, and Marcin Andrychowicz. 2017. Parameter space noise for exploration. arXiv preprint arXiv:1706.01905 (2017)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539193"},{"key":"e_1_3_2_1_39_1","article-title":"An MDP-based recommender system","volume":"6","author":"Shani Guy","year":"2005","unstructured":"Guy Shani, David Heckerman, Ronen I Brafman, and Craig Boutilier. 2005. An MDP-based recommender system. Journal of Machine Learning Research, Vol. 6, 9 (2005).","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3614921"},{"key":"e_1_3_2_1_41_1","volume-title":"International conference on machine learning. PMLR, 5779--5788","author":"Shyam Pranav","year":"2019","unstructured":"Pranav Shyam, Wojciech Ja'skowski, and Faustino Gomez. 2019. Model-based active exploration. In International conference on machine learning. PMLR, 5779--5788."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1017\/S0263574799281520"},{"key":"e_1_3_2_1_43_1","volume-title":"Worst cases policy gradients. arXiv preprint arXiv:1911.03618","author":"Tang Yichuan Charlie","year":"2019","unstructured":"Yichuan Charlie Tang, Jian Zhang, and Ruslan Salakhutdinov. 2019. Worst cases policy gradients. arXiv preprint arXiv:1911.03618 (2019)."},{"key":"e_1_3_2_1_44_1","volume-title":"Safe Collaborative Filtering. arXiv preprint arXiv:2306.05292","author":"Togashi Riku","year":"2023","unstructured":"Riku Togashi, Tatsushi Oka, Naoto Ohsaka, and Tetsuro Morimura. 2023. Safe Collaborative Filtering. arXiv preprint arXiv:2306.05292 (2023)."},{"key":"e_1_3_2_1_45_1","volume-title":"Risk-averse offline reinforcement learning. arXiv preprint arXiv:2102.05371","author":"Urp'i N\u00faria Armengol","year":"2021","unstructured":"N\u00faria Armengol Urp'i, Sebastian Curi, and Andreas Krause. 2021. Risk-averse offline reinforcement learning. arXiv preprint arXiv:2102.05371 (2021)."},{"key":"e_1_3_2_1_46_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research, Vol. 9, 11 (2008).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3594633"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463069"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.331"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2020.3048414"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"Qitian Wu Hengrui Zhang Xiaofeng Gao Peng He Paul Weng Han Gao and Guihai Chen. 2019. Dual graph attention networks for deep latent representation of multifaceted social effects in recommender systems. In The world wide web conference. 2091--2102.","DOI":"10.1145\/3308558.3313442"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210181"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6125"},{"key":"e_1_3_2_1_55_1","volume-title":"Teach and Explore: A Multiplex Information-guided Effective and Efficient Reinforcement Learning for Sequential Recommendation. ACM Transactions on Information Systems","author":"Yan Surong","year":"2023","unstructured":"Surong Yan, Chenglong Shi, Haosen Wang, Lei Chen, Ling Jiang, Ruilin Guo, and Kwei-Jay Lin. 2023. Teach and Explore: A Multiplex Information-guided Effective and Efficient Reinforcement Learning for Sequential Recommendation. ACM Transactions on Information Systems (2023)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"Yang Yu. 2018. Towards Sample Efficient Reinforcement Learning.. In IJCAI. 5739--5743.","DOI":"10.24963\/ijcai.2018\/820"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539393"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462892"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240374"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"},{"key":"e_1_3_2_1_61_1","volume-title":"Latent contextual bandits and their application to personalized recommendations for new users. arXiv preprint arXiv:1604.06743","author":"Zhou Li","year":"2016","unstructured":"Li Zhou and Emma Brunskill. 2016. Latent contextual bandits and their application to personalized recommendations for new users. arXiv preprint arXiv:1604.06743 (2016)."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3604915.3608855"}],"event":{"name":"SIGIR 2024: The 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","location":"Washington DC USA","acronym":"SIGIR 2024","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657714","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626772.3657714","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:41:00Z","timestamp":1755841260000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657714"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":62,"alternative-id":["10.1145\/3626772.3657714","10.1145\/3626772"],"URL":"https:\/\/doi.org\/10.1145\/3626772.3657714","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]},"assertion":[{"value":"2024-07-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}