{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T10:04:59Z","timestamp":1775815499251,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":81,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T00:00:00Z","timestamp":1720569600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Research Impact Fund","award":["No.R1015-23"],"award-info":[{"award-number":["No.R1015-23"]}]},{"name":"Hong Kong Environmental and Conservation Fund","award":["No. 88\/2022"],"award-info":[{"award-number":["No. 88\/2022"]}]},{"name":"Hong KongITC Innovation and Technology Fund Midstream Research Programme for Universities Project","award":["No.ITS\/034\/22MS"],"award-info":[{"award-number":["No.ITS\/034\/22MS"]}]},{"name":"Kuaishou"},{"name":"APRC - CityU New Research Initiatives","award":["No.9610565"],"award-info":[{"award-number":["No.9610565"]}]},{"name":"CityU - HKIDS Early Career Research Grant","award":["No.9360163"],"award-info":[{"award-number":["No.9360163"]}]},{"name":"SIRG - CityU Strategic Interdisciplinary Research Grant","award":["No.7020046, No.7020074"],"award-info":[{"award-number":["No.7020046, No.7020074"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.1145\/3626772.3657829","type":"proceedings-article","created":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T12:40:05Z","timestamp":1720701605000},"page":"1872-1882","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":16,"title":["Sequential Recommendation for Optimizing Both Immediate Feedback and Long-term Retention"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6654-2329","authenticated-orcid":false,"given":"Ziru","family":"Liu","sequence":"first","affiliation":[{"name":"City University of Hong Kong, Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1440-911X","authenticated-orcid":false,"given":"Shuchang","family":"Liu","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1194-8334","authenticated-orcid":false,"given":"Zijian","family":"Zhang","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6451-9299","authenticated-orcid":false,"given":"Qingpeng","family":"Cai","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2926-4416","authenticated-orcid":false,"given":"Xiangyu","family":"Zhao","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7187-3381","authenticated-orcid":false,"given":"Kesen","family":"Zhao","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0697-8985","authenticated-orcid":false,"given":"Lantao","family":"Hu","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9266-0780","authenticated-orcid":false,"given":"Peng","family":"Jiang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3636-3618","authenticated-orcid":false,"given":"Kun","family":"Gai","sequence":"additional","affiliation":[{"name":"Unaffiliated, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,7,11]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jii.2020.100129"},{"key":"e_1_3_2_1_2_1","volume-title":"Reinforcement learning based recommender systems: A survey. ACM Computing Surveys (CSUR)","author":"Afsar M Mehdi","year":"2021","unstructured":"M Mehdi Afsar, Trafford Crump, and Behrouz Far. 2021. Reinforcement learning based recommender systems: A survey. ACM Computing Surveys (CSUR) (2021)."},{"key":"e_1_3_2_1_3_1","volume-title":"Incremental natural actor-critic algorithms. Advances in neural information processing systems 20","author":"Bhatnagar Shalabh","year":"2007","unstructured":"Shalabh Bhatnagar, Mohammad Ghavamzadeh, Mark Lee, and Richard S Sutton. 2007. Incremental natural actor-critic algorithms. Advances in neural information processing systems 20 (2007)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543873.3584640"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583259"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013312"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539359"},{"key":"e_1_3_2_1_8_1","volume-title":"Decision transformer: Reinforcement learning via sequence modeling. Advances in neural information processing systems 34","author":"Chen Lili","year":"2021","unstructured":"Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Misha Laskin, Pieter Abbeel, Aravind Srinivas, and Igor Mordatch. 2021. Decision transformer: Reinforcement learning via sequence modeling. Advances in neural information processing systems 34 (2021), 15084--15097."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_1_10_1","volume-title":"International Conference on Machine Learning. PMLR, 1052--1061","author":"Chen Xinshi","year":"2019","unstructured":"Xinshi Chen, Shuang Li, Hui Li, Shaohua Jiang, Yuan Qi, and Le Song. 2019. Gen-erative adversarial user model for reinforcement learning based recommendation system. In International Conference on Machine Learning. PMLR, 1052--1061."},{"key":"e_1_3_2_1_11_1","volume-title":"A survey of deep reinforcement learning in recommender systems: A systematic review and future directions. arXiv preprint arXiv:2109.03540","author":"Chen Xiaocong","year":"2021","unstructured":"Xiaocong Chen, Lina Yao, Julian McAuley, Guanglin Zhou, and Xianzhi Wang. 2021. A survey of deep reinforcement learning in recommender systems: A systematic review and future directions. arXiv preprint arXiv:2109.03540 (2021)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482347"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE55515.2023.00260"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_1_15_1","volume-title":"Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio.","author":"Cho Kyunghyun","year":"2014","unstructured":"Kyunghyun Cho, Bart Van Merri\u00ebnboer, Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio. 2014. Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078 (2014)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2012.6315022"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3109859.3109877"},{"key":"e_1_3_2_1_18_1","volume-title":"Deep reinforcement learning in large discrete action spaces. arXiv preprint arXiv:1512.07679","author":"Dulac-Arnold Gabriel","year":"2015","unstructured":"Gabriel Dulac-Arnold, Richard Evans, Hado van Hasselt, Peter Sunehag, Timothy Lillicrap, Jonathan Hunt, Timothy Mann, Theophane Weber, Thomas Degris, and Ben Coppin. 2015. Deep reinforcement learning in large discrete action spaces. arXiv preprint arXiv:1512.07679 (2015)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P15-2139"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637871"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240406"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2959100.2959167"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/360"},{"key":"e_1_3_2_1_24_1","unstructured":"Eugene Ie Vihan Jain Jing Wang Sanmit Narvekar Ritesh Agarwal Rui Wu Heng-Tze Cheng Tushar Chandra and Craig Boutilier. 2019. SlateQ: A tractable decomposition for reinforcement learning with recommendation sets. (2019)."},{"key":"e_1_3_2_1_25_1","volume-title":"Martin Mladenov, Vihan Jain, Sanmit Narvekar, Jing Wang, Rui Wu, and Craig Boutilier.","author":"Ie Eugene","year":"2019","unstructured":"Eugene Ie, Chih wei Hsu, Martin Mladenov, Vihan Jain, Sanmit Narvekar, Jing Wang, Rui Wu, and Craig Boutilier. 2019. RecSim: A Configurable Simulation Platform for Recommender Systems. (2019). arXiv:1909.04847 [cs.LG]"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2018.00035"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2009.263"},{"key":"e_1_3_2_1_28_1","volume-title":"MLP4Rec: A pure MLP architecture for sequential recommendations. arXiv preprint arXiv:2204.11510","author":"Li Muyang","year":"2022","unstructured":"Muyang Li, Xiangyu Zhao, Chuan Lyu, Minghao Zhao, Runze Wu, and Ruocheng Guo. 2022. MLP4Rec: A pure MLP architecture for sequential recommendations. arXiv preprint arXiv:2204.11510 (2022)."},{"key":"e_1_3_2_1_29_1","volume-title":"ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out. Association for Computational Linguistics, Barcelona, Spain, 74--81. https:\/\/aclanthology.org\/W04-1013"},{"key":"e_1_3_2_1_30_1","volume-title":"Deep reinforcement learning based recommendation with explicit user-item interactions modeling. arXiv preprint arXiv:1810.12027","author":"Liu Feng","year":"2018","unstructured":"Feng Liu, Ruiming Tang, Xutao Li, Weinan Zhang, Yunming Ye, Haokun Chen, Huifeng Guo, and Yuzhou Zhang. 2018. Deep reinforcement learning based recommendation with explicit user-item interactions modeling. arXiv preprint arXiv:1810.12027 (2018)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106170"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591717"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3615134"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10115-023-01951-1"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583467"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3186158"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220007"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210104"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/1282100.1282114"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.433"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/2365952.2365971"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/336597.336662"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Huashan Pan Xiulin Li and Zhiqiang Huang. 2019. A Mandarin Prosodic Boundary Prediction Model Based on Multi-Task Learning.. In Interspeech. 4485--4488.","DOI":"10.21437\/Interspeech.2019-1400"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313404"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2007.11.026"},{"key":"e_1_3_2_1_47_1","volume-title":"Dragan","author":"Ratner Ellis","year":"2018","unstructured":"Ellis Ratner, Dylan Hadfield-Menell, and Anca D. Dragan. 2018. Simplifying Reward Design through Divide-and-Conquer. arXiv:1806.02501 [cs.RO]"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772773"},{"key":"e_1_3_2_1_49_1","article-title":"An MDP-based recommender system","volume":"6","author":"Shani Guy","year":"2005","unstructured":"Guy Shani, David Heckerman, Ronen I Brafman, and Craig Boutilier. 2005. An MDP-based recommender system. Journal of Machine Learning Research 6, 9 (2005).","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357895"},{"key":"e_1_3_2_1_51_1","volume-title":"The 41st international acm sigir conference on research & development in information retrieval. 235--244.","author":"Sun Yueming","unstructured":"Yueming Sun and Yi Zhang. 2018. Conversational recommender system. In The 41st international acm sigir conference on research & development in information retrieval. 235--244."},{"key":"e_1_3_2_1_52_1","volume-title":"Reinforcement learning: An intro-duction","author":"Sutton Richard S","unstructured":"Richard S Sutton and Andrew G Barto. 2018. Reinforcement learning: An intro-duction. MIT press."},{"key":"e_1_3_2_1_53_1","volume-title":"Policy gradient methods for reinforcement learning with function approximation. Advances in neural information processing systems 12","author":"Sutton Richard S","year":"1999","unstructured":"Richard S Sutton, David McAllester, Satinder Singh, and Yishay Mansour. 1999. Policy gradient methods for reinforcement learning with function approximation. Advances in neural information processing systems 12 (1999)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/1297231.1297250"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988452"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412236"},{"key":"e_1_3_2_1_57_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N. Gomez Lukasz Kaiser and Illia Polosukhin. 2017. Attention is All You Need. https:\/\/arxiv.org\/pdf\/1706.03762.pdf"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2016.7860393"},{"key":"e_1_3_2_1_59_1","volume-title":"Yi Wong, Ziru Liu, Xiangyu Zhao, Yichao Wang, Bo Chen, Huifeng Guo, and Ruiming Tang.","author":"Wang Yuhao","year":"2023","unstructured":"Yuhao Wang, Ha Tsz Lam, Yi Wong, Ziru Liu, Xiangyu Zhao, Yichao Wang, Bo Chen, Huifeng Guo, and Ruiming Tang. 2023. Multi-task deep recommender systems: A survey. arXiv preprint arXiv:2302.03525 (2023)."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539073"},{"key":"e_1_3_2_1_61_1","unstructured":"Yining Wang Liwei Wang Yuanzhi Li Di He Tie-Yan Liu and Wei Chen. 2013. A Theoretical Analysis of NDCG Type Ranking Measures. arXiv:1304.6480 [cs.LG]"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132847.3133025"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401147"},{"key":"e_1_3_2_1_64_1","volume-title":"Meta-gradient reinforcement learning. Advances in neural information processing systems 31","author":"Xu Zhongwen","year":"2018","unstructured":"Zhongwen Xu, Hado P van Hasselt, and David Silver. 2018. Meta-gradient reinforcement learning. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_2_1_65_1","volume-title":"Deep multi-task representation learning: A tensor factorisation approach. arXiv preprint arXiv:1605.06391","author":"Yang Yongxin","year":"2016","unstructured":"Yongxin Yang and Timothy Hospedales. 2016. Deep multi-task representation learning: A tensor factorisation approach. arXiv preprint arXiv:1605.06391 (2016)."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/2645710.2645724"},{"key":"e_1_3_2_1_67_1","volume-title":"SSDRec: Self-Augmented Sequence Denoising for Sequential Recommendation. arXiv preprint arXiv:2403.04278","author":"Zhang Chi","year":"2024","unstructured":"Chi Zhang, Qilong Han, Rui Chen, Xiangyu Zhao, Peng Tang, and Hongtao Song. 2024. SSDRec: Self-Augmented Sequence Denoising for Sequential Recommendation. arXiv preprint arXiv:2403.04278 (2024)."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539040"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/3158369"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3070203"},{"key":"e_1_3_2_1_71_1","volume-title":"KuaiSim: A Comprehensive Simulator for Recommender Systems. arXiv preprint arXiv:2309.12645","author":"Zhao Kesen","year":"2023","unstructured":"Kesen Zhao, Shuchang Liu, Qingpeng Cai, Xiangyu Zhao, Ziru Liu, Dong Zheng, Peng Jiang, and Kun Gai. 2023. KuaiSim: A Comprehensive Simulator for Recommender Systems. arXiv preprint arXiv:2309.12645 (2023)."},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583418"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i1.16156"},{"key":"e_1_3_2_1_74_1","volume-title":"Long Xia, Jiliang Tang, and Dawei Yin with Martin Vesely as coordinator. ACM sigweb newsletter Spring","author":"Zhao Xiangyu","year":"2019","unstructured":"Xiangyu Zhao, Long Xia, Jiliang Tang, and Dawei Yin. 2019. \"Deep reinforcement learning for search, recommendation, and online advertising: a survey\" by Xiangyu Zhao, Long Xia, Jiliang Tang, and Dawei Yin with Martin Vesely as coordinator. ACM sigweb newsletter Spring (2019), 1--15."},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240374"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412044"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219886"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403384"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1541-0420.2011.01572.x"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330668"}],"event":{"name":"SIGIR 2024: The 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","location":"Washington DC USA","acronym":"SIGIR 2024","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657829","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626772.3657829","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:37:07Z","timestamp":1755841027000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657829"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":81,"alternative-id":["10.1145\/3626772.3657829","10.1145\/3626772"],"URL":"https:\/\/doi.org\/10.1145\/3626772.3657829","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]},"assertion":[{"value":"2024-07-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}