{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T01:22:54Z","timestamp":1777339374694,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,30]],"date-time":"2023-04-30T00:00:00Z","timestamp":1682812800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"APRC - CityU New Research Initiatives","award":["No.9610565"],"award-info":[{"award-number":["No.9610565"]}]},{"name":"Ant Group (CCF-Ant Research Fund)"},{"name":"Huawei Innovation Research Program"},{"name":"SIRG - CityU Strategic Interdisciplinary Research Grant","award":["No.7020046, No.7020074"],"award-info":[{"award-number":["No.7020046, No.7020074"]}]},{"name":"HKIDS Early Career Research Grant","award":["No.9360163"],"award-info":[{"award-number":["No.9360163"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,30]]},"DOI":"10.1145\/3543507.3583467","type":"proceedings-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T23:30:51Z","timestamp":1682551851000},"page":"1273-1282","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":40,"title":["Multi-Task Recommendations with Reinforcement Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6654-2329","authenticated-orcid":false,"given":"Ziru","family":"Liu","sequence":"first","affiliation":[{"name":"City University of Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7251-9782","authenticated-orcid":false,"given":"Jiejie","family":"Tian","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6451-9299","authenticated-orcid":false,"given":"Qingpeng","family":"Cai","sequence":"additional","affiliation":[{"name":"Kuaishou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2926-4416","authenticated-orcid":false,"given":"Xiangyu","family":"Zhao","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4470-5972","authenticated-orcid":false,"given":"Jingtong","family":"Gao","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1440-911X","authenticated-orcid":false,"given":"Shuchang","family":"Liu","sequence":"additional","affiliation":[{"name":"Kuaishou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9957-3244","authenticated-orcid":false,"given":"Dayou","family":"Chen","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2230-4625","authenticated-orcid":false,"given":"Tonghao","family":"He","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0424-9658","authenticated-orcid":false,"given":"Dong","family":"Zheng","sequence":"additional","affiliation":[{"name":"Kuaishou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9266-0780","authenticated-orcid":false,"given":"Peng","family":"Jiang","sequence":"additional","affiliation":[{"name":"Kuaishou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3636-3618","authenticated-orcid":false,"given":"Kun","family":"Gai","sequence":"additional","affiliation":[{"name":"Unaffiliated, China"}]}],"member":"320","published-online":{"date-parts":[[2023,4,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jii.2020.100129"},{"key":"e_1_3_2_1_2_1","volume-title":"Reinforcement learning based recommender systems: A survey. ACM Computing Surveys (CSUR)","author":"Afsar M\u00a0Mehdi","year":"2021","unstructured":"M\u00a0Mehdi Afsar, Trafford Crump, and Behrouz Far. 2021. Reinforcement learning based recommender systems: A survey. ACM Computing Surveys (CSUR) (2021)."},{"key":"e_1_3_2_1_3_1","volume-title":"A model-based reinforcement learning with adversarial training for online recommendation. Advances in Neural Information Processing Systems 32","author":"Bai Xueying","year":"2019","unstructured":"Xueying Bai, Jian Guan, and Hongning Wang. 2019. A model-based reinforcement learning with adversarial training for online recommendation. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_1_4_1","volume-title":"Incremental natural actor-critic algorithms. Advances in neural information processing systems 20","author":"Bhatnagar Shalabh","year":"2007","unstructured":"Shalabh Bhatnagar, Mohammad Ghavamzadeh, Mark Lee, and Richard\u00a0S Sutton. 2007. Incremental natural actor-critic algorithms. Advances in neural information processing systems 20 (2007)."},{"key":"e_1_3_2_1_5_1","volume-title":"Multitask learning. Machine learning 28, 1","author":"Caruana Rich","year":"1997","unstructured":"Rich Caruana. 1997. Multitask learning. Machine learning 28, 1 (1997), 41\u201375."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013312"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441764"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2012.6315022"},{"key":"e_1_3_2_1_11_1","volume-title":"Deep reinforcement learning in large discrete action spaces. arXiv preprint arXiv:1512.07679","author":"Dulac-Arnold Gabriel","year":"2015","unstructured":"Gabriel Dulac-Arnold, Richard Evans, Hado van Hasselt, Peter Sunehag, Timothy Lillicrap, Jonathan Hunt, Timothy Mann, Theophane Weber, Thomas Degris, and Ben Coppin. 2015. Deep reinforcement learning in large discrete action spaces. arXiv preprint arXiv:1512.07679 (2015)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P15-2139"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3186165"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-02165-7"},{"key":"e_1_3_2_1_15_1","unstructured":"Huifeng Guo Ruiming Tang Yunming Ye Zhenguo Li and Xiuqiang He. 2017. DeepFM: a factorization-machine based neural network for CTR prediction. arXiv preprint arXiv:1703.04247 (2017)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240406"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2903743"},{"key":"e_1_3_2_1_18_1","volume-title":"Validation Set Evaluation can be Wrong: An Evaluator-Generator Approach for Maximizing Online Performance of Ranking in E-commerce. CoRR abs\/2003.11941","author":"Huzhang Guangda","year":"2020","unstructured":"Guangda Huzhang, Zhen-Jia Pang, Yongqing Gao, Wen-Ji Zhou, Qing Da, Anxiang Zeng, and Yang Yu. 2020. Validation Set Evaluation can be Wrong: An Evaluator-Generator Approach for Maximizing Online Performance of Ranking in E-commerce. CoRR abs\/2003.11941 (2020). https:\/\/arxiv.org\/abs\/2003.11941"},{"key":"e_1_3_2_1_19_1","unstructured":"Eugene Ie Vihan Jain Jing Wang Sanmit Narvekar Ritesh Agarwal Rui Wu Heng-Tze Cheng Tushar Chandra and Craig Boutilier. 2019. SlateQ: A tractable decomposition for reinforcement learning with recommendation sets. (2019)."},{"key":"e_1_3_2_1_20_1","volume-title":"Martin Mladenov, Vihan Jain, Sanmit Narvekar, Jing Wang, Rui Wu, and Craig Boutilier.","author":"Ie Eugene","year":"2019","unstructured":"Eugene Ie, Chih wei Hsu, Martin Mladenov, Vihan Jain, Sanmit Narvekar, Jing Wang, Rui Wu, and Craig Boutilier. 2019. RecSim: A Configurable Simulation Platform for Recommender Systems. (2019). arxiv:1909.04847\u00a0[cs.LG]"},{"key":"e_1_3_2_1_21_1","volume-title":"Webwatcher: A tour guide for the world wide web. In IJCAI (1). Citeseer, 770\u2013777.","author":"Joachims Thorsten","year":"1997","unstructured":"Thorsten Joachims, Dayne Freitag, Tom Mitchell, 1997. Webwatcher: A tour guide for the world wide web. In IJCAI (1). Citeseer, 770\u2013777."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2009.263"},{"key":"e_1_3_2_1_23_1","volume-title":"Deep reinforcement learning based recommendation with explicit user-item interactions modeling. arXiv preprint arXiv:1810.12027","author":"Liu Feng","year":"2018","unstructured":"Feng Liu, Ruiming Tang, Xutao Li, Weinan Zhang, Yunming Ye, Haokun Chen, Huifeng Guo, and Yuzhou Zhang. 2018. Deep reinforcement learning based recommendation with explicit user-item interactions modeling. arXiv preprint arXiv:1810.12027 (2018)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106170"},{"key":"e_1_3_2_1_25_1","first-page":"9228","article-title":"Kalman filtering attention for user behavior modeling in ctr prediction","volume":"33","author":"Liu Hu","year":"2020","unstructured":"Hu Liu, Jing Lu, Xiwei Zhao, Sulong Xu, Hao Peng, Yutong Liu, Zehua Zhang, Jian Li, Junsheng Jin, Yongjun Bao, 2020. Kalman filtering attention for user behavior modeling in ctr prediction. Advances in Neural Information Processing Systems 33 (2020), 9228\u20139238.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_26_1","volume-title":"Learning to rank for information retrieval. Foundations and Trends\u00ae in Information Retrieval 3, 3","author":"Tie-Yan","year":"2009","unstructured":"Tie-Yan Liu 2009. Learning to rank for information retrieval. Foundations and Trends\u00ae in Information Retrieval 3, 3 (2009), 225\u2013331."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3186158"},{"key":"e_1_3_2_1_28_1","volume-title":"Multi-task sequence to sequence learning. arXiv preprint arXiv:1511.06114","author":"Luong Minh-Thang","year":"2015","unstructured":"Minh-Thang Luong, Quoc\u00a0V Le, Ilya Sutskever, Oriol Vinyals, and Lukasz Kaiser. 2015. Multi-task sequence to sequence learning. arXiv preprint arXiv:1511.06114 (2015)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220007"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210104"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/1282100.1282114"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.433"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2365952.2365971"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/336597.336662"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Huashan Pan Xiulin Li and Zhiqiang Huang. 2019. A Mandarin Prosodic Boundary Prediction Model Based on Multi-Task Learning.. In Interspeech. 4485\u20134488.","DOI":"10.21437\/Interspeech.2019-1400"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313404"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2007.11.026"},{"key":"e_1_3_2_1_38_1","volume-title":"Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems 28","author":"Ren Shaoqing","year":"2015","unstructured":"Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_2_1_39_1","volume-title":"An MDP-based recommender system.Journal of Machine Learning Research 6, 9","author":"Shani Guy","year":"2005","unstructured":"Guy Shani, David Heckerman, Ronen\u00a0I Brafman, and Craig Boutilier. 2005. An MDP-based recommender system.Journal of Machine Learning Research 6, 9 (2005)."},{"key":"e_1_3_2_1_40_1","volume-title":"International conference on machine learning. Pmlr, 387\u2013395","author":"Silver David","year":"2014","unstructured":"David Silver, Guy Lever, Nicolas Heess, Thomas Degris, Daan Wierstra, and Martin Riedmiller. 2014. Deterministic policy gradient algorithms. In International conference on machine learning. Pmlr, 387\u2013395."},{"key":"e_1_3_2_1_41_1","volume-title":"The 41st international acm sigir conference on research & development in information retrieval. 235\u2013244.","author":"Sun Yueming","unstructured":"Yueming Sun and Yi Zhang. 2018. Conversational recommender system. In The 41st international acm sigir conference on research & development in information retrieval. 235\u2013244."},{"key":"e_1_3_2_1_42_1","volume-title":"Reinforcement learning: An introduction","author":"Sutton S","unstructured":"Richard\u00a0S Sutton and Andrew\u00a0G Barto. 2018. Reinforcement learning: An introduction. MIT press."},{"key":"e_1_3_2_1_43_1","volume-title":"Policy gradient methods for reinforcement learning with function approximation. Advances in neural information processing systems 12","author":"Sutton S","year":"1999","unstructured":"Richard\u00a0S Sutton, David McAllester, Satinder Singh, and Yishay Mansour. 1999. Policy gradient methods for reinforcement learning with function approximation. Advances in neural information processing systems 12 (1999)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/1297231.1297250"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412236"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.3390\/electronics9091363"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539073"},{"key":"e_1_3_2_1_48_1","volume-title":"Deep multi-task representation learning: A tensor factorisation approach. arXiv preprint arXiv:1605.06391","author":"Yang Yongxin","year":"2016","unstructured":"Yongxin Yang and Timothy Hospedales. 2016. Deep multi-task representation learning: A tensor factorisation approach. arXiv preprint arXiv:1605.06391 (2016)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539040"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3158369"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3070203"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i1.16156"},{"key":"e_1_3_2_1_53_1","volume-title":"Long Xia, Jiliang Tang, and Dawei Yin with Martin Vesely as coordinator. ACM sigweb newsletterSpring","author":"Zhao Xiangyu","year":"2019","unstructured":"Xiangyu Zhao, Long Xia, Jiliang Tang, and Dawei Yin. 2019. \" Deep reinforcement learning for search, recommendation, and online advertising: a survey\" by Xiangyu Zhao, Long Xia, Jiliang Tang, and Dawei Yin with Martin Vesely as coordinator. ACM sigweb newsletterSpring (2019), 1\u201315."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240374"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219886"},{"key":"e_1_3_2_1_56_1","volume-title":"Deep Reinforcement Learning for List-wise Recommendations. arXiv preprint arXiv:1801.00209","author":"Zhao Xiangyu","year":"2017","unstructured":"Xiangyu Zhao, Liang Zhang, Zhuoye Ding, Dawei Yin, Yihong Zhao, and Jiliang Tang. 2017. Deep Reinforcement Learning for List-wise Recommendations. arXiv preprint arXiv:1801.00209 (2017)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1541-0420.2011.01572.x"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330668"}],"event":{"name":"WWW '23: The ACM Web Conference 2023","location":"Austin TX USA","acronym":"WWW '23","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2023"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583467","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3543507.3583467","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:53Z","timestamp":1750178873000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583467"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,30]]},"references-count":59,"alternative-id":["10.1145\/3543507.3583467","10.1145\/3543507"],"URL":"https:\/\/doi.org\/10.1145\/3543507.3583467","relation":{},"subject":[],"published":{"date-parts":[[2023,4,30]]},"assertion":[{"value":"2023-04-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}